| #ifndef Py_INTERNAL_CODE_H |
| #define Py_INTERNAL_CODE_H |
| #ifdef __cplusplus |
| extern "C" { |
| #endif |
| |
| /* PEP 659 |
| * Specialization and quickening structs and helper functions |
| */ |
| |
| typedef struct { |
| int32_t cache_count; |
| int32_t _; /* Force 8 byte size */ |
| } _PyEntryZero; |
| |
| typedef struct { |
| uint8_t original_oparg; |
| uint8_t counter; |
| uint16_t index; |
| } _PyAdaptiveEntry; |
| |
| |
| typedef struct { |
| uint32_t tp_version; |
| uint32_t dk_version_or_hint; |
| } _PyAttrCache; |
| |
| typedef struct { |
| uint32_t module_keys_version; |
| uint32_t builtin_keys_version; |
| } _PyLoadGlobalCache; |
| |
| typedef struct { |
| /* Borrowed ref in LOAD_METHOD */ |
| PyObject *obj; |
| } _PyObjectCache; |
| |
| /* Add specialized versions of entries to this union. |
| * |
| * Do not break the invariant: sizeof(SpecializedCacheEntry) == 8 |
| * Preserving this invariant is necessary because: |
| - If any one form uses more space, then all must and on 64 bit machines |
| this is likely to double the memory consumption of caches |
| - The function for calculating the offset of caches assumes a 4:1 |
| cache:instruction size ratio. Changing that would need careful |
| analysis to choose a new function. |
| */ |
| typedef union { |
| _PyEntryZero zero; |
| _PyAdaptiveEntry adaptive; |
| _PyAttrCache attr; |
| _PyLoadGlobalCache load_global; |
| _PyObjectCache obj; |
| } SpecializedCacheEntry; |
| |
| #define INSTRUCTIONS_PER_ENTRY (sizeof(SpecializedCacheEntry)/sizeof(_Py_CODEUNIT)) |
| |
| /* Maximum size of code to quicken, in code units. */ |
| #define MAX_SIZE_TO_QUICKEN 5000 |
| |
| typedef union _cache_or_instruction { |
| _Py_CODEUNIT code[1]; |
| SpecializedCacheEntry entry; |
| } SpecializedCacheOrInstruction; |
| |
| /* Get pointer to the nth cache entry, from the first instruction and n. |
| * Cache entries are indexed backwards, with [count-1] first in memory, and [0] last. |
| * The zeroth entry immediately precedes the instructions. |
| */ |
| static inline SpecializedCacheEntry * |
| _GetSpecializedCacheEntry(const _Py_CODEUNIT *first_instr, Py_ssize_t n) |
| { |
| SpecializedCacheOrInstruction *last_cache_plus_one = (SpecializedCacheOrInstruction *)first_instr; |
| assert(&last_cache_plus_one->code[0] == first_instr); |
| return &last_cache_plus_one[-1-n].entry; |
| } |
| |
| /* Following two functions form a pair. |
| * |
| * oparg_from_offset_and_index() is used to compute the oparg |
| * when quickening, so that offset_from_oparg_and_nexti() |
| * can be used at runtime to compute the offset. |
| * |
| * The relationship between the three values is currently |
| * offset == (index>>1) + oparg |
| * This relation is chosen based on the following observations: |
| * 1. typically 1 in 4 instructions need a cache |
| * 2. instructions that need a cache typically use 2 entries |
| * These observations imply: offset ≈ index/2 |
| * We use the oparg to fine tune the relation to avoid wasting space |
| * and allow consecutive instructions to use caches. |
| * |
| * If the number of cache entries < number of instructions/2 we will waste |
| * some small amoount of space. |
| * If the number of cache entries > (number of instructions/2) + 255, then |
| * some instructions will not be able to use a cache. |
| * In practice, we expect some small amount of wasted space in a shorter functions |
| * and only functions exceeding a 1000 lines or more not to have enugh cache space. |
| * |
| */ |
| static inline int |
| oparg_from_offset_and_nexti(int offset, int nexti) |
| { |
| return offset-(nexti>>1); |
| } |
| |
| static inline int |
| offset_from_oparg_and_nexti(int oparg, int nexti) |
| { |
| return (nexti>>1)+oparg; |
| } |
| |
| /* Get pointer to the cache entry associated with an instruction. |
| * nexti is the index of the instruction plus one. |
| * nexti is used as it corresponds to the instruction pointer in the interpreter. |
| * This doesn't check that an entry has been allocated for that instruction. */ |
| static inline SpecializedCacheEntry * |
| _GetSpecializedCacheEntryForInstruction(const _Py_CODEUNIT *first_instr, int nexti, int oparg) |
| { |
| return _GetSpecializedCacheEntry( |
| first_instr, |
| offset_from_oparg_and_nexti(oparg, nexti) |
| ); |
| } |
| |
| #define QUICKENING_WARMUP_DELAY 8 |
| |
| /* We want to compare to zero for efficiency, so we offset values accordingly */ |
| #define QUICKENING_INITIAL_WARMUP_VALUE (-QUICKENING_WARMUP_DELAY) |
| #define QUICKENING_WARMUP_COLDEST 1 |
| |
| static inline void |
| PyCodeObject_IncrementWarmup(PyCodeObject * co) |
| { |
| co->co_warmup++; |
| } |
| |
| /* Used by the interpreter to determine when a code object should be quickened */ |
| static inline int |
| PyCodeObject_IsWarmedUp(PyCodeObject * co) |
| { |
| return (co->co_warmup == 0); |
| } |
| |
| int _Py_Quicken(PyCodeObject *code); |
| |
| extern Py_ssize_t _Py_QuickenedCount; |
| |
| |
| /* "Locals plus" for a code object is the set of locals + cell vars + |
| * free vars. This relates to variable names as well as offsets into |
| * the "fast locals" storage array of execution frames. The compiler |
| * builds the list of names, their offsets, and the corresponding |
| * kind of local. |
| * |
| * Those kinds represent the source of the initial value and the |
| * variable's scope (as related to closures). A "local" is an |
| * argument or other variable defined in the current scope. A "free" |
| * variable is one that is defined in an outer scope and comes from |
| * the function's closure. A "cell" variable is a local that escapes |
| * into an inner function as part of a closure, and thus must be |
| * wrapped in a cell. Any "local" can also be a "cell", but the |
| * "free" kind is mutually exclusive with both. |
| */ |
| |
| // Note that these all fit within a byte, as do combinations. |
| // Later, we will use the smaller numbers to differentiate the different |
| // kinds of locals (e.g. pos-only arg, varkwargs, local-only). |
| #define CO_FAST_LOCAL 0x20 |
| #define CO_FAST_CELL 0x40 |
| #define CO_FAST_FREE 0x80 |
| |
| typedef unsigned char _PyLocals_Kind; |
| |
| static inline _PyLocals_Kind |
| _PyLocals_GetKind(PyObject *kinds, int i) |
| { |
| assert(PyBytes_Check(kinds)); |
| assert(0 <= i && i < PyBytes_GET_SIZE(kinds)); |
| char *ptr = PyBytes_AS_STRING(kinds); |
| return (_PyLocals_Kind)(ptr[i]); |
| } |
| |
| static inline void |
| _PyLocals_SetKind(PyObject *kinds, int i, _PyLocals_Kind kind) |
| { |
| assert(PyBytes_Check(kinds)); |
| assert(0 <= i && i < PyBytes_GET_SIZE(kinds)); |
| char *ptr = PyBytes_AS_STRING(kinds); |
| ptr[i] = (char) kind; |
| } |
| |
| |
| struct _PyCodeConstructor { |
| /* metadata */ |
| PyObject *filename; |
| PyObject *name; |
| PyObject *qualname; |
| int flags; |
| |
| /* the code */ |
| PyObject *code; |
| int firstlineno; |
| PyObject *linetable; |
| PyObject *endlinetable; |
| PyObject *columntable; |
| |
| /* used by the code */ |
| PyObject *consts; |
| PyObject *names; |
| |
| /* mapping frame offsets to information */ |
| PyObject *localsplusnames; // Tuple of strings |
| PyObject *localspluskinds; // Bytes object, one byte per variable |
| |
| /* args (within varnames) */ |
| int argcount; |
| int posonlyargcount; |
| // XXX Replace argcount with posorkwargcount (argcount - posonlyargcount). |
| int kwonlyargcount; |
| |
| /* needed to create the frame */ |
| int stacksize; |
| |
| /* used by the eval loop */ |
| PyObject *exceptiontable; |
| }; |
| |
| // Using an "arguments struct" like this is helpful for maintainability |
| // in a case such as this with many parameters. It does bear a risk: |
| // if the struct changes and callers are not updated properly then the |
| // compiler will not catch problems (like a missing argument). This can |
| // cause hard-to-debug problems. The risk is mitigated by the use of |
| // check_code() in codeobject.c. However, we may decide to switch |
| // back to a regular function signature. Regardless, this approach |
| // wouldn't be appropriate if this weren't a strictly internal API. |
| // (See the comments in https://github.com/python/cpython/pull/26258.) |
| PyAPI_FUNC(int) _PyCode_Validate(struct _PyCodeConstructor *); |
| PyAPI_FUNC(PyCodeObject *) _PyCode_New(struct _PyCodeConstructor *); |
| |
| |
| /* Private API */ |
| |
| /* Getters for internal PyCodeObject data. */ |
| PyAPI_FUNC(PyObject *) _PyCode_GetVarnames(PyCodeObject *); |
| PyAPI_FUNC(PyObject *) _PyCode_GetCellvars(PyCodeObject *); |
| PyAPI_FUNC(PyObject *) _PyCode_GetFreevars(PyCodeObject *); |
| |
| |
| /* Cache hits and misses */ |
| |
| static inline uint8_t |
| saturating_increment(uint8_t c) |
| { |
| return c<<1; |
| } |
| |
| static inline uint8_t |
| saturating_decrement(uint8_t c) |
| { |
| return (c>>1) + 128; |
| } |
| |
| static inline uint8_t |
| saturating_zero(void) |
| { |
| return 255; |
| } |
| |
| /* Starting value for saturating counter. |
| * Technically this should be 1, but that is likely to |
| * cause a bit of thrashing when we optimize then get an immediate miss. |
| * We want to give the counter a change to stabilize, so we start at 3. |
| */ |
| static inline uint8_t |
| saturating_start(void) |
| { |
| return saturating_zero()<<3; |
| } |
| |
| static inline void |
| record_cache_hit(_PyAdaptiveEntry *entry) { |
| entry->counter = saturating_increment(entry->counter); |
| } |
| |
| static inline void |
| record_cache_miss(_PyAdaptiveEntry *entry) { |
| entry->counter = saturating_decrement(entry->counter); |
| } |
| |
| static inline int |
| too_many_cache_misses(_PyAdaptiveEntry *entry) { |
| return entry->counter == saturating_zero(); |
| } |
| |
| #define ADAPTIVE_CACHE_BACKOFF 64 |
| |
| static inline void |
| cache_backoff(_PyAdaptiveEntry *entry) { |
| entry->counter = ADAPTIVE_CACHE_BACKOFF; |
| } |
| |
| /* Specialization functions */ |
| |
| int _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache); |
| int _Py_Specialize_StoreAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache); |
| int _Py_Specialize_LoadGlobal(PyObject *globals, PyObject *builtins, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache); |
| int _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache); |
| int _Py_Specialize_BinarySubscr(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr); |
| int _Py_Specialize_BinaryAdd(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr); |
| |
| #define PRINT_SPECIALIZATION_STATS 0 |
| #define PRINT_SPECIALIZATION_STATS_DETAILED 0 |
| #define PRINT_SPECIALIZATION_STATS_TO_FILE 0 |
| |
| #ifdef Py_DEBUG |
| #define COLLECT_SPECIALIZATION_STATS 1 |
| #define COLLECT_SPECIALIZATION_STATS_DETAILED 1 |
| #else |
| #define COLLECT_SPECIALIZATION_STATS PRINT_SPECIALIZATION_STATS |
| #define COLLECT_SPECIALIZATION_STATS_DETAILED PRINT_SPECIALIZATION_STATS_DETAILED |
| #endif |
| |
| #define SPECIALIZATION_FAILURE_KINDS 20 |
| |
| #if COLLECT_SPECIALIZATION_STATS |
| |
| typedef struct _stats { |
| uint64_t specialization_success; |
| uint64_t specialization_failure; |
| uint64_t hit; |
| uint64_t deferred; |
| uint64_t miss; |
| uint64_t deopt; |
| uint64_t unquickened; |
| #if COLLECT_SPECIALIZATION_STATS_DETAILED |
| uint64_t specialization_failure_kinds[SPECIALIZATION_FAILURE_KINDS]; |
| #endif |
| } SpecializationStats; |
| |
| extern SpecializationStats _specialization_stats[256]; |
| #define STAT_INC(opname, name) _specialization_stats[opname].name++ |
| #define STAT_DEC(opname, name) _specialization_stats[opname].name-- |
| void _Py_PrintSpecializationStats(void); |
| |
| PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void); |
| |
| #else |
| #define STAT_INC(opname, name) ((void)0) |
| #define STAT_DEC(opname, name) ((void)0) |
| #endif |
| |
| |
| #ifdef __cplusplus |
| } |
| #endif |
| #endif /* !Py_INTERNAL_CODE_H */ |