| /* ********************************************************** |
| * Copyright (c) 2012-2025 Google, Inc. All rights reserved. |
| * **********************************************************/ |
| |
| /* Dr. Memory: the memory debugger |
| * |
| * This library is free software; you can redistribute it and/or |
| * modify it under the terms of the GNU Lesser General Public |
| * License as published by the Free Software Foundation; |
| * version 2.1 of the License, and no later version. |
| |
| * This library is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| * Library General Public License for more details. |
| |
| * You should have received a copy of the GNU Lesser General Public |
| * License along with this library; if not, write to the Free Software |
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
| */ |
| |
| /*************************************************************************** |
| * alloc_replace.c: application allocator replacement routines for both |
| * Dr. Memory and Dr. Heapstat |
| */ |
| |
| /* Requirements beyond regular allocator: |
| * + add redzones (configurable) |
| * + delay frees (configurable): thus unlike most allocators we do |
| * not want to re-use a block immediately even with same-size |
| * repeated alloc-free in order to detect use-after-free |
| * + callbacks for custom actions like updating shadow memory |
| * or heap profiling |
| * + provide iterator over all chunks |
| * + given pointer, know whether the start of a live chunk, |
| * the start of a freed chunk, or neither |
| * + store both requested size and allocated size |
| * + store type: malloc, new or new[] |
| * + store custom flags per chunk (for use during leak scan) |
| * + store callstack |
| * + optional: given pointer, know whether *inside* a live chunk, |
| * a freed chunk, or neither. required during leak scan, but can |
| * build new data structure at that point. |
| * nice-to-have when reporting neighbors of unaddr, and can |
| * use shadow mem heuristics instead. |
| * |
| * Differences vs wrap-based implementation wrt client_ callouts: |
| * + redzones are built-in rather than added by the client, to |
| * facilitate both storing headers in them and sharing adjacent |
| * + delay free lists are built-in rather than maintained by client |
| * |
| * Design: |
| * + for !alloc_ops.external_headers, header sits inside redzone; |
| * for alloc_ops.external_headers, header is in a hashtable |
| * + redzones are shared among adjacent allocs and are centered to |
| * reduce the likelihood of corruption from over/underflow: |
| * |
| * | request sz| | redzone size | request size | | redzone size | |
| * | app chunk | pad | rz | header | rz | app chunk |pad| rz | header | rz | |
| * ^ |
| * next_chunk _| |
| * |
| * + for !alloc_ops.shared_redzones, there are two redzones in between |
| * each chunk, with the header in between and separate from the redzones |
| * (geared toward modes that want to fill the redzones) |
| * + arena->next_chunk always has a redzone + header space (if co-located, i.e., |
| * !alloc_ops.external_headers) to its left |
| * + free lists are kept in buckets by size. larger is preferred over |
| * searching. final bucket is var-sized and is always searched. |
| * frees are appended to make the lists FIFO for better delaying |
| * (though worse alloc re-use), and searches start at the front and |
| * take the first fit. |
| * we can add fancier algorithms in the future. |
| * + for alloc_ops.external_headers, free list entries use headers that |
| * are co-located with the chunk headers |
| * + for !alloc_ops.external_headers, free list entry headers begin where |
| * regular headers begin, in the middle of the redzone. |
| */ |
| |
| #include "dr_api.h" |
| #include "drwrap.h" |
| #include "drmgr.h" |
| #include "utils.h" |
| #include "asm_utils.h" |
| #include "alloc.h" |
| #include "alloc_private.h" |
| #include "heap.h" |
| #include "drsymcache.h" |
| #include <string.h> /* memcpy */ |
| |
| #ifdef MACOS |
| # include <sys/syscall.h> |
| # include <sys/mman.h> |
| # include <malloc/malloc.h> |
| #elif defined(LINUX) |
| # include "sysnum_linux.h" |
| # define __USE_GNU /* for mremap */ |
| # include <sys/mman.h> |
| #else |
| # include "../wininc/crtdbg.h" |
| # pragma warning(disable : 5286) /* "implicit conversion from enum type" */ |
| # pragma warning(disable : 5287) /* "operands are different enum types" */ |
| #endif |
| |
| #ifdef UNIX |
| # include <errno.h> |
| #endif |
| |
| /*************************************************************************** |
| * header and free list data structures |
| */ |
| |
| /* 64-bit malloc impls generally align to 16, and in fact some Windows code |
| * assumes this (i#1219). |
| */ |
| #define CHUNK_ALIGNMENT IF_X64_ELSE(16, 8) |
| #define CHUNK_MIN_SIZE IF_X64_ELSE(16, 8) |
| #define CHUNK_MIN_MMAP 128*1024 |
| /* initial commit on linux has to hold at least one non-mmap chunk */ |
| #define ARENA_INITIAL_COMMIT CHUNK_MIN_MMAP |
| #define ARENA_INITIAL_SIZE 4*1024*1024 |
| |
| #define REQUEST_DIFF_MAX UINT_MAX |
| #define PREV_SIZE_MAX UINT_MAX |
| |
| /* we only support allocation sizes under 4GB */ |
| typedef uint heapsz_t; |
| |
| /* each free list bucket contains freed chunks of at least its bucket size |
| * XXX: add stats on searches to help in tuning these |
| */ |
| static const uint free_list_sizes[] = { |
| IF_NOT_X64_(8) 16, 24, 32, 40, 64, 96, 128, 192, 256, 384, 512, 1024, 2048, |
| 4096, 8192, 16384, 32768, |
| }; |
| #define NUM_FREE_LISTS (sizeof(free_list_sizes)/sizeof(free_list_sizes[0])) |
| |
| /* Values stored in chunk header flags */ |
| enum { |
| CHUNK_FREED = MALLOC_RESERVED_1, /* 0x0001 */ |
| CHUNK_MMAP = MALLOC_RESERVED_2, /* 0x0002 */ |
| /* MALLOC_RESERVED_{3,4} are used for types */ /* 0x000C */ |
| CHUNK_PRE_US = MALLOC_RESERVED_5, /* 0x0100 */ |
| CHUNK_PREV_FREE = MALLOC_RESERVED_6, /* 0x0200 */ |
| CHUNK_DELAY_FREE = MALLOC_RESERVED_7, /* 0x0400 */ |
| #ifdef WINDOWS |
| CHUNK_LAYER_RTL = MALLOC_RESERVED_8, /* 0x0800 */ |
| #endif |
| /* i#1532: only check for non-static libc. This is Windows-only but it's |
| * cleaner to avoid all the ifdefs down below. |
| */ |
| CHUNK_LAYER_NOCHECK = MALLOC_RESERVED_9, |
| CHUNK_SKIP_ITER = MALLOC_RESERVED_10, |
| |
| /* meta-flags */ |
| #ifdef WINDOWS |
| ALLOCATOR_TYPE_FLAGS = (MALLOC_ALLOCATOR_FLAGS | CHUNK_LAYER_RTL | |
| CHUNK_LAYER_NOCHECK), |
| #else |
| ALLOCATOR_TYPE_FLAGS = (MALLOC_ALLOCATOR_FLAGS), |
| #endif |
| }; |
| |
| #define HEADER_MAGIC 0x5244 /* "DR" */ |
| |
| /* This header struct is used in both a traditional co-located header |
| * and as a hashtable payload (for alloc_ops.external_headers). Note |
| * that when using redzones there's no problem with a large header as |
| * it sits inside the redzone. But with the hashtable, and for |
| * pattern mode with co-located headers, and for Dr. Heapstat where we |
| * have no redzone, we want to make the header as compact as is |
| * reasonable. |
| */ |
| typedef struct _chunk_header_t { |
| void *user_data; |
| /* If we wanted to save space we could hand out sizes only equal to the buckets |
| * and shrink the alloc_size field. We'd use a separate header for the largest |
| * bucket that had the alloc_size. |
| */ |
| heapsz_t alloc_size; |
| /* Bitmask of CHUNK_ flags */ |
| ushort flags; |
| /* Put magic last for a greater chance of surviving underflow, esp when our |
| * header has no redzone buffer (when redzone_size <= HEADER_SIZE, which |
| * unfortunately is true by default as both are 16 for 32-bit). |
| */ |
| ushort magic; |
| union { |
| /* A live or delay-free chunk does not need a prev pointer, while a truly |
| * free chunk does not need the request size nor the prev size (b/c |
| * we always coalesce, and we don't set prev size if prev is delay-free). |
| */ |
| struct { |
| /* Difference between alloc_size and requested size. We currently always |
| * split re-used large free chunks, but for mmaps this can be > 64K. |
| */ |
| uint request_diff; |
| /* The size of the previous free chunk / CHUNK_MIN_SIZE (i.e., >>3). Only |
| * valid if CHUNK_PREV_FREE is set in flags. We get away with only a 512KB |
| * max because larger elements, which are always mmaps, are not put on the |
| * free list or coalesced. We assert on the various constants all lining up |
| * in our init routine. After coalescing we can reach a larger size than |
| * 512KB, in which case we place 0 here and store the size immediately |
| * prior to the redzone. |
| * |
| * If CHUNK_MMAP is set in flags, this holds the padding at the start |
| * of the mmap base put in place for alignment of the returned alloc. |
| */ |
| uint prev_size_shr; |
| #ifdef X64 |
| /* Compiler will add anyway: just making explicit. we need the header |
| * size to be aligned to 8 so we can't pack. for alloc_ops.external_headers |
| * we eat this overhead to provide runtime flexibility w/ the same |
| * data struct as we don't need it there. |
| * Update: actually we need to align to 16. |
| */ |
| uint pad; |
| #endif |
| } unfree; |
| struct _free_header_t *prev; |
| } u; |
| } chunk_header_t; |
| |
| /* Header at the top of an mmap used for large allocs. If we didn't need to |
| * support memalign() & co, we could get away without this. |
| */ |
| typedef struct _mmap_header_t { |
| chunk_header_t *head; |
| size_t map_size; |
| } mmap_header_t; |
| |
| /* To support pattern mode, which wants to fill the redzone with its pattern, |
| * we don't want the next pointer in the redzone. For now we pay the cost |
| * of extra memory rather than complicate the interface to pattern mode |
| * to have it skip the next pointer (we'd need a call when we move from delay |
| * queueu to free lists, and we'd need to adjust real_base on several calls: |
| * and ensure client isn't storing things by real base!). |
| * Thus, we indirect the live header size through here. |
| */ |
| static heapsz_t header_size; |
| |
| /* if redzone is too small, header sticks beyond it */ |
| static heapsz_t header_beyond_redzone; |
| /* we place header in the middle */ |
| static heapsz_t redzone_beyond_header; |
| |
| /* Free list header for both regular and var-size chunk. Each chunk |
| * is at least 8 bytes so we can fit the next pointer here even for |
| * x64. We squish the prev pointer into fields of the chunk header we |
| * no longer need, for a true free; for a delay free we don't use a |
| * prev pointer. |
| * |
| * FIXME: for alloc_ops.external_headers do we need a chunk pointer |
| * here? or will it be in the head struct? |
| */ |
| typedef struct _free_header_t { |
| chunk_header_t head; |
| struct _free_header_t *next; |
| } free_header_t; |
| |
| typedef struct _free_lists_t { |
| /* Delayed frees are kept here for more fair delaying across sizes |
| * than if we put them into the per-size lists. |
| */ |
| free_header_t *delay_front; |
| free_header_t *delay_last; |
| /* The delay threshold is per-arena */ |
| uint delayed_chunks; |
| size_t delayed_bytes; |
| /* A normal free list can be LIFO, but for more effective delayed frees |
| * we want FIFO. FIFO-per-bucket-size is sufficient. |
| */ |
| free_header_t *front[NUM_FREE_LISTS]; |
| free_header_t *last[NUM_FREE_LISTS]; |
| } free_lists_t; |
| |
| #ifdef LINUX |
| /* we assume we're the sole users of the brk (after pre-us allocs) */ |
| static byte *pre_us_brk; |
| static byte *cur_brk; |
| #endif |
| |
| #ifdef WINDOWS |
| /* For alloc_ops.global_lock (xref i#949). Each arena's dr_lock points |
| * at this lock when alloc_ops.global_lock is true. |
| */ |
| static void *global_lock; |
| #endif |
| |
| /* header at the top of each arena (an "arena" for this code is a contiguous |
| * piece of memory parceled out into individual malloc "chunks") |
| */ |
| typedef struct _arena_header_t { |
| #ifdef MACOS |
| /* Placed at the start for easy conversion back and forth. |
| * We ignore the function pointers inside here. |
| * Xref i#1699. |
| */ |
| malloc_zone_t zone_inlined; |
| /* Some apps write to zone_inlined.zone_name and then mark the page read-only. */ |
| char padding[PAGE_SIZE-sizeof(malloc_zone_t)]; |
| /* For child arenas to point at the parent */ |
| malloc_zone_t *zone; |
| #endif |
| byte *start_chunk; |
| byte *next_chunk; |
| byte *commit_end; |
| byte *reserve_end; |
| free_lists_t *free_list; |
| #ifdef WINDOWS |
| /* i#949: We need two locks. The lock field is the app lock, which can |
| * be acquired while in app code. This field is a pure DR lock, and |
| * it's used to synchronize free chunk splitting and coalescing with |
| * malloc iteration. (Regular mallocs and frees that do not split |
| * or coalesce do not need to synchronize with malloc iteration.) |
| * We always acquire the app lock first if we acquire both. |
| */ |
| void *dr_lock; |
| #endif |
| void *lock; /* app lock for Windows */ |
| uint flags; |
| /* If we free the final chunk before the brk we need to know to mark the |
| * next carved-out chunk w/ the prev free size. |
| */ |
| heapsz_t prev_free_sz; |
| uint magic; |
| #ifdef WINDOWS |
| /* A member of the alloc set for which this arena is the default heap */ |
| app_pc alloc_set_member; |
| /* Base of the module for which this is the default Heap */ |
| app_pc modbase; |
| /* HANDLE of Heap, for pre-us Heap */ |
| HANDLE handle; |
| #endif |
| /* we need to iterate arenas belonging to one (non-default) Heap */ |
| struct _arena_header_t *next_arena; |
| /* for main arena of each Heap, we inline free_lists_t here */ |
| } arena_header_t; |
| |
| #ifdef WINDOWS |
| /* pick a flag that can't be passed on the Heap level to identify whether |
| * a Heap or a regular arena |
| */ |
| # define ARENA_MAIN HEAP_ZERO_MEMORY /* 0x8 */ |
| # define ARENA_PRE_US_MAPPED 0x100 /* unused by Windows */ |
| /* another non-Heap flag to identify libc-default Heaps (i#939) */ |
| # define ARENA_LIBC_DEFAULT HEAP_REALLOC_IN_PLACE_ONLY /* 0x10 */ |
| /* identify whether a static libc heap is the process heap (i#1223) */ |
| # define ARENA_LIBC_SPECULATIVE 0x200 /* unused by Windows */ |
| /* flags that we support being passed to HeapCreate: |
| * HEAP_CREATE_ENABLE_EXECUTE | HEAP_GENERATE_EXCEPTIONS | HEAP_NO_SERIALIZE | |
| * HEAP_GROWABLE |
| */ |
| # define HEAP_CREATE_POSSIBLE_FLAGS 0x40007 |
| static HANDLE process_heap; |
| /* i#1754: for pre-us mapped memory, in particular the shared-memory CsrPortHeap, |
| * we do not attempt to detect uninitialized reads as it very difficult to |
| * track writes by csrss. The simplest way to accomplish this is to mark |
| * all allocs as defined by zeroing them. |
| */ |
| # define WINDOWS_ZERO_MEMORY(arena, alloc_flags) \ |
| (TEST(ARENA_PRE_US_MAPPED, (arena)->flags) || TEST(HEAP_ZERO_MEMORY, (alloc_flags))) |
| #else |
| # define ARENA_MAIN 0x0001 |
| #endif |
| |
| /* Linux current arena, or Windows default Heap. We always use this main |
| * pointer as the arena, even though there can be extra sub-arena regions that |
| * belong to this Heap linked in the next_arena field. |
| */ |
| static arena_header_t *cur_arena; |
| |
| /* For handling pre-us mallocs for non-earliest injection or delayed/attach |
| * instrumentation. Contains chunk_header_t entries. |
| * We assume this table is only added to at init and only removed from |
| * at exit time and thus needs no external lock. |
| */ |
| #define PRE_US_TABLE_HASH_BITS 8 |
| static hashtable_t pre_us_table; |
| |
| /* XXX i#879: for pattern mode we ideally don't want any co-located |
| * headers and instead want a hashtable of live allocs (free are in |
| * free lists and/or rbtree). |
| * Cleaner to have own table here and not try to use the alloc.c malloc-wrap table |
| * though we do want the same hash tuning. |
| * Currently we have a much simpler implementation for pattern mode |
| * that uses non-shared redzones and a header in between (so it looks |
| * like wrapping, and like wrapping won't detect a bug that clobbers |
| * the header prior to corruption and possible crash). |
| */ |
| |
| #ifdef STATISTICS |
| static uint heap_capacity; |
| static uint peak_heap_capacity; |
| static uint num_arenas; |
| static uint peak_num_arenas; |
| static uint num_splits; |
| static uint num_coalesces; |
| static uint num_dealloc; |
| static uint dbgcrt_mismatch; |
| static uint allocs_left_native; |
| #endif |
| |
| #ifdef DEBUG |
| /* used to allow use of app stack on abort */ |
| static bool aborting; |
| #endif |
| |
| /* Indicates whether process initialization is fully complete, including |
| * iteration of modules. Thus, we don't set this until we get the |
| * first bb event. |
| */ |
| static bool process_initialized; |
| |
| #ifdef WINDOWS |
| static app_pc executable_base; |
| |
| static arena_header_t * |
| check_libc_vs_process_heap(alloc_routine_entry_t *e, arena_header_t *arena); |
| #endif |
| |
| #ifdef MACOS |
| static void |
| malloc_zone_init(arena_header_t *arena); |
| #endif |
| |
| /* Flags controlling allocation behavior */ |
| typedef enum { |
| ALLOC_SYNCHRONIZE = 0x0001, /* malloc, free, and realloc */ |
| ALLOC_ZERO = 0x0002, /* malloc and realloc */ |
| ALLOC_IS_REALLOC = 0x0004, /* malloc and free */ |
| /* Routines that free the client_data (client_malloc_data_free(), |
| * client_handle_free_reuse()) and routines reporting on invalid |
| * heap args or OOM are called regardless of these flags' values. |
| */ |
| /* Whether to invoke client_{add,remove}_malloc_{pre,post} */ |
| ALLOC_INVOKE_CLIENT_DATA = 0x0008, /* malloc and free */ |
| /* Whether to invoke client_handle_{malloc,free} */ |
| ALLOC_INVOKE_CLIENT_ACTION = 0x0010, /* malloc and free */ |
| ALLOC_INVOKE_CLIENT = ALLOC_INVOKE_CLIENT_DATA | ALLOC_INVOKE_CLIENT_ACTION, |
| ALLOC_IN_PLACE_ONLY = 0x0020, /* realloc */ |
| ALLOC_ALLOW_NULL = 0x0040, /* realloc: do not fail on NULL */ |
| ALLOC_ALLOW_EMPTY = 0x0080, /* realloc: size==0 does re-allocate */ |
| ALLOC_IGNORE_MISMATCH = 0x0100, /* free, realloc, size */ |
| ALLOC_IS_QUERY = 0x0200, /* check_type_match */ |
| } alloc_flags_t; |
| |
| /*************************************************************************** |
| * utility routines |
| */ |
| |
| #define DR_STATE_TO_SWAP (DR_STATE_ALL & (~DR_STATE_STACK_BOUNDS)) |
| |
| #ifdef WINDOWS |
| static inline const char * |
| malloc_layer_name(uint flags) |
| { |
| if (TEST(CHUNK_LAYER_RTL, flags)) |
| return "Windows API layer"; |
| else |
| return "C library layer"; |
| } |
| #endif |
| |
| static inline void * |
| enter_client_code(void) |
| { |
| void *drcontext = dr_get_current_drcontext(); |
| |
| /* For our callstack walk we need the frame ptr of our replacement |
| * functions to be marked defined. By using our replace xbp we |
| * have the malloc frame in the callstack (i#639). |
| * Note that we do not want to, say, pass in the mcontext and |
| * mark defined through get_stack_registers()'s xsp, as that |
| * will mark a bunch of uninitialized slots on the stack. |
| */ |
| byte *final_app_xsp = (byte *) |
| dr_read_saved_reg(drcontext, DRWRAP_REPLACE_NATIVE_SP_SLOT); |
| client_stack_alloc((byte *)final_app_xsp - sizeof(void*), (byte *)final_app_xsp, |
| true/*defined*/); |
| |
| /* while we are using the app's stack and registers, we need to |
| * switch to the private peb/teb to avoid asserts in symbol |
| * routines. |
| * XXX: is it safe to do away w/ this and relax the asserts? |
| * if perf becomes an issue we could do a lazy swap on symbol |
| * queries (and hope no other private lib calls occur). |
| * |
| * On Linux we don't need to swap b/c we (and our priv libs) won't |
| * examine the selectors or descriptors: -mangle_app_seg ensures |
| * we don't need to swap. Which is good b/c a swap involves a |
| * system call which kills performance: i#941. |
| */ |
| #ifdef WINDOWS |
| dr_switch_to_dr_state_ex(drcontext, DR_STATE_TO_SWAP); |
| #endif |
| return drcontext; |
| } |
| |
| static void |
| exit_client_code(void *drcontext, bool in_app_mode) |
| { |
| byte *final_app_xsp = (byte *) |
| dr_read_saved_reg(drcontext, DRWRAP_REPLACE_NATIVE_SP_SLOT); |
| client_stack_dealloc((byte *)final_app_xsp - sizeof(void*), (byte *)final_app_xsp); |
| |
| #if WINDOWS |
| if (!in_app_mode) |
| dr_switch_to_app_state_ex(drcontext, DR_STATE_TO_SWAP); |
| #endif |
| |
| drwrap_replace_native_fini(drcontext); |
| |
| /* i#1217: yet another point where we zero out data to avoid stale retaddrs |
| * on our callstacks. For 32-bit, dr_write_saved_reg() called by |
| * drwrap_replace_native_fini() has the app retaddr on the stack. We clear |
| * it here. |
| * For 32-bit, we assume it's safe to write beyond TOS. |
| * For 64-bit, this is not a leaf routine, so we similarly assume it's safe: |
| * but it's more fragile (xref i#1278). |
| * drwrap_replace_native_fini() currently uses 12 bytes of stack for 32-bit |
| * and 56 for 64-bit (and dr_write_saved_reg() uses 32, but we |
| * only care about its param slots). |
| * |
| * XXX: if we knew whether we had DrMem definedness info we could avoid |
| * this work for full mode. |
| */ |
| # define ZERO_APP_STACK_SZ IF_X64_ELSE(64, 32) |
| /* We can't call memset() or any regular function b/c it will clobber its |
| * own stack, nor can we have a loop here as we can clobber our own locals. |
| * Thus we must use an asm routine. |
| */ |
| zero_pointers_on_stack(ZERO_APP_STACK_SZ); |
| } |
| |
| /* i#900: we need to mark an app lock acquisition as a safe spot. |
| * This is made possible by drwrap_replace_native() using a continuation |
| * strategy rather than returning to the code cache. |
| * N.B.: no DR lock can be held by the caller! |
| */ |
| static void |
| app_heap_lock(void *drcontext, void *recur_lock) |
| { |
| dr_mark_safe_to_suspend(drcontext, true/*enter safe region*/); |
| dr_recurlock_lock(recur_lock); |
| dr_mark_safe_to_suspend(drcontext, false/*exit safe region*/); |
| } |
| |
| static void |
| app_heap_unlock(void *drcontext, void *recur_lock) |
| { |
| /* Nothing special, just for symmetry */ |
| dr_recurlock_unlock(recur_lock); |
| } |
| |
| /* Locking for any alloc or free operation */ |
| static void |
| arena_lock(void *drcontext, arena_header_t *arena, bool app_synch) |
| { |
| /* XXX i#948: use per-thread free lists to avoid lock in common case, |
| * for Linux or Windows libc at least (where heap synch is not part |
| * of app API), and when !alloc_ops.global_lock. |
| */ |
| if (app_synch) |
| app_heap_lock(drcontext, arena->lock); |
| #ifdef WINDOWS |
| /* i#949: regardless of app synch, we need to synchronize our own |
| * operations. We must grab this after the app lock. We don't need |
| * this to be a safe spot as it's only grabbed in our own code. |
| */ |
| if (alloc_ops.global_lock) |
| dr_recurlock_lock(arena->dr_lock); |
| #else |
| /* We assume every top-level caller synchronizes (can't check here b/c |
| * this can be called via realloc calling free or malloc). |
| * If synch becomes optional on Linux, need to use dr_lock too. |
| */ |
| #endif |
| } |
| |
| static void |
| arena_unlock(void *drcontext, arena_header_t *arena, bool app_synch) |
| { |
| #ifdef WINDOWS |
| if (alloc_ops.global_lock) |
| dr_recurlock_unlock(arena->dr_lock); |
| #else |
| /* We assume every top-level caller synchronizes (can't check here b/c |
| * this can be called via realloc calling free or malloc). |
| * If synch becomes optional on Linux, need to use dr_lock too. |
| */ |
| #endif |
| if (app_synch) |
| app_heap_unlock(drcontext, arena->lock); |
| } |
| |
| /* i#949: locking for alloc or free operations that affect concurrent |
| * iteration: splitting or coalescing of free chunks. Changing header |
| * flags concurrently with iteration is ok. If the iterator wants to |
| * look for certain flags across multiple iterations, the user needs |
| * to set alloc_ops.global_lock. |
| */ |
| static void |
| iterator_lock(arena_header_t *arena, bool in_alloc) |
| { |
| /* We could blindly lock (it's a recursive lock) but more performant this way */ |
| #ifdef WINDOWS |
| if (!in_alloc || !alloc_ops.global_lock) |
| dr_recurlock_lock(arena->dr_lock); |
| else |
| ASSERT(dr_recurlock_self_owns(arena->dr_lock), "lock error"); |
| #else |
| if (!in_alloc) |
| dr_recurlock_lock(arena->lock); |
| else |
| ASSERT(dr_recurlock_self_owns(arena->lock), "lock error"); |
| #endif |
| } |
| |
| static void |
| iterator_unlock(arena_header_t *arena, bool in_alloc) |
| { |
| #ifdef WINDOWS |
| ASSERT(dr_recurlock_self_owns(arena->dr_lock), "lock error"); |
| if (!in_alloc || !alloc_ops.global_lock) |
| dr_recurlock_unlock(arena->dr_lock); |
| #else |
| ASSERT(dr_recurlock_self_owns(arena->lock), "lock error"); |
| if (!in_alloc) |
| dr_recurlock_unlock(arena->lock); |
| #endif |
| } |
| |
| #if defined(WINDOWS) && defined(X64) |
| static app_pc |
| get_replace_native_caller(void *drcontext) |
| { |
| /* drwrap saved the retaddr slot for us */ |
| byte *app_xsp = (byte *) dr_read_saved_reg(drcontext, DRWRAP_REPLACE_NATIVE_SP_SLOT); |
| return *(app_pc *)app_xsp; |
| } |
| #endif |
| |
| /* This must be inlined to get an xsp that's in the call chain */ |
| #define INITIALIZE_MCONTEXT_FOR_REPORT(mc) do { \ |
| /* Assumption: we only need xsp, xbp, and pc initialized. */ \ |
| (mc)->size = sizeof(*(mc)); \ |
| (mc)->flags = DR_MC_CONTROL | DR_MC_INTEGER; \ |
| get_unwind_registers(&MC_SP_REG(mc), &MC_FP_REG(mc), &((mc)->pc)); \ |
| } while (0) |
| |
| #ifdef WINDOWS |
| static inline uint |
| arena_page_prot(uint flags) |
| { |
| return DR_MEMPROT_READ | DR_MEMPROT_WRITE | |
| (TEST(HEAP_CREATE_ENABLE_EXECUTE, flags) ? DR_MEMPROT_EXEC : 0); |
| } |
| #endif |
| |
| /* We used to call raw_syscall() and virtual_alloc(), but for DRi#199 we |
| * now have DR routines we can use, which avoids DR asserts (mainly on |
| * Linux allmem, but possible to have problems everywhere if the app |
| * puts code in the heap). |
| */ |
| static byte * |
| os_large_alloc(size_t commit_size _IF_WINDOWS(size_t reserve_size) _IF_WINDOWS(uint prot)) |
| { |
| #ifdef UNIX |
| byte *map = (byte *) |
| dr_raw_mem_alloc(commit_size, DR_MEMPROT_READ | DR_MEMPROT_WRITE, NULL); |
| ASSERT(ALIGNED(commit_size, PAGE_SIZE), "must align to at least page size"); |
| /* dr_raw_mem_alloc returns NULL on failure, but I'm keeping the range for |
| * raw syscall. |
| */ |
| if ((ptr_int_t)map <= 0 && (ptr_int_t)map > -PAGE_SIZE) { |
| LOG(2, "os_large_alloc FAILED with return value "PFX"\n", map); |
| return NULL; |
| } |
| LOG(3, "%s commit="PIFX" => "PFX"\n", __FUNCTION__, commit_size, map); |
| return map; |
| #else |
| byte *loc = NULL; |
| ASSERT(ALIGNED(commit_size, PAGE_SIZE), "must align to at least page size"); |
| ASSERT(ALIGNED(reserve_size, PAGE_SIZE), "must align to at least page size"); |
| ASSERT(reserve_size >= commit_size, "must reserve more than commit"); |
| loc = dr_custom_alloc(NULL, DR_ALLOC_NON_HEAP | DR_ALLOC_NON_DR | |
| DR_ALLOC_RESERVE_ONLY, reserve_size, |
| DR_MEMPROT_NONE, NULL); |
| if (loc == NULL) |
| return NULL; |
| loc = dr_custom_alloc(NULL, DR_ALLOC_NON_HEAP | DR_ALLOC_NON_DR | |
| DR_ALLOC_COMMIT_ONLY | DR_ALLOC_FIXED_LOCATION, commit_size, |
| prot, loc); |
| if (loc == NULL) { |
| dr_custom_free(NULL, DR_ALLOC_NON_HEAP | DR_ALLOC_NON_DR, loc, reserve_size); |
| return NULL; |
| } |
| LOG(3, "%s commit="PIFX" reserve="PIFX" prot=0x%x => "PFX"\n", |
| __FUNCTION__, commit_size, reserve_size, prot, loc); |
| return loc; |
| #endif |
| } |
| |
| /* For Windows, up to caller to ensure new_commit_size <= previously reserved size */ |
| static bool |
| os_large_alloc_extend(byte *map, size_t cur_commit_size, size_t new_commit_size |
| _IF_WINDOWS(uint prot)) |
| { |
| ASSERT(ALIGNED(cur_commit_size, PAGE_SIZE), "must align to at least page size"); |
| ASSERT(ALIGNED(new_commit_size, PAGE_SIZE), "must align to at least page size"); |
| ASSERT(new_commit_size > cur_commit_size, "this routine does not support shrinking"); |
| #ifdef LINUX |
| byte *newmap = (byte *) dr_raw_mremap(map, cur_commit_size, new_commit_size, |
| 0/*can't move*/, NULL/*ignored*/); |
| if ((ptr_int_t)newmap <= 0 && (ptr_int_t)newmap > -PAGE_SIZE) |
| return false; |
| return true; |
| #elif defined(MACOS) |
| /* There is no mremap on Mac so we try to do a new mmap at the right spot. |
| * We can still free both with one munmap. |
| * We don't dare do DR_ALLOC_FIXED_LOCATION as it may clobber something. |
| */ |
| byte *newmap = (byte *) |
| dr_raw_mem_alloc(new_commit_size - cur_commit_size, |
| DR_MEMPROT_READ | DR_MEMPROT_WRITE, |
| map + cur_commit_size); |
| if ((ptr_int_t)newmap <= 0 && (ptr_int_t)newmap > -PAGE_SIZE) |
| return false; |
| if (newmap != map + cur_commit_size) { |
| /* Didn't get the subsequent spot: bail. */ |
| dr_raw_mem_free(newmap, new_commit_size - cur_commit_size); |
| return false; |
| } |
| return true; |
| #else /* WINDOWS */ |
| /* i#1258: we have to tweak [map + cur_commit_size, map + new_commit_size) |
| * and not re-commit [map, map + new_commit_size) b/c the latter will |
| * modify the prot bits on existing pages, which the app might have |
| * changed from the arena default! |
| */ |
| return (dr_custom_alloc(NULL, DR_ALLOC_NON_HEAP | DR_ALLOC_NON_DR | |
| DR_ALLOC_COMMIT_ONLY | DR_ALLOC_FIXED_LOCATION, |
| new_commit_size - cur_commit_size, prot, |
| map + cur_commit_size) != NULL); |
| #endif |
| } |
| |
| /* For Windows, map_size is ignored and the whole allocation is freed */ |
| static bool |
| os_large_free(byte *map, size_t map_size) |
| { |
| #ifdef UNIX |
| bool success; |
| ASSERT(ALIGNED(map, PAGE_SIZE), "invalid mmap base"); |
| ASSERT(ALIGNED(map_size, PAGE_SIZE), "invalid mmap size"); |
| success = dr_raw_mem_free(map, map_size); |
| LOG(3, "%s "PFX" size="PIFX" => %d\n", __FUNCTION__, map, map_size, success); |
| return success; |
| #else |
| LOG(3, "%s "PFX" size="PIFX"\n", __FUNCTION__, map, map_size); |
| return dr_custom_free(NULL, DR_ALLOC_NON_HEAP | DR_ALLOC_NON_DR, map, map_size); |
| #endif |
| } |
| |
| static inline heapsz_t |
| chunk_request_size(chunk_header_t *head) |
| { |
| return (head->alloc_size - head->u.unfree.request_diff); |
| } |
| |
| static void |
| notify_client_alloc(void *drcontext, byte *ptr, chunk_header_t *head, |
| alloc_flags_t flags, dr_mcontext_t *mc, app_pc caller) |
| { |
| malloc_info_t info = { sizeof(info), ptr, chunk_request_size(head), |
| head->alloc_size, false/*!pre_us*/, true/*redzone*/, |
| TEST(ALLOC_ZERO, flags), TEST(ALLOC_IS_REALLOC, flags), |
| 0, head->user_data }; |
| if (TEST(ALLOC_INVOKE_CLIENT_DATA, flags)) { |
| head->user_data = client_add_malloc_pre(&info, mc, caller); |
| info.client_data = head->user_data; |
| client_add_malloc_post(&info); |
| } |
| if (TEST(ALLOC_INVOKE_CLIENT_ACTION, flags)) { |
| ASSERT(drcontext != NULL, "invalid arg"); |
| client_handle_malloc(drcontext, &info, mc); |
| } |
| } |
| |
| /*************************************************************************** |
| * core allocation routines |
| */ |
| |
| static inline chunk_header_t * |
| header_from_ptr(const void *ptr) |
| { |
| if (alloc_ops.external_headers) { |
| /* XXX i#879: hashtable lookup */ |
| ASSERT(false, "NYI"); |
| return NULL; |
| } else { |
| if ((ptr_uint_t)ptr < header_size) |
| return NULL; |
| else { |
| return (chunk_header_t *) ((byte *)ptr - redzone_beyond_header - header_size); |
| } |
| } |
| } |
| |
| static inline byte * |
| ptr_from_header(chunk_header_t *head) |
| { |
| if (alloc_ops.external_headers) { |
| /* XXX i#879: hashtable lookup */ |
| ASSERT(false, "NYI"); |
| return NULL; |
| } else { |
| ASSERT(!TEST(CHUNK_PRE_US, head->flags), "caller must handle pre-us"); |
| return (byte *)head + redzone_beyond_header + header_size; |
| } |
| } |
| |
| static inline chunk_header_t * |
| header_from_mmap_base(void *map) |
| { |
| if (alloc_ops.external_headers) { |
| /* XXX i#879: hashtable lookup */ |
| ASSERT(false, "NYI"); |
| return NULL; |
| } else { |
| if ((ptr_uint_t)map < header_size) |
| return NULL; |
| else { |
| mmap_header_t *mhead = (mmap_header_t *) map; |
| return mhead->head; |
| } |
| } |
| } |
| |
| /* Distance from the end of one chunk (its start pointer plus alloc_size) to |
| * the start of the user memory for the subsequent chunk |
| */ |
| static inline size_t |
| inter_chunk_space(void) |
| { |
| return alloc_ops.redzone_size + header_beyond_redzone + |
| (alloc_ops.shared_redzones ? 0 : alloc_ops.redzone_size); |
| } |
| |
| /* Pass in result of header_from_ptr() as 2nd arg, but don't de-reference it! |
| * Returns true for both live mallocs and chunks in delay free lists |
| */ |
| static inline bool |
| is_valid_chunk(const void *ptr, chunk_header_t *head) |
| { |
| /* Note that we can't be sure w/o using a hashtable, but for performance |
| * it's worth it to risk not identifying an invalid free so we use |
| * heuristics. |
| * XXX improvements: |
| * + should we have an option of using a hashtable to be sure, |
| * even when !alloc_ops.external_headers? |
| * app corrupting our allocator would be bad. |
| * + check whether in heap memory region(s) if that's cheap: if |
| * need rbtree lookup then don't |
| * + could check that next header is a real header, or at end of arena |
| * + could have client_ callout that checks shadow memory |
| */ |
| if (alloc_ops.external_headers) { |
| /* XXX i#879: need to look in delay free rbtree too */ |
| return head != NULL; |
| } else { |
| /* Unlike a regular malloc library, we cannot afford to crash on |
| * a bogus arg from the app b/c Dr. Memory is supposed to detect |
| * invalid args and crashes. We use DR's new, fast dr_safe_read() |
| * (via safe_read()) to have low overhead yet stability. |
| * An alternative might be a top-level crash handler |
| * that bails out w/ an error report about invalid args. |
| */ |
| ushort magic; |
| /* App heap corruption can touch our magic field (deliberately |
| * nearest the app alloc), causing us to report as an invalid |
| * heap arg (after reporting the unaddr access) and later as a |
| * leak, which doesn't seem ideal: but it's hard to do better. |
| * Xref i#950. |
| */ |
| return (ptr != NULL && |
| ALIGNED(ptr, CHUNK_ALIGNMENT) && |
| safe_read(&head->magic, sizeof(magic), &magic) && |
| magic == HEADER_MAGIC); |
| } |
| } |
| |
| /* This is called on every free, so keep it efficient. |
| * However, esp on Windows, we must pay the overhead to avoid crashes |
| * from callers causing us to mix our free lists across Heaps. |
| * |
| * Up to caller to check for large allocs, which are not inside arenas! |
| * (Yes, this means that on Windows the app can pass any Heap it likes: so |
| * far that hasn't been an issue but one could imagine a Heap flag that |
| * needs to apply to a large alloc free or size query.) |
| */ |
| static inline bool |
| ptr_is_in_arena(byte *ptr, arena_header_t *arena) |
| { |
| arena_header_t *a; |
| for (a = arena; a != NULL; a = a->next_arena) { |
| if (ptr >= a->start_chunk && ptr < a->commit_end) |
| return true; |
| } |
| LOG(2, "%s: "PFX" not found in arena "PFX"\n", __FUNCTION__, ptr, arena); |
| return false; |
| } |
| |
| /* Returns true iff ptr is a live alloc inside arena. Thus, will return |
| * false for pre-us allocs from other arenas. |
| */ |
| static bool |
| is_live_alloc(void *ptr, arena_header_t *arena, chunk_header_t *head) |
| { |
| bool live = false; |
| if (alloc_ops.external_headers) { |
| live = (head != NULL); |
| } else { |
| live = (is_valid_chunk(ptr, head) && |
| !TEST(CHUNK_FREED, head->flags)); |
| } |
| return (live && |
| /* large allocs are their own arenas */ |
| (TEST(CHUNK_MMAP, head->flags) || ptr_is_in_arena(ptr, arena))); |
| } |
| |
| /* returns NULL if an invalid ptr, but will return a freed chunk */ |
| static inline chunk_header_t * |
| header_from_ptr_include_pre_us(void *ptr) |
| { |
| chunk_header_t *head = header_from_ptr(ptr); |
| if (!is_valid_chunk(ptr, head)) |
| head = hashtable_lookup(&pre_us_table, (void *)ptr); |
| return head; |
| } |
| |
| /* The base param must be non-NULL for pre-us; else, it can be NULL */ |
| static inline void |
| header_to_info(chunk_header_t *head, malloc_info_t *info, byte *pre_us_base, |
| alloc_flags_t flags /* pass 0 if not a new alloc notification */) |
| { |
| info->struct_size = sizeof(*info); |
| info->pre_us = TEST(CHUNK_PRE_US, head->flags); |
| info->base = (info->pre_us ? pre_us_base : ptr_from_header(head)); |
| ASSERT(!info->pre_us || pre_us_base != NULL, "need base for pre-us!"); |
| info->request_size = chunk_request_size(head); |
| info->pad_size = head->alloc_size; |
| info->has_redzone = !info->pre_us; |
| info->zeroed = TEST(ALLOC_ZERO, flags); |
| info->realloc = TEST(ALLOC_IS_REALLOC, flags); |
| info->client_flags = head->flags & MALLOC_POSSIBLE_CLIENT_FLAGS; |
| info->client_data = head->user_data; |
| } |
| |
| /* Assumes caller zeroed the full struct and initialized the commit_end and |
| * reserve_end fields. |
| */ |
| static void |
| arena_init(arena_header_t *arena, arena_header_t *parent) |
| { |
| size_t header_size = sizeof(*arena); |
| if (parent != NULL) { |
| /* XXX: maybe we should have two different headers for parents vs children */ |
| arena->flags = (parent->flags & (~ARENA_MAIN)); |
| arena->lock = parent->lock; |
| #ifdef WINDOWS |
| arena->dr_lock = parent->dr_lock; |
| #endif |
| arena->free_list = parent->free_list; |
| #ifdef WINDOWS |
| arena->alloc_set_member = parent->alloc_set_member; |
| arena->modbase = parent->modbase; |
| arena->handle = parent->handle; |
| #endif |
| #ifdef MACOS |
| arena->zone = parent->zone; |
| #endif |
| } else { |
| arena->flags = ARENA_MAIN; |
| arena->lock = dr_recurlock_create(); |
| /* We only grab this DR lock as the app and we mark it with |
| * dr_recurlock_mark_as_app(), as well as using dr_mark_safe_to_suspend(), |
| * to ensure proper DR behavior |
| */ |
| dr_recurlock_mark_as_app(arena->lock); |
| #ifdef WINDOWS |
| if (alloc_ops.global_lock) |
| arena->dr_lock = global_lock; |
| else |
| arena->dr_lock = dr_recurlock_create(); |
| #endif |
| /* to avoid complications of storing and freeing DR heap we inline these |
| * in the main arena's header |
| */ |
| arena->free_list = (free_lists_t *) ((byte *)arena + header_size); |
| header_size += sizeof(*arena->free_list); |
| #ifdef WINDOWS |
| arena->alloc_set_member = NULL; |
| arena->modbase = NULL; |
| arena->handle = NULL; |
| #endif |
| #ifdef MACOS |
| malloc_zone_init(arena); |
| #endif |
| } |
| /* need to start with a redzone */ |
| arena->start_chunk = (byte *)arena + |
| /* XXX: this wastes the initial redzone for !shared_redzones */ |
| ALIGN_FORWARD(header_size, CHUNK_ALIGNMENT) + inter_chunk_space(); |
| arena->next_chunk = arena->start_chunk; |
| arena->magic = HEADER_MAGIC; |
| arena->next_arena = NULL; |
| arena->prev_free_sz = 0; |
| STATS_ADD(heap_capacity, (uint)(arena->commit_end - (byte *)arena)); |
| STATS_PEAK(heap_capacity); |
| STATS_INC(num_arenas); |
| STATS_PEAK(num_arenas); |
| if (parent != NULL) { |
| ASSERT(parent->next_arena == NULL, "should only append to end"); |
| parent->next_arena = arena; |
| } |
| } |
| |
| /* up to caller to call heap_region_remove() */ |
| static void |
| arena_deallocate(arena_header_t *arena) |
| { |
| #ifdef LINUX |
| if (arena->reserve_end != cur_brk) |
| #elif defined(WINDOWS) |
| /* For pre-us mapped we just never free */ |
| if (!TEST(ARENA_PRE_US_MAPPED, arena->flags)) |
| #endif |
| os_large_free((byte *)arena, arena->reserve_end - (byte *)arena); |
| } |
| |
| /* up to caller to call heap_region_remove() before calling here, |
| * as we can't call it here b/c we're invoked from heap_region_iterate() |
| */ |
| static void |
| arena_free(arena_header_t *arena) |
| { |
| if (TEST(ARENA_MAIN, arena->flags)) { |
| dr_recurlock_destroy(arena->lock); |
| #ifdef WINDOWS |
| if (!alloc_ops.global_lock) |
| dr_recurlock_destroy(arena->dr_lock); |
| #endif |
| } |
| arena_deallocate(arena); |
| } |
| |
| static arena_header_t * |
| arena_create(arena_header_t *parent, size_t initial_size) |
| { |
| size_t init_size = (initial_size == 0) ? ARENA_INITIAL_SIZE : initial_size; |
| arena_header_t *new_arena = (arena_header_t *) |
| os_large_alloc(IF_WINDOWS_(ARENA_INITIAL_COMMIT) init_size |
| _IF_WINDOWS(arena_page_prot(parent->flags))); |
| if (new_arena == NULL) |
| return NULL; |
| #ifdef UNIX |
| new_arena->commit_end = (byte *)new_arena + init_size; |
| #else |
| new_arena->commit_end = (byte *)new_arena + ARENA_INITIAL_COMMIT; |
| #endif |
| new_arena->reserve_end = (byte *)new_arena + init_size; |
| heap_region_add((byte *)new_arena, new_arena->reserve_end, HEAP_ARENA, NULL); |
| arena_init(new_arena, parent); |
| return new_arena; |
| } |
| |
| /* Either extends arena in-place and returns it, or allocates a new arena |
| * and returns that. Returns NULL on failure to do either. |
| * Expects to be passed the final sub-arena, not the master arena. |
| */ |
| static arena_header_t * |
| arena_extend(arena_header_t *arena, heapsz_t add_size) |
| { |
| heapsz_t aligned_add = (heapsz_t) ALIGN_FORWARD(add_size, PAGE_SIZE); |
| arena_header_t *new_arena; |
| #ifdef LINUX |
| if (arena->commit_end == cur_brk) { |
| byte *new_brk = set_brk(cur_brk + aligned_add); |
| if (new_brk >= cur_brk + add_size) { |
| LOG(2, "\tincreased brk from "PFX" to "PFX"\n", cur_brk, new_brk); |
| STATS_ADD(heap_capacity, (uint)(new_brk - cur_brk)); |
| STATS_PEAK(heap_capacity); |
| cur_brk = new_brk; |
| arena->commit_end = new_brk; |
| arena->reserve_end = arena->commit_end; |
| heap_region_adjust((byte *)arena, new_brk); |
| return arena; |
| } else { |
| LOG(1, "brk @"PFX"-"PFX" cannot expand: switching to mmap\n", |
| pre_us_brk, cur_brk); |
| } |
| } else |
| #else |
| if (arena->commit_end + aligned_add <= arena->reserve_end) |
| #endif |
| { /* here to not confuse brace matching */ |
| size_t cur_size = arena->commit_end - (byte *)arena; |
| size_t new_size = cur_size + aligned_add; |
| if (os_large_alloc_extend((byte *)arena, cur_size, new_size |
| _IF_WINDOWS(arena_page_prot(arena->flags)))) { |
| LOG(2, "\textended arena to "PFX"-"PFX"\n", arena, (byte*)arena + new_size); |
| STATS_ADD(heap_capacity, (uint)(new_size - cur_size)); |
| STATS_PEAK(heap_capacity); |
| arena->commit_end = (byte *)arena + new_size; |
| #ifdef UNIX /* windows already added whole reservation */ |
| arena->reserve_end = arena->commit_end; |
| heap_region_adjust((byte *)arena, (byte *)arena + new_size); |
| #endif |
| return arena; |
| } |
| } |
| #ifdef WINDOWS |
| if (!TEST(HEAP_GROWABLE, arena->flags)) |
| return NULL; |
| #endif |
| /* XXX: add stranded space at end of arena to free list */ |
| new_arena = arena_create(arena, 0/*default*/); |
| LOG(1, "cur arena "PFX"-"PFX" out of space: created new one @"PFX"\n", |
| (byte *)arena, arena->reserve_end, new_arena); |
| return new_arena; |
| } |
| |
| static inline bool |
| arena_delayed_list_full(arena_header_t *arena) |
| { |
| return (arena->free_list->delayed_chunks >= alloc_ops.delay_frees || |
| arena->free_list->delayed_bytes >= alloc_ops.delay_frees_maxsz); |
| } |
| |
| static inline chunk_header_t * |
| next_chunk_forward(arena_header_t *arena, chunk_header_t *head, |
| arena_header_t **container_out DR_PARAM_OUT) |
| { |
| arena_header_t *container; |
| byte *start = ptr_from_header(head); |
| /* XXX: this arena walk is showing up in too many places. We may need |
| * to optimize this. |
| */ |
| for (container = arena; container != NULL; container = container->next_arena) { |
| if (start >= container->start_chunk && start < container->commit_end) { |
| start += head->alloc_size + inter_chunk_space(); |
| if (start < container->next_chunk) { |
| chunk_header_t *next = header_from_ptr(start); |
| ASSERT(is_valid_chunk(start, next), "next_chunk_forward error"); |
| return next; |
| } else if (container_out != NULL) |
| *container_out = container; |
| break; |
| } |
| } |
| return NULL; |
| } |
| |
| /* updates the prev size field of the next chunk, if any */ |
| static void |
| set_prev_size_field(arena_header_t *arena, chunk_header_t *head) |
| { |
| arena_header_t *container = NULL; |
| chunk_header_t *next = next_chunk_forward(arena, head, &container); |
| ASSERT(!TEST(CHUNK_DELAY_FREE, head->flags), "no need/room for prev size for delay"); |
| if (next != NULL) { |
| ASSERT(!TEST(CHUNK_FREED, next->flags) || TEST(CHUNK_DELAY_FREE, next->flags), |
| "can't set prev size on true free"); |
| next->flags |= CHUNK_PREV_FREE; |
| if (head->alloc_size / CHUNK_MIN_SIZE <= USHRT_MAX) { |
| next->u.unfree.prev_size_shr = head->alloc_size / CHUNK_MIN_SIZE; |
| LOG(3, "set prev_size_shr of "PFX" to "PIFX"\n", |
| next, next->u.unfree.prev_size_shr); |
| } else { |
| /* We don't want to increase the header size, so we store |
| * in the prev chunk. This takes away one slot from pattern |
| * mode but we can live with that. |
| */ |
| byte *redzone_start = (byte *)next - inter_chunk_space(); |
| next->u.unfree.prev_size_shr = 0; |
| LOG(3, "writing prev size "PIFX" to "PFX"\n", head->alloc_size, |
| redzone_start - sizeof(heapsz_t)); |
| *(heapsz_t*)(redzone_start - sizeof(heapsz_t)) = head->alloc_size; |
| } |
| } else { |
| ASSERT(container != NULL, "couldn't find containing sub-arena"); |
| container->prev_free_sz = head->alloc_size; |
| } |
| } |
| |
| static heapsz_t |
| get_prev_size_field(chunk_header_t *head) |
| { |
| ASSERT(TEST(CHUNK_PREV_FREE, head->flags), "only call if prev free exists"); |
| if (head->u.unfree.prev_size_shr == 0) { |
| byte *redzone_start = (byte *)head - inter_chunk_space(); |
| LOG(3, "reading prev size "PIFX" from "PFX"\n", |
| *(heapsz_t*)(redzone_start - sizeof(heapsz_t)), |
| redzone_start - sizeof(heapsz_t)); |
| return *(heapsz_t*)(redzone_start - sizeof(heapsz_t)); |
| } else |
| return head->u.unfree.prev_size_shr * CHUNK_MIN_SIZE; |
| } |
| |
| #define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) |
| |
| /* XXX: i#1269 index above array bounds warning on x64 build using gcc 4.8.1 */ |
| #if defined(X64) && GCC_VERSION > 40801 |
| # define IF_GCC_WARN(x) |
| #else |
| # define IF_GCC_WARN(x) x |
| #endif |
| |
| static inline uint |
| bucket_index(chunk_header_t *head) |
| { |
| uint bucket; |
| /* pivot around small vs large first to avoid walking whole list for small: */ |
| uint start = (head->alloc_size > free_list_sizes[6]) ? (NUM_FREE_LISTS - 1) : 6; |
| /* our buckets guarantee that all allocs in that bucket have at least that size */ |
| for (bucket = start; head->alloc_size < free_list_sizes[bucket] |
| /* if bucket is 0 this cond breaks to avoid free_list_sizes[-1] */ |
| IF_GCC_WARN(&& bucket > 0); |
| bucket--) |
| ; /* nothing */ |
| ASSERT(head->alloc_size >= free_list_sizes[bucket], "bucket invariant violated"); |
| return bucket; |
| } |
| |
| /* Pass UINT_MAX if the bucket is not known */ |
| static void |
| remove_from_free_list(arena_header_t *arena, free_header_t *target, uint bucket) |
| { |
| if (target->head.u.prev == NULL) { |
| if (bucket == UINT_MAX) |
| bucket = bucket_index(&target->head); |
| ASSERT(target == arena->free_list->front[bucket], "free list corrupted"); |
| arena->free_list->front[bucket] = target->next; |
| } else { |
| target->head.u.prev->next = target->next; |
| } |
| if (target->next == NULL) { |
| if (bucket == UINT_MAX) |
| bucket = bucket_index(&target->head); |
| ASSERT(target == arena->free_list->last[bucket], "free list corrupted"); |
| arena->free_list->last[bucket] = target->head.u.prev; |
| } else { |
| target->next->head.u.prev = target->head.u.prev; |
| } |
| } |
| |
| static void |
| add_to_free_list(arena_header_t *arena, chunk_header_t *head) |
| { |
| free_header_t *cur = (free_header_t *) head; |
| uint bucket = bucket_index(head); |
| cur->next = NULL; |
| if (arena->free_list->last[bucket] == NULL) { |
| ASSERT(arena->free_list->front[bucket] == NULL, "inconsistent free list"); |
| arena->free_list->front[bucket] = cur; |
| cur->head.u.prev = NULL; |
| } else { |
| cur->head.u.prev = arena->free_list->last[bucket]; |
| arena->free_list->last[bucket]->next = cur; |
| } |
| arena->free_list->last[bucket] = cur; |
| LOG(3, "%s: arena "PFX" bucket %d free front="PFX" last="PFX"\n", __FUNCTION__, |
| arena, bucket, arena->free_list->front[bucket], |
| arena->free_list->last[bucket]); |
| } |
| |
| static free_header_t * |
| consider_giving_back_memory(arena_header_t *arena, chunk_header_t *tofree) |
| { |
| /* If we've accumulated enough, consider giving it back to the OS. |
| * We won't give back a new arena in which we haven't allocated at |
| * least half of it, even if it's now all free. |
| */ |
| if (tofree->alloc_size >= ARENA_INITIAL_SIZE/2) { |
| arena_header_t *sub, *prev = NULL; |
| byte *ptr = ptr_from_header(tofree); |
| #ifdef LINUX |
| if (arena->reserve_end == cur_brk) { |
| sub = NULL; /* don't search */ |
| if (ptr + tofree->alloc_size + inter_chunk_space() == arena->next_chunk) { |
| /* Shrink the brk */ |
| byte *new_brk = set_brk((byte *)ALIGN_FORWARD(ptr, PAGE_SIZE)); |
| if (new_brk <= cur_brk) { |
| LOG(2, "shrinking brk "PFX"-"PFX" to "PFX"-"PFX"\n", |
| pre_us_brk, cur_brk, pre_us_brk, new_brk); |
| STATS_ADD(heap_capacity, (int)(new_brk - cur_brk)); |
| STATS_INC(num_dealloc); |
| heap_region_remove(new_brk, cur_brk, NULL); |
| cur_brk = new_brk; |
| arena->commit_end = new_brk; |
| arena->reserve_end = new_brk; |
| arena->next_chunk = ptr; |
| arena->prev_free_sz = 0; /* can't end in free: would be coalesced */ |
| return NULL; |
| } else { |
| LOG(1, "brk @"PFX"-"PFX" failed to shrink to "PFX"\n", |
| pre_us_brk, cur_brk, ptr); |
| } |
| } |
| } |
| #endif |
| for (sub = arena; sub != NULL; prev = sub, sub = sub->next_arena) { |
| if (ptr == sub->start_chunk && |
| ptr + tofree->alloc_size + inter_chunk_space() == sub->next_chunk) { |
| if (prev == NULL) { |
| /* If there's a next_arena, we could try to |
| * de-allocate the main region and copy the free |
| * lists over, but for now we don't do anything. |
| */ |
| } else { |
| LOG(2, "de-allocating arena "PFX"-"PFX"\n", sub, sub->reserve_end); |
| prev->next_arena = sub->next_arena; |
| STATS_ADD(heap_capacity, -(int)(sub->commit_end - (byte *)sub)); |
| STATS_INC(num_dealloc); |
| STATS_DEC(num_arenas); |
| heap_region_remove((byte *)sub, sub->reserve_end, NULL); |
| arena_deallocate(sub); |
| return NULL; |
| } |
| } |
| } |
| } |
| return (free_header_t *) tofree; |
| } |
| |
| /* Returns the header of the newly coalesced entry, or cur unchanged |
| * if no coalescing was done. Does not add cur to the free lists. |
| */ |
| static free_header_t * |
| coalesce_adjacent_frees(arena_header_t *arena, free_header_t *cur) |
| { |
| chunk_header_t *tofree = &cur->head, *next; |
| if (TEST(CHUNK_PREV_FREE, cur->head.flags)) { |
| /* Coalesce with prior block */ |
| size_t prev_sz = get_prev_size_field(&cur->head); |
| byte *cur_ptr = ptr_from_header(tofree); |
| byte *prev_ptr = cur_ptr - inter_chunk_space() - prev_sz; |
| free_header_t *prev = (free_header_t *) header_from_ptr(prev_ptr); |
| ASSERT(TEST(CHUNK_FREED, prev->head.flags), "header flags inconsistent"); |
| ASSERT(prev->head.alloc_size == prev_sz, "prev size inconsistent"); |
| ASSERT(is_valid_chunk(prev_ptr, &prev->head), "prev chunk inconsistent"); |
| /* Synchronize with iterators (i#949) */ |
| iterator_lock(arena, true/*in alloc*/); |
| /* We can't merge with a delayed free b/c we'd lose the callstack, so we |
| * don't even set CHUNK_PREV_FREE (we don't have space anyway in a true-free |
| * header to store prev_size_shr: so we can't store for a delay, and we rely |
| * on always coalescing). |
| */ |
| ASSERT(!TEST(CHUNK_DELAY_FREE, prev->head.flags), "prev free must be true free"); |
| /* Remove prev from free list and merge w/ head. We'll add the |
| * newly combined chunk to the delay list below. Yes, this delays |
| * re-use of the no-longer-delayed prev, but the size delay |
| * threshold should prevent OOM. |
| */ |
| remove_from_free_list(arena, prev, UINT_MAX); |
| if (cur->head.user_data != NULL) |
| client_malloc_data_free(cur->head.user_data); |
| /* We don't want misleading data so we throw out prev as well */ |
| if (prev->head.user_data != NULL) { |
| client_malloc_data_free(prev->head.user_data); |
| prev->head.user_data = NULL; |
| } |
| tofree = &prev->head; |
| tofree->alloc_size += cur->head.alloc_size + inter_chunk_space(); |
| iterator_unlock(arena, true/*in alloc*/); |
| LOG(3, "coalescing with prev chunk "PFX" => "PFX"-"PFX"\n", |
| prev, prev_ptr, prev_ptr + tofree->alloc_size); |
| STATS_INC(num_coalesces); |
| /* We can't call set_prev_size_field() here b/c it will assert if |
| * next is free, so we wait until we've possibly merged w/ next |
| */ |
| /* Let client fill/mark midpoint header, if desired */ |
| if (!alloc_ops.shared_redzones) |
| client_new_redzone((byte *)cur, header_size); |
| } |
| next = next_chunk_forward(arena, tofree, NULL); |
| if (next != NULL && TEST(CHUNK_FREED, next->flags) && |
| !TEST(CHUNK_DELAY_FREE, next->flags) ) { |
| /* Synchronize with iterators (i#949) */ |
| iterator_lock(arena, true/*in alloc*/); |
| /* Coalesce with next block */ |
| remove_from_free_list(arena, (free_header_t *)next, UINT_MAX); |
| if (next->user_data != NULL) |
| client_malloc_data_free(next->user_data); |
| /* We don't want misleading data so we throw out cur as well */ |
| if (tofree->user_data != NULL) { |
| client_malloc_data_free(tofree->user_data); |
| tofree->user_data = NULL; |
| } |
| tofree->alloc_size += next->alloc_size + inter_chunk_space(); |
| LOG(3, "coalescing with next chunk "PFX" => "PFX"-"PFX"\n", |
| next, ptr_from_header(tofree), ptr_from_header(tofree) + |
| tofree->alloc_size); |
| STATS_INC(num_coalesces); |
| /* Let client fill/mark midpoint header, if desired */ |
| if (!alloc_ops.shared_redzones) |
| client_new_redzone((byte *)next, header_size); |
| set_prev_size_field(arena, tofree); /* update */ |
| iterator_unlock(arena, true/*in alloc*/); |
| } else if (tofree != &cur->head) { |
| /* Delayed from above: see comment in merge-prev */ |
| set_prev_size_field(arena, tofree); /* update */ |
| } |
| return consider_giving_back_memory(arena, tofree); |
| } |
| |
| static bool |
| shift_from_delay_list_to_free_list(arena_header_t *arena) |
| { |
| free_header_t *cur = arena->free_list->delay_front; |
| if (cur == NULL) |
| return false; |
| LOG(3, "%s: shifting "PFX" to regular free list\n", __FUNCTION__, cur); |
| cur->head.flags &= ~CHUNK_DELAY_FREE; |
| arena->free_list->delay_front = cur->next; |
| if (cur == arena->free_list->delay_last) |
| arena->free_list->delay_last = NULL; |
| ASSERT(arena->free_list->delayed_chunks > 0, "delay counter off"); |
| arena->free_list->delayed_chunks--; |
| ASSERT(arena->free_list->delayed_bytes >= cur->head.alloc_size, |
| "delay bytes counter off"); |
| arena->free_list->delayed_bytes -= cur->head.alloc_size; |
| LOG(3, "%s: updated delayed chunks=%d, bytes="PIFX"\n", __FUNCTION__, |
| arena->free_list->delayed_chunks, arena->free_list->delayed_bytes); |
| |
| /* We coalesce here, rather than on initial free, b/c only now |
| * can we throw away the user_data |
| */ |
| cur = coalesce_adjacent_frees(arena, cur); |
| if (cur != NULL) { |
| set_prev_size_field(arena, &cur->head); |
| add_to_free_list(arena, &cur->head); |
| ASSERT(!TEST(CHUNK_PREV_FREE, cur->head.flags), "no adjacent frees"); |
| DOLOG(2, { |
| chunk_header_t *next = next_chunk_forward(arena, &cur->head, NULL); |
| ASSERT(next == NULL || TEST(CHUNK_PREV_FREE, next->flags), |
| "missing prev free pointer"); |
| }); |
| } |
| return true; |
| } |
| |
| static void |
| add_to_delay_list(arena_header_t *arena, chunk_header_t *head) |
| { |
| free_header_t *cur = (free_header_t *) head; |
| /* add to the end for delayed free FIFO */ |
| cur->next = NULL; |
| head->flags |= CHUNK_DELAY_FREE; |
| if (arena->free_list->delay_last == NULL) { |
| ASSERT(arena->free_list->delay_front == NULL, "inconsistent free list"); |
| arena->free_list->delay_front = cur; |
| } else |
| arena->free_list->delay_last->next = cur; |
| arena->free_list->delay_last = cur; |
| |
| arena->free_list->delayed_chunks++; |
| arena->free_list->delayed_bytes += head->alloc_size; |
| LOG(3, "%s: updated delayed chunks=%d, bytes="PIFX"\n", __FUNCTION__, |
| arena->free_list->delayed_chunks, arena->free_list->delayed_bytes); |
| |
| while (arena_delayed_list_full(arena)) { |
| /* Keep shifting first delayed entry to the free lists, until we're |
| * below both thresholds. |
| */ |
| if (!shift_from_delay_list_to_free_list(arena)) |
| break; |
| } |
| } |
| |
| static chunk_header_t * |
| search_free_list_bucket(arena_header_t *arena, heapsz_t aligned_size, uint bucket) |
| { |
| /* search for large enough chunk */ |
| free_header_t *cur; |
| chunk_header_t *head = NULL; |
| #ifdef UNIX |
| /* On Windows we have HEAP_NO_SERIALIZE. Not worth passing the flags in. */ |
| ASSERT(dr_recurlock_self_owns(arena->lock), "caller must hold lock"); |
| #endif |
| ASSERT(bucket < NUM_FREE_LISTS, "invalid param"); |
| for (cur = arena->free_list->front[bucket]; |
| cur != NULL && cur->head.alloc_size < aligned_size; |
| cur = cur->next) |
| ; /* nothing */ |
| if (cur != NULL) { |
| remove_from_free_list(arena, cur, bucket); |
| head = (chunk_header_t *) cur; |
| } |
| LOG(3, "arena "PFX" taking cur="PFX" => bucket %d free front="PFX" last="PFX"\n", |
| arena, cur, bucket, arena->free_list->front[bucket], |
| arena->free_list->last[bucket]); |
| return head; |
| } |
| |
| /* Caller needs only to point free_hdr at the right point: this routine will fill it in. |
| */ |
| static void |
| split_piece_for_free_list(arena_header_t *arena, chunk_header_t *head, |
| free_header_t *free_hdr, size_t free_sz, |
| size_t head_new_sz) |
| { |
| free_header_t *coalesced; |
| byte *free_ptr; |
| /* Synchronize with iterators (i#949) */ |
| iterator_lock(arena, true/*in alloc*/); |
| |
| head->alloc_size = head_new_sz; |
| |
| free_hdr->head.user_data = client_malloc_data_free_split(head->user_data); |
| free_hdr->head.u.unfree.request_diff = 0; |
| free_hdr->head.alloc_size = free_sz; |
| free_hdr->head.magic = HEADER_MAGIC; |
| free_hdr->head.flags = head->flags | CHUNK_FREED; |
| |
| free_ptr = ptr_from_header(&free_hdr->head); |
| LOG(3, "splitting off "PFX"-"PFX" (hdr "PFX") from "PFX"-"PFX" (hdr "PFX")\n", |
| free_ptr, free_ptr+free_sz, free_hdr, ptr_from_header(head), |
| ptr_from_header(head) + head->alloc_size, head); |
| /* Let client fill/mark new redzones, if desired. |
| * We currently have our next free ptr in the redzone: |
| */ |
| client_new_redzone(free_ptr - alloc_ops.redzone_size, alloc_ops.redzone_size); |
| if (!alloc_ops.shared_redzones) { |
| client_new_redzone(free_ptr + free_sz, alloc_ops.redzone_size); |
| } |
| |
| coalesced = coalesce_adjacent_frees(arena, free_hdr); |
| if (coalesced != NULL) { |
| set_prev_size_field(arena, (chunk_header_t *)coalesced); |
| /* XXX: this adds it to the end, even though maybe it |
| * should stay at the front for FIFO for the case where we split |
| * it off a free list entry in the first place. |
| */ |
| add_to_free_list(arena, (chunk_header_t *)coalesced); |
| } |
| iterator_unlock(arena, true/*in alloc*/); |
| } |
| |
| static chunk_header_t * |
| find_free_list_entry(arena_header_t *arena, heapsz_t request_size, heapsz_t aligned_size) |
| { |
| chunk_header_t *head = NULL; |
| uint bucket; |
| #ifdef UNIX |
| /* On Windows we have HEAP_NO_SERIALIZE. Not worth passing the flags in. */ |
| ASSERT(dr_recurlock_self_owns(arena->lock), "caller must hold lock"); |
| #endif |
| |
| /* b/c we're delaying, we're not able to re-use a just-freed chunk. |
| * thus we go for time over space and use the guaranteed-size bucket |
| * before searching the maybe-big-enough bucket. |
| */ |
| for (bucket = 0; |
| bucket < NUM_FREE_LISTS - 1 && aligned_size > free_list_sizes[bucket]; |
| bucket++) |
| ; /* nothing */ |
| |
| /* I tried searching the maybe-big-enough bucket (bucket - 1) before |
| * going to bigger buckets but it's a huge time sink for some benchmarks |
| * and doesn't seem to help much on others so I removed it. |
| */ |
| |
| /* Use a larger bucket to avoid delaying a ton of allocs of a |
| * certain size and never re-using them for pathological app alloc |
| * sequences. I used to do this only when delayed frees were piling |
| * up (delayed_chunks or delayed_bytes at 2x the threshold) but |
| * it seems worth doing every time, even at the risk of fragmentation, |
| * since we have coalescing in place. |
| */ |
| if (head == NULL && arena->free_list->front[bucket] == NULL) { |
| while (bucket < NUM_FREE_LISTS - 1 && arena->free_list->front[bucket] == NULL) |
| bucket++; |
| } |
| |
| if (head == NULL && arena->free_list->front[bucket] != NULL) { |
| LOG(2, "\tallocating from larger bucket size to reduce delayed frees\n"); |
| if (bucket == NUM_FREE_LISTS - 1) { |
| /* var-size bucket: have to search */ |
| head = search_free_list_bucket(arena, aligned_size, bucket); |
| } else { |
| /* guaranteed to be big enough so take from front */ |
| ASSERT(aligned_size <= free_list_sizes[bucket], "logic error"); |
| head = (chunk_header_t *) arena->free_list->front[bucket]; |
| arena->free_list->front[bucket] = arena->free_list->front[bucket]->next; |
| if (head == (chunk_header_t *) arena->free_list->last[bucket]) |
| arena->free_list->last[bucket] = arena->free_list->front[bucket]; |
| else { |
| free_header_t *cur = (free_header_t *) head; |
| ASSERT(cur->next != NULL, "free list corrupted"); |
| cur->next->head.u.prev = NULL; |
| } |
| LOG(3, "arena "PFX" bucket %d taking "PFX" => free front="PFX" last="PFX"\n", |
| arena, bucket, head, arena->free_list->front[bucket], |
| arena->free_list->last[bucket]); |
| } |
| } |
| |
| if (head != NULL) { |
| chunk_header_t *next; |
| arena_header_t *container = NULL; |
| LOG(2, "\tusing free list size=%d for request=%d align=%d from bucket %d\n", |
| head->alloc_size, request_size, aligned_size, bucket); |
| |
| /* if there's a lot of extra room, split it off as a separate free entry */ |
| if (head->alloc_size > aligned_size + CHUNK_MIN_SIZE + inter_chunk_space()) { |
| byte *split = ptr_from_header(head) + aligned_size + |
| (alloc_ops.shared_redzones ? 0 : alloc_ops.redzone_size); |
| size_t rest_size = head->alloc_size - (aligned_size + inter_chunk_space()); |
| byte *chunk2_start = split + inter_chunk_space() - |
| (alloc_ops.shared_redzones ? 0 : alloc_ops.redzone_size); |
| free_header_t *rest = (free_header_t *) header_from_ptr(chunk2_start); |
| ASSERT(!TEST(CHUNK_MMAP, head->flags), "mmap not expected on free list"); |
| STATS_INC(num_splits); |
| split_piece_for_free_list(arena, head, rest, rest_size, aligned_size); |
| ASSERT(is_valid_chunk(chunk2_start, &rest->head), "rest chunk inconsistent"); |
| } |
| |
| if (head->user_data != NULL) { |
| client_malloc_data_free(head->user_data); |
| head->user_data = NULL; |
| } |
| head->flags &= ~(CHUNK_FREED | ALLOCATOR_TYPE_FLAGS); |
| |
| next = next_chunk_forward(arena, head, &container); |
| if (next != NULL) |
| next->flags &= ~CHUNK_PREV_FREE; |
| else if (container != NULL) |
| container->prev_free_sz = 0; |
| } |
| return head; |
| } |
| |
| /* i#1581: to avoid retaddr local vars from callstack walks messing up app |
| * callstacks, we invoke the 2nd layer on a clean dstack (this lets us keep |
| * just the outer layer as stdcall, and avoids complicating drwrap further). |
| */ |
| #define ONDSTACK_REPLACE_ALLOC_COMMON(arena, sz, align, flags, dc, mc, \ |
| caller, alloc_type) \ |
| dr_call_on_clean_stack(dc, (void* (*)(void)) replace_alloc_common, arena, \ |
| (void *)(ptr_uint_t)(sz), (void *)(ptr_uint_t)(align), \ |
| (void *)(ptr_uint_t)(flags), \ |
| dc, mc, caller, (void *)(ptr_uint_t)(alloc_type)) |
| |
| /* As noted in the flag definitions, ALLOC_INVOKE_CLIENT_* in flags |
| * only applies to successful allocation: client is still notified on failure |
| * and when client user data is freed or shifted. |
| * |
| * If invoked from an outer drwrap_replace_native() layer, this should be invoked |
| * via ONDSTACK_REPLACE_ALLOC_COMMON(). |
| * |
| * Pass 0 if no special alignment is needed. |
| */ |
| static byte * |
| replace_alloc_common(arena_header_t *arena, size_t request_size, size_t alignment, |
| alloc_flags_t flags, void *drcontext, dr_mcontext_t *mc, |
| app_pc caller, uint alloc_type) |
| { |
| heapsz_t aligned_size; |
| byte *res = NULL; |
| chunk_header_t *head = NULL; |
| ASSERT((alloc_type & ~(ALLOCATOR_TYPE_FLAGS)) == 0, "invalid type flags"); |
| |
| if (request_size > UINT_MAX || |
| /* catch overflow in chunk or mmap alignment: no need to support really |
| * large sizes within a page of UINT_MAX (i#944) |
| */ |
| ALIGN_FORWARD(request_size, PAGE_SIZE) < request_size) { |
| /* rather than have larger headers for 64-bit we just don't support |
| * enormous allocations |
| */ |
| client_handle_alloc_failure(request_size, caller, mc); |
| return NULL; |
| } |
| |
| /* alignment must be power of 2, or 0 (== default) */ |
| if (alignment != 0 && !IS_POWER_OF_2(alignment)) { |
| client_handle_alloc_failure(request_size, caller, mc); |
| return NULL; |
| } |
| if (alignment < CHUNK_ALIGNMENT) |
| alignment = CHUNK_ALIGNMENT; |
| |
| aligned_size = ALIGN_FORWARD(request_size, CHUNK_ALIGNMENT); |
| if (alignment > CHUNK_ALIGNMENT) { |
| /* We brute-force and alloc enough space to ensure we can back the |
| * pre-aligned-padding as a free slot, to avoid any complexity of |
| * having pre-header padding. |
| */ |
| aligned_size += alignment + CHUNK_MIN_SIZE + inter_chunk_space(); |
| } |
| ASSERT(aligned_size >= request_size, "overflow should have been caught"); |
| if (aligned_size < CHUNK_MIN_SIZE) |
| aligned_size = CHUNK_MIN_SIZE; |
| |
| arena_lock(drcontext, arena, TEST(ALLOC_SYNCHRONIZE, flags)); |
| |
| /* for large requests we do direct mmap with own redzones. |
| * we use the large malloc table to track them for iteration. |
| * XXX: for simplicity, not delay-freeing these for now |
| */ |
| if (aligned_size + header_size >= CHUNK_MIN_MMAP) { |
| mmap_header_t *mhead; |
| size_t map_size = (size_t) |
| ALIGN_FORWARD(aligned_size + sizeof(mmap_header_t) + |
| alloc_ops.redzone_size*2 + header_beyond_redzone, PAGE_SIZE); |
| byte *map = os_large_alloc(map_size _IF_WINDOWS(map_size) |
| _IF_WINDOWS(arena_page_prot(arena->flags))); |
| size_t dist_to_map; |
| ASSERT(map_size >= aligned_size, "overflow should have been caught"); |
| LOG(2, "\tlarge alloc %zu => mmap %zu @"PFX"\n", request_size, map_size, map); |
| if (map == NULL) { |
| client_handle_alloc_failure(request_size, caller, mc); |
| goto replace_alloc_common_done; |
| } |
| ASSERT(!alloc_ops.external_headers, "NYI"); |
| mhead = (mmap_header_t *) map; |
| mhead->map_size = map_size; |
| head = (chunk_header_t *) |
| ((byte *)map + sizeof(mmap_header_t) + alloc_ops.redzone_size + |
| header_beyond_redzone - redzone_beyond_header - header_size); |
| res = ptr_from_header(head); |
| if (!ALIGNED(res, alignment)) { |
| res = (byte *) ALIGN_FORWARD(res, alignment); |
| head = header_from_ptr(res); |
| } |
| dist_to_map = (byte *)head - map; |
| if (dist_to_map > PREV_SIZE_MAX) { |
| LOG(1, "\tdist_to_map %zu=%zu+%d+%d-%d-%d is too large\n", dist_to_map, |
| sizeof(mmap_header_t), alloc_ops.redzone_size, |
| header_beyond_redzone, redzone_beyond_header, header_size); |
| os_large_free(map, map_size); |
| client_handle_alloc_failure(request_size, caller, mc); |
| goto replace_alloc_common_done; |
| } |
| head->u.unfree.prev_size_shr = dist_to_map; |
| mhead->head = head; |
| head->flags |= CHUNK_MMAP; |
| head->magic = HEADER_MAGIC; |
| head->alloc_size = (map + map_size - alloc_ops.redzone_size - res); |
| heap_region_add(map, map + map_size, HEAP_MMAP, mc); |
| } else { |
| /* look for free list entry */ |
| head = find_free_list_entry(arena, request_size, aligned_size); |
| if (head != NULL) { |
| malloc_info_t info; |
| header_to_info(head, &info, NULL, 0); |
| client_handle_free_reuse(drcontext, &info, mc); |
| } |
| } |
| |
| /* if no free list entry, get new memory */ |
| if (head == NULL) { |
| heapsz_t add_size = aligned_size + inter_chunk_space(); |
| /* We deliberately walk every arena each time. This helps use empty |
| * space at the bottom that was too small for larger allocs that triggered |
| * creating a new arena. However, it is extra overhead, especially if |
| * we ever end up with many arenas, where we should probably keep a pointer |
| * to the last one around to avoid this walk. But, even artificially |
| * forcing allocs among 28 arenas on cfrac, the overhead isn't egregious, |
| * so I'm sticking with this simple design for now. |
| */ |
| arena_header_t *last_arena = arena; |
| byte *orig_next_chunk; |
| while (arena != NULL) { |
| if (arena->next_chunk + add_size <= arena->commit_end) |
| break; |
| last_arena = arena; |
| arena = arena->next_arena; |
| } |
| if (arena == NULL) |
| arena = arena_extend(last_arena, add_size); |
| if (arena == NULL) { /* ignore ALLOC_INVOKE_CLIENT */ |
| /* i#1829: better to abandon the delayed frees (yes, all of them) to |
| * avoid OOM in the app. This is rare so we can afford the simple |
| * solution of re-checking the free list after each shift, so we'll |
| * be able to use a coalesced pair rather than searching the delay |
| * list for a singleton that's large enough. |
| */ |
| arena = last_arena; |
| while (arena->free_list->delayed_bytes >= aligned_size) { |
| if (!shift_from_delay_list_to_free_list(arena)) |
| break; |
| head = find_free_list_entry(arena, request_size, aligned_size); |
| if (head != NULL) |
| break; |
| } |
| if (head == NULL) { |
| client_handle_alloc_failure(request_size, caller, mc); |
| goto replace_alloc_common_done; |
| } |
| } |
| if (head == NULL) { |
| /* remember that arena->next_chunk always has a redzone preceding it */ |
| head = (chunk_header_t *) |
| (arena->next_chunk - redzone_beyond_header - header_size); |
| head->alloc_size = aligned_size; |
| head->magic = HEADER_MAGIC; |
| head->user_data = NULL; /* b/c we pass the old to client */ |
| head->flags = 0; |
| LOG(2, "\tcarving out new chunk @"PFX" => head="PFX", res="PFX"\n", |
| arena->next_chunk - alloc_ops.redzone_size, head, ptr_from_header(head)); |
| orig_next_chunk = arena->next_chunk; |
| arena->next_chunk += add_size; |
| if (arena->prev_free_sz != 0) { |
| /* There's a prior free, so we need to mark this new chunk with |
| * prev-free info. |
| */ |
| byte *prev_ptr = orig_next_chunk - inter_chunk_space() - |
| arena->prev_free_sz; |
| chunk_header_t *prev = header_from_ptr(prev_ptr); |
| ASSERT(is_valid_chunk(prev_ptr, prev), "arena prev free corrupted"); |
| ASSERT(TEST(CHUNK_FREED, prev->flags), "arena prev free inconsistent"); |
| set_prev_size_field(arena, prev); |
| arena->prev_free_sz = 0; |
| } |
| } |
| } |
| |
| /* head->alloc_size, head->magic, and head->flags (except type) are already set */ |
| ASSERT(head->magic == HEADER_MAGIC, "corrupted header"); |
| ASSERT(head->alloc_size - request_size <= REQUEST_DIFF_MAX, |
| "illegally large chunk padding"); |
| head->u.unfree.request_diff = head->alloc_size - request_size; |
| head->flags |= alloc_type; |
| res = ptr_from_header(head); |
| if (!ALIGNED(res, alignment)) { |
| /* Place the pre-aligned padding onto the free list */ |
| chunk_header_t *orig = head; |
| byte *orig_res = res; |
| free_header_t *pre = (free_header_t *) head; |
| size_t pre_sz; |
| res += CHUNK_MIN_SIZE + inter_chunk_space(); |
| res = (byte *) ALIGN_FORWARD(res, alignment); |
| head = header_from_ptr(res); |
| *head = *orig; |
| pre_sz = (byte *)head - (byte *)orig - inter_chunk_space(); |
| LOG(2, "\torig alloc %d bytes, shrinking by %d to align\n", |
| head->alloc_size, res - orig_res); |
| split_piece_for_free_list(arena, head, pre, pre_sz, |
| head->alloc_size - (res - orig_res)); |
| ASSERT(head->alloc_size > request_size, "pre-align miscalculation"); |
| head->u.unfree.request_diff = head->alloc_size - request_size; |
| } |
| LOG(2, "\treplace_alloc_common arena="PFX" flags=0x%x request=%d, align=%d alloc=%d " |
| "=> "PFX"\n", arena, head->flags, |
| chunk_request_size(head), alignment, head->alloc_size, res); |
| if (TEST(ALLOC_ZERO, flags)) |
| memset(res, 0, request_size); |
| |
| ASSERT(head->alloc_size >= request_size, "chunk too small"); |
| |
| notify_client_alloc(drcontext, (byte *)res, head, flags, mc, caller); |
| |
| if (chunk_request_size(head) >= LARGE_MALLOC_MIN_SIZE) |
| malloc_large_add(res, request_size); |
| else |
| STATS_INC(num_mallocs); |
| |
| replace_alloc_common_done: |
| arena_unlock(drcontext, arena, TEST(ALLOC_SYNCHRONIZE, flags)); |
| |
| return res; |
| } |
| |
| static void |
| check_type_match(void *ptr, chunk_header_t *head, uint free_type, |
| alloc_flags_t flags, dr_mcontext_t *mc, app_pc caller) |
| { |
| uint alloc_main_type = (head->flags & MALLOC_ALLOCATOR_FLAGS); |
| uint free_main_type = (free_type & MALLOC_ALLOCATOR_FLAGS); |
| const char *action = (TEST(ALLOC_IS_REALLOC, flags) ? "realloc" : |
| (TEST(ALLOC_IS_QUERY, flags) ? "queried" : "freed")); |
| if (TEST(ALLOC_IGNORE_MISMATCH, flags)) |
| return; |
| LOG(3, "\tcheck_type_match: alloc flags=0x%x vs free=0x%x\n", |
| head->flags, free_type); |
| ASSERT((free_type & ~(ALLOCATOR_TYPE_FLAGS)) == 0, "invalid type flags"); |
| if ((alloc_main_type != MALLOC_ALLOCATOR_UNKNOWN && |
| free_main_type != MALLOC_ALLOCATOR_UNKNOWN) && |
| alloc_main_type != free_main_type) { |
| /* i#1533: ensure we're not in a private std::_DebugHeapDelete that we missed |
| * up front. We want the app caller, so the caller of our "caller" here |
| * (which is our replace_* routine). |
| */ |
| app_pc app_caller = callstack_next_retaddr(mc); |
| if (!check_for_private_debug_delete(app_caller)) { |
| client_mismatched_heap(caller, (byte *)ptr, mc, |
| malloc_alloc_type_name(alloc_main_type), |
| malloc_free_type_name(free_main_type), action, |
| head->user_data, true/*C vs C++*/); |
| } |
| } |
| #ifdef WINDOWS |
| /* For pre-us we don't know whether Rtl or libc layer */ |
| else if (!TEST(CHUNK_PRE_US, head->flags) && |
| (free_type & CHUNK_LAYER_RTL) != (head->flags & CHUNK_LAYER_RTL) && |
| !TEST(CHUNK_LAYER_NOCHECK, free_type | head->flags)) { |
| /* i#1197: report libc/Rtl mismatches */ |
| client_mismatched_heap(caller, (byte *)ptr, mc, |
| malloc_layer_name(head->flags), |
| malloc_layer_name(free_type), action, |
| head->user_data, false/*!C vs C++*/); |
| } |
| #endif |
| } |
| |
| /* See i#1581 notes above. |
| * Unfortunately we can't easily cast to the bool return type (from void*) here |
| * as gcc then complains about the calls that ignore the return value: so each |
| * caller who needs the return value must cast. |
| */ |
| #define ONDSTACK_REPLACE_FREE_COMMON(arena, ptr, flags, dc, mc, caller, free_type) \ |
| dr_call_on_clean_stack(dc, (void* (*)(void)) replace_free_common, arena, ptr, \ |
| (void *)(ptr_uint_t)(flags), dc, mc, caller, \ |
| (void *)(ptr_uint_t)(free_type), NULL) |
| |
| /* Up to caller to verify that ptr is inside arena. |
| * invoke_client controls whether client_handle_free() is called. |
| * |
| * If invoked from an outer drwrap_replace_native() layer, this should be invoked |
| * via ONDSTACK_REPLACE_FREE_COMMON(). |
| */ |
| static bool |
| replace_free_common(arena_header_t *arena, void *ptr, alloc_flags_t flags, |
| void *drcontext, dr_mcontext_t *mc, app_pc caller, uint free_type) |
| { |
| chunk_header_t *head = header_from_ptr(ptr); |
| malloc_info_t info; |
| |
| if (!is_live_alloc(ptr, arena, head)) { /* including NULL */ |
| /* w/o early inject, or w/ delayed instru, there are allocs in place |
| * before we took over |
| */ |
| head = hashtable_lookup(&pre_us_table, (void *)ptr); |
| if (head != NULL && !TEST(CHUNK_FREED, head->flags)) { |
| /* XXX i#1195: need to call the app's free routine. |
| * Xref DRi#497 for a mechanism to do this; or, we could call |
| * it natively (after swapping TLS back). |
| * For Windows we can assume Rtl since that's where we iterated. |
| * For now we're just leaking these, which we claim is a feature |
| * b/c we'll catch use-after-free :) |
| * FIXME: That's fine for the small # at late inject, but for |
| * attach at a random point that's not good enough: probably |
| * better to free immediately rather than have some extra code |
| * to delay pre-us frees. If we do that we may need an |
| * external table lock. |
| */ |
| /* We do not report mismatches on pre-us allocs: we never saw the alloc! */ |
| return true; |
| } else { |
| /* try to report mismatches on common invalid ptr cases */ |
| byte *p = (byte *) ptr; |
| bool identified = false; |
| bool valid = false; |
| if (p != NULL) { |
| const size_t slot_sz = sizeof(size_t); |
| /* try one slot back, in case this is an array w/ size passed to delete */ |
| head = header_from_ptr(p - slot_sz); |
| if (is_live_alloc(p - slot_sz, arena, head)) { |
| check_type_match(p - slot_sz, head, free_type, |
| flags, mc, caller); |
| identified = true; |
| } |
| if (!identified) { |
| /* try one slot in, in case this is a non-array passed to delete[] */ |
| head = header_from_ptr(p + slot_sz); |
| if (is_live_alloc(p + slot_sz, arena, head)) { |
| check_type_match(p + slot_sz, head, free_type, |
| flags, mc, caller); |
| identified = true; |
| } |
| } |
| } |
| #ifdef WINDOWS |
| if (!identified && (ptr_uint_t)p > DBGCRT_PRE_REDZONE_SIZE) { |
| /* i#607 part A: debug CRT code sometimes allocates via an internal |
| * routine like _calloc_dbg_impl() which adds a redzone and |
| * calls RtlAllocateHeap; the same object is later freed by |
| * passing the inside-redzone pointer to free(). |
| * With symbols, we simply intercept the internal routine; |
| * without, it's too complex to try and retroactively add our redzone |
| * instead of the CRT redzone and skip over some callers, so we |
| * live w/o our own redzone for this handful of allocs and simply |
| * try to avoid reporting invalid args on the free (the Rtl |
| * vs libc layer mismatch, which happens w/ release CRT too, |
| * is suppressed as part of i#960). |
| * But, this no longer happens with DR > r1728+ (it was an FLS |
| * transparency bug that caused _getptd_noexit() to call |
| * _calloc_dbg_impl(): and it's the only code I see that does so!). |
| */ |
| head = header_from_ptr(p - DBGCRT_PRE_REDZONE_SIZE); |
| if (is_live_alloc(p - DBGCRT_PRE_REDZONE_SIZE, arena, head) && |
| chunk_request_size(head) > DBGCRT_PRE_REDZONE_SIZE + |
| DBGCRT_POST_REDZONE_SIZE) { |
| identified = true; |
| valid = true; |
| ptr = (void *) (p - DBGCRT_PRE_REDZONE_SIZE); |
| LOG(2, "inner-redzone pointer "PFX" => real alloc "PFX"\n", p, ptr); |
| STATS_INC(dbgcrt_mismatch); |
| } |
| } |
| #endif |
| if (!valid) { /* call regardless of ALLOC_INVOKE_CLIENT */ |
| client_invalid_heap_arg(caller, (byte *)ptr, mc, |
| /* XXX: we might be replacing RtlHeapFree or |
| * _free_dbg but it's not worth trying to |
| * store the exact name |
| */ |
| "free", true/*free*/); |
| return false; |
| } |
| } |
| } |
| |
| arena_lock(drcontext, arena, TEST(ALLOC_SYNCHRONIZE, flags)); |
| |
| check_type_match(ptr, head, free_type, flags, mc, caller); |
| |
| /* current model is to throw the data away when we put on free list. |
| * would we ever want to keep the alloc callstack for freed entries, |
| * or we always want to replace w/ free callstack? |
| */ |
| header_to_info(head, &info, NULL, 0); |
| if (TEST(ALLOC_INVOKE_CLIENT_DATA, flags)) |
| client_remove_malloc_pre(&info); |
| if (TESTANY(CHUNK_MMAP | CHUNK_PRE_US, head->flags)) { |
| if (head->user_data != NULL) |
| client_malloc_data_free(head->user_data); /* ignores ALLOC_INVOKE_CLIENT */ |
| head->user_data = NULL; |
| } else |
| head->user_data = client_malloc_data_to_free_list(head->user_data, mc, caller); |
| |
| /* Mark this after client_remove_malloc_pre so client can iterate |
| * and see the alloc as currently-live, matching wrapping behavior. |
| */ |
| head->flags |= CHUNK_FREED; /* even if CHUNK_MMAP, so a client iter will skip */ |
| |
| if (TEST(ALLOC_INVOKE_CLIENT_DATA, flags)) |
| client_remove_malloc_post(&info); |
| if (TEST(ALLOC_INVOKE_CLIENT_ACTION, flags)) { |
| /* we ignore the return value */ |
| client_handle_free(&info, (byte *)ptr, mc, caller, NULL, |
| false/*reuse delayed*/ _IF_WINDOWS(NULL)); |
| } |
| |
| if (chunk_request_size(head) >= LARGE_MALLOC_MIN_SIZE && |
| !TEST(CHUNK_PRE_US, head->flags)) |
| malloc_large_remove(ptr); |
| |
| if (!TESTANY(CHUNK_MMAP | CHUNK_PRE_US, head->flags)) { |
| LOG(2, "\treplace_free_common "PFX" == request=%d, alloc=%d, arena="PFX"\n", |
| ptr, chunk_request_size(head), head->alloc_size, arena); |
| add_to_delay_list(arena, head); |
| /* At this point head may be invalid to de-ref, if coalesced or freed (this |
| * will only happen if -delay_frees is 0) |
| */ |
| } else if (TEST(CHUNK_MMAP, head->flags)) { |
| /* see comments in alloc routine about not delaying the free */ |
| byte *map = (byte *)head - head->u.unfree.prev_size_shr; |
| mmap_header_t *mhead = (mmap_header_t *) map; |
| size_t map_size = mhead->map_size; |
| ASSERT(mhead->head == head, "mmap header corrupted"); |
| LOG(2, "\tlarge alloc %d freed => munmap @"PFX"\n", chunk_request_size(head), map); |
| heap_region_remove(map, map + map_size, mc); |
| if (!os_large_free(map, map_size)) |
| ASSERT(false, "munmap failed"); |
| } |
| |
| STATS_INC(num_frees); |
| |
| arena_unlock(drcontext, arena, TEST(ALLOC_SYNCHRONIZE, flags)); |
| return true; |
| } |
| |
| /* See i#1581 notes above */ |
| #define ONDSTACK_REPLACE_REALLOC_COMMON(arena, ptr, size, flags, dc, mc, caller, type) \ |
| dr_call_on_clean_stack(dc, (void* (*)(void)) replace_realloc_common, arena, ptr, \ |
| (void *)(ptr_uint_t)(size), (void *)(ptr_uint_t)(flags), \ |
| dc, mc, caller, (void *)(ptr_uint_t)(type)) |
| |
| /* If invoked from an outer drwrap_replace_native() layer, this should be invoked |
| * via ONDSTACK_REPLACE_REALLOC_COMMON(). |
| */ |
| static byte * |
| replace_realloc_common(arena_header_t *arena, byte *ptr, size_t size, |
| alloc_flags_t flags, void *drcontext, dr_mcontext_t *mc, |
| app_pc caller, uint alloc_type) |
| { |
| byte *res = NULL; |
| chunk_header_t *head = header_from_ptr(ptr); |
| malloc_info_t old_info; |
| malloc_info_t new_info; |
| alloc_flags_t sub_flags = flags; |
| LOG(2, " %s: "PFX" %d bytes arena="PFX"\n", __FUNCTION__, ptr, size, arena); |
| if (ptr == NULL) { |
| if (TEST(ALLOC_ALLOW_NULL, flags)) { |
| client_handle_realloc_null(caller, mc); |
| res = (void *) replace_alloc_common(arena, size, 0, |
| flags | ALLOC_IS_REALLOC | |
| ALLOC_INVOKE_CLIENT, |
| drcontext, mc, caller, alloc_type); |
| } else { |
| client_handle_alloc_failure(size, caller, mc); |
| res = NULL; |
| } |
| return res; |
| } else if (size == 0 && !TEST(ALLOC_ALLOW_EMPTY, flags)) { |
| replace_free_common(arena, ptr, |
| flags | ALLOC_IS_REALLOC | ALLOC_INVOKE_CLIENT, |
| drcontext, mc, caller, alloc_type); |
| return NULL; |
| } else if (!is_live_alloc(ptr, arena, head)) { |
| /* w/o early inject, or w/ delayed instru, there are allocs in place |
| * before we took over |
| */ |
| head = hashtable_lookup(&pre_us_table, (void *)ptr); |
| if (head == NULL || TEST(CHUNK_FREED, head->flags)) { |
| client_invalid_heap_arg(caller, (byte *)ptr, mc, |
| /* XXX: we might be replacing RtlReallocateHeap or |
| * _realloc_dbg but it's not worth trying to |
| * store the exact name |
| */ |
| "realloc", false/*!free*/); |
| return NULL; |
| } |
| } |
| /* if we reach here, this is a regular realloc */ |
| arena_lock(drcontext, arena, TEST(ALLOC_SYNCHRONIZE, flags)); |
| sub_flags &= ~ALLOC_SYNCHRONIZE; /* sub-calls don't need synch */ |
| ASSERT(head != NULL, "should return before here"); |
| #ifdef WINDOWS |
| check_type_match(ptr, head, alloc_type, flags, mc, caller); |
| #endif |
| header_to_info(head, &old_info, ptr, 0); |
| if (head->alloc_size >= size && |
| head->alloc_size - size <= REQUEST_DIFF_MAX && |
| !TEST(CHUNK_PRE_US, head->flags)) { |
| LOG(2, "\t%s: in-place realloc from %d to %d bytes\n", __FUNCTION__, |
| chunk_request_size(head), size); |
| /* XXX: if shrinking a lot, should free and re-malloc, or split, to save space */ |
| if (chunk_request_size(head) >= LARGE_MALLOC_MIN_SIZE) |
| malloc_large_remove(ptr); |
| if (chunk_request_size(head) < size && TEST(ALLOC_ZERO, flags)) |
| memset(ptr + chunk_request_size(head), 0, size - chunk_request_size(head)); |
| head->u.unfree.request_diff = head->alloc_size - size; |
| if (chunk_request_size(head) >= LARGE_MALLOC_MIN_SIZE) |
| malloc_large_add(ptr, chunk_request_size(head)); |
| res = ptr; |
| header_to_info(head, &new_info, NULL, flags | ALLOC_IS_REALLOC); |
| client_handle_realloc(drcontext, &old_info, &new_info, false, mc); |
| } else if (!TEST(ALLOC_IN_PLACE_ONLY, flags) || head->alloc_size >= size) { |
| size_t old_request_size = chunk_request_size(head); |
| bool was_mmap = TEST(CHUNK_MMAP, head->flags); |
| LOG(2, "\t%s: malloc-and-free realloc from %d to %d bytes\n", __FUNCTION__, |
| old_request_size, size); |
| /* XXX: use mremap for mmapped alloc! */ |
| /* XXX: if final chunk in arena, extend in-place */ |
| res = (void *) replace_alloc_common(arena, size, 0, |
| sub_flags | ALLOC_IS_REALLOC /*no client*/, |
| drcontext, mc, caller, alloc_type); |
| if (res != NULL) { |
| head = header_from_ptr(res); |
| memcpy(res, ptr, MIN(size, old_request_size)); |
| /* Prevent client iteration in client_remove_malloc_{pre,post} from |
| * seeing the new alloc and complaining that it has not yet had |
| * client_add_malloc_{pre,post} called on it yet. |
| */ |
| head->flags |= CHUNK_SKIP_ITER; |
| replace_free_common(arena, ptr, |
| sub_flags | ALLOC_IS_REALLOC | |
| /* we do want client_remove_malloc_{pre,post} as they |
| * must be called around the actual free -- but |
| * no client_handle_free() |
| */ |
| ALLOC_INVOKE_CLIENT_DATA /* not _ACTION */ | |
| ALLOC_IGNORE_MISMATCH, |
| drcontext, mc, caller, alloc_type); |
| head->flags &= ~CHUNK_SKIP_ITER; |
| header_to_info(head, &new_info, NULL, flags | ALLOC_IS_REALLOC); |
| /* We delay client_add_malloc_{pre,post} until here, to avoid a client |
| * iterating inside the event and seeing both the new and old allocs! |
| */ |
| notify_client_alloc(drcontext, (byte *)res, head, |
| flags | ALLOC_IS_REALLOC | ALLOC_INVOKE_CLIENT_DATA, |
| mc, caller); |
| client_handle_realloc(drcontext, &old_info, &new_info, was_mmap, mc); |
| } |
| } |
| arena_unlock(drcontext, arena, TEST(ALLOC_SYNCHRONIZE, flags)); |
| return res; |
| } |
| |
| /* Returns -1 on failure. |
| * We don't bother to swap stacks here as we do not expect to walk the |
| * callstack. |
| */ |
| static size_t |
| replace_size_common(arena_header_t *arena, byte *ptr, alloc_flags_t flags, |
| void *drcontext, dr_mcontext_t *mc, app_pc caller, |
| uint alloc_type) |
| { |
| chunk_header_t *head = header_from_ptr(ptr); |
| size_t res; |
| LOG(2, "%s: "PFX", flags 0x%x, arena "PFX"\n", __FUNCTION__, ptr, flags, arena); |
| arena_lock(drcontext, arena, TEST(ALLOC_SYNCHRONIZE, flags)); |
| if (!is_live_alloc(ptr, arena, head)) { |
| /* w/o early inject, or w/ delayed instru, there are allocs in place |
| * before we took over |
| */ |
| head = hashtable_lookup(&pre_us_table, (void *)ptr); |
| if (head == NULL || TEST(CHUNK_FREED, head->flags)) { |
| client_invalid_heap_arg(caller, (byte *)ptr, mc, |
| IF_WINDOWS_ELSE("_msize", "malloc_usable_size"), |
| false/*!free*/); |
| arena_unlock(drcontext, arena, TEST(ALLOC_SYNCHRONIZE, flags)); |
| return (size_t)-1; |
| } |
| } |
| #ifdef WINDOWS |
| check_type_match(ptr, head, |
| /* i#1207: malloc_usable_size() on operator new memory |
| * should not be an error. We only want to check for Rtl |
| * vs libc mismatches. |
| */ |
| MALLOC_ALLOCATOR_UNKNOWN | |
| (TEST(CHUNK_LAYER_RTL, alloc_type) ? CHUNK_LAYER_RTL : 0), |
| flags | ALLOC_IS_QUERY, mc, caller); |
| #endif |
| res = chunk_request_size(head); /* we do not allow using padding */ |
| arena_unlock(drcontext, arena, TEST(ALLOC_SYNCHRONIZE, flags)); |
| return res; |
| } |
| |
| #if defined(WINDOWS) || defined(MACOS) |
| /* Caller should hold any required locks, though we are probably assuming |
| * no synch is needed here. |
| */ |
| static void |
| destroy_arena_family(arena_header_t *arena, dr_mcontext_t *mc, bool free_chunks, |
| app_pc caller) |
| { |
| arena_header_t *a, *next_a; |
| chunk_header_t *head; |
| malloc_info_t info; |
| for (a = arena; a != NULL; a = next_a) { |
| next_a = a->next_arena; |
| if (free_chunks) { |
| byte *cur = a->start_chunk; |
| while (cur < a->next_chunk) { |
| head = header_from_ptr(cur); |
| if (!TEST(CHUNK_FREED, head->flags)) { |
| /* XXX: like mmaps for large allocs, we assume the OS |
| * re-using the memory won't be immediate, so we go w/ |
| * a simple no-delay policy on the frees |
| */ |
| header_to_info(head, &info, NULL, 0); |
| client_remove_malloc_pre(&info); |
| client_remove_malloc_post(&info); |
| if (head->user_data != NULL) |
| client_malloc_data_free(head->user_data); |
| client_handle_free(&info, info.base, mc, caller, NULL, |
| true/*not delayed*/ _IF_WINDOWS((HANDLE)arena)); |
| } |
| cur += head->alloc_size + inter_chunk_space(); |
| } |
| } |
| heap_region_remove((byte *)a, a->reserve_end, mc); |
| arena_free(a); |
| } |
| } |
| #endif |
| |
| /*************************************************************************** |
| * iterator |
| */ |
| |
| typedef struct _alloc_iter_data_t { |
| bool only_live; |
| malloc_iter_cb_t cb; |
| void *data; |
| } alloc_iter_data_t; |
| |
| static inline bool |
| skip_chunk_in_iter(alloc_iter_data_t *data, chunk_header_t *head) |
| { |
| return (data->only_live && TEST(CHUNK_FREED, head->flags)) || |
| TEST(CHUNK_SKIP_ITER, head->flags); |
| } |
| |
| static bool |
| alloc_iter_own_arena(byte *iter_arena_start, byte *iter_arena_end, uint flags |
| _IF_WINDOWS(HANDLE heap), void *iter_data) |
| { |
| alloc_iter_data_t *data = (alloc_iter_data_t *) iter_data; |
| chunk_header_t *head; |
| byte *cur; |
| arena_header_t *arena = (arena_header_t *) iter_arena_start; |
| malloc_info_t info; |
| |
| /* We use the HEAP_MMAP flag to find our mmapped chunks. We can't easily |
| * use the large malloc tree b/c it has pre_us allocs too (i#1051). |
| */ |
| /* We rely on the heap region lock to avoid races accessing this */ |
| if (TEST(HEAP_MMAP, flags)) { |
| chunk_header_t *head = header_from_mmap_base(iter_arena_start); |
| if (!skip_chunk_in_iter(data, head)) { |
| header_to_info(head, &info, NULL, 0); |
| ASSERT(TEST(CHUNK_MMAP, head->flags), "mmap chunk inconsistent"); |
| LOG(2, "%s: "PFX"-"PFX"\n", __FUNCTION__, info.base, |
| info.base + chunk_request_size(head)); |
| if (!data->cb(&info, data->data)) |
| return false; |
| } |
| } |
| |
| if (TEST(HEAP_PRE_US, flags) || !TEST(HEAP_ARENA, flags)) |
| return true; |
| |
| LOG(2, "%s: "PFX"-"PFX"\n", __FUNCTION__, iter_arena_start, iter_arena_end); |
| /* Synchronize with splits or coalesces (i#949) */ |
| iterator_lock(arena, false/*!in alloc*/); |
| cur = arena->start_chunk; |
| while (cur < arena->next_chunk) { |
| head = header_from_ptr(cur); |
| LOG(3, "\tchunk %s "PFX"-"PFX"\n", TEST(CHUNK_FREED, head->flags) ? "freed" : "", |
| ptr_from_header(head), ptr_from_header(head) + head->alloc_size); |
| if (!skip_chunk_in_iter(data, head)) { |
| header_to_info(head, &info, NULL, 0); |
| if (!data->cb(&info, data->data)) { |
| iterator_unlock(arena, false/*!in alloc*/); |
| return false; |
| } |
| } |
| cur += head->alloc_size + inter_chunk_space(); |
| } |
| iterator_unlock(arena, false/*!in alloc*/); |
| return true; |
| } |
| |
| /* This will end up grabbing DR locks (iterator_lock()) but that's fine even |
| * in an app context, as it's not while we're marked safe-to-suspend and |
| * it's only in our own code. |
| */ |
| static void |
| alloc_iterate(malloc_iter_cb_t cb, void *iter_data, bool only_live) |
| { |
| /* Strategy: |
| * + can iterate arenas via heap rbtree |
| * - each arena of ours can be walked straight through |
| * - for mmap chunks, we can't use the large_malloc_tree b/c it has |
| * pre-us, so we store a new flag in heap regions: HEAP_MMAP (i#1051) |
| * + ignore pre-us arenas and instead iterate pre_us_table |
| */ |
| alloc_iter_data_t data = {only_live, cb, iter_data}; |
| uint i; |
| malloc_info_t info; |
| |
| LOG(2, "%s\n", __FUNCTION__); |
| |
| ASSERT(!alloc_ops.external_headers, "NYI: walk malloc table"); |
| |
| LOG(3, "%s: iterating heap regions\n", __FUNCTION__); |
| heap_region_iterate(alloc_iter_own_arena, &data); |
| |
| LOG(3, "%s: iterating pre-us allocs\n", __FUNCTION__); |
| /* XXX: should add hashtable_iterate() to drcontainers */ |
| /* See notes at top: this table is only modified at init or teardown |
| * and thus needs no external lock. |
| */ |
| for (i = 0; i < HASHTABLE_SIZE(pre_us_table.table_bits); i++) { |
| hash_entry_t *he; |
| for (he = pre_us_table.table[i]; he != NULL; he = he->next) { |
| chunk_header_t *head = (chunk_header_t *) he->payload; |
| byte *start = he->key; |
| if (!skip_chunk_in_iter(&data, head)) { |
| LOG(3, "\tpre-us "PFX"-"PFX"-"PFX"\n", |
| start, start + chunk_request_size(head), start + head->alloc_size); |
| header_to_info(head, &info, start, 0); |
| if (!cb(&info, iter_data)) |
| break; |
| } |
| } |
| } |
| } |
| |
| static bool |
| overlap_helper(chunk_header_t *head, |
| malloc_info_t *info DR_PARAM_INOUT, |
| uint positive_flags, |
| uint negative_flags) |
| { |
| /* XXX: this is the one DR_PARAM_INOUT case of this structure. Once we extend it, |
| * we need to handle back-compat struct size here. For now, header_to_info() |
| * is used here and by above internal code that doesn't set struct-size. |
| */ |
| if (info->struct_size != sizeof(*info)) |
| ASSERT(false, "size is wrong"); |
| LOG(4, "overlap_helper for "PFX": 0x%x vs pos=0x%x neg=0x%x\n", |
| ptr_from_header(head), head->flags, positive_flags, negative_flags); |
| if (TESTALL(positive_flags, head->flags) && |
| !TEST(negative_flags, head->flags)) { |
| LOG(4, "overlap_helper match for "PFX"\n", ptr_from_header(head)); |
| if (info != NULL) |
| header_to_info(head, info, NULL, 0); |
| return true; |
| } |
| return false; |
| } |
| |
| /* Considers alloc_size to overlap, but returns request size in *found_end */ |
| static bool |
| alloc_replace_overlaps_region(byte *start, byte *end, |
| malloc_info_t *info DR_PARAM_INOUT, |
| uint positive_flags, |
| uint negative_flags) |
| { |
| /* Maintaining an rbtree is expensive, particularly b/c in order to keep |
| * freed blocks in there until actual re-alloc we need to have rbtree |
| * operations on every free and every malloc. |
| * Since this query should only be when reporting an unaddr, we go ahead |
| * do an expensive lookup, avoiding any maintenance on malloc or free. |
| * |
| * XXX: pattern mode may need a more performant lookup |
| * |
| * XXX: Note that this is not a true overlap of [start,end) and instead only |
| * looks up start for now. But, it's pretty unlikely to have the start be before |
| * a heap arena and still overlap a free chunk. For the large malloc lookup, it |
| * will fall through to heap arena for non-mmap, and mmap has similar arg about |
| * being unlikely to overlap w/o overlapping start. But if we want to we could |
| * add a heap_region_overlaps() routine. |
| */ |
| bool found = false; |
| byte *found_arena_start, *found_arena_end; |
| uint flags; |
| size_t size; |
| LOG(4, "%s: looking for "PFX"-"PFX"\n", __FUNCTION__, start, end); |
| if (malloc_large_lookup(start, &found_arena_start, &size)) { |
| /* XXX: potentially racy! Would need to find the containing |
| * arena and grab its lock to safely access the header. |
| */ |
| chunk_header_t *head = header_from_ptr(found_arena_start); |
| found = overlap_helper(head, info, positive_flags, negative_flags); |
| ASSERT(size == chunk_request_size(head), "inconsistent"); |
| } else if (heap_region_bounds(start, &found_arena_start, &found_arena_end, &flags)) { |
| if (TEST(HEAP_PRE_US, flags)) { |
| /* walk pre-us table. |
| * See notes at top: this table is only modified at init or teardown |
| * and thus needs no external lock. |
| */ |
| uint i; |
| for (i = 0; i < HASHTABLE_SIZE(pre_us_table.table_bits); i++) { |
| hash_entry_t *he; |
| for (he = pre_us_table.table[i]; he != NULL; he = he->next) { |
| chunk_header_t *head = (chunk_header_t *) he->payload; |
| byte *chunk_start = he->key; |
| if (start < chunk_start + head->alloc_size && end >= chunk_start) { |
| found = overlap_helper(head, info, |
| positive_flags, negative_flags); |
| goto overlap_inner_loop_break; |
| } |
| } |
| } |
| overlap_inner_loop_break: |
| ; /* nothing */ |
| } else if (TEST(HEAP_ARENA, flags)) { |
| /* walk arena */ |
| /* XXX: make a shared internal iterator for this? */ |
| arena_header_t *arena = (arena_header_t *) found_arena_start; |
| byte *cur = arena->start_chunk; |
| ASSERT(!alloc_ops.external_headers, "NYI: walk malloc table"); |
| /* Synchronize with splits or coalesces (i#949) */ |
| iterator_lock(arena, false/*!in alloc*/); |
| while (cur < arena->next_chunk) { |
| byte *chunk_start; |
| chunk_header_t *head = header_from_ptr(cur); |
| chunk_start = ptr_from_header(head); |
| /* Check vs alloc_size + redzones. Even if we've coalesced, or |
| * if beyond requested size, still considered to overlap freed |
| * area. Don't check vs inter_chunk_space: callers don't want a |
| * match if beyond redzone. |
| */ |
| LOG(4, "\tchunk "PFX"-"PFX"\n", chunk_start, |
| chunk_start + head->alloc_size); |
| if (start < chunk_start + head->alloc_size + alloc_ops.redzone_size && |
| end >= chunk_start - alloc_ops.redzone_size) { |
| found = overlap_helper(head, info, positive_flags, negative_flags); |
| break; |
| } |
| cur += head->alloc_size + inter_chunk_space(); |
| } |
| iterator_unlock(arena, false/*!in alloc*/); |
| } else if (TEST(HEAP_MMAP, flags)) { |
| /* i#1210: the large malloc tree stores only the requested size, so |
| * a padding-size overlap will end up here. |
| */ |
| chunk_header_t *head = header_from_mmap_base(found_arena_start); |
| found = overlap_helper(head, info, positive_flags, negative_flags); |
| } else |
| ASSERT(false, "large lookup should have found it"); |
| } |
| return found; |
| } |
| |
| bool |
| alloc_replace_overlaps_delayed_free(byte *start, byte *end, |
| malloc_info_t *info DR_PARAM_OUT) |
| { |
| return alloc_replace_overlaps_region(start, end, info, CHUNK_DELAY_FREE, 0); |
| } |
| |
| bool |
| alloc_replace_overlaps_any_free(byte *start, byte *end, |
| malloc_info_t *info DR_PARAM_OUT) |
| { |
| return alloc_replace_overlaps_region(start, end, info, CHUNK_FREED, 0); |
| } |
| |
| bool |
| alloc_replace_overlaps_malloc(byte *start, byte *end, |
| malloc_info_t *info DR_PARAM_OUT) |
| { |
| return alloc_replace_overlaps_region(start, end, info, 0, CHUNK_FREED); |
| } |
| |
| /*************************************************************************** |
| * app-facing interface |
| */ |
| |
| static arena_header_t * |
| arena_for_libc_alloc(void *drcontext) |
| { |
| #ifdef WINDOWS |
| /* i#939: we need to wrap the libc alloc routines, but at that outer |
| * point we don't know what Heap they'll pass to the Rtl routines. |
| * Thus we ourselves create a single Heap per libc alloc routine set |
| * and we pass it in drwrap's data slot. |
| * We can't use our default heap (cur_arena) b/c we need a private |
| * Heap for each library that we can destroy when it unloads. |
| * |
| * XXX: this is not purely transparent and makes some assumptions |
| * about there only being one Heap per libc set, a libc set's |
| * lifetime never exceeding its library, and a libc set never |
| * destroying its own Heap (which remains empty in our impl unless |
| * a non-libc-set routine uses that Heap) before its library exits. |
| * But, it's not clear that we can do any better. |
| */ |
| arena_header_t *arena; |
| alloc_routine_entry_t *e = (alloc_routine_entry_t *) |
| dr_read_saved_reg(drcontext, DRWRAP_REPLACE_NATIVE_DATA_SLOT); |
| ASSERT(e != NULL, "invalid stored arg"); |
| arena = (arena_header_t *) alloc_routine_set_get_user_data(e); |
| ASSERT(arena != NULL && |
| (arena == cur_arena || TEST(ARENA_LIBC_DEFAULT, arena->flags)), |
| "invalid per-set arena"); |
| if (TEST(ARENA_LIBC_SPECULATIVE, arena->flags)) { |
| arena->flags &= ~ARENA_LIBC_SPECULATIVE; |
| if (arena != cur_arena) |
| arena = check_libc_vs_process_heap(e, arena); |
| } |
| return arena; |
| #else |
| /* we assume that pre-us (which doesn't use cur_arena) is checked by caller */ |
| return cur_arena; |
| #endif |
| } |
| |
| static void * |
| replace_malloc(size_t size) |
| { |
| void *res; |
| void *drcontext = enter_client_code(); |
| arena_header_t *arena = arena_for_libc_alloc(drcontext); |
| dr_mcontext_t mc; |
| INITIALIZE_MCONTEXT_FOR_REPORT(&mc); |
| LOG(2, "replace_malloc %d\n", size); |
| res = ONDSTACK_REPLACE_ALLOC_COMMON(arena, size, 0, |
| ALLOC_SYNCHRONIZE | ALLOC_INVOKE_CLIENT, |
| drcontext, &mc, (app_pc)replace_malloc, |
| MALLOC_ALLOCATOR_MALLOC); |
| LOG(2, "\treplace_malloc %d => "PFX"\n", size, res); |
| exit_client_code(drcontext, false/*need swap*/); |
| return res; |
| } |
| |
| /* Unfortunately there's no easy way to share code here. We do not want an |
| * extra frame. We could use macros. |
| */ |
| static void * |
| replace_malloc_nomatch(size_t size) |
| { |
| void *res; |
| void *drcontext = enter_client_code(); |
| arena_header_t *arena = arena_for_libc_alloc(drcontext); |
| dr_mcontext_t mc; |
| INITIALIZE_MCONTEXT_FOR_REPORT(&mc); |
| LOG(2, "replace_malloc (nomatch) %d\n", size); |
| res = ONDSTACK_REPLACE_ALLOC_COMMON(arena, size, 0, |
| ALLOC_SYNCHRONIZE | ALLOC_INVOKE_CLIENT, |
| drcontext, &mc, |
| (app_pc)replace_malloc/*avoid confusion*/, |
| MALLOC_ALLOCATOR_MALLOC | CHUNK_LAYER_NOCHECK); |
| LOG(2, "\treplace_malloc %d => "PFX"\n", size, res); |
| exit_client_code(drcontext, false/*need swap*/); |
| return res; |
| } |
| |
| static void * |
| replace_calloc(size_t nmemb, size_t size) |
| { |
| void *drcontext = enter_client_code(); |
| arena_header_t *arena = arena_for_libc_alloc(drcontext); |
| byte *res; |
| dr_mcontext_t mc; |
| INITIALIZE_MCONTEXT_FOR_REPORT(&mc); |
| LOG(2, "replace_calloc %d %d\n", nmemb, size); |
| if (unsigned_multiply_will_overflow(nmemb, size)) { |
| LOG(2, "calloc size will overflow => returning NULL\n"); |
| client_handle_alloc_failure(UINT_MAX, (app_pc)replace_calloc, &mc); |
| res = NULL; |
| } else { |
| res = ONDSTACK_REPLACE_ALLOC_COMMON |
| (arena, nmemb * size, 0, |
| ALLOC_SYNCHRONIZE | ALLOC_ZERO | ALLOC_INVOKE_CLIENT, |
| drcontext, &mc, (app_pc)replace_calloc, |
| MALLOC_ALLOCATOR_MALLOC); |
| } |
| LOG(2, "\treplace_calloc %d %d => "PFX"\n", nmemb, size, res); |
| exit_client_code(drcontext, false/*need swap*/); |
| return (void *) res; |
| } |
| |
| /* See comment on replace_malloc_nomatch about sharing code */ |
| static void * |
| replace_calloc_nomatch(size_t nmemb, size_t size) |
| { |
| void *drcontext = enter_client_code(); |
| arena_header_t *arena = arena_for_libc_alloc(drcontext); |
| byte *res; |
| dr_mcontext_t mc; |
| INITIALIZE_MCONTEXT_FOR_REPORT(&mc); |
| LOG(2, "replace_calloc %d %d\n", nmemb, size); |
| if (unsigned_multiply_will_overflow(nmemb, size)) { |
| LOG(2, "calloc size will overflow => returning NULL\n"); |
| client_handle_alloc_failure(UINT_MAX, (app_pc)replace_calloc, &mc); |
| res = NULL; |
| } else { |
| res = ONDSTACK_REPLACE_ALLOC_COMMON |
| (arena, nmemb * size, 0, |
| ALLOC_SYNCHRONIZE | ALLOC_ZERO | ALLOC_INVOKE_CLIENT, |
| drcontext, &mc, |
| (app_pc)replace_calloc/*avoid confusion*/, |
| MALLOC_ALLOCATOR_MALLOC | CHUNK_LAYER_NOCHECK); |
| } |
| LOG(2, "\treplace_calloc %d %d => "PFX"\n", nmemb, size, res); |
| exit_client_code(drcontext, false/*need swap*/); |
| return (void *) res; |
| } |
| |
| static void * |
| replace_realloc(void *ptr, size_t size) |
| { |
| void *drcontext = enter_client_code(); |
| arena_header_t *arena = arena_for_libc_alloc(drcontext); |
| void *res = NULL; |
| dr_mcontext_t mc; |
| INITIALIZE_MCONTEXT_FOR_REPORT(&mc); |
| LOG(2, "replace_realloc "PFX" %d\n", ptr, size); |
| res = ONDSTACK_REPLACE_REALLOC_COMMON(arena, ptr, size, |
| ALLOC_SYNCHRONIZE | ALLOC_ALLOW_NULL, |
| drcontext, &mc, (app_pc)replace_realloc, |
| MALLOC_ALLOCATOR_MALLOC); |
| LOG(2, "\treplace_realloc %d => "PFX"\n", size, res); |
| exit_client_code(drcontext, false/*need swap*/); |
| return res; |
| } |
| |
| /* See comment on replace_malloc_nomatch about sharing code */ |
| static void * |
| replace_realloc_nomatch(void *ptr, size_t size) |
| { |
| void *drcontext = enter_client_code(); |
| arena_header_t *arena = arena_for_libc_alloc(drcontext); |
| void *res = NULL; |
| dr_mcontext_t mc; |
| INITIALIZE_MCONTEXT_FOR_REPORT(&mc); |
| LOG(2, "replace_realloc "PFX" %d\n", ptr, size); |
| res = ONDSTACK_REPLACE_REALLOC_COMMON(arena, ptr, size, |
| ALLOC_SYNCHRONIZE | ALLOC_ALLOW_NULL, |
| drcontext, &mc, |
| (app_pc)replace_realloc/*avoid confusion*/, |
| MALLOC_ALLOCATOR_MALLOC | CHUNK_LAYER_NOCHECK); |
| LOG(2, "\treplace_realloc %d => "PFX"\n", size, res); |
| exit_client_code(drcontext, false/*need swap*/); |
| return res; |
| } |
| |
| static void |
| replace_free(void *ptr) |
| { |
| void *drcontext = enter_client_code(); |
| arena_header_t *arena = arena_for_libc_alloc(drcontext); |
| dr_mcontext_t mc; |
| INITIALIZE_MCONTEXT_FOR_REPORT(&mc); |
| LOG(2, "replace_free "PFX"\n", ptr); |
| ONDSTACK_REPLACE_FREE_COMMON(arena, ptr, ALLOC_SYNCHRONIZE | ALLOC_INVOKE_CLIENT, |
| drcontext, &mc, (app_pc)replace_free, |
| MALLOC_ALLOCATOR_MALLOC); |
| exit_client_code(drcontext, false/*need swap*/); |
| } |
| |
| /* See comment on replace_malloc_nomatch about sharing code */ |
| static void |
| replace_free_nomatch(void *ptr) |
| { |
| void *drcontext = enter_client_code(); |
| arena_header_t *arena = arena_for_libc_alloc(drcontext); |
| dr_mcontext_t mc; |
| INITIALIZE_MCONTEXT_FOR_REPORT(&mc); |
| LOG(2, "replace_free "PFX"\n", ptr); |
| ONDSTACK_REPLACE_FREE_COMMON(arena, ptr, ALLOC_SYNCHRONIZE | ALLOC_INVOKE_CLIENT, |
| drcontext, &mc, |
| (app_pc)replace_free/*deliberate: avoid confusion*/, |
| MALLOC_ALLOCATOR_MALLOC | CHUNK_LAYER_NOCHECK); |
| exit_client_code(drcontext, false/*need swap*/); |
| } |
| |
| static size_t |
| replace_malloc_usable_size(void *ptr) |
| { |
| void *drcontext = enter_client_code(); |
| size_t res; |
| arena_header_t *arena = arena_for_libc_alloc(drcontext); |
| dr_mcontext_t mc; |
| INITIALIZE_MCONTEXT_FOR_REPORT(&mc); |
| LOG(2, "replace_malloc_usable_size "PFX"\n", ptr); |
| res = replace_size_common(arena, ptr, ALLOC_SYNCHRONIZE, drcontext, &mc, |
| (app_pc)replace_malloc_usable_size, |
| MALLOC_ALLOCATOR_MALLOC); |
| if (res == (size_t)-1) |
| res = 0; /* 0 on failure */ |
| LOG(2, "\treplace_malloc_usable_size "PFX" => "PIFX"\n", ptr, res); |
| exit_client_code(drcontext, false/*need swap*/); |
| return res; |
| } |
| |
| /* See comment on replace_malloc_nomatch about sharing code */ |
| static size_t |
| replace_malloc_usable_size_nomatch(void *ptr) |
| { |
| void *drcontext = enter_client_code(); |
| size_t res; |
| arena_header_t *arena = arena_for_libc_alloc(drcontext); |
| dr_mcontext_t mc; |
| INITIALIZE_MCONTEXT_FOR_REPORT(&mc); |
| LOG(2, "replace_malloc_usable_size "PFX"\n", ptr); |
| res = replace_size_common(arena, ptr, ALLOC_SYNCHRONIZE, drcontext, &mc, |
| (app_pc)replace_malloc_usable_size/*avoid confusion*/, |
| MALLOC_ALLOCATOR_MALLOC | CHUNK_LAYER_NOCHECK); |
| if (res == (size_t)-1) |
| res = 0; /* 0 on failure */ |
| LOG(2, "\treplace_malloc_usable_size "PFX" => "PIFX"\n", ptr, res); |
| exit_client_code(drcontext, false/*need swap*/); |
| return res; |
| } |
| |
| #ifdef UNIX |
| static int |
| replace_posix_memalign(void **out, size_t align, size_t size) |
| { |
| void *drcontext = enter_client_code(); |
| arena_header_t *arena = arena_for_libc_alloc(drcontext); |
| dr_mcontext_t mc; |
| int res = 0; |
| byte *alloc; |
| INITIALIZE_MCONTEXT_FOR_REPORT(&mc); |
| LOG(2, "%s align=%d size=%d\n", __FUNCTION__, align, size); |
| /* alignment must be power of 2 */ |
| if (!IS_POWER_OF_2(align) || out == NULL) { |
| client_handle_alloc_failure(size, (app_pc)replace_posix_memalign, &mc); |
| res = EINVAL; |
| } else { |
| alloc = ONDSTACK_REPLACE_ALLOC_COMMON |
| (arena, size, align, ALLOC_SYNCHRONIZE | ALLOC_INVOKE_CLIENT, |
| drcontext, &mc, (app_pc)replace_posix_memalign, MALLOC_ALLOCATOR_MALLOC); |
| if (!dr_safe_write(out, sizeof(alloc), &alloc, NULL)) { |
| client_handle_alloc_failure(size, (app_pc)replace_posix_memalign, &mc); |
| res = EINVAL; |
| } |
| } |
| LOG(2, "\t%s %d %d => "PFX"\n", __FUNCTION__, align, size, res); |
| exit_client_code(drcontext, false/*need swap*/); |
| return res; |
| } |
| |
| static void * |
| replace_memalign(size_t align, size_t size) |
| { |
| void *drcontext = enter_client_code(); |
| arena_header_t *arena = arena_for_libc_alloc(drcontext); |
| dr_mcontext_t mc; |
| byte *res = NULL; |
| INITIALIZE_MCONTEXT_FOR_REPORT(&mc); |
| LOG(2, "%s align=%d size=%d\n", __FUNCTION__, align, size); |
| if (!IS_POWER_OF_2(align)) |
| client_handle_alloc_failure(size, (app_pc)replace_memalign, &mc); |
| else { |
| res = ONDSTACK_REPLACE_ALLOC_COMMON |
| (arena, size, align, ALLOC_SYNCHRONIZE | ALLOC_INVOKE_CLIENT, |
| drcontext, &mc, (app_pc)replace_memalign, MALLOC_ALLOCATOR_MALLOC); |
| } |
| LOG(2, "\t%s %d %d => "PFX"\n", __FUNCTION__, align, size, res); |
| exit_client_code(drcontext, false/*need swap*/); |
| return res; |
| } |
| |
| static void * |
| replace_valloc(size_t size) |
| { |
| void *drcontext = enter_client_code(); |
| arena_header_t *arena = arena_for_libc_alloc(drcontext); |
| dr_mcontext_t mc; |
| byte *res = NULL; |
| INITIALIZE_MCONTEXT_FOR_REPORT(&mc); |
| LOG(2, "%s size=%d\n", __FUNCTION__, size); |
| res = ONDSTACK_REPLACE_ALLOC_COMMON |
| (arena, size, PAGE_SIZE, |
| ALLOC_SYNCHRONIZE | ALLOC_INVOKE_CLIENT, |
| drcontext, &mc, (app_pc)replace_valloc, MALLOC_ALLOCATOR_MALLOC); |
| LOG(2, "\t%s %d => "PFX"\n", __FUNCTION__, size, res); |
| exit_client_code(drcontext, false/*need swap*/); |
| return res; |
| } |
| |
| static void * |
| replace_pvalloc(size_t size) |
| { |
| void *drcontext = enter_client_code(); |
| arena_header_t *arena = arena_for_libc_alloc(drcontext); |
| dr_mcontext_t mc; |
| byte *res = NULL; |
| INITIALIZE_MCONTEXT_FOR_REPORT(&mc); |
| LOG(2, "%s size=%d\n", __FUNCTION__, size); |
| res = ONDSTACK_REPLACE_ALLOC_COMMON |
| (arena, ALIGN_FORWARD(size, PAGE_SIZE), PAGE_SIZE, |
| ALLOC_SYNCHRONIZE | ALLOC_INVOKE_CLIENT, |
| drcontext, &mc, (app_pc)replace_pvalloc, MALLOC_ALLOCATOR_MALLOC); |
| LOG(2, "\t%s %d => "PFX"\n", __FUNCTION__, size, res); |
| exit_client_code(drcontext, false/*need swap*/); |
| return res; |
| } |
| #endif |
| |
| /* XXX i#94: replace mallopt(), mallinfo(), etc. */ |
| |
| /*************************************************************************** |
| * Operators |
| */ |
| |
| /* i#882: replace operator new/delete known to be non-placement to |
| * avoid wrap cost and to support redzones on debug CRT. |
| * We will also be able to pass in the allocation type rather than |
| * reading it from CLS. |
| */ |
| static inline void * |
| replace_operator_new_common(void *drcontext, dr_mcontext_t *mc, size_t size, |
| bool abort_on_oom, uint alloc_type, app_pc caller) |
| { |
| void *res; |
| /* b/c we replace at the operator level and we don't analyze the |
| * replaced operator to see which libc it's using we have to assume |
| * our stored default is ok (xref i#964, i#939) |
| */ |
| arena_header_t *arena = arena_for_libc_alloc(drcontext); |
| LOG(2, "replace_operator_new size=%d abort_on_oom=%d type=%d\n", |
| size, abort_on_oom, alloc_type); |
| res = ONDSTACK_REPLACE_ALLOC_COMMON(arena, size, 0, |
| ALLOC_SYNCHRONIZE | ALLOC_INVOKE_CLIENT, |
| drcontext, mc, caller, alloc_type); |
| LOG(2, "\treplace_operator_new %d => "PFX"\n", size, res); |
| if (abort_on_oom && res == NULL) { |
| /* XXX i#957: we should throw a C++ exception but for now we just abort */ |
| ELOGF(0, f_global, "ABORTING ON OOM\n"); |
| IF_DEBUG(aborting = true;) |
| dr_exit_process(1); |
| ASSERT(false, "should not reach here"); |
| } |
| return res; |
| } |
| |
| static void * |
| replace_operator_new(size_t size) |
| { |
| void *res; |
| void *drcontext = enter_client_code(); |
| dr_mcontext_t mc; |
| INITIALIZE_MCONTEXT_FOR_REPORT(&mc); |
| LOG(2, "%s "PIFX"\n", __FUNCTION__, size); |
| res = replace_operator_new_common(drcontext, &mc, size, true, MALLOC_ALLOCATOR_NEW, |
| (app_pc)replace_operator_new); |
| exit_client_code(drcontext, false/*need swap*/); |
| return res; |
| } |
| |
| static void * |
| replace_operator_new_nothrow(size_t size, int /*std::nothrow_t*/ ignore) |
| { |
| void *res; |
| void *drcontext = enter_client_code(); |
| dr_mcontext_t mc; |
| INITIALIZE_MCONTEXT_FOR_REPORT(&mc); |
| LOG(2, "%s "PIFX"\n", __FUNCTION__, size); |
| res = replace_operator_new_common(drcontext, &mc, size, false, MALLOC_ALLOCATOR_NEW, |
| (app_pc)replace_operator_new_nothrow); |
| exit_client_code(drcontext, false/*need swap*/); |
| return res; |
| } |
| |
| /* we need separate array versions for type mismatch detection (NYI) */ |
| static void * |
| replace_operator_new_array(size_t size) |
| { |
| void *res; |
| void *drcontext = enter_client_code(); |
| dr_mcontext_t mc; |
| INITIALIZE_MCONTEXT_FOR_REPORT(&mc); |
| LOG(2, "%s "PIFX"\n", __FUNCTION__, size); |
| res = replace_operator_new_common(drcontext, &mc, size, true, |
| MALLOC_ALLOCATOR_NEW_ARRAY, |
| (app_pc)replace_operator_new_array); |
| exit_client_code(drcontext, false/*need swap*/); |
| return res; |
| } |
| |
| static void * |
| replace_operator_new_array_nothrow(size_t size, int /*std::nothrow_t*/ ignore) |
| { |
| void *res; |
| void *drcontext = enter_client_code(); |
| dr_mcontext_t mc; |
| INITIALIZE_MCONTEXT_FOR_REPORT(&mc); |
| LOG(2, "%s "PIFX"\n", __FUNCTION__, size); |
| res = replace_operator_new_common(drcontext, &mc, size, false, |
| MALLOC_ALLOCATOR_NEW_ARRAY, |
| (app_pc)replace_operator_new_array_nothrow); |
| exit_client_code(drcontext, false/*need swap*/); |
| return res; |
| } |
| |
| /* caller must call enter_client_code() + get mc to ensure a single cstack frame */ |
| static inline void |
| replace_operator_delete_common(void *drcontext, dr_mcontext_t *mc, void *ptr, |
| uint alloc_type, app_pc caller, bool ignore_mismatch) |
| { |
| arena_header_t *arena = arena_for_libc_alloc(drcontext); |
| LOG(2, "replace_operator_delete "PFX"%s\n", ptr, |
| ignore_mismatch ? " (ignore mismatches)" : ""); |
| ONDSTACK_REPLACE_FREE_COMMON(arena, ptr, ALLOC_SYNCHRONIZE | ALLOC_INVOKE_CLIENT | |
| (ignore_mismatch ? ALLOC_IGNORE_MISMATCH : 0), |
| drcontext, mc, caller, alloc_type); |
| } |
| |
| /* We do not bother to report mismatches on nothrow vs regular so we |
| * don't need to distinguish nothrow vs regular delete |
| */ |
| static void |
| replace_operator_delete(void *ptr) |
| { |
| void *drcontext = enter_client_code(); |
| dr_mcontext_t mc; |
| INITIALIZE_MCONTEXT_FOR_REPORT(&mc); |
| LOG(2, "%s "PFX"\n", __FUNCTION__, ptr); |
| replace_operator_delete_common(drcontext, &mc, ptr, MALLOC_ALLOCATOR_NEW, |
| (app_pc)replace_operator_delete, false); |
| exit_client_code(drcontext, false/*need swap*/); |
| } |
| |
| static void |
| replace_operator_delete_nothrow(void *ptr, int /*std::nothrow_t*/ ignore) |
| { |
| void *drcontext = enter_client_code(); |
| dr_mcontext_t mc; |
| INITIALIZE_MCONTEXT_FOR_REPORT(&mc); |
| LOG(2, "%s "PFX"\n", __FUNCTION__, ptr); |
| replace_operator_delete_common(drcontext, &mc, ptr, MALLOC_ALLOCATOR_NEW, |
| (app_pc)replace_operator_delete, false); |
| exit_client_code(drcontext, false/*need swap*/); |
| } |
| |
| static void |
| replace_operator_delete_array(void *ptr) |
| { |
| void *drcontext = enter_client_code(); |
| dr_mcontext_t mc; |
| INITIALIZE_MCONTEXT_FOR_REPORT(&mc); |
| LOG(2, "%s "PFX"\n", __FUNCTION__, ptr); |
| replace_operator_delete_common(drcontext, &mc, ptr, MALLOC_ALLOCATOR_NEW_ARRAY, |
| (app_pc)replace_operator_delete_array, false); |
| exit_client_code(drcontext, false/*need swap*/); |
| } |
| |
| static void |
| replace_operator_delete_array_nothrow(void *ptr, int /*std::nothrow_t*/ ignore) |
| { |
| void *drcontext = enter_client_code(); |
| dr_mcontext_t mc; |
| INITIALIZE_MCONTEXT_FOR_REPORT(&mc); |
| LOG(2, "%s "PFX"\n", __FUNCTION__, ptr); |
| replace_operator_delete_common(drcontext, &mc, ptr, MALLOC_ALLOCATOR_NEW_ARRAY, |
| (app_pc)replace_operator_delete_array_nothrow, false); |
| exit_client_code(drcontext, false/*need swap*/); |
| } |
| |
| static void * |
| replace_operator_new_nomatch(size_t size) |
| { |
| void *res; |
| void *drcontext = enter_client_code(); |
| dr_mcontext_t mc; |
| INITIALIZE_MCONTEXT_FOR_REPORT(&mc); |
| LOG(2, "%s "PIFX"\n", __FUNCTION__, size); |
| res = replace_operator_new_common(drcontext, &mc, size, true, |
| MALLOC_ALLOCATOR_UNKNOWN, |
| (app_pc)replace_operator_new_nomatch); |
| exit_client_code(drcontext, false/*need swap*/); |
| return res; |
| } |
| |
| static void * |
| replace_operator_new_nothrow_nomatch(size_t size, int /*std::nothrow_t*/ ignore) |
| { |
| void *res; |
| void *drcontext = enter_client_code(); |
| dr_mcontext_t mc; |
| INITIALIZE_MCONTEXT_FOR_REPORT(&mc); |
| LOG(2, "%s "PIFX"\n", __FUNCTION__, size); |
| res = replace_operator_new_common(drcontext, &mc, size, false, |
| MALLOC_ALLOCATOR_UNKNOWN, |
| (app_pc)replace_operator_new_nothrow); |
| exit_client_code(drcontext, false/*need swap*/); |
| return res; |
| } |
| |
| static void |
| replace_operator_delete_nomatch(void *ptr) |
| { |
| void *drcontext = enter_client_code(); |
| dr_mcontext_t mc; |
| INITIALIZE_MCONTEXT_FOR_REPORT(&mc); |
| LOG(2, "%s "PFX"\n", __FUNCTION__, ptr); |
| replace_operator_delete_common(drcontext, &mc, ptr, MALLOC_ALLOCATOR_NEW, |
| (app_pc)replace_operator_delete_nomatch, true); |
| exit_client_code(drcontext, false/*need swap*/); |
| } |
| |
| static void |
| replace_operator_delete_nothrow_nomatch(void *ptr, int /*std::nothrow_t*/ ignore) |
| { |
| void *drcontext = enter_client_code(); |
| dr_mcontext_t mc; |
| INITIALIZE_MCONTEXT_FOR_REPORT(&mc); |
| LOG(2, "%s "PFX"\n", __FUNCTION__, ptr); |
| replace_operator_delete_common(drcontext, &mc, ptr, MALLOC_ALLOCATOR_NEW, |
| (app_pc)replace_operator_delete_nothrow_nomatch, true); |
| exit_client_code(drcontext, false/*need swap*/); |
| } |
| |
| #ifdef WINDOWS |
| static void |
| replace_operator_combined_delete(void *ptr) |
| { |
| /* See i#722 for background, and i#965. |
| * This routine is called for both delete and delete[] so we must disable |
| * mismatch checking. |
| * XXX: it would be nice to check malloc vs delete* |
| */ |
| void *drcontext = enter_client_code(); |
| dr_mcontext_t mc; |
| INITIALIZE_MCONTEXT_FOR_REPORT(&mc); |
| LOG(2, "%s "PFX"\n", __FUNCTION__, ptr); |
| replace_operator_delete_common(drcontext, &mc, ptr, MALLOC_ALLOCATOR_UNKNOWN, |
| (app_pc)replace_operator_combined_delete, true); |
| exit_client_code(drcontext, false/*need swap*/); |
| } |
| #endif /* WINDOWS */ |
| |
| #ifdef WINDOWS |
| /*************************************************************************** |
| * Windows RTL Heap API |
| */ |
| |
| /* i#1572: Rtl*Heap return BOOLEAN up through win7, but BOOL on win8+. |
| * There's no downside to returning BOOL instead of BOOLEAN if our value |
| * is either 0 or 1 (i.e., no weird != 1 true values) so we always do that. |
| * Our code either uses A) TRUE or FALSE constants or B) !!bool. |
| */ |
| typedef BOOL RTL_HEAP_BOOL_TYPE; |
| |
| /* Table mapping a module base to arena_header_t, for post-us libc Heaps (i#960). |
| * This stores the default Heap for the module and thus we assume the |
| * lifetime of the arena matches the module lifetime. |
| */ |
| static hashtable_t crtheap_mod_table; |
| #define CRTHEAP_MOD_TABLE_HASH_BITS 8 |
| |
| /* Table mapping Heap HANDLE to arena_header_t, for pre-us Heaps (i#959). */ |
| static hashtable_t crtheap_handle_table; |
| #define CRTHEAP_HANDLE_TABLE_HASH_BITS 8 |
| |
| /* Forwards */ |
| static NTSTATUS WINAPI |
| replace_RtlDestroyHeap(HANDLE heap); |
| |
| |
| static arena_header_t * |
| create_Rtl_heap(size_t commit_sz, size_t reserve_sz, uint flags) |
| { |
| arena_header_t *new_arena = (arena_header_t *) |
| os_large_alloc(commit_sz, reserve_sz, arena_page_prot(flags)); |
| if (new_arena != NULL) { |
| LOG(2, "%s commit="PIFX" reserve="PIFX" flags=0x%x => "PFX"\n", |
| __FUNCTION__, commit_sz, reserve_sz, flags, new_arena); |
| new_arena->commit_end = (byte *)new_arena + commit_sz; |
| new_arena->reserve_end = (byte *)new_arena + reserve_sz; |
| heap_region_add((byte *)new_arena, new_arena->reserve_end, HEAP_ARENA, NULL); |
| /* Even if this is the post-us arena for a pre-us Heap, we store the new |
| * arena as the Heap for easier RtlWalkHeap implementation. We skip |
| * pre-us heaps during app iteration. |
| * Earlier injection would eliminate the complexity. |
| */ |
| heap_region_set_heap((byte *)new_arena, (HANDLE)new_arena); |
| /* this will create the lock even if TEST(HEAP_NO_SERIALIZE, flags) */ |
| arena_init(new_arena, NULL); |
| new_arena->flags |= (flags & HEAP_CREATE_POSSIBLE_FLAGS); |
| } |
| return new_arena; |
| } |
| |
| /* If !free_chunks, we assume called at process exit */ |
| static void |
| destroy_Rtl_heap(arena_header_t *arena, dr_mcontext_t *mc, bool free_chunks) |
| { |
| LOG(2, "%s heap="PFX"\n", __FUNCTION__, arena); |
| if (arena->modbase != NULL) { |
| IF_DEBUG(bool found =) |
| hashtable_remove(&crtheap_mod_table, (void *)arena->modbase); |
| ASSERT(found, "inconsistent default Heap"); |
| } |
| if (arena->handle != NULL) { |
| IF_DEBUG(bool found =) |
| hashtable_remove(&crtheap_handle_table, (void *)arena->handle); |
| ASSERT(found, "inconsistent default Heap"); |
| } |
| /* If not at process exit (else we'll deadlock on alloc_routine_table lock), |
| * clear this from the alloc set |
| */ |
| if (free_chunks && arena->alloc_set_member != NULL) { |
| IF_DEBUG(bool success =) |
| alloc_routine_set_update_user_data(arena->alloc_set_member, NULL); |
| ASSERT(success, "failed to invalidate default Heap on its destruction"); |
| } |
| destroy_arena_family(arena, mc, free_chunks, (app_pc)replace_RtlDestroyHeap); |
| } |
| |
| /* Returns NULL if not a valid Heap handle. Caller may want to call |
| * report_invalid_heap() once mc is available to report NULL. |
| */ |
| static arena_header_t * |
| heap_to_arena(HANDLE heap) |
| { |
| arena_header_t *arena = (arena_header_t *) heap; |
| uint magic; |
| if (heap == process_heap) |
| return cur_arena; |
| ASSERT(heap != get_private_heap_handle(), "app using private heap"); |
| if (arena != NULL && |
| safe_read(&arena->magic, sizeof(magic), &magic) && |
| magic == HEADER_MAGIC && |
| /* XXX: safe_read flags too? magic passed though */ |
| TEST(ARENA_MAIN, arena->flags)) |
| return arena; |
| else { |
| arena = hashtable_lookup(&crtheap_handle_table, (void *)heap); |
| if (arena != NULL) |
| return arena; |
| LOG(2, "%s: "PFX" => NULL!\n", __FUNCTION__, heap); |
| return NULL; |
| } |
| } |
| |
| /* Called at process init, prior to any module events */ |
| static void |
| pre_existing_heap_init(HANDLE heap) |
| { |
| /* Create an arena for this pre-existing Heap (i#959) */ |
| arena_header_t *arena; |
| MEMORY_BASIC_INFORMATION mbi; |
| uint prot; |
| bool mapped = false; |
| IF_DEBUG(bool unique;) |
| if (heap == process_heap) |
| return; |
| if (dr_virtual_query((byte *)heap, &mbi, sizeof(mbi)) == sizeof(mbi) && |
| (mbi.Type == MEM_MAPPED || mbi.Type == MEM_IMAGE)) { |
| /* i#1221: ntdll!CsrPortHeap passed in shared memory here. |
| * We can't use our own memory. |
| */ |
| byte *alloc_base = (byte *) mbi.AllocationBase; |
| byte *alloc_end = heap_allocated_end(heap); |
| /* Go to next page to be safe */ |
| /* FIXME i#1882: on x64 we have the endpoint computed incorrectly, or |
| * something is open-ended, so we skip one more page. |
| */ |
| alloc_end = (byte *) ALIGN_FORWARD(alloc_end + IF_X64(2*)PAGE_SIZE, PAGE_SIZE); |
| /* FIXME: for this case we'd have to fall back to native calls */ |
| ASSERT(alloc_end < (byte *)heap + mbi.RegionSize, |
| "pre-us mapped heap has no room left"); |
| arena = (arena_header_t *) alloc_end; |
| /* Be sure to initialize everything as there could be stale data here (i#1823) */ |
| memset(arena, 0, sizeof(*arena) + sizeof(*arena->free_list)); |
| arena->commit_end = (byte *)heap + mbi.RegionSize; |
| |
| /* i#1282: we may need to extend the committed part of the heap */ |
| if (dr_virtual_query(arena->commit_end, &mbi, sizeof(mbi)) == sizeof(mbi) && |
| (byte *)mbi.AllocationBase == alloc_base && |
| mbi.State == MEM_RESERVE) |
| arena->reserve_end = (byte *)mbi.BaseAddress + mbi.RegionSize; |
| else |
| arena->reserve_end = arena->commit_end; |
| |
| arena_init(arena, NULL); |
| arena->flags |= ARENA_PRE_US_MAPPED; |
| LOG(2, "new arena inside mmapped pre-us Heap "PFX" is "PFX"-"PFX"-"PFX"\n", |
| heap, arena, arena->commit_end, arena->reserve_end); |
| } else { |
| arena = (arena_header_t *) |
| create_Rtl_heap(PAGE_SIZE, ARENA_INITIAL_SIZE, HEAP_GROWABLE); |
| LOG(2, "new arena for pre-us Heap "PFX" is "PFX"\n", heap, arena); |
| } |
| IF_DEBUG(unique =) |
| hashtable_add(&crtheap_handle_table, (void *)heap, (void *)arena); |
| ASSERT(unique, "duplicate pre-us Heap"); |
| arena->handle = heap; |
| if (dr_query_memory((byte *)heap, NULL, NULL, &prot) && TEST(DR_MEMPROT_EXEC, prot)) { |
| arena->flags |= HEAP_CREATE_ENABLE_EXECUTE; |
| } |
| /* XXX: we don't know about HEAP_GROWABLE or HEAP_GENERATE_EXCEPTIONS |
| * or HEAP_NO_SERIALIZE! Best to be conservative on HEAP_GROWABLE. |
| */ |
| if (!TEST(ARENA_PRE_US_MAPPED, arena->flags)) |
| arena->flags |= HEAP_GROWABLE; |
| } |
| |
| static HANDLE |
| libc_heap_handle(const module_data_t *mod) |
| { |
| HANDLE pre_us_heap = NULL; |
| ptr_uint_t (*get_heap)(void) = (ptr_uint_t (*)(void)) |
| dr_get_proc_address(mod->handle, "_get_heap_handle"); |
| LOG(3, "%s: for "PFX" func is "PFX"\n", __FUNCTION__, mod->start, get_heap); |
| if (get_heap != NULL) { |
| void *drcontext = dr_get_current_drcontext(); |
| DR_TRY_EXCEPT(drcontext, { |
| pre_us_heap = (HANDLE) (*get_heap)(); |
| }, { /* EXCEPT */ |
| }); |
| } else { |
| /* For static libc, we don't want to call _get_heap_handle(), as it |
| * asserts if the heap is not initialized yet. Since we need syms to find |
| * it anyway, we just go straight for _crtheap. |
| */ |
| byte *addr = lookup_internal_symbol(mod, "_crtheap"); |
| /* i#1864: VS2015 changed the name to "__acrt_heap" */ |
| if (addr == NULL) |
| addr = lookup_internal_symbol(mod, "__acrt_heap"); |
| if (addr != NULL) { |
| if (!safe_read(addr, sizeof(pre_us_heap), &pre_us_heap)) |
| pre_us_heap = NULL; |
| LOG(3, "%s: _crtheap @"PFX" => "PFX"\n", __FUNCTION__, addr, pre_us_heap); |
| /* i#1766: Chromium sets their _crtheap to 1! */ |
| if (pre_us_heap < (HANDLE)PAGE_SIZE) { |
| LOG(3, "%s: clamping _crtheap from "PFX" to NULL\n", __FUNCTION__, |
| pre_us_heap); |
| pre_us_heap = NULL; |
| } |
| if (alloc_ops.use_symcache) |
| drsymcache_add(mod, "_crtheap", addr - mod->start); |
| } |
| } |
| return pre_us_heap; |
| } |
| |
| static arena_header_t * |
| check_libc_vs_process_heap(alloc_routine_entry_t *e, arena_header_t *arena) |
| { |
| /* On first use, we must check whether the arena we created prior |
| * to the module initializing its _crtheap should in fact exist, |
| * or whether the module is using ProcessHeap as its libc heap |
| * (happens on VS2012: i#1223). |
| */ |
| HANDLE pre_us_heap; |
| app_pc modbase = alloc_routine_get_module_base(e); |
| module_data_t *mod = dr_lookup_module(modbase); |
| ASSERT(mod != NULL, "libc set must have module"); |
| pre_us_heap = libc_heap_handle(mod); |
| dr_free_module_data(mod); |
| LOG(2, "%s: modbase "PFX" arena "PFX" heap "PFX"\n", __FUNCTION__, |
| modbase, arena, pre_us_heap); |
| if (pre_us_heap == process_heap) { |
| /* win8 libc uses process heap (i#1223) */ |
| bool success = alloc_routine_set_update_user_data |
| (arena->alloc_set_member, cur_arena); |
| LOG(2, "replacing arena for modbase "PFX" w/ default arena for set "PFX"\n", |
| modbase, arena->alloc_set_member); |
| ASSERT(arena->next_chunk == arena->start_chunk && arena->next_arena == NULL, |
| "arena should be unused"); |
| ASSERT(success, "failed to update set arena"); |
| IF_DEBUG(success =) |
| heap_region_remove((byte *)arena, arena->reserve_end, NULL); |
| ASSERT(success, "missing heap region for default Heap"); |
| IF_DEBUG(success =) |
| hashtable_remove(&crtheap_mod_table, (void *)arena->modbase); |
| ASSERT(success, "inconsistent default Heap"); |
| arena_free(arena); |
| return cur_arena; |
| } else { |
| ASSERT(pre_us_heap == NULL /* lib w/ just cpp stubs, using msvcr*.dll */ || |
| hashtable_lookup(&crtheap_handle_table, (void *)pre_us_heap) != NULL, |
| "failed to find pre-us heap"); |
| return arena; |
| } |
| } |
| |
| static inline void |
| report_invalid_heap(HANDLE heap, dr_mcontext_t *mc, app_pc caller) |
| { |
| client_invalid_heap_arg(caller, (byte *)heap, mc, |
| "Windows API routine: invalid heap HANDLE", false/*!free*/); |
| } |
| |
| /* i#960/i#607.A: identify a new Heap for CRT */ |
| static void |
| check_for_CRT_heap(void *drcontext, arena_header_t *new_arena) |
| { |
| dr_mcontext_t mc; |
| packed_callstack_t *pcs; |
| symbolized_callstack_t scs; |
| uint i; |
| app_pc modbase; |
| # define CRT_HEAP_INIT_ROUTINE "_heap_init" |
| # define CRT_HEAP_INIT_FRAMES 12 |
| INITIALIZE_MCONTEXT_FOR_REPORT(&mc); |
| DOLOG(2, { report_callstack(drcontext, &mc); }); |
| packed_callstack_record(&pcs, &mc, NULL/*skip replace_ frame*/, CRT_HEAP_INIT_FRAMES); |
| packed_callstack_to_symbolized(pcs, &scs); |
| /* Look for 2 frames of ntdll (trying to rule out qsort or |
| * other callback) calling entry point of dll, some other |
| * frames of that dll, and then kernel*!HeapCreate. |
| */ |
| LOG(2, "symbolized callstack:\n"); |
| for (i = 0; i < scs.num_frames; i++) |
| LOG(2, " #%d = %s!%s\n", i, symbolized_callstack_frame_modname(&scs, i), |
| symbolized_callstack_frame_func(&scs, i)); |
| i = 0; |
| /* Sometimes the replace_RtlCreateHeap still ends up on the stack */ |
| if (text_matches_pattern(symbolized_callstack_frame_modname(&scs, i), |
| DRMEMORY_LIBNAME, FILESYS_CASELESS)) |
| i++; |
| if (scs.num_frames >= 3 && |
| text_matches_pattern(symbolized_callstack_frame_modname(&scs, i++), |
| "kernel*.dll", FILESYS_CASELESS)) { |
| bool crt_init = false; |
| IF_DEBUG(const char *modname = symbolized_callstack_frame_modname(&scs, i);) |
| modbase = symbolized_callstack_frame_modbase(&scs, i++); |
| LOG(2, "checking for CRT heap created by %s base="PFX"\n", modname, modbase); |
| if (modbase == executable_base && |
| strcmp(symbolized_callstack_frame_func(&scs, i-1), CRT_HEAP_INIT_ROUTINE) |
| == 0) { |
| /* CRT in executable */ |
| crt_init = true; |
| } else { |
| /* Check for CRT in a DLL */ |
| while (i < scs.num_frames && |
| symbolized_callstack_frame_modbase(&scs, i) == modbase) |
| i++; |
| if (i < scs.num_frames - 1 && |
| text_matches_pattern(symbolized_callstack_frame_modname(&scs, i++), |
| "ntdll.dll", FILESYS_CASELESS) && |
| text_matches_pattern(symbolized_callstack_frame_modname(&scs, i++), |
| "ntdll.dll", FILESYS_CASELESS)) { |
| crt_init = true; |
| } |
| } |
| if (crt_init) { |
| /* Match => destroy the arena we made at lib load event time and |
| * replace with the one here, as this one has specific params. |
| */ |
| arena_header_t *set_arena = (arena_header_t *) |
| hashtable_lookup(&crtheap_mod_table, (void *)modbase); |
| LOG(2, "arena for CRT in %s is "PFX"\n", modname, set_arena); |
| if (set_arena != NULL) { |
| bool success = alloc_routine_set_update_user_data |
| (set_arena->alloc_set_member, new_arena); |
| LOG(2, "replacing arena for %s w/ app arena "PFX" for set "PFX"\n", |
| modname, new_arena, set_arena->alloc_set_member); |
| ASSERT(set_arena->alloc_set_member != NULL, "mis-initialized arena"); |
| ASSERT(set_arena->next_chunk == set_arena->start_chunk && |
| set_arena->next_arena == NULL, |
| "arena should be unused"); |
| ASSERT(success, "failed to update set arena"); |
| if (success) { |
| new_arena->flags |= ARENA_LIBC_DEFAULT; |
| new_arena->modbase = set_arena->modbase; |
| set_arena->modbase = NULL; /* xfer, no free */ |
| new_arena->alloc_set_member = set_arena->alloc_set_member; |
| heap_region_remove((byte *)set_arena, set_arena->reserve_end, |
| NULL); |
| hashtable_add_replace(&crtheap_mod_table, (void *)modbase, |
| (void *)new_arena); |
| arena_free(set_arena); |
| } |
| } |
| } |
| } |
| symbolized_callstack_free(&scs); |
| packed_callstack_free(pcs); |
| } |
| |
| static HANDLE WINAPI |
| replace_RtlCreateHeap(ULONG flags, void *base, size_t reserve_sz, |
| size_t commit_sz, void *lock, void *params) |
| { |
| arena_header_t *new_arena = NULL; |
| void *drcontext = enter_client_code(); |
| LOG(2, "%s\n", __FUNCTION__); |
| if (lock != NULL || params != NULL || base != NULL) { |
| /* As of win7, CreateHeap always passes NULL for these 3. |
| * XXX: once we have early injection, we'll see ntdll!CsrPortHeap created, |
| * and it passes in a base (xref i#1221) to RtlCreateHeap. |
| */ |
| ASSERT(false, "NYI params to RtlCreateHeap"); |
| /* we continue on and ignore params for release build */ |
| } |
| flags &= ~(HEAP_CREATE_POSSIBLE_FLAGS); |
| if (reserve_sz == 0) { |
| flags |= HEAP_GROWABLE; |
| reserve_sz = ARENA_INITIAL_SIZE; |
| } else /* XXX: is max really non-page-aligned? we align it */ |
| reserve_sz = ALIGN_FORWARD(reserve_sz, PAGE_SIZE); |
| commit_sz = ALIGN_FORWARD(commit_sz, PAGE_SIZE); |
| if (commit_sz == 0) |
| commit_sz = PAGE_SIZE; |
| new_arena = (arena_header_t *) create_Rtl_heap(commit_sz, reserve_sz, flags); |
| LOG(2, " => "PFX"\n", new_arena); |
| |
| if (new_arena != NULL) |
| check_for_CRT_heap(drcontext, new_arena); |
| |
| dr_switch_to_app_state(drcontext); |
| if (new_arena == NULL) { |
| /* XXX: most of our errors are invalid params so that's all we set. |
| * We deliberately wait until in app mode to make this more efficient. |
| */ |
| set_app_error_code(drcontext, ERROR_INVALID_PARAMETER); |
| } |
| exit_client_code(drcontext, true/*already swapped*/); |
| return (HANDLE) new_arena; |
| } |
| |
| static NTSTATUS WINAPI |
| replace_RtlDestroyHeap(HANDLE heap) |
| { |
| void *drcontext = enter_client_code(); |
| arena_header_t *arena = heap_to_arena(heap); |
| NTSTATUS res = STATUS_INVALID_PARAMETER; |
| dr_mcontext_t mc; |
| INITIALIZE_MCONTEXT_FOR_REPORT(&mc); |
| LOG(2, "%s heap="PFX"\n", __FUNCTION__, heap); |
| if (arena == NULL) |
| report_invalid_heap(heap, &mc, (app_pc)replace_RtlDestroyHeap); |
| else if (heap != process_heap) { |
| destroy_Rtl_heap(arena, &mc, true/*free indiv chunks*/); |
| res = STATUS_SUCCESS; |
| } |
| exit_client_code(drcontext, false/*need swap*/); |
| return res; |
| } |
| |
| # ifdef X64 |
| /*************************************************************************** |
| * i#1565: 64-bit win7/win8 RtlGetThreadPreferredUILanguages and several private |
| * helper routines it calls (LdrpMergeLangFallbackLists and |
| * RtlpMuiRegAddMultiSzToLangFallbackList, at the least) perform |
| * abstraction-violating tests on precise heap block header fields which of course |
| * our replacement headers do not match. I'm calling this being "nosy". Worse, they |
| * allocate using RtlAllocateHeap yet free using RtlpFreeHeap. In some cases they |
| * seem to not even use the heap block they allocate for anything but these checks. |
| * The checks are very similar for each routine, with particular patterns of checks |
| * involving prefetch fields and other things we could pattern-match (limiting the |
| * checks to only when inside RtlGetThreadPreferredUILanguages for accuracy and |
| * perf), but we would still have to identify RtlpFreeHeap if we want to replace |
| * these allocs. Thus for now we go with the simplest solution we have that works: |
| * we let all allocs inside RtlGetThreadPreferredUILanguages that pass |
| * HEAP_ZERO_MEMORY go native. |
| * |
| * XXX: the current solution works for pattern mode but we might hit false positives |
| * in shadow mode on accesses to these native allocs. We'll have to revisit at that |
| * point and perhaps try to do what's mentioned above: pattern-match the heap header |
| * accesses (in shadow mode we can wait for the unaddr reports), and locate the call |
| * to RtlpFreeHeap. |
| * |
| * XXX i#1720: a win7 ntdll patch changed some of this code to free using RtlFreeHeap |
| * instead of RtlpFreeHeap as we saw before. That causes invalid heap arg errors. |
| * If all such code now calls RtlFreeHeap, we could consider going back to a |
| * pattern-match approach, which was only abandoned b/c of RtlpFreeHeap: but it would |
| * have to be contingent on this recent ntdll.dll. |
| */ |
| |
| /* If we add more fields, we should move this up top-level */ |
| static int cls_idx_replace = -1; |
| |
| typedef struct _cls_replace_t { |
| uint in_nosy_heap_region; /* are we inside RtlGetThreadPreferredUILanguages */ |
| } cls_replace_t; |
| |
| typedef NTSYSAPI PVOID (NTAPI *RtlAllocateHeap_t)(HANDLE, ULONG, SIZE_T); |
| static RtlAllocateHeap_t native_RtlAllocateHeap; |
| |
| typedef NTSYSAPI RTL_HEAP_BOOL_TYPE (NTAPI *RtlFreeHeap_t)(HANDLE, ULONG, PVOID); |
| static RtlFreeHeap_t native_RtlFreeHeap; |
| |
| static app_pc addr_RtlGetThreadPreferredUILanguages; |
| /* i#1822: we also need to make allocs in the Set routine native */ |
| static app_pc addr_RtlSetThreadPreferredUILanguages; |
| |
| static app_pc ntdll_base; |
| static app_pc ntdll_end; |
| |
| /* We avoid invalid heap arg complaints on free by remembering the native allocs */ |
| #define NOSY_TABLE_HASH_BITS 8 |
| static hashtable_t nosy_table; |
| |
| static void |
| replace_context_init(void *drcontext, bool new_depth) |
| { |
| cls_replace_t *data; |
| if (new_depth) { |
| data = (cls_replace_t *) thread_alloc(drcontext, sizeof(*data), HEAPSTAT_WRAP); |
| drmgr_set_cls_field(drcontext, cls_idx_replace, data); |
| } else |
| data = (cls_replace_t *) drmgr_get_cls_field(drcontext, cls_idx_replace); |
| memset(data, 0, sizeof(*data)); |
| } |
| |
| static void |
| replace_context_exit(void *drcontext, bool thread_exit) |
| { |
| if (thread_exit) { |
| cls_replace_t *data = (cls_replace_t *) |
| drmgr_get_cls_field(drcontext, cls_idx_replace); |
| thread_free(drcontext, data, sizeof(*data), HEAPSTAT_WRAP); |
| } |
| /* else, nothing to do: we leave the struct for re-use on next callback */ |
| } |
| |
| static void |
| replace_start_nosy_sequence(void *wrapcxt, DR_PARAM_OUT void **user_data) |
| { |
| cls_replace_t *data = (cls_replace_t *) |
| drmgr_get_cls_field(dr_get_current_drcontext(), cls_idx_replace); |
| data->in_nosy_heap_region++; |
| LOG(4, "%s: counter=%d\n", __FUNCTION__, data->in_nosy_heap_region); |
| DOLOG(4, { |
| dr_mcontext_t *mc = drwrap_get_mcontext_ex(wrapcxt, DR_MC_INTEGER); |
| client_print_callstack(drwrap_get_drcontext(wrapcxt), mc, |
| (app_pc)addr_RtlGetThreadPreferredUILanguages); |
| }); |
| } |
| |
| static void |
| replace_stop_nosy_sequence(void *wrapcxt, DR_PARAM_OUT void **user_data) |
| { |
| cls_replace_t *data = (cls_replace_t *) |
| drmgr_get_cls_field(dr_get_current_drcontext(), cls_idx_replace); |
| ASSERT(data->in_nosy_heap_region > 0, "missed in_native stop"); |
| if (data->in_nosy_heap_region > 0) /* try to recover */ |
| data->in_nosy_heap_region--; |
| LOG(4, "%s: counter=%d\n", __FUNCTION__, data->in_nosy_heap_region); |
| DOLOG(4, { |
| dr_mcontext_t *mc = drwrap_get_mcontext_ex(wrapcxt, DR_MC_INTEGER); |
| client_print_callstack(drwrap_get_drcontext(wrapcxt), mc, |
| (app_pc)addr_RtlGetThreadPreferredUILanguages); |
| }); |
| } |
| |
| static void |
| replace_nosy_init(void) |
| { |
| module_data_t *ntdll = dr_lookup_module_by_name("ntdll.dll"); |
| ASSERT(ntdll != NULL, "cannot find ntdll.dll"); |
| ntdll_base = ntdll->start; |
| ASSERT(ntdll_base != NULL, "internal error finding ntdll.dll base"); |
| ntdll_end = ntdll->end; |
| |
| native_RtlAllocateHeap = (RtlAllocateHeap_t) |
| dr_get_proc_address(ntdll->handle, "RtlAllocateHeap"); |
| ASSERT(native_RtlAllocateHeap != NULL, "internal error finding RtlAllocateHeap"); |
| |
| addr_RtlGetThreadPreferredUILanguages = (app_pc) |
| dr_get_proc_address(ntdll->handle, "RtlGetThreadPreferredUILanguages"); |
| addr_RtlSetThreadPreferredUILanguages = (app_pc) |
| dr_get_proc_address(ntdll->handle, "RtlSetThreadPreferredUILanguages"); |
| ASSERT((addr_RtlGetThreadPreferredUILanguages != NULL && |
| addr_RtlSetThreadPreferredUILanguages != NULL) || |
| get_windows_version() < DR_WINDOWS_VERSION_VISTA, |
| "failed to find RtlGetThreadPreferredUILanguages"); |
| if (addr_RtlGetThreadPreferredUILanguages != NULL) { |
| if (!drwrap_wrap(addr_RtlGetThreadPreferredUILanguages, |
| replace_start_nosy_sequence, replace_stop_nosy_sequence)) |
| ASSERT(false, "failed to wrap"); |
| } |
| if (addr_RtlSetThreadPreferredUILanguages != NULL) { |
| if (!drwrap_wrap(addr_RtlSetThreadPreferredUILanguages, |
| replace_start_nosy_sequence, replace_stop_nosy_sequence)) |
| ASSERT(false, "failed to wrap"); |
| } |
| native_RtlFreeHeap = (RtlFreeHeap_t) |
| dr_get_proc_address(ntdll->handle, "RtlFreeHeap"); |
| ASSERT(native_RtlFreeHeap != NULL, "failed to find RtlFreeHeap"); |
| dr_free_module_data(ntdll); |
| |
| cls_idx_replace = |
| drmgr_register_cls_field(replace_context_init, replace_context_exit); |
| ASSERT(cls_idx_replace > -1, "unable to reserve CLS field"); |
| |
| hashtable_init(&nosy_table, NOSY_TABLE_HASH_BITS, HASH_INTPTR, false/*!strdup*/); |
| } |
| |
| static void |
| replace_nosy_exit(void) |
| { |
| if (addr_RtlGetThreadPreferredUILanguages != NULL) { |
| if (!drwrap_unwrap(addr_RtlGetThreadPreferredUILanguages, |
| replace_start_nosy_sequence, replace_stop_nosy_sequence)) |
| ASSERT(false, "failed to unwrap"); |
| } |
| if (addr_RtlSetThreadPreferredUILanguages != NULL) { |
| if (!drwrap_unwrap(addr_RtlSetThreadPreferredUILanguages, |
| replace_start_nosy_sequence, replace_stop_nosy_sequence)) |
| ASSERT(false, "failed to unwrap"); |
| } |
| drmgr_unregister_cls_field(replace_context_init, replace_context_exit, |
| cls_idx_replace); |
| hashtable_delete_with_stats(&nosy_table, "nosy"); |
| } |
| |
| /* Returns whether an RtlAllocateHeap call should go native */ |
| static bool |
| replace_leave_native(void *drcontext, dr_mcontext_t *mc, HANDLE heap, |
| ULONG flags, SIZE_T size) |
| { |
| /* i#1565: ntdll!RtlpMuiRegAddMultiSzToLangFallbackList on 64-bit win7 and |
| * win8 allocates a heap object and then performs quite a few sanity checks |
| * on it, directly reading the object's header as well as the header of |
| * PEB->ProcessHeap. It xors in some cookies and de-references the result, |
| * ending up in a crash, so we have to do more than just ignore/suppress the |
| * unaddrs. Plus, it frees it via RtlpFreeHeap. |
| */ |
| cls_replace_t *data; |
| if (alloc_ops.replace_nosy_allocs) |
| return false; |
| if (get_windows_version() != DR_WINDOWS_VERSION_7 && |
| get_windows_version() != DR_WINDOWS_VERSION_8) |
| return false; |
| if (heap != process_heap || |
| /* every instance so far has this and only this flag set */ |
| flags != HEAP_ZERO_MEMORY) |
| return false; |
| data = (cls_replace_t *) drmgr_get_cls_field(drcontext, cls_idx_replace); |
| if (data->in_nosy_heap_region > 0) { |
| /* We perform one more check: to rule out a regular alloc in |
| * RtlpMuiRegTryToAppendLanguageName (for which we then raise an invalid |
| * heap arg potential error) we decode forward and look for a call to |
| * RtlFreeHeap. On Win7x64 that call is 195 bytes away. |
| * XXX: if we end up having even more regular allocs that we made native, we |
| * may want to put in a hashtable of native allocs so we can ignore them in |
| * replace_RtlFreeHeap. |
| */ |
| bool found_normal_free = false; |
| instr_t inst; |
| app_pc pc; |
| # define NOSY_MAX_DECODE 512 |
| instr_init(drcontext, &inst); |
| DR_TRY_EXCEPT(dr_get_current_drcontext(), { |
| /* i#1833: we used to call callstack_next_retaddr(mc) but it can produce |
| * bogus frames w/o unwind data, so we go with the more robust drwrap |
| * retaddr slot: |
| */ |
| app_pc app_caller = get_replace_native_caller(drcontext); |
| if (app_caller >= ntdll_base && app_caller < ntdll_end) { /* sanity check */ |
| for (pc = app_caller; pc < app_caller + NOSY_MAX_DECODE; ) { |
| pc = decode(drcontext, pc, &inst); |
| if (instr_valid(&inst) && instr_is_call_direct(&inst)) { |
| if (opnd_get_pc(instr_get_target(&inst)) == |
| (app_pc)native_RtlFreeHeap) { |
| LOG(3, "%s: found RtlFreeHeap call => not a native alloc\n", |
| __FUNCTION__); |
| DOLOG(3, { |
| client_print_callstack(dr_get_current_drcontext(), mc, |
| (app_pc)native_RtlAllocateHeap); |
| }); |
| found_normal_free = true; |
| break; |
| } |
| } |
| instr_reset(drcontext, &inst); |
| } |
| } |
| }, { /* EXCEPT */ |
| found_normal_free = false; |
| }); |
| instr_free(drcontext, &inst); |
| if (!found_normal_free) { |
| LOG(3, "%s: inside RtlGetThreadPreferredUILanguages => native alloc\n", |
| __FUNCTION__); |
| DOLOG(3, { |
| client_print_callstack(dr_get_current_drcontext(), mc, |
| (app_pc)native_RtlAllocateHeap); |
| }); |
| STATS_INC(allocs_left_native); |
| return true; |
| } |
| } |
| return false; |
| } |
| # endif /* X64 */ |
| |
| /*************************************************************************** |
| * Continue RtlHeap API replacement routines: |
| */ |
| |
| static void |
| handle_Rtl_alloc_failure(void *drcontext, arena_header_t *arena, ULONG flags) |
| { |
| /* N.B.: neither HeapAlloc nor HeapReAlloc set the last error */ |
| |
| if ((arena != NULL && TEST(HEAP_GENERATE_EXCEPTIONS, arena->flags)) || |
| TEST(HEAP_GENERATE_EXCEPTIONS, flags)) { |
| ASSERT(false, "HEAP_GENERATE_EXCEPTIONS NYI"); |
| /* FIXME: need to call RtlRaiseException or sthg |
| * But, have to be careful: will it work calling it natively or will |
| * we need to dr_redirect_execution() to get the call interpreted? |
| */ |
| /* FIXME: for invalid params or heap corruption, raise STATUS_ACCESS_VIOLATION; |
| * for OOM, raise STATUS_NO_MEMORY. need caller to tell us which it is! |
| */ |
| } |
| } |
| |
| static void * WINAPI |
| replace_RtlAllocateHeap(HANDLE heap, ULONG flags, SIZE_T size) |
| { |
| void *drcontext = enter_client_code(); |
| arena_header_t *arena = heap_to_arena(heap); |
| void *res = NULL; |
| dr_mcontext_t mc; |
| INITIALIZE_MCONTEXT_FOR_REPORT(&mc); |
| LOG(2, "%s heap="PFX" (=> "PFX") flags=0x%x size="PIFX"\n", |
| __FUNCTION__, heap, arena, flags, size); |
| if (arena == NULL) |
| report_invalid_heap(heap, &mc, (app_pc)replace_RtlAllocateHeap); |
| # ifdef X64 |
| else if (replace_leave_native(drcontext, &mc, heap, flags, size)) { |
| /* We can't directly invoke RtlAllocateHeap as DR's private loader |
| * will redirect it. |
| */ |
| IF_DEBUG(void *existing;) |
| res = (*native_RtlAllocateHeap)(heap, flags, size); |
| IF_DEBUG(existing =) |
| hashtable_add_replace(&nosy_table, (void *)res, (void *)res); |
| /* This better not touch an mmapped heap as that could corrupt our data */ |
| ASSERT(!TEST(ARENA_PRE_US_MAPPED, arena->flags), |
| "native alloc in mmapped heap is not supported"); |
| LOG(2, "\tnative alloc => "PFX" (%s)\n", res, |
| existing == NULL ? "new" : "replacing -- likely missed RtlpFreeHeap"); |
| } |
| # endif |
| else { |
| res = ONDSTACK_REPLACE_ALLOC_COMMON |
| (arena, size, 0, |
| ((!TEST(HEAP_NO_SERIALIZE, arena->flags) && |
| !TEST(HEAP_NO_SERIALIZE, flags)) ? |
| ALLOC_SYNCHRONIZE : 0) | |
| (WINDOWS_ZERO_MEMORY(arena, flags) ? ALLOC_ZERO : 0) | |
| ALLOC_INVOKE_CLIENT, drcontext, |
| &mc, (app_pc)replace_RtlAllocateHeap, |
| MALLOC_ALLOCATOR_MALLOC | CHUNK_LAYER_RTL); |
| } |
| dr_switch_to_app_state(drcontext); |
| if (res == NULL) |
| handle_Rtl_alloc_failure(drcontext, arena, flags); |
| exit_client_code(drcontext, true/*already swapped*/); |
| return res; |
| } |
| |
| static void * WINAPI |
| replace_RtlReAllocateHeap(HANDLE heap, ULONG flags, PVOID ptr, SIZE_T size) |
| { |
| void *drcontext = enter_client_code(); |
| arena_header_t *arena = heap_to_arena(heap); |
| void *res = NULL; |
| dr_mcontext_t mc; |
| INITIALIZE_MCONTEXT_FOR_REPORT(&mc); |
| LOG(2, "%s heap="PFX" (=> "PFX") flags=0x%x ptr="PFX" size="PIFX"\n", |
| __FUNCTION__, heap, arena, flags, ptr, size); |
| if (arena == NULL) |
| report_invalid_heap(heap, &mc, (app_pc)replace_RtlReAllocateHeap); |
| else { |
| /* unlike libc realloc(), HeapReAlloc fails when ptr==NULL */ |
| res = ONDSTACK_REPLACE_REALLOC_COMMON |
| (arena, ptr, size, |
| ((!TEST(HEAP_NO_SERIALIZE, arena->flags) && |
| !TEST(HEAP_NO_SERIALIZE, flags)) ? |
| ALLOC_SYNCHRONIZE : 0) | |
| (WINDOWS_ZERO_MEMORY(arena, flags) ? ALLOC_ZERO : 0) | |
| (TEST(HEAP_REALLOC_IN_PLACE_ONLY, flags) ? |
| ALLOC_IN_PLACE_ONLY : 0) | |
| ALLOC_ALLOW_EMPTY |
| /* fails on NULL */, |
| drcontext, &mc, (app_pc)replace_RtlReAllocateHeap, |
| MALLOC_ALLOCATOR_MALLOC | CHUNK_LAYER_RTL); |
| } |
| dr_switch_to_app_state(drcontext); |
| if (res == NULL) |
| handle_Rtl_alloc_failure(drcontext, arena, flags); |
| exit_client_code(drcontext, true/*already swapped*/); |
| return res; |
| } |
| |
| static RTL_HEAP_BOOL_TYPE WINAPI |
| replace_RtlFreeHeap(HANDLE heap, ULONG flags, PVOID ptr) |
| { |
| void *drcontext = enter_client_code(); |
| arena_header_t *arena = heap_to_arena(heap); |
| RTL_HEAP_BOOL_TYPE res = FALSE; |
| dr_mcontext_t mc; |
| INITIALIZE_MCONTEXT_FOR_REPORT(&mc); |
| LOG(2, "%s heap="PFX" flags=0x%x ptr="PFX"\n", __FUNCTION__, heap, flags, ptr); |
| if (ptr == NULL) { |
| /* for -warn_null_ptr */ |
| client_invalid_heap_arg((app_pc)replace_RtlFreeHeap, ptr, &mc, |
| "RtlFreeHeap", true /* is_free */); |
| /* i#1644: ntdll!RtlFreeHeap returns TRUE if ptr is NULL */ |
| res = TRUE; |
| } else if (arena == NULL) |
| report_invalid_heap(heap, &mc, (app_pc)replace_RtlFreeHeap); |
| #ifdef X64 |
| else if (hashtable_lookup(&nosy_table, (void *)ptr) != NULL) { |
| IF_DEBUG(bool found;) |
| res = (*native_RtlFreeHeap)(heap, flags, ptr); |
| IF_DEBUG(found =) |
| hashtable_remove(&nosy_table, (void *)ptr); |
| /* This better not touch an mmapped heap as that could corrupt our data */ |
| ASSERT(!TEST(ARENA_PRE_US_MAPPED, arena->flags), |
| "native free in mmapped heap is not supported"); |
| LOG(2, "\tnative free "PFX" => %d\n", ptr, res); |
| ASSERT(found, "could this be an app race?"); |
| } |
| #endif |
| else { |
| bool ok = (bool)(ptr_uint_t) ONDSTACK_REPLACE_FREE_COMMON |
| (arena, ptr, |
| ((!TEST(HEAP_NO_SERIALIZE, arena->flags) && |
| !TEST(HEAP_NO_SERIALIZE, flags)) ? |
| ALLOC_SYNCHRONIZE : 0) | |
| ALLOC_INVOKE_CLIENT, |
| drcontext, &mc, (app_pc)replace_RtlFreeHeap, |
| MALLOC_ALLOCATOR_MALLOC | CHUNK_LAYER_RTL); |
| res = !!ok; /* convert from bool to BOOL */ |
| } |
| dr_switch_to_app_state(drcontext); |
| if (!res) { |
| /* XXX: all our errors are invalid params so that's all we set. |
| * We deliberately wait until in app mode to make this more efficient. |
| */ |
| set_app_error_code(drcontext, ERROR_INVALID_PARAMETER); |
| } |
| exit_client_code(drcontext, true/*already swapped*/); |
| return res; |
| } |
| |
| static SIZE_T WINAPI |
| replace_RtlSizeHeap(HANDLE heap, ULONG flags, PVOID ptr) |
| { |
| void *drcontext = enter_client_code(); |
| arena_header_t *arena = heap_to_arena(heap); |
| SIZE_T res = (SIZE_T) -1; |
| dr_mcontext_t mc; |
| INITIALIZE_MCONTEXT_FOR_REPORT(&mc); |
| LOG(2, "%s\n", __FUNCTION__); |
| if (arena == NULL) |
| report_invalid_heap(heap, &mc, (app_pc)replace_RtlSizeHeap); |
| else { |
| res = replace_size_common(arena, ptr, |
| ((!TEST(HEAP_NO_SERIALIZE, arena->flags) && |
| !TEST(HEAP_NO_SERIALIZE, flags)) ? |
| ALLOC_SYNCHRONIZE : 0), |
| drcontext, &mc, (app_pc)replace_RtlSizeHeap, |
| MALLOC_ALLOCATOR_MALLOC | CHUNK_LAYER_RTL); |
| } |
| dr_switch_to_app_state(drcontext); |
| if (!res) { |
| /* XXX: all our errors are invalid params so that's all we set. |
| * We deliberately wait until in app mode to make this more efficient. |
| */ |
| set_app_error_code(drcontext, ERROR_INVALID_PARAMETER); |
| } |
| exit_client_code(drcontext, true/*already swapped*/); |
| return res; |
| } |
| |
| /* i#900: allowing the app to hold a lock we'll wait for in our |
| * code that needs to return to a cache fragment is unsafe b/c a flusher |
| * could hold the lock as the app. Thus, we mark the lock acquisition |
| * as a safe spot, and we redirect our return to the code cache |
| * via DRi#849. |
| */ |
| static RTL_HEAP_BOOL_TYPE WINAPI |
| replace_RtlLockHeap(HANDLE heap) |
| { |
| void *drcontext = enter_client_code(); |
| arena_header_t *arena = heap_to_arena(heap); |
| RTL_HEAP_BOOL_TYPE res = FALSE; |
| LOG(2, "%s heap="PFX" (arena="PFX")\n", __FUNCTION__, heap, arena); |
| if (arena == NULL) { |
| dr_mcontext_t mc; |
| INITIALIZE_MCONTEXT_FOR_REPORT(&mc); |
| report_invalid_heap(heap, &mc, (app_pc)replace_RtlLockHeap); |
| } else { |
| /* We only grab this DR lock as the app and we mark it with |
| * dr_recurlock_mark_as_app(), as well as using dr_mark_safe_to_suspend(), |
| * to ensure proper DR behavior |
| */ |
| app_heap_lock(drcontext, arena->lock); |
| res = TRUE; |
| } |
| dr_switch_to_app_state(drcontext); |
| if (!res) /* see above about setting errno in app mode */ |
| set_app_error_code(drcontext, ERROR_INVALID_PARAMETER); |
| exit_client_code(drcontext, true/*already swapped*/); |
| return res; |
| } |
| |
| static RTL_HEAP_BOOL_TYPE WINAPI |
| replace_RtlUnlockHeap(HANDLE heap) |
| { |
| void *drcontext = enter_client_code(); |
| arena_header_t *arena = heap_to_arena(heap); |
| RTL_HEAP_BOOL_TYPE res = FALSE, invalid = FALSE;; |
| LOG(2, "%s heap="PFX" (arena="PFX")\n", __FUNCTION__, heap, arena); |
| if (arena == NULL) { |
| dr_mcontext_t mc; |
| INITIALIZE_MCONTEXT_FOR_REPORT(&mc); |
| report_invalid_heap(heap, &mc, (app_pc)replace_RtlUnlockHeap); |
| invalid = TRUE; |
| } else if (dr_recurlock_self_owns(arena->lock)) { |
| app_heap_unlock(drcontext, arena->lock); |
| res = TRUE; |
| } |
| dr_switch_to_app_state(drcontext); |
| if (invalid) /* see above about setting errno in app mode */ |
| set_app_error_code(drcontext, ERROR_INVALID_PARAMETER); |
| exit_client_code(drcontext, true/*already swapped*/); |
| return res; |
| } |
| |
| static RTL_HEAP_BOOL_TYPE WINAPI |
| replace_RtlValidateHeap(HANDLE heap, DWORD flags, void *ptr) |
| { |
| void *drcontext = enter_client_code(); |
| arena_header_t *arena = heap_to_arena(heap); |
| RTL_HEAP_BOOL_TYPE res = FALSE, invalid = FALSE; |
| if (arena == NULL) { |
| dr_mcontext_t mc; |
| INITIALIZE_MCONTEXT_FOR_REPORT(&mc); |
| report_invalid_heap(heap, &mc, (app_pc)replace_RtlValidateHeap); |
| invalid = TRUE; |
| } else { |
| chunk_header_t *head = header_from_ptr(ptr); |
| if (is_live_alloc(ptr, arena, head)) /* checks for NULL */ |
| res = TRUE; |
| } |
| LOG(2, "%s: heap "PFX"=>"PFX" arena, ptr "PFX" => %d\n", |
| __FUNCTION__, heap, arena, ptr, res); |
| dr_switch_to_app_state(drcontext); |
| if (invalid) /* see above about setting errno in app mode */ |
| set_app_error_code(drcontext, ERROR_INVALID_PARAMETER); |
| exit_client_code(drcontext, true/*already swapped*/); |
| return res; |
| } |
| |
| static NTSTATUS WINAPI |
| replace_RtlQueryHeapInformation(HANDLE heap, |
| HEAP_INFORMATION_CLASS info_class, |
| PVOID buf OPTIONAL, |
| SIZE_T buflen OPTIONAL, |
| PSIZE_T outlen OPTIONAL) |
| { |
| void *drcontext = enter_client_code(); |
| arena_header_t *arena = heap_to_arena(heap); |
| NTSTATUS res = STATUS_SUCCESS; |
| dr_mcontext_t mc; |
| INITIALIZE_MCONTEXT_FOR_REPORT(&mc); |
| /* In MSDN only HeapCompatibilityInformation is supported. It returns a ULONG |
| * that we want to set to 0 to indicate neither look-aside lists nor |
| * low-fragmentation heap support. |
| */ |
| if (arena == NULL) { |
| report_invalid_heap(heap, &mc, (app_pc)replace_RtlQueryHeapInformation); |
| res = STATUS_INVALID_PARAMETER; |
| } else if (info_class != HeapCompatibilityInformation) { |
| res = STATUS_INVALID_PARAMETER; |
| } else if (buflen < sizeof(ULONG)) { |
| res = STATUS_BUFFER_TOO_SMALL; |
| } else { |
| mc.pc = (app_pc) replace_RtlQueryHeapInformation; |
| if (client_write_memory(buf, buflen, &mc)) |
| *(ULONG *)buf = 0; |
| if (outlen != NULL) { |
| if (client_write_memory((byte *)outlen, sizeof(ULONG), &mc)) |
| *outlen = sizeof(ULONG); |
| } |
| res = STATUS_SUCCESS; |
| } |
| exit_client_code(drcontext, false/*need swap*/); |
| return res; |
| } |
| |
| static NTSTATUS WINAPI |
| replace_RtlSetHeapInformation(HANDLE heap, HEAP_INFORMATION_CLASS info_class, |
| PVOID buf, SIZE_T buflen) |
| { |
| void *drcontext = enter_client_code(); |
| /* MSDN examples, and crt0.c, allow NULL to presumably mean the process heap */ |
| arena_header_t *arena = heap_to_arena(heap == NULL ? process_heap : heap); |
| NTSTATUS res = STATUS_SUCCESS; |
| if (arena == NULL) { |
| dr_mcontext_t mc; |
| INITIALIZE_MCONTEXT_FOR_REPORT(&mc); |
| report_invalid_heap(heap, &mc, (app_pc)replace_RtlSetHeapInformation); |
| res = STATUS_INVALID_PARAMETER; |
| } else if (info_class == HeapCompatibilityInformation) { |
| if (buflen < sizeof(ULONG)) { |
| res = STATUS_BUFFER_TOO_SMALL; |
| } else { |
| /* Just turn into a nop (xref i#280) as we don't care if they request LFH */ |
| res = STATUS_SUCCESS; |
| } |
| } else if (info_class == HeapEnableTerminationOnCorruption) { |
| /* XXX: should we turn into -crash_at_error or sthg, i.e., |
| * treat as an annotation? For now making a nop. |
| */ |
| res = STATUS_SUCCESS; |
| } else { |
| res = STATUS_INVALID_PARAMETER; |
| } |
| exit_client_code(drcontext, false/*need swap*/); |
| return res; |
| } |
| |
| static SIZE_T WINAPI |
| replace_RtlCompactHeap(HANDLE heap, ULONG flags) |
| { |
| void *drcontext = enter_client_code(); |
| SIZE_T res = 0; |
| BOOL success = FALSE; |
| arena_header_t *arena = heap_to_arena(heap); |
| if (arena == NULL) { |
| dr_mcontext_t mc; |
| INITIALIZE_MCONTEXT_FOR_REPORT(&mc); |
| report_invalid_heap(heap, &mc, (app_pc)replace_RtlCompactHeap); |
| } else { |
| arena_lock(drcontext, arena, !TEST(HEAP_NO_SERIALIZE, arena->flags) && |
| !TEST(HEAP_NO_SERIALIZE, flags)); |
| success = TRUE; |
| if (arena->next_chunk < arena->commit_end) |
| res = arena->commit_end - arena->next_chunk; |
| arena_unlock(drcontext, arena, !TEST(HEAP_NO_SERIALIZE, arena->flags) && |
| !TEST(HEAP_NO_SERIALIZE, flags)); |
| } |
| dr_switch_to_app_state(drcontext); |
| if (!success) /* see above about setting errno in app mode */ |
| set_app_error_code(drcontext, ERROR_INVALID_PARAMETER); |
| else if (res == 0) /* actually out of space */ |
| set_app_error_code(drcontext, NO_ERROR); |
| exit_client_code(drcontext, true/*already swapped*/); |
| return res; |
| } |
| |
| |
| #ifdef X64 |
| /* See i#907, i#995, i#1032. For x64, strings are allocated via exported |
| * heap routines, but freed via internal. |
| */ |
| static RTL_HEAP_BOOL_TYPE WINAPI |
| replace_NtdllpFreeStringRoutine(PVOID ptr) |
| { |
| void *drcontext = enter_client_code(); |
| /* This routine calls RtlpFreeHeap(PEB->ProcessHeap, 0x2, ptr - 0x10, ptr). |
| * I have no idea what the 0x2 is: is it really HEAP_GROWABLE?!?. |
| * We ignore it here. |
| */ |
| arena_header_t *arena = heap_to_arena(process_heap); |
| RTL_HEAP_BOOL_TYPE res = FALSE; |
| bool ok; |
| dr_mcontext_t mc; |
| INITIALIZE_MCONTEXT_FOR_REPORT(&mc); |
| LOG(2, "%s ptr="PFX"\n", __FUNCTION__, ptr); |
| ASSERT(arena != NULL, "process_heap should always have an arena"); |
| if (arena != NULL) { |
| ok = (bool)(ptr_uint_t) ONDSTACK_REPLACE_FREE_COMMON |
| (arena, ptr, (!TEST(HEAP_NO_SERIALIZE, arena->flags) ? |
| ALLOC_SYNCHRONIZE : 0) | ALLOC_INVOKE_CLIENT, |
| drcontext, &mc, (app_pc)replace_NtdllpFreeStringRoutine, |
| MALLOC_ALLOCATOR_MALLOC | CHUNK_LAYER_RTL); |
| res = !!ok; /* convert from bool to BOOL */ |
| } |
| dr_switch_to_app_state(drcontext); |
| if (!res) |
| set_app_error_code(drcontext, ERROR_INVALID_PARAMETER); |
| exit_client_code(drcontext, true/*already swapped*/); |
| return res; |
| } |
| #endif |
| |
| static RTL_HEAP_BOOL_TYPE WINAPI |
| replace_ignore_arg0(void) |
| { |
| void *drcontext = enter_client_code(); |
| LOG(2, "%s: ignoring\n", __FUNCTION__); |
| exit_client_code(drcontext, false/*need swap*/); |
| return TRUE; |
| } |
| |
| static RTL_HEAP_BOOL_TYPE WINAPI |
| replace_ignore_arg1(void *arg1) |
| { |
| void *drcontext = enter_client_code(); |
| LOG(2, "%s: ignoring\n", __FUNCTION__); |
| exit_client_code(drcontext, false/*need swap*/); |
| return TRUE; |
| } |
| |
| static RTL_HEAP_BOOL_TYPE WINAPI |
| replace_ignore_arg2(void *arg1, void *arg2) |
| { |
| void *drcontext = enter_client_code(); |
| LOG(2, "%s: ignoring\n", __FUNCTION__); |
| exit_client_code(drcontext, false/*need swap*/); |
| return TRUE; |
| } |
| |
| static RTL_HEAP_BOOL_TYPE WINAPI |
| replace_ignore_arg3(void *arg1, void *arg2, void *arg3) |
| { |
| void *drcontext = enter_client_code(); |
| LOG(2, "%s: ignoring\n", __FUNCTION__); |
| exit_client_code(drcontext, false/*need swap*/); |
| return TRUE; |
| } |
| |
| static RTL_HEAP_BOOL_TYPE WINAPI |
| replace_ignore_arg4(void *arg1, void *arg2, void *arg3, void *arg4) |
| { |
| void *drcontext = enter_client_code(); |
| LOG(2, "%s: ignoring\n", __FUNCTION__); |
| exit_client_code(drcontext, false/*need swap*/); |
| return TRUE; |
| } |
| |
| static RTL_HEAP_BOOL_TYPE WINAPI |
| replace_ignore_arg5(void *arg1, void *arg2, void *arg3, void *arg4, void *arg5) |
| { |
| void *drcontext = enter_client_code(); |
| LOG(2, "%s: ignoring\n", __FUNCTION__); |
| exit_client_code(drcontext, false/*need swap*/); |
| return TRUE; |
| } |
| |
| /*************************************************************************** |
| * RtlHeap iteration replacement routines |
| */ |
| |
| typedef NTSTATUS (*PHEAP_ENUMERATION_ROUTINE)(DR_PARAM_IN PVOID HeapHandle, |
| DR_PARAM_IN PVOID UserParam); |
| |
| typedef struct _getheaps_data_t { |
| ULONG actual_len; |
| ULONG user_len; |
| HANDLE *user_heaps; |
| dr_mcontext_t *mc; |
| } getheaps_data_t; |
| |
| #define STATUS_NO_MORE_ENTRIES ((NTSTATUS)0x8000001a) |
| |
| static bool |
| heap_iter_getheaps(byte *start, byte *end, uint flags |
| _IF_WINDOWS(HANDLE heap), void *iter_data) |
| { |
| getheaps_data_t *data = (getheaps_data_t *) iter_data; |
| /* We do not attempt to walk pre-us heaps. We'd have to mix wrap and |
| * replace in a strange way, and pre-us should be system lib allocs unrelated |
| * to the app (XXX: except for delayed init or attach: though those are |
| * non-default modes). |
| */ |
| if (TEST(HEAP_ARENA, flags) && !TEST(HEAP_PRE_US, flags)) { |
| arena_header_t *arena = (arena_header_t *) start; |
| if (TEST(ARENA_MAIN, arena->flags)) { |
| LOG(2, "%s: "PFX"-"PFX" heap="PFX"\n", __FUNCTION__, start, end, heap); |
| if (data->user_len > data->actual_len) { |
| /* We avoid crashing (reported as internal error) if a problem w/ this |
| * write. |
| */ |
| if (client_write_memory((byte *)&data->user_heaps[data->actual_len], |
| sizeof(data->user_heaps[0]), data->mc)) |
| data->user_heaps[data->actual_len] = heap; |
| } |
| data->actual_len++; |
| } |
| } |
| return true; |
| } |
| |
| static ULONG WINAPI |
| replace_RtlGetProcessHeaps(ULONG count, HANDLE *heaps) |
| { |
| void *drcontext = enter_client_code(); |
| dr_mcontext_t mc; |
| getheaps_data_t data = {0, count, heaps, &mc}; |
| LOG(2, "%s\n", __FUNCTION__); |
| INITIALIZE_MCONTEXT_FOR_REPORT(&mc); |
| mc.pc = (app_pc) replace_RtlGetProcessHeaps; |
| /* No input validation needed: the real API crashes if passed NULL */ |
| heap_region_iterate(heap_iter_getheaps, &data); |
| exit_client_code(drcontext, false/*need swap*/); |
| return data.actual_len; |
| } |
| |
| static NTSTATUS WINAPI |
| replace_RtlEnumProcessHeaps(PHEAP_ENUMERATION_ROUTINE HeapEnumerationRoutine, |
| PVOID UserParam) |
| { |
| void *drcontext = enter_client_code(); |
| /* FIXME i#1719: NYI. This one is difficult, as we need to run app code. |
| * We probably need an outer drwrap_replace() layer that calls an inner |
| * drwrap_replace_native() layer. The inner layer does what GetProcessHeaps does |
| * and passes the array (allocated where?) to the outer layer, which is |
| * interpreted and can safely run the callback routine. |
| */ |
| ASSERT(false, "NYI"); |
| exit_client_code(drcontext, false/*need swap*/); |
| return STATUS_SUCCESS; |
| } |
| |
| static NTSTATUS WINAPI |
| replace_RtlWalkHeap(HANDLE heap, PVOID entry) |
| { |
| void *drcontext = enter_client_code(); |
| arena_header_t *arena = heap_to_arena(heap); |
| NTSTATUS res = STATUS_SUCCESS; |
| dr_mcontext_t mc; |
| rtl_process_heap_entry_t *e = (rtl_process_heap_entry_t *) entry; |
| INITIALIZE_MCONTEXT_FOR_REPORT(&mc); |
| mc.pc = (app_pc) replace_RtlWalkHeap; |
| LOG(2, "%s heap="PFX" entry="PFX"\n", __FUNCTION__, heap, entry); |
| /* XXX i#1719: we do not bother to try and iterate pre-us heaps */ |
| if (arena == NULL) { |
| report_invalid_heap(heap, &mc, (app_pc)replace_RtlWalkHeap); |
| res = STATUS_INVALID_PARAMETER; |
| } else if (!client_read_memory((byte *)&e->lpData, sizeof(e->lpData), &mc)) { |
| res = STATUS_INVALID_PARAMETER; |
| } else { |
| arena_header_t *a; |
| byte *cur; |
| chunk_header_t *head = NULL; |
| bool region = false; |
| /* client_read_memory will complain that the arena is unaddr so we safe_read */ |
| arena_header_t safe_a; |
| /* We're supposed to have a PROCESS_HEAP_REGION entry with e->Region filled |
| * out prior to the first chunk in each region. |
| */ |
| if (e->lpData == NULL) { |
| a = arena; |
| region = true; |
| } else if (!client_read_memory((byte *)e, sizeof(*e), &mc) || |
| !safe_read((byte *)e->Block.hMem, sizeof(safe_a), &safe_a)) { |
| res = STATUS_INVALID_PARAMETER; |
| } else { |
| if (TEST(RTL_PROCESS_HEAP_REGION, e->wFlags)) { |
| a = (arena_header_t *) e->lpData; |
| cur = a->start_chunk; |
| } else { |
| cur = (byte *) e->lpData; |
| for (a = arena; a != NULL; a = a->next_arena) { |
| if (cur >= a->start_chunk && cur < a->next_chunk) |
| break; |
| } |
| if (a == NULL) |
| res = STATUS_INVALID_PARAMETER; |
| else { |
| /* advance to next chunk */ |
| head = header_from_ptr(cur); |
| if (head == NULL) { |
| cur = a->next_chunk; |
| res = STATUS_INVALID_PARAMETER; |
| } else |
| cur += head->alloc_size + inter_chunk_space(); |
| } |
| } |
| if (cur >= a->next_chunk) { |
| a = a->next_arena; |
| region = true; |
| } |
| } |
| if (res == STATUS_SUCCESS && region && |
| !client_write_memory((byte *)e, sizeof(*e), &mc)) |
| res = STATUS_INVALID_PARAMETER; |
| if (res != STATUS_SUCCESS) { |
| /* error already set */ |
| } else if (a == NULL) { |
| res = STATUS_NO_MORE_ENTRIES; |
| } else { |
| e->iRegionIndex = 0; |
| e->cbOverhead = sizeof(chunk_header_t); |
| if (region) { |
| e->wFlags = RTL_PROCESS_HEAP_REGION; |
| e->Region.dwCommittedSize = (DWORD) (a->commit_end - (byte *)a); |
| e->Region.dwUnCommittedSize = (DWORD) (a->reserve_end - a->commit_end); |
| e->Region.lpFirstBlock = (LPVOID) a->start_chunk; |
| e->Region.lpLastBlock = (LPVOID) a->next_chunk; |
| /* Store for use on the next query */ |
| e->lpData = (PVOID) a; |
| } else { |
| head = header_from_ptr(cur); |
| if (TEST(CHUNK_FREED, head->flags)) |
| e->wFlags = RTL_PROCESS_HEAP_UNCOMMITTED_RANGE; |
| else |
| e->wFlags = RTL_PROCESS_HEAP_ENTRY_BUSY; |
| e->lpData = cur; |
| e->cbData = head->alloc_size; |
| /* We can't use unused fields like e->Block.hMem to store the arena |
| * for use on the next query, as the HeapWalk layer has its own |
| * copy of this data struct and it doesn't copy all fields out. |
| */ |
| } |
| } |
| } |
| exit_client_code(drcontext, false/*need swap*/); |
| return res; |
| } |
| |
| #endif /* WINDOWS */ |
| |
| #ifdef MACOS |
| /*************************************************************************** |
| * Malloc zone API (i#1699) |
| * |
| * We ignore the indirection through the function pointers in the |
| * malloc_zone_t struct. Natively, applications can replace individual |
| * routines with their own versions, but for DrMem we want everything here. |
| */ |
| |
| typedef struct _zone_iter_data_t { |
| const void *ptr; |
| malloc_zone_t *zone; |
| } zone_iter_data_t; |
| |
| static arena_header_t * |
| zone_to_arena(malloc_zone_t *zone) |
| { |
| arena_header_t *arena = (arena_header_t *) zone; |
| uint magic; |
| if (arena != NULL && |
| safe_read(&arena->magic, sizeof(magic), &magic) && |
| magic == HEADER_MAGIC && |
| TEST(ARENA_MAIN, arena->flags)) |
| return arena; |
| return NULL; |
| } |
| |
| static inline void |
| report_invalid_zone(malloc_zone_t *zone, dr_mcontext_t *mc, app_pc caller) |
| { |
| client_invalid_heap_arg(caller, (byte *)zone, mc, |
| "malloc zone API: invalid zone", false/*!free*/); |
| } |
| |
| static malloc_zone_t * |
| replace_malloc_create_zone(vm_size_t start_size, unsigned flags) |
| { |
| arena_header_t *arena = NULL; |
| void *drcontext = enter_client_code(); |
| LOG(2, "%s %d %d\n", __FUNCTION__, start_size, flags); |
| /* Only 0 is supported for flags but we ignore it to match native behavior */ |
| arena = arena_create(NULL, ALIGN_FORWARD(start_size, PAGE_SIZE)); |
| LOG(2, "\t%s %d %d => "PFX"\n", __FUNCTION__, start_size, flags, arena); |
| exit_client_code(drcontext, false/*need swap*/); |
| return (malloc_zone_t *) arena; |
| } |
| |
| static void |
| replace_malloc_destroy_zone(malloc_zone_t *zone) |
| { |
| void *drcontext = enter_client_code(); |
| arena_header_t *arena = zone_to_arena(zone); |
| dr_mcontext_t mc; |
| INITIALIZE_MCONTEXT_FOR_REPORT(&mc); |
| LOG(2, "%s "PFX"\n", __FUNCTION__, arena); |
| if (arena == NULL) |
| report_invalid_zone(zone, &mc, (app_pc)replace_malloc_destroy_zone); |
| else { |
| destroy_arena_family(arena, &mc, true/*free chunks*/, |
| (app_pc)replace_malloc_destroy_zone); |
| } |
| exit_client_code(drcontext, false/*need swap*/); |
| } |
| |
| static malloc_zone_t * |
| replace_malloc_default_zone(void) |
| { |
| void *drcontext = enter_client_code(); |
| malloc_zone_t *res = cur_arena->zone; |
| exit_client_code(drcontext, false/*need swap*/); |
| return res; |
| } |
| |
| static bool |
| zone_from_ptr_iter(byte *start, byte *end, uint flags |
| _IF_WINDOWS(HANDLE heap), void *iter_data) |
| { |
| zone_iter_data_t *data = (zone_iter_data_t *) iter_data; |
| LOG(3, "%s: "PFX"-"PFX" 0x%x\n", __FUNCTION__, start, end, flags); |
| if (TEST(HEAP_ARENA, flags) && |
| (byte *)data->ptr >= start && (byte *)data->ptr < end) { |
| data->zone = (malloc_zone_t *) start; |
| return false; /* stop iterating */ |
| } |
| return true; |
| } |
| |
| static malloc_zone_t * |
| replace_malloc_zone_from_ptr(const void *ptr) |
| { |
| void *drcontext = enter_client_code(); |
| zone_iter_data_t data = {ptr, NULL}; |
| chunk_header_t *head = header_from_ptr(ptr); |
| if (is_valid_chunk(ptr, head)) { |
| /* XXX: do we have any better way to go from a chunk to containing arena? */ |
| heap_region_iterate(zone_from_ptr_iter, &data); |
| } |
| LOG(2, "\t%s "PFX" => "PIFX"\n", __FUNCTION__, ptr, data.zone); |
| exit_client_code(drcontext, false/*need swap*/); |
| return data.zone; |
| } |
| |
| static size_t |
| replace_malloc_zone_size(malloc_zone_t *zone, const void *ptr) |
| { |
| void *drcontext = enter_client_code(); |
| size_t res = 0; |
| arena_header_t *arena = zone_to_arena(zone); |
| dr_mcontext_t mc; |
| INITIALIZE_MCONTEXT_FOR_REPORT(&mc); |
| LOG(2, "%s: "PFX"\n", __FUNCTION__, ptr); |
| if (arena == NULL) |
| report_invalid_zone(zone, &mc, (app_pc)replace_malloc_zone_size); |
| else { |
| /* The API promises to return 0 if ptr is not in zone */ |
| arena_header_t *a; |
| arena_lock(drcontext, arena, true); |
| for (a = arena; a != NULL; a = a->next_arena) { |
| if ((byte *)ptr >= a->start_chunk && (byte *)ptr < a->reserve_end) |
| break; |
| } |
| arena_unlock(drcontext, arena, true); |
| if (a == NULL) |
| res = 0; |
| else { |
| res = replace_size_common(arena, (byte *)ptr, ALLOC_SYNCHRONIZE, drcontext, |
| &mc, (app_pc)replace_malloc_zone_size, |
| MALLOC_ALLOCATOR_MALLOC); |
| if (res == (size_t)-1) |
| res = 0; /* 0 on failure */ |
| } |
| } |
| LOG(2, "\t%s "PFX" => "PIFX"\n", __FUNCTION__, ptr, res); |
| exit_client_code(drcontext, false/*need swap*/); |
| return res; |
| } |
| |
| static void * |
| replace_malloc_zone_malloc(malloc_zone_t *zone, size_t size) |
| { |
| void *drcontext = enter_client_code(); |
| arena_header_t *arena = zone_to_arena(zone); |
| void *res = NULL; |
| dr_mcontext_t mc; |
| INITIALIZE_MCONTEXT_FOR_REPORT(&mc); |
| LOG(2, "%s zone="PFX" (=> "PFX") size="PIFX"\n", |
| __FUNCTION__, zone, arena, size); |
| if (arena == NULL) |
| report_invalid_zone(zone, &mc, (app_pc)replace_malloc_zone_malloc); |
| else { |
| res = ONDSTACK_REPLACE_ALLOC_COMMON |
| (arena, size, 0, ALLOC_SYNCHRONIZE | ALLOC_INVOKE_CLIENT, |
| drcontext, &mc, (app_pc)replace_malloc_zone_malloc, MALLOC_ALLOCATOR_MALLOC); |
| } |
| LOG(2, "\t%s "PFX" %d => "PIFX"\n", __FUNCTION__, zone, size, res); |
| exit_client_code(drcontext, false/*need swap*/); |
| return res; |
| } |
| |
| static void * |
| replace_malloc_zone_calloc(malloc_zone_t *zone, size_t num_items, size_t size) |
| { |
| void *drcontext = enter_client_code(); |
| arena_header_t *arena = zone_to_arena(zone); |
| void *res = NULL; |
| dr_mcontext_t mc; |
| INITIALIZE_MCONTEXT_FOR_REPORT(&mc); |
| LOG(2, "%s zone="PFX" (=> "PFX") %d X %d\n", |
| __FUNCTION__, zone, arena, num_items, size); |
| if (arena == NULL) |
| report_invalid_zone(zone, &mc, (app_pc)replace_malloc_zone_calloc); |
| else if (unsigned_multiply_will_overflow(num_items, size)) { |
| LOG(2, "calloc size will overflow => returning NULL\n"); |
| client_handle_alloc_failure(UINT_MAX, (app_pc)replace_malloc_zone_calloc, &mc); |
| res = NULL; |
| } else { |
| res = ONDSTACK_REPLACE_ALLOC_COMMON |
| (arena, num_items * size, 0, |
| ALLOC_SYNCHRONIZE | ALLOC_ZERO | ALLOC_INVOKE_CLIENT, drcontext, |
| &mc, (app_pc)replace_malloc_zone_calloc, MALLOC_ALLOCATOR_MALLOC); |
| } |
| LOG(2, "\t%s "PFX" %d X %d => "PIFX"\n", __FUNCTION__, zone, num_items, size, res); |
| exit_client_code(drcontext, false/*need swap*/); |
| return res; |
| } |
| |
| static void * |
| replace_malloc_zone_realloc(malloc_zone_t *zone, void *ptr, size_t size) |
| { |
| void *drcontext = enter_client_code(); |
| arena_header_t *arena = zone_to_arena(zone); |
| void *res = NULL; |
| dr_mcontext_t mc; |
| INITIALIZE_MCONTEXT_FOR_REPORT(&mc); |
| LOG(2, "%s "PFX" %d\n", __FUNCTION__, ptr, size); |
| if (arena == NULL) |
| report_invalid_zone(zone, &mc, (app_pc)replace_malloc_zone_realloc); |
| else { |
| res = ONDSTACK_REPLACE_REALLOC_COMMON(arena, ptr, size, |
| ALLOC_SYNCHRONIZE | ALLOC_ALLOW_NULL, |
| drcontext, &mc, |
| (app_pc)replace_malloc_zone_realloc, |
| MALLOC_ALLOCATOR_MALLOC); |
| } |
| LOG(2, "\t%s %d => "PFX"\n", __FUNCTION__, size, res); |
| exit_client_code(drcontext, false/*need swap*/); |
| return res; |
| } |
| |
| static void |
| replace_malloc_zone_free(malloc_zone_t *zone, void *ptr) |
| { |
| void *drcontext = enter_client_code(); |
| arena_header_t *arena = zone_to_arena(zone); |
| dr_mcontext_t mc; |
| INITIALIZE_MCONTEXT_FOR_REPORT(&mc); |
| LOG(2, "%s "PFX"\n", __FUNCTION__, ptr); |
| if (arena == NULL) |
| report_invalid_zone(zone, &mc, (app_pc)replace_malloc_zone_realloc); |
| else { |
| ONDSTACK_REPLACE_FREE_COMMON(arena, ptr, ALLOC_SYNCHRONIZE | ALLOC_INVOKE_CLIENT, |
| drcontext, &mc, (app_pc)replace_malloc_zone_free, |
| MALLOC_ALLOCATOR_MALLOC); |
| } |
| exit_client_code(drcontext, false/*need swap*/); |
| } |
| |
| static void * |
| replace_malloc_zone_valloc(malloc_zone_t *zone, size_t size) |
| { |
| void *drcontext = enter_client_code(); |
| arena_header_t *arena = zone_to_arena(zone); |
| void *res = NULL; |
| dr_mcontext_t mc; |
| INITIALIZE_MCONTEXT_FOR_REPORT(&mc); |
| LOG(2, "%s zone="PFX" (=> "PFX") size="PIFX"\n", |
| __FUNCTION__, zone, arena, size); |
| if (arena == NULL) |
| report_invalid_zone(zone, &mc, (app_pc)replace_malloc_zone_valloc); |
| else { |
| res = ONDSTACK_REPLACE_ALLOC_COMMON |
| (arena, size, PAGE_SIZE, ALLOC_SYNCHRONIZE | ALLOC_INVOKE_CLIENT, |
| drcontext, &mc, (app_pc)replace_malloc_zone_valloc, MALLOC_ALLOCATOR_MALLOC); |
| } |
| LOG(2, "\t%s "PFX" %d => "PIFX"\n", __FUNCTION__, zone, size, res); |
| exit_client_code(drcontext, false/*need swap*/); |
| return res; |
| } |
| |
| static void * |
| replace_malloc_zone_memalign(malloc_zone_t *zone, size_t alignment, size_t size) |
| { |
| void *drcontext = enter_client_code(); |
| arena_header_t *arena = zone_to_arena(zone); |
| void *res = NULL; |
| dr_mcontext_t mc; |
| INITIALIZE_MCONTEXT_FOR_REPORT(&mc); |
| LOG(2, "%s zone="PFX" (=> "PFX") size="PIFX"\n", |
| __FUNCTION__, zone, arena, size); |
| if (!IS_POWER_OF_2(alignment)) |
| client_handle_alloc_failure(size, (app_pc)replace_malloc_zone_memalign, &mc); |
| else if (arena == NULL) |
| report_invalid_zone(zone, &mc, (app_pc)replace_malloc_zone_memalign); |
| else { |
| res = ONDSTACK_REPLACE_ALLOC_COMMON |
| (arena, size, alignment, ALLOC_SYNCHRONIZE | ALLOC_INVOKE_CLIENT, |
| drcontext, &mc, (app_pc)replace_malloc_zone_memalign, |
| MALLOC_ALLOCATOR_MALLOC); |
| } |
| LOG(2, "\t%s "PFX" %d => "PIFX"\n", __FUNCTION__, zone, size, res); |
| exit_client_code(drcontext, false/*need swap*/); |
| return res; |
| } |
| |
| static void |
| malloc_zone_init(arena_header_t *arena) |
| { |
| /* i#1699: we do not support apps replacing the func ptrs but we do |
| * fill in the fields with initial values. |
| */ |
| arena->zone = &arena->zone_inlined; |
| arena->zone_inlined.size = replace_malloc_zone_size; |
| arena->zone_inlined.malloc = replace_malloc_zone_malloc; |
| arena->zone_inlined.calloc = replace_malloc_zone_calloc; |
| arena->zone_inlined.valloc = replace_malloc_zone_valloc; |
| arena->zone_inlined.free = replace_malloc_zone_free; |
| arena->zone_inlined.realloc = replace_malloc_zone_realloc; |
| arena->zone_inlined.destroy = replace_malloc_destroy_zone; |
| arena->zone_inlined.batch_malloc = NULL; |
| arena->zone_inlined.batch_free = NULL; |
| arena->zone_inlined.introspect = NULL; |
| /* I'm making the version 5 to avoid having to fill in free_definite_size |
| * or pressure_relief. |
| */ |
| arena->zone_inlined.version = 5; |
| arena->zone_inlined.memalign = replace_malloc_zone_memalign; |
| arena->zone_inlined.free_definite_size = NULL; |
| arena->zone_inlined.pressure_relief = NULL; |
| } |
| |
| #endif /* MACOS */ |
| |
| /*************************************************************************** |
| * drmem-facing interface |
| */ |
| |
| #ifdef LINUX |
| byte * |
| alloc_replace_orig_brk(void) |
| { |
| ASSERT(alloc_ops.replace_malloc, "shouldn't call"); |
| return pre_us_brk; |
| } |
| #endif |
| |
| bool |
| alloc_replace_in_cur_arena(byte *addr) |
| { |
| ASSERT(alloc_ops.replace_malloc, "shouldn't call"); |
| return ptr_is_in_arena(addr, cur_arena); |
| } |
| |
| bool |
| alloc_entering_replace_routine(app_pc pc) |
| { |
| return drwrap_is_replaced_native(pc); |
| } |
| |
| static bool |
| func_interceptor(routine_type_t type, bool check_mismatch, bool check_winapi_match, |
| void **routine DR_PARAM_OUT, bool *at_entry DR_PARAM_OUT, |
| uint *stack DR_PARAM_OUT) |
| { |
| /* almost everything is at the callee entry */ |
| *at_entry = true; |
| #ifdef WINDOWS |
| if (is_rtl_routine(type)) { |
| switch (type) { |
| case RTL_ROUTINE_MALLOC: |
| *routine = (void *) replace_RtlAllocateHeap; |
| *stack = sizeof(void*) * 3; |
| return true; |
| case RTL_ROUTINE_REALLOC: |
| *routine = (void *) replace_RtlReAllocateHeap; |
| *stack = sizeof(void*) * 4; |
| return true; |
| case RTL_ROUTINE_FREE: |
| *routine = (void *) replace_RtlFreeHeap; |
| *stack = sizeof(void*) * 3; |
| return true; |
| case RTL_ROUTINE_SIZE: |
| *routine = (void *) replace_RtlSizeHeap; |
| *stack = sizeof(void*) * 3; |
| return true; |
| case RTL_ROUTINE_CREATE: |
| *routine = (void *) replace_RtlCreateHeap; |
| *stack = sizeof(void*) * 6; |
| return true; |
| case RTL_ROUTINE_DESTROY: |
| *routine = (void *) replace_RtlDestroyHeap; |
| *stack = sizeof(void*) * 1; |
| return true; |
| case RTL_ROUTINE_LOCK: |
| *routine = (void *) replace_RtlLockHeap; |
| *stack = sizeof(void*) * 1; |
| return true; |
| case RTL_ROUTINE_UNLOCK: |
| *routine = (void *) replace_RtlUnlockHeap; |
| *stack = sizeof(void*) * 1; |
| return true; |
| case RTL_ROUTINE_HEAPINFO_GET: |
| *routine = (void *) replace_RtlQueryHeapInformation; |
| *stack = sizeof(void*) * 5; |
| return true; |
| case RTL_ROUTINE_HEAPINFO_SET: |
| *routine = (void *) replace_RtlSetHeapInformation; |
| *stack = sizeof(void*) * 4; |
| return true; |
| case RTL_ROUTINE_VALIDATE: |
| *routine = (void *) replace_RtlValidateHeap; |
| *stack = sizeof(void*) * 3; |
| return true; |
| # ifdef X64 |
| /* i#995-c#3: we need to replace NtdllpFreeStringRoutine in win-x64, |
| * which takes the first arg as the ptr to be freed. |
| */ |
| case RTL_ROUTINE_FREE_STRING: |
| *routine = (void *) replace_NtdllpFreeStringRoutine; |
| *stack = sizeof(void*); |
| return true; |
| # endif |
| case RTL_ROUTINE_COMPACT: |
| *routine = (void *) replace_RtlCompactHeap; |
| *stack = sizeof(void*) * 2; |
| return true; |
| /* XXX i#1202: NYI. Warn or assert if we hit them? */ |
| case RTL_ROUTINE_USERINFO_GET: |
| *routine = (void *) replace_ignore_arg5; |
| *stack = sizeof(void*) * 5; |
| return true; |
| case RTL_ROUTINE_USERINFO_SET: |
| *routine = (void *) replace_ignore_arg4; |
| *stack = sizeof(void*) * 4; |
| return true; |
| case RTL_ROUTINE_SETFLAGS: |
| *routine = (void *) replace_ignore_arg5; |
| *stack = sizeof(void*) * 5; |
| return true; |
| case RTL_ROUTINE_GET_HEAPS: |
| *routine = (void *) replace_RtlGetProcessHeaps; |
| *stack = sizeof(void*) * 2; |
| return true; |
| case RTL_ROUTINE_WALK: |
| *routine = (void *) replace_RtlWalkHeap; |
| *stack = sizeof(void*) * 2; |
| return true; |
| #if 0 /* FIXME i#1719: NYI */ |
| case RTL_ROUTINE_ENUM: |
| *routine = (void *) replace_RtlEnumProcessHeaps; |
| *stack = sizeof(void*) * 2; |
| return true; |
| #endif |
| /* note that replacing malloc does NOT eliminate the need to |
| * wrap LdrShutdownProcess b/c it calls RtlpHeapIsLocked, |
| * unless we wanted to treat pre-us Heap header as addressable |
| */ |
| default: |
| *routine = NULL; /* wrap it */ |
| return true; |
| } |
| } |
| #endif |
| /* nothing below here is stdcall */ |
| *stack = 0; |
| #ifdef MACOS |
| switch (type) { |
| case ZONE_ROUTINE_CREATE: |
| *routine = (void *) replace_malloc_create_zone; |
| return true; |
| case ZONE_ROUTINE_DESTROY: |
| *routine = (void *) replace_malloc_destroy_zone; |
| return true; |
| case ZONE_ROUTINE_DEFAULT: |
| *routine = (void *) replace_malloc_default_zone; |
| return true; |
| case ZONE_ROUTINE_QUERY: |
| *routine = (void *) replace_malloc_zone_from_ptr; |
| return true; |
| case ZONE_ROUTINE_MALLOC: |
| *routine = (void *) replace_malloc_zone_malloc; |
| return true; |
| case ZONE_ROUTINE_CALLOC: |
| *routine = (void *) replace_malloc_zone_calloc; |
| return true; |
| case ZONE_ROUTINE_VALLOC: |
| *routine = (void *) replace_malloc_zone_valloc; |
| return true; |
| case ZONE_ROUTINE_REALLOC: |
| *routine = (void *) replace_malloc_zone_realloc; |
| return true; |
| case ZONE_ROUTINE_MEMALIGN: |
| *routine = (void *) replace_malloc_zone_memalign; |
| return true; |
| case ZONE_ROUTINE_FREE: |
| *routine = (void *) replace_malloc_zone_free; |
| return true; |
| default: break; /* continue below */ |
| } |
| #endif |
| switch (type) { |
| #ifdef UNIX |
| case HEAP_ROUTINE_POSIX_MEMALIGN: |
| *routine = (void *) replace_posix_memalign; |
| return true; |
| case HEAP_ROUTINE_MEMALIGN: |
| *routine = (void *) replace_memalign; |
| return true; |
| case HEAP_ROUTINE_VALLOC: |
| *routine = (void *) replace_valloc; |
| return true; |
| case HEAP_ROUTINE_PVALLOC: |
| *routine = (void *) replace_pvalloc; |
| return true; |
| #endif |
| default: break; /* continue below */ |
| } |
| if (is_malloc_routine(type)) { |
| *routine = (void *) |
| (check_winapi_match ? replace_malloc : replace_malloc_nomatch); |
| } |
| else if (is_calloc_routine(type)) { |
| *routine = (void *) |
| (check_winapi_match ? replace_calloc : replace_calloc_nomatch); |
| } |
| else if (is_realloc_routine(type)) { |
| *routine = (void *) |
| (check_winapi_match ? replace_realloc : replace_realloc_nomatch); |
| } |
| else if (is_free_routine(type)) |
| *routine = (void *) (check_winapi_match ? replace_free : replace_free_nomatch); |
| else if (is_size_routine(type)) { |
| *routine = (void *) |
| (check_winapi_match ? replace_malloc_usable_size : |
| replace_malloc_usable_size_nomatch); |
| } |
| else if (type == HEAP_ROUTINE_NEW) { |
| *routine = (void *) |
| (check_mismatch ? replace_operator_new : replace_operator_new_nomatch); |
| } |
| else if (type == HEAP_ROUTINE_NEW_ARRAY) { |
| *routine = (void *) |
| (check_mismatch ? replace_operator_new_array : replace_operator_new_nomatch); |
| } |
| else if (type == HEAP_ROUTINE_NEW_NOTHROW) { |
| *routine = (void *) |
| (check_mismatch ? replace_operator_new_nothrow : |
| replace_operator_new_nothrow_nomatch); |
| } |
| else if (type == HEAP_ROUTINE_NEW_ARRAY_NOTHROW) { |
| *routine = (void *) |
| (check_mismatch ? replace_operator_new_array_nothrow : |
| replace_operator_new_nothrow_nomatch); |
| } |
| else if (type == HEAP_ROUTINE_DELETE) { |
| *routine = (void *) |
| (check_mismatch ? replace_operator_delete : replace_operator_delete_nomatch); |
| } |
| else if (type == HEAP_ROUTINE_DELETE_ARRAY) { |
| *routine = (void *) |
| (check_mismatch ? replace_operator_delete_array : |
| replace_operator_delete_nomatch); |
| } |
| else if (type == HEAP_ROUTINE_DELETE_NOTHROW) { |
| *routine = (void *) |
| (check_mismatch ? replace_operator_delete_nothrow : |
| replace_operator_delete_nothrow_nomatch); |
| } |
| else if (type == HEAP_ROUTINE_DELETE_ARRAY_NOTHROW) { |
| *routine = (void *) |
| (check_mismatch ? replace_operator_delete_array_nothrow : |
| replace_operator_delete_nothrow_nomatch); |
| } |
| #ifdef WINDOWS |
| else if (type == HEAP_ROUTINE_DebugHeapDelete) { |
| *routine = (void *) replace_operator_combined_delete; |
| /* i#965: we must replace at the call site, but drwrap now handles that |
| * and saves us a lot of work |
| */ |
| *at_entry = false; |
| } |
| #endif |
| else |
| *routine = NULL; /* but go ahead and wrap */ |
| return true; |
| } |
| |
| static void |
| malloc_replace__intercept(app_pc pc, routine_type_t type, alloc_routine_entry_t *e, |
| bool check_mismatch, bool check_winapi_match) |
| { |
| void *interceptor = NULL; |
| bool at_entry = true; |
| uint stack_adjust = 0; |
| #ifndef WINDOWS |
| check_winapi_match = true; /* always use the match versions */ |
| #endif |
| if (!func_interceptor(type, check_mismatch, check_winapi_match, |
| &interceptor, &at_entry, &stack_adjust)) { |
| /* we'll replace it ourselves elsewhere: alloc.c should ignore it */ |
| return; |
| } |
| if (interceptor != NULL) { |
| /* optimization: only pass where needed, for Windows libc */ |
| void *user_data = IF_WINDOWS_ELSE(is_rtl_routine(type) ? NULL : (void *) e, NULL); |
| if (!drwrap_replace_native(pc, interceptor, at_entry, |
| IF_X64_ELSE(0, stack_adjust), user_data, false)) |
| ASSERT(false, "failed to replace alloc routine"); |
| } else { |
| LOG(2, "wrapping, not replacing, "PFX"\n", pc); |
| /* else wrap */ |
| /* XXX i#1202: Windows NYI: want to replace |
| * _Crt* / RtlMultipleAllocateHeap / etc., along with all other |
| * heap-related routines currenly not intercepted, w/ nops |
| */ |
| malloc_wrap__intercept(pc, type, e, check_mismatch, check_winapi_match); |
| } |
| } |
| |
| static void |
| malloc_replace__unintercept(app_pc pc, routine_type_t type, alloc_routine_entry_t *e, |
| bool check_mismatch, bool check_winapi_match) |
| { |
| void *interceptor = NULL; |
| bool at_entry; |
| uint stack_adjust = 0; |
| #ifndef WINDOWS |
| check_winapi_match = true; /* always use the match versions */ |
| #endif |
| if (!func_interceptor(type, check_mismatch, check_winapi_match, |
| &interceptor, &at_entry, &stack_adjust)) { |
| /* we'll un-replace it ourselves elsewhere: alloc.c should ignore it */ |
| return; |
| } |
| if (interceptor != NULL) { |
| if (!drwrap_replace_native(pc, NULL, at_entry, IF_X64_ELSE(0, stack_adjust), |
| NULL, true)) |
| ASSERT(false, "failed to un-replace alloc routine"); |
| } else { |
| malloc_wrap__unintercept(pc, type, e, check_mismatch, check_winapi_match); |
| } |
| } |
| |
| static void * |
| malloc_replace__set_init(heapset_type_t type, app_pc pc, const module_data_t *mod, |
| void *libc_data) |
| { |
| #ifdef WINDOWS |
| if (type == HEAPSET_RTL) { |
| return NULL; |
| } else if (libc_data != NULL) { |
| /* dbg crt and regular crt and cpp routines share a Heap (i#964) */ |
| LOG(2, "shared default Heap for libc set type=%d @"PFX" is "PFX"\n", |
| type, pc, libc_data); |
| return libc_data; |
| } else { |
| arena_header_t *arena = NULL; |
| HANDLE pre_us_heap = NULL; |
| bool in_table; |
| IF_DEBUG(bool unique;) |
| |
| /* Determine the pre-us Heap for this pre-existing module, if |
| * any (i#959). |
| */ |
| if (!process_initialized) { |
| pre_us_heap = libc_heap_handle(mod); |
| LOG(2, "pre-existing Heap for libc set type=%d module=%s is "PFX"\n", |
| type, (dr_module_preferred_name(mod) == NULL) ? "<null>" : |
| dr_module_preferred_name(mod), pre_us_heap); |
| if (pre_us_heap != NULL) { |
| if (pre_us_heap == process_heap) { |
| /* win8 msvcr*.dll uses process heap (i#1223) */ |
| LOG(2, "pre-existing libc Heap for module=%s == process heap!\n", |
| (dr_module_preferred_name(mod) == NULL) ? "<null>" : |
| dr_module_preferred_name(mod)); |
| return cur_arena; |
| } |
| /* We should have already added in pre_existing_heap_init() */ |
| arena = (arena_header_t *) |
| hashtable_lookup(&crtheap_handle_table, (void *)pre_us_heap); |
| in_table = (arena != NULL); |
| ASSERT(in_table, "pre-us libc missed in heap walk"); |
| } |
| } |
| |
| /* Create the Heap for this libc alloc routine set (i#939) */ |
| if (arena == NULL) { |
| arena = (arena_header_t *) |
| create_Rtl_heap(PAGE_SIZE, ARENA_INITIAL_SIZE, HEAP_GROWABLE); |
| } |
| LOG(2, "new default Heap for libc set type=%d @"PFX" modbase="PFX" is "PFX"\n", |
| type, pc, mod->start, arena); |
| arena->flags |= ARENA_LIBC_DEFAULT; |
| /* Mark as speculative: for VS2012+, libc uses ProcessHeap, so we never |
| * see RtlCreateHeap and we must instead wait for the 1st malloc set use |
| * to see whether we want this separate arena. |
| */ |
| arena->flags |= ARENA_LIBC_SPECULATIVE; |
| arena->alloc_set_member = pc; |
| IF_DEBUG(unique =) |
| hashtable_add(&crtheap_mod_table, (void *)mod->start, (void *)arena); |
| ASSERT(unique, "duplicate default Heap"); |
| arena->modbase = mod->start; |
| |
| /* Just in case: should be present from pre_existing_heap_init() */ |
| if (pre_us_heap != NULL && !in_table) { |
| IF_DEBUG(unique =) |
| hashtable_add(&crtheap_handle_table, (void *)pre_us_heap, (void *)arena); |
| ASSERT(unique, "duplicate default Heap"); |
| arena->handle = pre_us_heap; |
| } |
| |
| return arena; |
| } |
| /* cpp set does not need its own Heap (i#964) */ |
| #endif |
| return NULL; |
| } |
| |
| static void |
| malloc_replace__set_exit(heapset_type_t type, app_pc pc, void *user_data) |
| { |
| #ifdef WINDOWS |
| if (type != HEAPSET_RTL && user_data != NULL) { |
| /* Destroy the Heap for this libc alloc routine set (i#939) */ |
| arena_header_t *arena = (arena_header_t *) user_data; |
| /* For non-pre-us /MT module, we see the HeapDestroy, so arena can be NULL */ |
| if (arena != NULL && arena != cur_arena) { |
| LOG(2, "destroying default Heap "PFX" for libc set @"PFX"\n", arena, pc); |
| /* i#939: we assume the Heap used by a libc routine set is not destroyed |
| * mid-run (pool-style) and is simply torn down at the end without any |
| * desire to free the individual chunks. |
| * XXX if we do free indiv chunks, we have no mcxt: should be rare, but |
| * can imagine an app bug involving memory freed when a |
| * library w/ libc routine unloads |
| */ |
| destroy_Rtl_heap(arena, NULL, false/*do not free indiv chunks*/); |
| } |
| } |
| #endif |
| } |
| |
| static void |
| malloc_replace__add(app_pc start, app_pc end, app_pc real_end, |
| bool pre_us, uint client_flags, dr_mcontext_t *mc, app_pc post_call) |
| { |
| IF_DEBUG(bool new_entry;) |
| chunk_header_t *head = global_alloc(sizeof(*head), HEAPSTAT_WRAP); |
| head->alloc_size = (real_end - start); |
| ASSERT(real_end - end <= REQUEST_DIFF_MAX, "too-large padding on pre-us malloc"); |
| head->u.unfree.request_diff = (real_end - end); |
| if (chunk_request_size(head) >= LARGE_MALLOC_MIN_SIZE) |
| malloc_large_add(start, chunk_request_size(head)); |
| head->flags = CHUNK_PRE_US; |
| head->magic = HEADER_MAGIC; |
| head->user_data = NULL; |
| /* we assume only called for pre_us and only during init when no lock is needed */ |
| ASSERT(pre_us, "malloc add from outside must be pre_us"); |
| IF_DEBUG(new_entry =) |
| hashtable_add(&pre_us_table, (void *)start, (void *)head); |
| LOG(3, "new pre-us alloc "PFX"-"PFX"-"PFX"\n", start, end, real_end); |
| ASSERT(new_entry, "should be no pre-us dups"); |
| notify_client_alloc(NULL, start, head, |
| /* no client action: caller can do that on its own */ |
| ALLOC_INVOKE_CLIENT_DATA, mc, post_call); |
| } |
| |
| static bool |
| malloc_replace__is_pre_us_ex(app_pc start, bool ok_if_invalid) |
| { |
| /* see notes up top about not needing an external lock */ |
| chunk_header_t *head = hashtable_lookup(&pre_us_table, (void *)start); |
| return (head != NULL && (ok_if_invalid || !TEST(CHUNK_FREED, head->flags))); |
| } |
| |
| static bool |
| malloc_replace__is_pre_us(app_pc start) |
| { |
| return malloc_replace__is_pre_us_ex(start, false); |
| } |
| |
| static app_pc |
| malloc_replace__end(app_pc start) |
| { |
| chunk_header_t *head = header_from_ptr_include_pre_us(start); |
| if (head == NULL || TEST(CHUNK_FREED, head->flags)) |
| return NULL; |
| else |
| return start + chunk_request_size(head); |
| } |
| |
| /* Returns -1 on failure */ |
| static ssize_t |
| malloc_replace__size(app_pc start) |
| { |
| chunk_header_t *head; |
| ssize_t res = -1; |
| head = header_from_ptr_include_pre_us(start); |
| if (head != NULL && !TEST(CHUNK_FREED, head->flags)) |
| res = chunk_request_size(head); |
| return res; |
| } |
| |
| static ssize_t |
| malloc_replace__size_invalid_only(app_pc start) |
| { |
| chunk_header_t *head = header_from_ptr_include_pre_us(start); |
| if (head == NULL || !TEST(CHUNK_FREED, head->flags)) |
| return -1; |
| else |
| return chunk_request_size(head); |
| } |
| |
| static void * |
| malloc_replace__get_client_data(app_pc start) |
| { |
| chunk_header_t *head = header_from_ptr_include_pre_us(start); |
| /* following alloc.c's lead and not failing on a freed chunk. |
| * ditto on routines below. not sure if anyone relies on that though. |
| */ |
| if (head == NULL) |
| return NULL; |
| return head->user_data; |
| } |
| |
| static uint |
| malloc_replace__get_client_flags(app_pc start) |
| { |
| chunk_header_t *head = header_from_ptr_include_pre_us(start); |
| if (head == NULL) |
| return 0; |
| return (head->flags & MALLOC_POSSIBLE_CLIENT_FLAGS); |
| } |
| |
| static bool |
| malloc_replace__set_client_flag(app_pc start, uint client_flag) |
| { |
| chunk_header_t *head = header_from_ptr_include_pre_us(start); |
| if (head == NULL) |
| return false; |
| head->flags |= (client_flag & MALLOC_POSSIBLE_CLIENT_FLAGS); |
| return true; |
| } |
| |
| static bool |
| malloc_replace__clear_client_flag(app_pc start, uint client_flag) |
| { |
| chunk_header_t *head = header_from_ptr_include_pre_us(start); |
| if (head == NULL) |
| return false; |
| head->flags &= ~(client_flag & MALLOC_POSSIBLE_CLIENT_FLAGS); |
| return true; |
| } |
| |
| static void |
| malloc_replace__iterate(bool (*cb)(malloc_info_t *info, void *iter_data), void *iter_data) |
| { |
| alloc_iterate(cb, iter_data, true/*live only*/); |
| } |
| |
| static void |
| malloc_replace__lock(void) |
| { |
| #ifdef WINDOWS |
| /* i#949: we can't mark safe to suspend here (in app_heap_lock()) |
| * b/c it's called from clean calls, etc, and thus grabbing the app |
| * lock here is unsafe. Thus we require the global_lock option in order |
| * to call this routine. |
| * We don't need to grab the app lock as we don't need to synchronize |
| * with app actions: only with our own allocator. |
| */ |
| ASSERT(alloc_ops.global_lock, "must set global_lock to use malloc_lock()"); |
| dr_recurlock_lock(cur_arena->dr_lock); |
| #else |
| dr_recurlock_lock(cur_arena->lock); |
| #endif |
| } |
| |
| static void |
| malloc_replace__unlock(void) |
| { |
| #ifdef WINDOWS |
| /* i#949: see comments above */ |
| ASSERT(alloc_ops.global_lock, "must set global_lock to use malloc_lock()"); |
| dr_recurlock_unlock(cur_arena->dr_lock); |
| #else |
| dr_recurlock_unlock(cur_arena->lock); |
| #endif |
| } |
| |
| static dr_emit_flags_t |
| bb_event(void *drcontext, void *tag, instrlist_t *bb, |
| bool for_trace, bool translating) |
| { |
| /* process and pre-existing modules are all initialized */ |
| process_initialized = true; |
| |
| /* reduce overhead by removing this event now */ |
| if (!drmgr_unregister_bb_app2app_event(bb_event)) |
| ASSERT(false, "drmgr unregistration failed"); |
| |
| return DR_EMIT_DEFAULT; |
| } |
| |
| void |
| alloc_replace_init(void) |
| { |
| #ifdef WINDOWS |
| module_data_t *exe; |
| #endif |
| |
| if (!drmgr_register_bb_app2app_event(bb_event, NULL)) |
| ASSERT(false, "drmgr registration failed"); |
| |
| if (alloc_ops.shared_redzones) { |
| /* For x64 we have to add 8 extra bytes to align this */ |
| header_size = ALIGN_FORWARD(sizeof(chunk_header_t), CHUNK_ALIGNMENT); |
| } else { |
| /* See comment up top: we pay in extra space for simplicity of keeping |
| * the free list next pointer out of the redzone. |
| */ |
| header_size = ALIGN_FORWARD(sizeof(free_header_t), CHUNK_ALIGNMENT); |
| } |
| |
| ASSERT(sizeof(free_header_t) <= |
| (alloc_ops.external_headers ? 0 : sizeof(chunk_header_t)) + CHUNK_MIN_SIZE, |
| "min size too small"); |
| /* we could pad but it's simpler to have struct already have right size */ |
| ASSERT(ALIGNED(header_size, CHUNK_ALIGNMENT), "alignment off"); |
| ASSERT(ALIGNED(inter_chunk_space(), CHUNK_ALIGNMENT), "alignment off"); |
| |
| ASSERT(CHUNK_MIN_MMAP >= LARGE_MALLOC_MIN_SIZE, |
| "we rely on mmapped chunks being in large malloc table"); |
| |
| ASSERT(ARENA_INITIAL_SIZE >= CHUNK_MIN_MMAP, "arena must hold at least 1 chunk"); |
| |
| ASSERT(ALIGNED(alloc_ops.redzone_size, CHUNK_ALIGNMENT), "redzone alignment off"); |
| |
| if (!alloc_ops.shared_redzones) { |
| header_beyond_redzone = header_size; |
| redzone_beyond_header = alloc_ops.redzone_size; |
| } else if (alloc_ops.redzone_size < header_size) { |
| header_beyond_redzone = header_size - alloc_ops.redzone_size; |
| redzone_beyond_header = 0; |
| } else { |
| redzone_beyond_header = (alloc_ops.redzone_size - header_size)/2; |
| ASSERT(redzone_beyond_header*2 + header_size <= alloc_ops.redzone_size, |
| "redzone or header size not aligned properly"); |
| } |
| |
| hashtable_init(&pre_us_table, PRE_US_TABLE_HASH_BITS, HASH_INTPTR, false/*!strdup*/); |
| |
| #ifdef WINDOWS |
| if (alloc_ops.global_lock) |
| global_lock = dr_recurlock_create(); |
| |
| # ifdef X64 |
| replace_nosy_init(); |
| # endif |
| #endif |
| |
| #ifdef LINUX |
| /* we waste pre-brk space of pre-us allocator, and we assume we're |
| * now completely replacing the pre-us allocator. |
| * XXX: better to not use brk and solely use mmap instead? |
| */ |
| cur_brk = get_brk(false); |
| pre_us_brk = cur_brk; |
| cur_arena = (arena_header_t *) pre_us_brk; |
| cur_brk = set_brk(cur_brk + PAGE_SIZE); |
| /* XXX: for delayed instru we will need to handle this; for now we assert */ |
| ASSERT(cur_brk > (byte *)cur_arena, "failed to increase brk at init"); |
| cur_arena->commit_end = cur_brk; |
| cur_arena->reserve_end = cur_arena->commit_end; |
| LOG(2, "heap orig brk="PFX"\n", pre_us_brk); |
| heap_region_add((byte *)cur_arena, cur_arena->reserve_end, HEAP_ARENA, NULL); |
| arena_init(cur_arena, NULL); |
| #elif defined(MACOS) |
| cur_arena = arena_create(NULL, 0/*default*/); |
| ASSERT(cur_arena != NULL, "can't allocate initial heap: fatal"); |
| LOG(2, "initial arena="PFX"\n", cur_arena); |
| #else /* WINDOWS */ |
| process_heap = get_app_PEB()->ProcessHeap; |
| LOG(2, "process heap="PFX"\n", process_heap); |
| cur_arena = create_Rtl_heap(ARENA_INITIAL_COMMIT, ARENA_INITIAL_SIZE, HEAP_GROWABLE); |
| ASSERT(cur_arena != NULL, "can't allocate initial heap: fatal"); |
| |
| hashtable_init(&crtheap_mod_table, CRTHEAP_MOD_TABLE_HASH_BITS, HASH_INTPTR, |
| false/*!strdup*/); |
| hashtable_init(&crtheap_handle_table, CRTHEAP_HANDLE_TABLE_HASH_BITS, HASH_INTPTR, |
| false/*!strdup*/); |
| |
| exe = dr_get_main_module(); |
| ASSERT(exe != NULL, "should find exe base"); |
| if (exe != NULL) { |
| executable_base = exe->start; |
| dr_free_module_data(exe); |
| } |
| |
| heap_iterator(NULL, NULL _IF_WINDOWS(pre_existing_heap_init)); |
| #endif |
| |
| /* set up pointers for per-malloc API */ |
| malloc_interface.malloc_lock = malloc_replace__lock; |
| malloc_interface.malloc_unlock = malloc_replace__unlock; |
| malloc_interface.malloc_end = malloc_replace__end; |
| malloc_interface.malloc_add = malloc_replace__add; |
| malloc_interface.malloc_is_pre_us = malloc_replace__is_pre_us; |
| malloc_interface.malloc_is_pre_us_ex = malloc_replace__is_pre_us_ex; |
| malloc_interface.malloc_chunk_size = malloc_replace__size; |
| malloc_interface.malloc_chunk_size_invalid_only = malloc_replace__size_invalid_only; |
| malloc_interface.malloc_get_client_data = malloc_replace__get_client_data; |
| malloc_interface.malloc_get_client_flags = malloc_replace__get_client_flags; |
| malloc_interface.malloc_set_client_flag = malloc_replace__set_client_flag; |
| malloc_interface.malloc_clear_client_flag = malloc_replace__clear_client_flag; |
| malloc_interface.malloc_iterate = malloc_replace__iterate; |
| malloc_interface.malloc_intercept = malloc_replace__intercept; |
| malloc_interface.malloc_unintercept = malloc_replace__unintercept; |
| malloc_interface.malloc_set_init = malloc_replace__set_init; |
| malloc_interface.malloc_set_exit = malloc_replace__set_exit; |
| } |
| |
| static bool |
| free_arena_at_exit(byte *start, byte *end, uint flags |
| _IF_WINDOWS(HANDLE heap), void *iter_data) |
| { |
| LOG(2, "%s: "PFX"-"PFX" 0x%x\n", __FUNCTION__, start, end, flags); |
| if (TEST(HEAP_ARENA, flags) && !TEST(HEAP_PRE_US, flags)) { |
| arena_header_t *arena = (arena_header_t *) start; |
| #ifdef WINDOWS |
| /* freed when libc routine set exits */ |
| if (!TEST(ARENA_LIBC_DEFAULT, arena->flags)) |
| #endif |
| arena_free(arena); |
| } |
| return true; |
| } |
| |
| static bool |
| free_user_data_at_exit(malloc_info_t *info, void *iter_data) |
| { |
| if (!info->pre_us) { |
| chunk_header_t *head = header_from_ptr(info->base); |
| if (head->user_data != NULL) |
| client_malloc_data_free(head->user_data); |
| } |
| return true; /* keep iterating */ |
| } |
| |
| void |
| alloc_replace_exit(void) |
| { |
| uint i; |
| #ifdef STATISTICS |
| LOG(1, "alloc_replace statistics:\n"); |
| LOG(1, " arenas: %9d\n", num_arenas); |
| LOG(1, " peak arenas: %9d\n", peak_num_arenas); |
| LOG(1, " heap capacity: %9d\n", heap_capacity); |
| LOG(1, " peak heap capacity: %9d\n", peak_heap_capacity); |
| LOG(1, " splits: %9d\n", num_splits); |
| LOG(1, " coalesces: %9d\n", num_coalesces); |
| LOG(1, " deallocs: %9d\n", num_dealloc); |
| LOG(1, " dbgcrt mismatches: %9d\n", dbgcrt_mismatch); |
| LOG(1, " allocs left native: %9d\n", allocs_left_native); |
| #endif |
| |
| /* On Win10 at process exit, RtlLockHeap is called but the private |
| * RtlUnlockProcessHeapOnProcessTerminate does the unlock and so |
| * we don't see it. This exiting thread should be the one who owns the lock. |
| */ |
| if (dr_recurlock_self_owns(cur_arena->lock)) { |
| LOG(2, "Process heap (arena="PFX") is locked at exit: unlocking\n", cur_arena); |
| app_heap_unlock(dr_get_current_drcontext(), cur_arena->lock); |
| } |
| |
| alloc_iterate(free_user_data_at_exit, NULL, false/*free too*/); |
| /* XXX: should add hashtable_iterate() to drcontainers */ |
| for (i = 0; i < HASHTABLE_SIZE(pre_us_table.table_bits); i++) { |
| hash_entry_t *he, *next; |
| for (he = pre_us_table.table[i]; he != NULL; he = next) { |
| chunk_header_t *head = (chunk_header_t *) he->payload; |
| next = he->next; |
| if (head->user_data != NULL) |
| client_malloc_data_free(head->user_data); |
| global_free(head, sizeof(*head), HEAPSTAT_WRAP); |
| } |
| } |
| hashtable_delete_with_stats(&pre_us_table, "pre_us"); |
| |
| #ifdef WINDOWS |
| # ifdef X64 |
| replace_nosy_exit(); |
| # endif |
| |
| /* Free any pre-us heaps that are still around */ |
| for (i = 0; i < HASHTABLE_SIZE(crtheap_handle_table.table_bits); i++) { |
| hash_entry_t *he, *next; |
| for (he = crtheap_handle_table.table[i]; he != NULL; he = next) { |
| arena_header_t *arena = (arena_header_t *) he->payload; |
| next = he->next; |
| destroy_Rtl_heap(arena, NULL, false/*do not free indiv chunks*/); |
| } |
| } |
| #endif |
| |
| heap_region_iterate(free_arena_at_exit, NULL); |
| |
| #ifdef WINDOWS |
| if (alloc_ops.global_lock) |
| dr_recurlock_destroy(global_lock); |
| |
| hashtable_delete_with_stats(&crtheap_mod_table, "crtheap"); |
| hashtable_delete_with_stats(&crtheap_handle_table, "crtheap handles"); |
| #endif |
| } |
| |
| /* Allocate application memory for clients. |
| * This function can only be used with -replace_malloc and |
| * does not work with malloc wrapping mode. |
| */ |
| byte * |
| client_app_malloc(void *drcontext, size_t size, app_pc caller) |
| { |
| void *res; |
| arena_header_t *arena = cur_arena; |
| dr_mcontext_t mc; |
| ASSERT(alloc_ops.replace_malloc, "-replace_malloc is not enabled"); |
| /* FIXME i#1837: provide better callstack */ |
| mc.size = sizeof(mc); |
| mc.flags = DR_MC_CONTROL | DR_MC_INTEGER; /* xsp and xbp */ |
| dr_get_mcontext(drcontext, &mc); |
| LOG(2, "client_app_malloc %d\n", size); |
| /* we are on clean call stack already */ |
| res = replace_alloc_common(arena, size, 0, ALLOC_SYNCHRONIZE | ALLOC_INVOKE_CLIENT, |
| drcontext, &mc, caller, |
| MALLOC_ALLOCATOR_MALLOC); |
| LOG(2, "client_app_malloc %d => "PFX"\n", size, res); |
| return res; |
| } |
| |
| /* Free application memory allocated from client_app_malloc. |
| * This function can only be used with -replace_malloc and |
| * does not work with malloc wrapping mode. |
| */ |
| void |
| client_app_free(void *drcontext, void *ptr, app_pc caller) |
| { |
| arena_header_t *arena = cur_arena; |
| dr_mcontext_t mc; |
| ASSERT(alloc_ops.replace_malloc, "-replace_malloc is not enabled"); |
| /* FIXME i#1837: provide better callstack */ |
| mc.size = sizeof(mc); |
| mc.flags = DR_MC_CONTROL | DR_MC_INTEGER; /* xsp and xbp */ |
| dr_get_mcontext(drcontext, &mc); |
| LOG(2, "client_app_free "PFX"\n", ptr); |
| /* we are on clean call stack already */ |
| replace_free_common(arena, ptr, ALLOC_SYNCHRONIZE | ALLOC_INVOKE_CLIENT, |
| drcontext, &mc, caller, |
| MALLOC_ALLOCATOR_MALLOC); |
| } |