#if USE_TCACHE /* We want 64 entries. This is an arbitrary limit, which tunables can reduce. */ # define TCACHE_MAX_BINS 64 # define MAX_TCACHE_SIZE tidx2usize (TCACHE_MAX_BINS-1)
/* Only used to pre-fill the tunables.定义了 tidx2usize 宏,这个宏用于将bin的idx转换为相应的字节大小。*/ # define tidx2usize(idx) (((size_t) idx) * MALLOC_ALIGNMENT + MINSIZE - SIZE_SZ)
/* When "x" is from chunksize().定义了 csize2tidx 宏,将一个给定的chunk大小 x 转换为对应的bin idx。 */ # define csize2tidx(x) (((x) - MINSIZE + MALLOC_ALIGNMENT - 1) / MALLOC_ALIGNMENT) /* When "x" is a user-provided size. */ # define usize2tidx(x) csize2tidx (request2size (x))
/* With rounding and alignment, the bins are... idx 0 bytes 0..24 (64-bit) or 0..12 (32-bit) idx 1 bytes 25..40 or 13..20 idx 2 bytes 41..56 or 21..28 etc. */
/* This is another arbitrary limit, which tunables can change. Each tcache bin will hold at most this number of chunks. */ # define TCACHE_FILL_COUNT 7 //tcache 中最多 7 个 /* Maximum chunks in tcache bins for tunables. This value must fit the range of tcache->counts[] entries, else they may overflow. */ # define MAX_TCACHE_COUNT UINT16_MAX #endif
这里见名知意,例:tidx2usize —> tcache_idx to unsigned_size
一些设置
目的:提高项目的可移植性和开启一些优化措施
1 2 3 4 5 6 7 8 9 10
/* REALLOC_ZERO_BYTES_FREES should be set if a call to realloc with zero bytes should be the same as a call to free. This is required by the C standard. Otherwise, since this malloc returns a unique pointer for malloc(0), so does realloc(p, 0). */ //realloc()参数为 0 相当于free #ifndef REALLOC_ZERO_BYTES_FREES #define REALLOC_ZERO_BYTES_FREES 1 #endif
INTERNAL_SIZE_T mchunk_prev_size; /* Size of previous chunk (if free). */ INTERNAL_SIZE_T mchunk_size; /* Size in bytes, including overhead. */
structmalloc_chunk* fd;/* double links -- used only if free. */ structmalloc_chunk* bk;
/* Only used for large blocks: pointer to next larger size. */ structmalloc_chunk* fd_nextsize;/* double links -- used only if free. */ structmalloc_chunk* bk_nextsize; };
/* The smallest possible chunk */ //计算最小内存块的大小,即从 struct malloc_chunk 结构体中 fd_nextsize 字段的偏移量。 #define MIN_CHUNK_SIZE (offsetof(struct malloc_chunk, fd_nextsize))
/* The smallest size we can malloc is an aligned minimal chunk */ //定义可以分配的最小内存块的大小。它确保计算出的大小是对齐的,使用 MALLOC_ALIGN_MASK 来进行对齐。 #define MINSIZE \ (unsigned long)(((MIN_CHUNK_SIZE+MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK))
/* size field is or'ed with NON_MAIN_ARENA if the chunk was obtained from a non-main arena. This is only set immediately before handing the chunk to the user, if necessary. */ #define NON_MAIN_ARENA 0x4
/* Check for chunk from main arena. */ #define chunk_main_arena(p) (((p)->mchunk_size & NON_MAIN_ARENA) == 0)
/* Mark a chunk as not being on the main arena. */ #define set_non_main_arena(p) ((p)->mchunk_size |= NON_MAIN_ARENA)
bin_at() 是一个宏,用于通过给定的索引从内存管理器(m)的 bins 数组中获取特定的 bin(内存块管理单元)。它的实现依赖于 offsetof,计算出特定 bin 的地址,并进行适当的类型转换。
参数说明:
m: 这是一个指向内存管理结构的指针,通常是一个包含多个 bins 的结构体。
i: 这是一个 bin 的索引,表示你想要获取的 bin 的位置(注意,索引是从 1 开始的,bin_at(0) 不存在)。
宏 bin_at(m, i)通过 bin index 获得 bin 的链表头,chunk 中的fd 和 bk 用于将空闲 chunk 链入链表中,而对于每个 bin 的链表头,只需要这两个域就可以了,prev_size 和 size 对链表 都来说都没有意义,浪费空间,ptmalloc 为了节约这点内存空间,增大 cpu 高速缓存的命中 率,在 bins 数组中只为每个 bin 预留了两个指针的内存空间用于存放 bin 的链表头的 fb 和 bk 指针。
宏 next_bin(b)用于获得下一个 bin 的地址,根据前面的分析,我们知道只需要将当前 bin 的地址向后移动两个指针的长度就得到下一个 bin 的链表头地址。 每个 bin 使用双向循环链表管理空闲 chunk,bin 的链表头的指针 fb 指向第一个可用的 chunk,指针 bk 指向最后一个可用的 chunk。宏 first(b)用于获得 bin 的第一个可用 chunk, 宏 last(b)用于获得 bin 的最后一个可用的 chunk,这两个宏便于遍历 bin,而跳过 bin 的链表 头。
/* Indexing Bins for sizes < 512 bytes contain chunks of all the same size, spaced 8 bytes apart. Larger bins are approximately logarithmically spaced: 64 bins of size 8 32 bins of size 64 16 bins of size 512 8 bins of size 4096 4 bins of size 32768 2 bins of size 262144 1 bin of size what's left There is actually a little bit of slop in the numbers in bin_index for the sake of speed. This makes no difference elsewhere. The bins top out around 1MB because we expect to service large requests via mmap. Bin 0 does not exist. Bin 1 is the unordered list; if that would be a valid chunk size the small bins are bumped up one. */
/* Take a chunk off a bin list. */ staticvoid unlink_chunk(mstate av, mchunkptr p) { if (chunksize (p) != prev_size (next_chunk (p))) malloc_printerr ("corrupted size vs. prev_size");
mchunkptr fd = p->fd; mchunkptr bk = p->bk;
if (__builtin_expect (fd->bk != p || bk->fd != p, 0)) malloc_printerr ("corrupted double-linked list");
fd->bk = bk; //unlink操作的利用点 bk->fd = fd; if (!in_smallbin_range (chunksize_nomask (p)) && p->fd_nextsize != NULL) { if (p->fd_nextsize->bk_nextsize != p || p->bk_nextsize->fd_nextsize != p) malloc_printerr ("corrupted double-linked list (not small)");
/* There are several instances of this struct ("arenas") in this malloc. If you are adapting this malloc in a way that does NOT use a static or mmapped malloc_state, you MUST explicitly zero-fill it before using. This malloc relies on the property that malloc_state is initialized to all zeroes (as is true of C statics). */
/* These variables are used for undumping support. Chunked are marked as using mmap, but we leave them alone if they fall into this range. NB: The chunk size for these chunks only includes the initial size field (of SIZE_SZ bytes), there is no trailing size field (unlike with regular mmapped chunks). */ static mchunkptr dumped_main_arena_start; /* Inclusive. */ static mchunkptr dumped_main_arena_end; /* Exclusive. */
/* True if the pointer falls into the dumped arena. Use this after chunk_is_mmapped indicates a chunk is mmapped. */ #define DUMPED_MAIN_ARENA_CHUNK(p) \ ((p) >= dumped_main_arena_start && (p) < dumped_main_arena_end)
/* There is only one instance of the malloc parameters. */
/* -------------- Early definitions for debugging hooks ---------------- */
/* Define and initialize the hook variables. These weak definitions must appear before any use of the variables in a function (arena.c uses one). */ #ifndef weak_variable /* In GNU libc we want the hook variables to be weak definitions to avoid a problem with Emacs. */ # define weak_variable weak_function #endif
/* ----------- Routines dealing with system allocation -------------- */ /* sysmalloc 处理从系统请求更多内存的 malloc 情况。 进入时,假设 av->top 没有足够的空间来满足 nb 字节的请求, 因此需要扩展或替换 av->top。 */ staticvoid * sysmalloc(INTERNAL_SIZE_T nb, mstate av) { mchunkptr old_top; /* incoming value of av->top */ INTERNAL_SIZE_T old_size; /* its size */ char *old_end; /* its end address */
long size; /* arg to first MORECORE or mmap call */ char *brk; /* return value from MORECORE */
long correction; /* arg to 2nd MORECORE call */ char *snd_brk; /* 2nd return val */
INTERNAL_SIZE_T front_misalign; /* unusable bytes at front of new space */ INTERNAL_SIZE_T end_misalign; /* partial page left at end of new space */ char *aligned_brk; /* aligned offset into brk */
mchunkptr p; /* the allocated/returned chunk */ mchunkptr remainder; /* remainder from allocation */ unsignedlong remainder_size; /* its size */
/*------------------------ Public wrappers. --------------------------------*/
#if USE_TCACHE
/* We overlay this structure on the user-data portion of a chunk when the chunk is stored in the per-thread cache. */ typedefstructtcache_entry { structtcache_entry *next; /* This field exists to detect double frees. */ structtcache_perthread_struct *key; } tcache_entry;
staticvoid tcache_thread_shutdown(void) { int i; tcache_perthread_struct *tcache_tmp = tcache;
if (!tcache) return;
/* Disable the tcache and prevent it from being reinitialized. */ tcache = NULL; tcache_shutting_down = true;
/* Free all of the entries and the tcache itself back to the arena heap for coalescing. */ for (i = 0; i < TCACHE_MAX_BINS; ++i) { while (tcache_tmp->entries[i]) { tcache_entry *e = tcache_tmp->entries[i]; tcache_tmp->entries[i] = e->next; __libc_free (e); } }
if (ar_ptr != NULL) __libc_lock_unlock (ar_ptr->mutex);
/* In a low memory situation, we may not be able to allocate memory - in which case, we just keep trying later. However, we typically do this very early, so either there is sufficient memory, or there isn't enough memory to do non-trivial allocations anyway. */ if (victim) { tcache = (tcache_perthread_struct *) victim; memset (tcache, 0, sizeof (tcache_perthread_struct)); } }
victim = _int_malloc (ar_ptr, bytes); /* Retry with another arena only if we were able to find a usable arena before. */ if (!victim && ar_ptr != NULL) { LIBC_PROBE (memory_malloc_retry, 1, bytes); ar_ptr = arena_get_retry (ar_ptr, bytes); victim = _int_malloc (ar_ptr, bytes); }
if (ar_ptr != NULL) __libc_lock_unlock (ar_ptr->mutex);
下面的源代码实现从 last remainder chunk,large bins 和top chunk 中分配所需的 chunk, 这里包含了多个多层循环,在这些循环中,主要工作是分配前两步都未分配成功的 small bin chunk,large bin chunk 和 large chunk。最外层的循环用于重新尝试分配 small bin chunk,因 为如果在前一步分配small bin chunk 不成功,并没有调用 malloc_consolidate()函数合并 fast bins 中的 chunk,将空闲 chunk 加入 unsorted bin 中,如果第一尝试从 last remainder chunk, top chunk 中分配 small bin chunk 都失败以后,如果 fast bins 中存在空闲 chunk,会调用 malloc_consolidate()函数,那么在 usorted bin 中就可能存在合适的 small bin chunk 供分配, 所以需要再次尝试。
如果当前 chunk 属于 large bins,获得当前 chunk 所属 large bin 的 index,并将该 large bin 的链表表头赋值给 bck,第一个 chunk 赋值给 fwd,也就是当前的 chunk 会插入到 bck 和 fwd 之间,作为 large bin 链表的第一个 chunk。
如果fwd 不等于 bck,意味着large bin 中有空闲chunk 存在,由于large bin 中的空闲chunk 是按照大小顺序排序的,需要将当前从 unsorted bin 中取出的 chunk 插入到 large bin 中合适 的位置。将当前 chunk 的 size 的 inuse 标志 bit 置位,相当于加 1,便于加快 chunk 大小的比 较,找到合适的地方插入当前 chunk。这里还做了一次检查,断言在 large bin 双向循环链表 中的最后一个 chunk的 size 字段中的非主分配区的标志 bit没有置位,因为所有在 large bin 中的 chunk 都处于空闲状态,该标志位一定是清零的。
/* maintain large bins in sorted order */ if (fwd != bck) {//large bin 中有空闲chunk 存在 /* Or with inuse bit to speed comparisons */ size |= PREV_INUSE; /* if smaller than smallest, bypass loop below */ assert (chunk_main_arena (bck->bk));
如果当前chunk比large bin的最后一个chunk的大小还小,那么当前chunk就插入到large bin 的链表的最后,作为最后一个 chunk。可以看出 large bin 中的 chunk 是按照从大到小的 顺序排序的,同时一个 chunk 存在于两个双向循环链表中,一个链表包含了 large bin 中所有 的chunk,另一个链表为chunk size 链表,该链表从每个相同大小的chunk 的取出第一个chunk 按照大小顺序链接在一起,便于一次跨域多个相同大小的 chunk 遍历下一个不同大小的 chunk,这样可以加快在 large bin 链表中的遍历速度。
上面的代码将当前 chunk 插入到 large bin 的空闲 chunk 链表中,并将 large bin 所对应 binmap 的相应 bit 置位。
1 2 3 4 5 6 7 8 9 10 11
#if USE_TCACHE /* If we've processed as many chunks as we're allowed while filling the cache, return one of the cached ones. */ ++tcache_unsorted_count; if (return_cached && mp_.tcache_unsorted_limit > 0 && tcache_unsorted_count > mp_.tcache_unsorted_limit) { return tcache_get (tc_idx); } #endif
1 2 3 4 5 6 7 8 9 10 11 12
#define MAX_ITERS 10000 if (++iters >= MAX_ITERS) break; }
#if USE_TCACHE /* If all the small chunks we found ended up cached, return one now. */ if (return_cached) { return tcache_get (tc_idx); } #endif
当将 unsorted bin 中的空闲 chunk 加入到相应的 small bins 和 large bins 后,将使用最佳匹配法分配 large bin chunk。源代码如下:
1 2 3 4 5 6 7 8 9 10 11 12 13
/* If a large request, scan through the chunks of current bin in sorted order to find smallest that fits. Use the skip list for this. */
if (!in_smallbin_range (nb)) { bin = bin_at (av, idx);
/* skip scan if empty or largest chunk is too small */ if ((victim = first (bin)) != bin && (unsignedlong) chunksize_nomask (victim) >= (unsignedlong) (nb))
如果所需分配的 chunk为 large bin chunk,查询对应的 large bin 链表,如果 large bin 链 表为空,或者链表中最大的 chunk 也不能满足要求,则不能从 large bin 中分配。否则,遍历 98 large bin 链表,找到合适的 chunk。
/* Avoid removing the first entry for a size so that the skip list does not have to be rerouted. */ if (victim != last (bin) && chunksize_nomask (victim) == chunksize_nomask (victim->fd)) victim = victim->fd;
获取下一个相邻bin 的空闲chunk 链表,并获取该bin 对于binmap 中的bit 位的值。Binmap 中的标识了相应的 bin 中是否有空闲 chunk 存在。Binmap 按 block 管理,每个 block 为一个 int,共 32 个 bit,可以表示 32 个 bin 中是否有空闲 chunk 存在。使用 binmap 可以加快查找 bin 是否包含空闲 chunk。这里只查询比所需 chunk 大的 bin 中是否有空闲 chunk 可用。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
for (;; ) { /* Skip rest of block if there are no more set bits in this block. */ if (bit > map || bit == 0) { do { if (++block >= BINMAPSIZE) /* out of bins */ goto use_top; } while ((map = av->binmap[block]) == 0);
bin = bin_at (av, (block << BINMAPSHIFT)); bit = 1; }
Idx2bit()宏将 idx 指定的位设置为 1,其它位清零,map 表示一个 block(unsigned int) 值,如果 bit 大于 map,意味着 map 为 0,该 block 所对应的所有 bins 中都没有空闲 chunk, 于是遍历 binmap 的下一个 block,直到找到一个不为 0 的 block 或者遍历完所有的 block。 退出循环遍历后,设置 bin 指向 block 的第一个 bit 对应的 bin,并将 bit 置为 1,表示该 block 中 bit 1 对应的 bin,这个 bin 中如果有空闲 chunk,该 chunk 的大小一定满足要求。
use_top: /* If large enough, split off the chunk bordering the end of memory (held in av->top). Note that this is in accord with the best-fit search rule. In effect, av->top is treated as larger (and thus less well fitting) than any other available chunk since it can be extended to be as large as necessary (up to system limitations). We require that av->top always exists (i.e., has size >= MINSIZE) after initialization, so if it would otherwise be exhausted by current request, it is replenished. (The main reason for ensuring it exists is that we may need MINSIZE space to put in fenceposts in sysmalloc.) */
victim = av->top; size = chunksize (victim);
将当前分配区的top chunk赋值给 victim,并获得 victim 的大小。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
if (__glibc_unlikely (size > av->system_mem)) malloc_printerr ("malloc(): corrupted top size");
由于 top chunk 切分出所需 chunk 后,还需要 MINSIZE 的空间来作为fencepost,所需必须满足 top chunk 的大小大于所需 chunk 的大小加上 MINSIZE 这个条件,才能从 top chunk 中分配所需 chunk。从 top chunk 切分出所需 chunk 的处理过程跟前面的 chunk 切分类似, 不同的是,原 top chunk 切分后的剩余部分将作为新的 top chunk,原 top chunk 的 fencepost 仍然作为新的top chunk 的 fencepost,所以切分之后剩余的 chunk 不用 set_foot。
1 2 3 4 5 6 7 8 9 10 11
/* When we are using atomic ops to free fast chunks we can get here for all block sizes. */ elseif (atomic_load_relaxed (&av->have_fastchunks)) { malloc_consolidate (av); /* restore original bin index */ if (in_smallbin_range (nb)) idx = smallbin_index (nb); else idx = largebin_index (nb); }
如果 top chunk 也不能满足要求,查看 fast bins 中是否有空闲 chunk 存在,由于开启了 ATOMIC_FASTBINS 优化情况下,free 属于 fast bins 的 chunk 时不需要获得分配区的锁,所以在调用_int_malloc()函数时,有可能有其它线程已经向 fast bins 中加入了新的空闲 chunk,也有可能是所需的 chunk 属于 small bins,但通过前面的步骤都没有分配到所需的 chunk,由于 分配 small bin chunk 时在前面的步骤都不会调用 malloc_consolidate()函数将 fast bins 中的 chunk 合并加入到 unsorted bin 中。所在这里如果 fast bin 中有 chunk 存在, 调用 malloc_consolidate()函数,并重新设置当前 bin 的 index。并转到最外层的循环,尝试重新分 配 small bin chunk 或是 large bin chunk。如果开启了 ATOMIC_FASTBINS 优化,有可能在由其 它线程加入到fast bins 中的 chunk 被合并后加入 unsorted bin 中,从 unsorted bin 中就可以 分配出所需的 large bin chunk 了,所以对没有成功分配的 large bin chunk 也需要重试。