int_malloc()源码解读

_int_malloc()函数是内存分配的核心,根据分配的内存块的大小,该函数中实现了四种分配内存的路径,下面将分别分析这四种分配路径。

glibc 2.23

_int_malloc()定义

先给出_int_malloc()函数的函数定义及临时变量的定义:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
static void *
_int_malloc (mstate av, size_t bytes)
{
INTERNAL_SIZE_T nb; /* normalized request size */
unsigned int idx; /* associated bin index */
mbinptr bin; /* associated bin */

mchunkptr victim; /* inspected/selected chunk */
INTERNAL_SIZE_T size; /* its size */
int victim_index; /* its bin index */

mchunkptr remainder; /* remainder from a split */
unsigned long remainder_size; /* its size */

unsigned int block; /* bit map traverser */
unsigned int bit; /* bit map traverser */
unsigned int map; /* current word of binmap */

mchunkptr fwd; /* misc temp for linking */
mchunkptr bck; /* misc temp for linking */

const char *errstr = NULL;
/*
Convert request size to internal form by adding SIZE_SZ bytes
overhead plus possibly more to obtain necessary alignment and/or
to obtain a size of at least MINSIZE, the smallest allocatable
size. Also, checked_request2size traps (returning 0) request sizes
that are so large that they wrap around zero when padded and
aligned.
*/

checked_request2size (bytes, nb);

checked_request2size()函数将需要分配的内存大小 bytes 转换为需要分配的 chunk 大小nb。Ptmalloc 内部分配都是以 chunk 为单位,根据 chunk 的大小,决定如何获得满足条件的chunk。

分配fast bin chunk

如果所需的 chunk大小小于等于 fast bins 中的最大 chunk 大小,首先尝试从 fast bins 中分配 chunk。源代码如下

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
/*
If the size qualifies as a fastbin, first check corresponding bin.
This code is safe to execute even if av is not yet initialized, so we
can try it without checking, which saves some time on this fast path.
*/

//这个 (nb) 是我们申请的大小
if ((unsigned long) (nb) <= (unsigned long) (get_max_fast ()))
{//get_max_fast() 这里判断了需要的大小是否属于fastbin
idx = fastbin_index (nb);
//根据所需大小计算出对应在fastbin中的大小
mfastbinptr *fb = &fastbin (av, idx);
//在fastbin中寻找并返回nb的chunk
mchunkptr pp = *fb;
do
{
victim = pp;
if (victim == NULL)
break;
}
//原子操作确保线程之间不会互相影响
while ((pp = catomic_compare_and_exchange_val_acq (fb, victim->fd, victim))!= victim);
//在分配成功后再次进行了检查
if (victim != 0)
{//计算分配出来的chunk实际size并获取对应在fastbin中的下标,判断是否与前面的idx是否相同
if (__builtin_expect (fastbin_index (chunksize (victim)) != idx, 0))
{//不相同则报错,打印错误信息
errstr = "malloc(): memory corruption (fast)";
errout:
malloc_printerr (check_action, errstr, chunk2mem (victim), av);
return NULL;
}
check_remalloced_chunk (av, victim, nb);
void *p = chunk2mem (victim);//返回给用户memptr
alloc_perturb (p, bytes);
return p;
}
}

  • 这段代码首先检查请求的大小是否在快速分配的范围内(小于等于 get_max_fast() 的返回值,这个值是预定义的快速分配阈值)。

  • fastbin_index(nb) 会计算出对应的快速分配bins的索引。

  • 然后,代码尝试从对应的快速分配桶中获取一个已分配的块(chunk),并使用原子操作 catomic_compare_and_exchange_val_acq 来确保线程安全。

  • 使用原子操作的目的是避免在多线程环境中出现竞争条件,确保 victim 指向的内存块在被另一个线程修改时不会被错误地使用。

  • 如果成功获取到 victim,代码会进行进一步的验证,检查 victim 的大小是否匹配请求的 nb。这一步是为了防止内存的破坏(memory corruption)。如果 victim 的大小不匹配,代码会记录错误并返回 NULL。

  • check_remalloced_chunk(av, victim, nb) 是一个虚函数,实际调用do_check_remalloced_chunk (A, P, N)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
static void
do_check_remalloced_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T s)
{
INTERNAL_SIZE_T sz = p->size & ~(PREV_INUSE | NON_MAIN_ARENA);
//从 p 中获取内存块的大小,去掉了 PREV_INUSE 和 NON_MAIN_ARENA 的标志位,赋值给sz
if (!chunk_is_mmapped (p))
{//检查块是否是通过 mmap 分配的
assert (av == arena_for_chunk (p));
if (chunk_non_main_arena (p))
assert (av != &main_arena);
else
assert (av == &main_arena);
}

do_check_inuse_chunk (av, p);
//检查 PREV_INUSE 标志位
/* Legal size ... */
assert ((sz & MALLOC_ALIGN_MASK) == 0);//检查对齐
assert ((unsigned long) (sz) >= MINSIZE);//sz大于最小分配内存
/* ... and alignment */
assert (aligned_OK (chunk2mem (p)));//p的mem也是对齐的
/* chunk is less than MINSIZE more than request */
assert ((long) (sz) - (long) (s) >= 0);//sz满足用户需求
assert ((long) (sz) - (long) (s + MINSIZE) < 0);//且sz不能太大
}
  • 如果所有检查都通过,代码将返回对应的内存指针。

分配small bin chunk

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
 /*
If a small request, check regular bin. Since these "smallbins"
hold one size each, no searching within bins is necessary.
(For a large request, we need to wait until unsorted chunks are
processed to find best fit. But for small ones, fits are exact
anyway, so we can check now, which is faster.)
*/

if (in_smallbin_range (nb))
{//判断所需的size是否属于smallbin
idx = smallbin_index (nb);
bin = bin_at (av, idx);
//计算出对应的bin和idx
if ((victim = last (bin)) != bin)
{//获取 small bin 中的最后一个块(如果存在)
if (victim == 0) /* initialization check */
malloc_consolidate (av);//整合fastbin中的chunk
else
{
bck = victim->bk;
if (__glibc_unlikely (bck->fd != victim))
{//检查双向链表的一致性,确保后向指针和前向指针均指向正确的块。
errstr = "malloc(): smallbin double linked list corrupted";
goto errout;
}
set_inuse_bit_at_offset (victim, nb);//设置块的使用标志,表明该块正在使用。
bin->bk = bck;
bck->fd = bin;//更新双向链表的指针。
if (av != &main_arena)
victim->size |= NON_MAIN_ARENA;//如果在非主 arena 中,设置块的标志
check_malloced_chunk (av, victim, nb);//正常检查堆块
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
}
}
}

分配large bin chunk

如果所需的 chunk 不属于 small bins,首先会执行如下的代码段:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
/*
If this is a large request, consolidate fastbins before continuing.
While it might look excessive to kill all fastbins before
even seeing if there is space available, this avoids
fragmentation problems normally associated with fastbins.
Also, in practice, programs tend to have runs of either small or
large requests, but less often mixtures, so consolidation is not
invoked all that often in most programs. And the programs that
it is called frequently in otherwise tend to fragment.
*/

else
{
idx = largebin_index (nb);
if (have_fastchunks (av))
malloc_consolidate (av);
}

所需 chunk 不属于 small bins,那么就一定属于 large bins,首先根据 chunk 的大小获得对应的 large bin 的 index,接着判断当前分配区的 fast bins 中是否包含 chunk,如果存在,调用 malloc_consolidate()函数合并 fast bins 中的 chunk,并将这些空闲 chunk 加入 unsorted bin中。

下面的源代码实现从 last remainder chunk,large bins 和top chunk 中分配所需的 chunk,这里包含了多个多层循环,在这些循环中,主要工作是分配前两步都未分配成功的 small bin chunk,large bin chunk 和 large chunk。最外层的循环用于重新尝试分配 small bin chunk,因为如果在前一步分配small bin chunk 不成功,并没有调用 malloc_consolidate()函数合并 fast bins 中的 chunk,将空闲 chunk 加入 unsorted bin 中,如果第一尝试从 last remainder chunk,top chunk 中分配 small bin chunk 都失败以后,如果 fast bins 中存在空闲 chunk,会调用malloc_consolidate()函数,那么在 usorted bin 中就可能存在合适的 small bin chunk 供分配,所以需要再次尝试。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
/*
Process recently freed or remaindered chunks, taking one only if
it is exact fit, or, if this a small request, the chunk is remainder from
the most recent non-exact fit. Place other traversed chunks in
bins. Note that this step is the only place in any routine where
chunks are placed in bins.

The outer loop here is needed because we might not realize until
near the end of malloc that we should have consolidated, so must
do so and retry. This happens at most once, and only when we would
otherwise need to expand memory to service a "small" request.
*/

for (;; )//遍历unsortedbin
{
int iters = 0;
while ((victim = unsorted_chunks (av)->bk) != unsorted_chunks (av))

反向遍历 unsorted bin 的双向链表,遍历结束的条件是循环链表只剩下一个头结点。


在这里可能会有疑惑这个unsorted_chunks(av)返回的是什么,这个头结点又是什么?

unsorted_chunks(M) 的返回值是一个指向 虚拟 chunk(链表头) 的指针(在程序中就是main_arena+88的地方),该虚拟 chunk 的 fdbk 成员分别对应 main_arenabins 数组的第一个和第二个元素(即 bins[0]bins[1]),用于管理 unsorted bin 的双向链表,而头节点就是这个 虚拟 chunk(链表头) 的指针(在程序中就是main_arena+88的地方)

deepseek分析:

关键步骤解析

  1. 宏展开unsorted_chunks(M) 展开为 bin_at(M, 1)
  2. bin_at 宏计算
    • 计算 &((M)->bins[(1-1)*2]),即 &M->bins[0](获取 bins 数组第一个元素的地址)。
    • 减去 offsetof(struct malloc_chunk, fd)fd 成员在 malloc_chunk 结构中的偏移,通常为 16 字节)。
  3. 指针调整
    • 通过减去偏移量,将指针回退到虚拟 chunk 的起始地址,使得 fdbk 成员正好对齐到 bins[0]bins[1]

作用:

  • 虚拟 chunk 的作用:作为 unsorted bin 的链表头,其 fd 指向链表的第一个实际 chunk,bk 指向最后一个 chunk。
  • 链表操作统一性:通过虚拟 chunk,所有 bin 的链表操作(如插入、删除)可以统一使用 malloc_chunk 结构的方法,简化内存管理逻辑。

图解:

image-20250402171151852

该虚拟 chunk 不占用实际内存,仅通过指针运算将 bins 数组的特定位置映射为 malloc_chunk 结构,便于链表管理。


1
2
3
4
5
6
bck = victim->bk;
if (__builtin_expect (victim->size <= 2 * SIZE_SZ, 0)
|| __builtin_expect (victim->size > av->system_mem, 0))
malloc_printerr (check_action, "malloc(): memory corruption",
chunk2mem (victim), av);
size = chunksize (victim);//获得当前要操作的chunk的size

检查当前遍历的 chunk 是否合法,chunk 的大小不能小于等于 2 * SIZE_SZ,也不能超过该分配区总的内存分配量。然后获取 chunk 的大小并赋值给 size。这里的检查似乎有点小问题,直接使用了victim->size,但victim->size 中包含了相关的标志位信息,使用chunksize(victim)才比较合理,但在 unsorted bin 中的空闲 chunk 的所有标志位都清零了,所以这里直接victim->size 没有问题。

1
2
3
4
5
6
7
8
9
10
11
12
/*
If a small request, try to use last remainder if it is the
only chunk in unsorted bin. This helps promote locality for
runs of consecutive small requests. This is the only
exception to best-fit, and applies only when there is
no exact fit for a small chunk.
*/

if (in_smallbin_range (nb) &&
bck == unsorted_chunks (av) &&
victim == av->last_remainder &&
(unsigned long) (size) > (unsigned long) (nb + MINSIZE))

如果需要分配一个 small bin chunk,在前面的 small bins 中没有匹配到合适的chunk,并且 unsorted bin 中只有一个 chunk,并且这个 chunk 为 last remainder chunk,并且这个 chunk 的大小大于所需 chunk 的大小加上 MINSIZE,在满足这些条件的情况下,可以使用这个chunk 切分出需要的small bin chunk。这是唯一的从unsorted bin 中分配small bin chunk的情况,这种优化利于 cpu 的高速缓存命中(这也表明了smallbin和fastbin中的chunk需要精准匹配)。

1
2
3
4
5
6
7
8
9
10
11
/* split and reattach remainder */
remainder_size = size - nb;
remainder = chunk_at_offset (victim, nb);
unsorted_chunks (av)->bk = unsorted_chunks (av)->fd = remainder;
av->last_remainder = remainder;
remainder->bk = remainder->fd = unsorted_chunks (av);
if (!in_smallbin_range (remainder_size))
{
remainder->fd_nextsize = NULL;
remainder->bk_nextsize = NULL;
}

从该 chunk 中切分出所需大小的 chunk,计算切分后剩下 chunk 的大小,将剩下的 chunk加入 unsorted bin 的链表中,并将剩下的 chunk 作为分配区的 last remainder chunk,若剩下的 chunk 属于 large bin chunk,将该 chunk 的fd_nextsize 和 bk_nextsize 设置为 NULL,因为这个 chunk 仅仅存在于 unsorted bin 中,并且 unsorted bin 中有且仅有这一个 chunk。

1
2
3
4
5
6
7
8
9
set_head (victim, nb | PREV_INUSE |
(av != &main_arena ? NON_MAIN_ARENA : 0));
set_head (remainder, remainder_size | PREV_INUSE);
set_foot (remainder, remainder_size);

check_malloced_chunk (av, victim, nb);
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;

设置分配出的 chunk 和 last remainder chunk 的相关信息,如 chunk 的size,状态标志位,对于 last remainder chunk,需要调用 set_foot 宏,因为只有处于空闲状态的 chunk 的 foot 信息(prev_size)才是有效的,处于 inuse 状态的 chunk 的 foot 无效,该 foot 是返回给应用层的内存块的一部分。设置完成 chunk 的相关信息,调用 chunk2mem()获得 chunk 中可用的内存指针,返回给应用层,退出。

1
2
3
4
5
      /* remove from unsorted list */
unsorted_chunks (av)->bk = bck;
bck->fd = unsorted_chunks (av);
//将victem从unsortedbin中移出来,这里是unsortedbin的核心
//即想任意地址写入一个大数,也就是main_arena+88

将双向链表中的最后一个chunk移除

1
2
3
4
5
6
7
8
9
10
11
12
/* Take now instead of binning if exact fit */

if (size == nb)//判断当前操作(victim)的size是否和我们申请的chunk的size一样
{//house of strom
set_inuse_bit_at_offset (victim, size);
if (av != &main_arena)
victim->size |= NON_MAIN_ARENA;
check_malloced_chunk (av, victim, nb);
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;//如果一样直接返回
}

如果当前遍历的 chunk 与所需的 chunk 大小一致,将当前 chunk 返回。首先设置当前 chunk 处于 inuse 状态,该标志位处于相邻的下一个 chunk 的 size 中,如果当前分配区不是主分配区,设置当前 chunk 的非主分配区标志位,最后调用 chunk2mem()获得 chunk 中可用的内存指针,返回给应用层,退出。

1
2
3
4
5
6
7

if (in_smallbin_range (size))//如果这个size是smallbin中的则放入smallbin中
{
victim_index = smallbin_index (size);
bck = bin_at (av, victim_index);
fwd = bck->fd;//头插法
}//smallbin是单链表

如果当前 chunk 属于small bins,获得当前 chunk 所属small bin 的 index,并将该 small bin的链表表头赋值给 bck,第一个 chunk 赋值给 fwd,也就是当前的 chunk 会插入到 bck 和 fwd之间,作为small bin 链表的第一个 chunk。

1
2
3
4
5
6
else//否则就会放入largebin中
{
victim_index = largebin_index (size);
bck = bin_at (av, victim_index);
//这个bck是main_arena+1096,它的fd是最大的哪个chunk,bk是最小的chunk
fwd = bck->fd;//fwd是largebin中最大的那个堆头(头节点)

如果当前 chunk 属于 large bins,获得当前 chunk 所属 large bin 的 index,并将该 large bin的链表表头赋值给 bck,第一个 chunk 赋值给 fwd,也就是当前的 chunk 会插入到 bck 和 fwd之间,作为 large bin 链表的第一个 chunk

1
2
3
4
5
6
7
/* maintain large bins in sorted order */
if (fwd != bck)//这个就是判断largebin中有没有chunk
{
/* Or with inuse bit to speed comparisons */
size |= PREV_INUSE;
/* if smaller than smallest, bypass loop below */
assert ((bck->bk->size & NON_MAIN_ARENA) == 0);

如果fwd 不等于 bck ,意味着large bin 中有空闲chunk 存在,由于large bin 中的空闲chunk是按照大小顺序排序的,需要将当前从 unsorted bin 中取出的 chunk 插入到 large bin 中合适的位置。将当前 chunk 的 size 的 inuse 标志 bit 置位,相当于加 1,便于加快 chunk 大小的比较,找到合适的地方插入当前 chunk。这里还做了一次检查,断言在 large bin 双向循环链表中的最后一个 chunk 的 size 字段中的非主分配区的标志 bit 没有置位,因为所有在 large bin中的 chunk 都处于空闲状态,该标志位一定是清零的。

1
2
3
4
5
6
7
8
9
if ((unsigned long) (size) < (unsigned long) (bck->bk->size))
//判断当前操作的chunk的size是不是小于largebin中最小的size
{
fwd = bck;
bck = bck->bk;

victim->fd_nextsize = fwd->fd;
victim->bk_nextsize = fwd->fd->bk_nextsize;
fwd->fd->bk_nextsize = victim->bk_nextsize->fd_nextsize = victim;

如果当前chunk 比 large bin的最后一个chunk 的大小还小,那么当前 chunk 就插入到largebin 的链表的最后,作为最后一个 chunk。可以看出 large bin 中的 chunk 是按照从大到小的顺序排序的,同时一个 chunk 存在于两个双向循环链表中,一个链表包含了 large bin 中所有的chunk,另一个链表为chunk size 链表,该链表从每个相同大小的chunk 的取出第一个chunk按照大小顺序链接在一起,便于一次跨域多个相同大小的 chunk 遍历下一个不同大小的 chunk,这样可以加快在 large bin 链表中的遍历速度。

1
2
3
4
5
6
7
8
9
else
{
assert ((fwd->size & NON_MAIN_ARENA) == 0);
while ((unsigned long) size < fwd->size)
//判断当前操作的chunk的size是不是小于largebin中最大的size
{
fwd = fwd->fd_nextsize;
assert ((fwd->size & NON_MAIN_ARENA) == 0);
}

正向遍历 chunk size 链表,直到找到第一个 chunk 大小小于等于当前 chunk 大小的 chunk 退出循环。

1
2
3
4
if ((unsigned long) size == (unsigned long) fwd->size)
//判断当前操作的chunk的size是不是等于largebin中最大的size
/* Always insert in the second position. */
fwd = fwd->fd;

如果从 large bin 链表中找到了与当前 chunk 大小相同的 chunk,则同一大小的 chunk 已经存在,那么 chunk size 链表中一定包含了fwd 所指向的 chunk,为了不修改 chunk size 链表,当前 chunk 只能插入 fwd 之后

1
2
3
4
5
6
else
{//这是largebin_attack的核心
victim->fd_nextsize = fwd;
victim->bk_nextsize = fwd->bk_nextsize;
fwd->bk_nextsize = victim;
victim->bk_nextsize->fd_nextsize = victim;

如果 chunk size 链表中还没有包含当前 chunk 大小的 chunk,也就是说当前 chunk 的大小大于 fwd 的大小,则将当前 chunk 作为该 chunk size 的代表加入 chunk size 链表,chunk size 链表也是按照由大到小的顺序排序

1
2
3
4
5
6
          }
bck = fwd->bk;
}
}
else
victim->fd_nextsize = victim->bk_nextsize = victim;

如果 large bin 链表中没有 chunk,直接将当前 chunk 加入 chunk size 链表。

1
2
3
4
5
6
7
  }

mark_bin (av, victim_index);
victim->bk = bck;
victim->fd = fwd;
fwd->bk = victim;
bck->fd = victim;

上面的代码将当前 chunk 插入到 large bin 的空闲 chunk 链表中,并将 large bin 所对应 binmap 的相应 bit 置位。

目前学到的攻击方式就到这里了,下面的以后再阅读分析吧

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
#define MAX_ITERS       10000
if (++iters >= MAX_ITERS)
break;
}

/*
If a large request, scan through the chunks of current bin in
sorted order to find smallest that fits. Use the skip list for this.
*/

if (!in_smallbin_range (nb))
{
bin = bin_at (av, idx);

/* skip scan if empty or largest chunk is too small */
if ((victim = first (bin)) != bin &&
(unsigned long) (victim->size) >= (unsigned long) (nb))
{
victim = victim->bk_nextsize;
while (((unsigned long) (size = chunksize (victim)) <
(unsigned long) (nb)))
victim = victim->bk_nextsize;

/* Avoid removing the first entry for a size so that the skip
list does not have to be rerouted. */
if (victim != last (bin) && victim->size == victim->fd->size)
victim = victim->fd;

remainder_size = size - nb;
unlink (av, victim, bck, fwd);

/* Exhaust */
if (remainder_size < MINSIZE)
{
set_inuse_bit_at_offset (victim, size);
if (av != &main_arena)
victim->size |= NON_MAIN_ARENA;
}
/* Split */
else
{
remainder = chunk_at_offset (victim, nb);
/* We cannot assume the unsorted list is empty and therefore
have to perform a complete insert here. */
bck = unsorted_chunks (av);
fwd = bck->fd;
if (__glibc_unlikely (fwd->bk != bck))
{
errstr = "malloc(): corrupted unsorted chunks";
goto errout;
}
remainder->bk = bck;
remainder->fd = fwd;
bck->fd = remainder;
fwd->bk = remainder;
if (!in_smallbin_range (remainder_size))
{
remainder->fd_nextsize = NULL;
remainder->bk_nextsize = NULL;
}
set_head (victim, nb | PREV_INUSE |
(av != &main_arena ? NON_MAIN_ARENA : 0));
set_head (remainder, remainder_size | PREV_INUSE);
set_foot (remainder, remainder_size);
}
check_malloced_chunk (av, victim, nb);
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
}
}

/*
Search for a chunk by scanning bins, starting with next largest
bin. This search is strictly by best-fit; i.e., the smallest
(with ties going to approximately the least recently used) chunk
that fits is selected.

The bitmap avoids needing to check that most blocks are nonempty.
The particular case of skipping all bins during warm-up phases
when no chunks have been returned yet is faster than it might look.
*/

++idx;
bin = bin_at (av, idx);
block = idx2block (idx);
map = av->binmap[block];
bit = idx2bit (idx);

for (;; )
{
/* Skip rest of block if there are no more set bits in this block. */
if (bit > map || bit == 0)
{
do
{
if (++block >= BINMAPSIZE) /* out of bins */
goto use_top;
}
while ((map = av->binmap[block]) == 0);

bin = bin_at (av, (block << BINMAPSHIFT));
bit = 1;
}

/* Advance to bin with set bit. There must be one. */
while ((bit & map) == 0)
{
bin = next_bin (bin);
bit <<= 1;
assert (bit != 0);
}

/* Inspect the bin. It is likely to be non-empty */
victim = last (bin);

/* If a false alarm (empty bin), clear the bit. */
if (victim == bin)
{
av->binmap[block] = map &= ~bit; /* Write through */
bin = next_bin (bin);
bit <<= 1;
}

else
{
size = chunksize (victim);

/* We know the first chunk in this bin is big enough to use. */
assert ((unsigned long) (size) >= (unsigned long) (nb));

remainder_size = size - nb;

/* unlink */
unlink (av, victim, bck, fwd);

/* Exhaust */
if (remainder_size < MINSIZE)
{
set_inuse_bit_at_offset (victim, size);
if (av != &main_arena)
victim->size |= NON_MAIN_ARENA;
}

/* Split */
else
{
remainder = chunk_at_offset (victim, nb);

/* We cannot assume the unsorted list is empty and therefore
have to perform a complete insert here. */
bck = unsorted_chunks (av);
fwd = bck->fd;
if (__glibc_unlikely (fwd->bk != bck))
{
errstr = "malloc(): corrupted unsorted chunks 2";
goto errout;
}
remainder->bk = bck;
remainder->fd = fwd;
bck->fd = remainder;
fwd->bk = remainder;

/* advertise as last remainder */
if (in_smallbin_range (nb))
av->last_remainder = remainder;
if (!in_smallbin_range (remainder_size))
{
remainder->fd_nextsize = NULL;
remainder->bk_nextsize = NULL;
}
set_head (victim, nb | PREV_INUSE |
(av != &main_arena ? NON_MAIN_ARENA : 0));
set_head (remainder, remainder_size | PREV_INUSE);
set_foot (remainder, remainder_size);
}
check_malloced_chunk (av, victim, nb);
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
}
}

use_top:
/*
If large enough, split off the chunk bordering the end of memory
(held in av->top). Note that this is in accord with the best-fit
search rule. In effect, av->top is treated as larger (and thus
less well fitting) than any other available chunk since it can
be extended to be as large as necessary (up to system
limitations).

We require that av->top always exists (i.e., has size >=
MINSIZE) after initialization, so if it would otherwise be
exhausted by current request, it is replenished. (The main
reason for ensuring it exists is that we may need MINSIZE space
to put in fenceposts in sysmalloc.)
*/

victim = av->top;
size = chunksize (victim);

if ((unsigned long) (size) >= (unsigned long) (nb + MINSIZE))
{
remainder_size = size - nb;
remainder = chunk_at_offset (victim, nb);
av->top = remainder;
set_head (victim, nb | PREV_INUSE |
(av != &main_arena ? NON_MAIN_ARENA : 0));
set_head (remainder, remainder_size | PREV_INUSE);

check_malloced_chunk (av, victim, nb);
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
}

/* When we are using atomic ops to free fast chunks we can get
here for all block sizes. */
else if (have_fastchunks (av))
{
malloc_consolidate (av);
/* restore original bin index */
if (in_smallbin_range (nb))
idx = smallbin_index (nb);
else
idx = largebin_index (nb);
}

/*
Otherwise, relay to handle system-dependent cases
*/
else
{
void *p = sysmalloc (nb, av);
if (p != NULL)
alloc_perturb (p, bytes);
return p;
}
}
}

glibc 2.27

加入了tcache,其中所有在tcache管理范围内的chunk都会放入tcache后再分配出去