diff options
Diffstat (limited to 'sysdep/unix/alloc.c')
-rw-r--r-- | sysdep/unix/alloc.c | 241 |
1 files changed, 155 insertions, 86 deletions
diff --git a/sysdep/unix/alloc.c b/sysdep/unix/alloc.c index c8f1c83f..cafcc8dd 100644 --- a/sysdep/unix/alloc.c +++ b/sysdep/unix/alloc.c @@ -10,6 +10,7 @@ #include "lib/resource.h" #include "lib/lists.h" #include "lib/event.h" +#include "lib/rcu.h" #include <errno.h> #include <stdlib.h> @@ -29,56 +30,55 @@ long page_size = 0; #ifdef HAVE_MMAP -#define KEEP_PAGES_MAIN_MAX 256 -#define KEEP_PAGES_MAIN_MIN 8 -#define CLEANUP_PAGES_BULK 256 +#define KEEP_PAGES_MAX 512 +#define KEEP_PAGES_MIN 32 +#define KEEP_PAGES_MAX_LOCAL 16 +#define ALLOC_PAGES_AT_ONCE 8 -STATIC_ASSERT(KEEP_PAGES_MAIN_MIN * 4 < KEEP_PAGES_MAIN_MAX); +STATIC_ASSERT(KEEP_PAGES_MIN * 4 < KEEP_PAGES_MAX); +STATIC_ASSERT(ALLOC_PAGES_AT_ONCE < KEEP_PAGES_MAX_LOCAL); static _Bool use_fake = 0; +static _Bool initialized = 0; #if DEBUGGING struct free_page { node unused[42]; - node n; + struct free_page * _Atomic next; }; #else struct free_page { - node n; + struct free_page * _Atomic next; }; #endif #define EP_POS_MAX ((page_size - OFFSETOF(struct empty_pages, pages)) / sizeof (void *)) struct empty_pages { - node n; + struct empty_pages *next; uint pos; void *pages[0]; }; -struct free_pages { - list pages; /* List of (struct free_page) keeping free pages without releasing them (hot) */ - list empty; /* List of (struct empty_pages) keeping invalidated pages mapped for us (cold) */ - u16 min, max; /* Minimal and maximal number of free pages kept */ - uint cnt; /* Number of free pages in list */ - event cleanup; -}; +DEFINE_DOMAIN(resource); +static DOMAIN(resource) empty_pages_domain; +static struct empty_pages *empty_pages = NULL; -static void global_free_pages_cleanup_event(void *); -static void *alloc_cold_page(void); +static struct free_page * _Atomic page_stack = NULL; +static _Thread_local struct free_page * local_page_stack = NULL; -static struct free_pages global_free_pages = { - .min = KEEP_PAGES_MAIN_MIN, - .max = KEEP_PAGES_MAIN_MAX, - .cleanup = { .hook = global_free_pages_cleanup_event }, -}; +static void page_cleanup(void *); +static event page_cleanup_event = { .hook = page_cleanup, }; +#define SCHEDULE_CLEANUP do if (initialized && !shutting_down) ev_send(&global_event_list, &page_cleanup_event); while (0) -uint *pages_kept = &global_free_pages.cnt; +_Atomic int pages_kept = 0; +_Atomic int pages_kept_locally = 0; +static _Thread_local int pages_kept_here = 0; static void * alloc_sys_page(void) { - void *ptr = mmap(NULL, page_size, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + void *ptr = mmap(NULL, page_size * ALLOC_PAGES_AT_ONCE, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (ptr == MAP_FAILED) die("mmap(%ld) failed: %m", (s64) page_size); @@ -108,45 +108,57 @@ alloc_page(void) } #ifdef HAVE_MMAP - struct free_pages *fps = &global_free_pages; - - /* If there is any free page kept hot, we use it. */ - if (fps->cnt) + /* If there is any free page kept hot in this thread, we use it. */ + struct free_page *fp = local_page_stack; + if (fp) { - struct free_page *fp = SKIP_BACK(struct free_page, n, HEAD(fps->pages)); - rem_node(&fp->n); + local_page_stack = atomic_load_explicit(&fp->next, memory_order_acquire); + atomic_fetch_sub_explicit(&pages_kept_locally, 1, memory_order_relaxed); + pages_kept_here--; + return fp; + } - /* If the hot-free-page cache is getting short, request the cleanup routine to replenish the cache */ - if ((--fps->cnt < fps->min) && !shutting_down) - ev_schedule(&fps->cleanup); + ASSERT_DIE(pages_kept_here == 0); + /* If there is any free page kept hot in global storage, we use it. */ + rcu_read_lock(); + fp = atomic_load_explicit(&page_stack, memory_order_acquire); + while (fp && !atomic_compare_exchange_strong_explicit( + &page_stack, &fp, atomic_load_explicit(&fp->next, memory_order_acquire), + memory_order_acq_rel, memory_order_acquire)) + ; + rcu_read_unlock(); + + if (fp) + { + atomic_fetch_sub_explicit(&pages_kept, 1, memory_order_relaxed); return fp; } - else - return alloc_cold_page(); -} - -static void * -alloc_cold_page(void) -{ - struct free_pages *fps = &global_free_pages; /* If there is any free page kept cold, we use that. */ - if (!EMPTY_LIST(fps->empty)) - { - struct empty_pages *ep = HEAD(fps->empty); + LOCK_DOMAIN(resource, empty_pages_domain); + if (empty_pages) { + if (empty_pages->pos) + /* Either the keeper page contains at least one cold page pointer, return that */ + fp = empty_pages->pages[--empty_pages->pos]; + else + { + /* Or the keeper page has no more cold page pointer, return the keeper page */ + fp = (struct free_page *) empty_pages; + empty_pages = empty_pages->next; + } + } + UNLOCK_DOMAIN(resource, empty_pages_domain); - /* Either the keeper page contains at least one cold page pointer, return that */ - if (ep->pos) - return ep->pages[--ep->pos]; + if (fp) + return fp; - /* Or the keeper page has no more cold page pointer, return the keeper page */ - rem_node(&ep->n); - return ep; - } + /* And in the worst case, allocate some new pages by mmap() */ + void *ptr = alloc_sys_page(); + for (int i=1; i<ALLOC_PAGES_AT_ONCE; i++) + free_page(ptr + page_size * i); - /* And in the worst case, allocate a new page by mmap() */ - return alloc_sys_page(); + return ptr; #endif } @@ -161,53 +173,108 @@ free_page(void *ptr) } #ifdef HAVE_MMAP - struct free_pages *fps = &global_free_pages; + /* We primarily try to keep the pages locally. */ struct free_page *fp = ptr; + if (shutting_down || (pages_kept_here < KEEP_PAGES_MAX_LOCAL)) + { + atomic_store_explicit(&fp->next, local_page_stack, memory_order_relaxed); + local_page_stack = fp; - /* Otherwise, we add the free page to the hot-free-page list */ - fp->n = (node) {}; - add_tail(&fps->pages, &fp->n); + atomic_fetch_add_explicit(&pages_kept_locally, 1, memory_order_relaxed); + pages_kept_here++; + return; + } + + /* If there are too many local pages, we add the free page to the global hot-free-page list */ + rcu_read_lock(); + struct free_page *next = atomic_load_explicit(&page_stack, memory_order_acquire); - /* And if there are too many hot free pages, we ask for page cleanup */ - if ((++fps->cnt > fps->max) && !shutting_down) - ev_schedule(&fps->cleanup); + do atomic_store_explicit(&fp->next, next, memory_order_release); + while (!atomic_compare_exchange_strong_explicit( + &page_stack, &next, fp, + memory_order_acq_rel, memory_order_acquire)); + rcu_read_unlock(); + + /* And if there are too many global hot free pages, we ask for page cleanup */ + if (atomic_fetch_add_explicit(&pages_kept, 1, memory_order_relaxed) >= KEEP_PAGES_MAX) + SCHEDULE_CLEANUP; #endif } +/* When the routine is going to sleep for a long time, we flush the local + * hot page cache to not keep dirty pages for nothing. */ +void +flush_local_pages(void) +{ + if (use_fake || !local_page_stack || shutting_down) + return; + + /* We first count the pages to enable consistency checking. + * Also, we need to know the last page. */ + struct free_page *last = local_page_stack, *next; + int check_count = 1; + while (next = atomic_load_explicit(&last->next, memory_order_acquire)) + { + check_count++; + last = next; + } + + /* The actual number of pages must be equal to the counter value. */ + ASSERT_DIE(check_count == pages_kept_here); + + /* Repeatedly trying to insert the whole page list into global page stack at once. */ + rcu_read_lock(); + next = atomic_load_explicit(&page_stack, memory_order_acquire); + + /* First we set the outwards pointer (from our last), + * then we try to set the inwards pointer to our first page. */ + do atomic_store_explicit(&last->next, next, memory_order_release); + while (!atomic_compare_exchange_strong_explicit( + &page_stack, &next, local_page_stack, + memory_order_acq_rel, memory_order_acquire)); + rcu_read_unlock(); + + /* Finished. Now the local stack is empty. */ + local_page_stack = NULL; + pages_kept_here = 0; + + /* Check the state of global page cache and maybe schedule its cleanup. */ + atomic_fetch_sub_explicit(&pages_kept_locally, check_count, memory_order_relaxed); + if (atomic_fetch_add_explicit(&pages_kept, check_count, memory_order_relaxed) >= KEEP_PAGES_MAX) + SCHEDULE_CLEANUP; +} + #ifdef HAVE_MMAP static void -global_free_pages_cleanup_event(void *data UNUSED) +page_cleanup(void *_ UNUSED) { /* Cleanup on shutdown is ignored. All pages may be kept hot, OS will take care. */ if (shutting_down) return; - struct free_pages *fps = &global_free_pages; + struct free_page *stack = atomic_exchange_explicit(&page_stack, NULL, memory_order_acq_rel); + if (!stack) + return; - /* Cleanup may get called when hot free page cache is short of pages. Replenishing. */ - while (fps->cnt / 2 < fps->min) - free_page(alloc_cold_page()); - /* Or the hot free page cache is too big. Moving some pages to the cold free page cache. */ - for (int limit = CLEANUP_PAGES_BULK; limit && (fps->cnt > fps->max / 2); fps->cnt--, limit--) - { - struct free_page *fp = SKIP_BACK(struct free_page, n, TAIL(fps->pages)); - rem_node(&fp->n); + do { + synchronize_rcu(); + struct free_page *fp = stack; + stack = atomic_load_explicit(&fp->next, memory_order_acquire); + LOCK_DOMAIN(resource, empty_pages_domain); /* Empty pages are stored as pointers. To store them, we need a pointer block. */ - struct empty_pages *ep; - if (EMPTY_LIST(fps->empty) || ((ep = HEAD(fps->empty))->pos == EP_POS_MAX)) + if (!empty_pages || (empty_pages->pos == EP_POS_MAX)) { /* There is either no pointer block or the last block is full. We use this block as a pointer block. */ - ep = (struct empty_pages *) fp; - *ep = (struct empty_pages) {}; - add_head(&fps->empty, &ep->n); + empty_pages = (struct empty_pages *) fp; + *empty_pages = (struct empty_pages) {}; } else { /* We store this block as a pointer into the first free place * and tell the OS that the underlying memory is trash. */ - ep->pages[ep->pos++] = fp; + empty_pages->pages[empty_pages->pos++] = fp; if (madvise(fp, page_size, #ifdef CONFIG_MADV_DONTNEED_TO_FREE MADV_DONTNEED @@ -217,12 +284,18 @@ global_free_pages_cleanup_event(void *data UNUSED) ) < 0) bug("madvise(%p) failed: %m", fp); } + UNLOCK_DOMAIN(resource, empty_pages_domain); } + while ((atomic_fetch_sub_explicit(&pages_kept, 1, memory_order_relaxed) >= KEEP_PAGES_MAX / 2) && stack); - /* If the hot free page cleanup hit the limit, re-schedule this routine - * to allow for other routines to run. */ - if (fps->cnt > fps->max) - ev_schedule(&fps->cleanup); + while (stack) + { + struct free_page *f = stack; + stack = atomic_load_explicit(&f->next, memory_order_acquire); + free_page(f); + + atomic_fetch_sub_explicit(&pages_kept, 1, memory_order_relaxed); + } } #endif @@ -236,8 +309,6 @@ resource_sys_init(void) #endif #ifdef HAVE_MMAP - ASSERT_DIE(global_free_pages.cnt == 0); - /* Check what page size the system supports */ if (!(page_size = sysconf(_SC_PAGESIZE))) die("System page size must be non-zero"); @@ -247,11 +318,8 @@ resource_sys_init(void) /* We assume that page size has only one bit and is between 1K and 256K (incl.). * Otherwise, the assumptions in lib/slab.c (sl_head's num_full range) aren't met. */ - struct free_pages *fps = &global_free_pages; - - init_list(&fps->pages); - init_list(&fps->empty); - global_free_pages_cleanup_event(NULL); + empty_pages_domain = DOMAIN_NEW(resource, "Empty Pages"); + initialized = 1; return; } @@ -261,4 +329,5 @@ resource_sys_init(void) #endif page_size = 4096; + initialized = 1; } |