diff options
author | Maria Matejka <mq@ucw.cz> | 2021-11-30 23:57:14 +0100 |
---|---|---|
committer | Maria Matejka <mq@ucw.cz> | 2021-12-01 13:00:54 +0100 |
commit | bb63e99d7877023667edaf26495dd657ec2fd57b (patch) | |
tree | 3ae919a00541c27c8f661addb56c6d4ef681d361 /sysdep | |
parent | 385b3ea3956aefc2868cdd838fc0a90f1d8a7857 (diff) |
Page allocator moved from pools to IO loops.
The resource pool system is highly hierarchical and keeping spare pages
in pools leads to unnecessarily complex memory management.
Loops have a flat hiearchy, at least for now, and it is therefore much
easier to keep care of pages, especially in cases of excessive virtual memory
fragmentation.
Diffstat (limited to 'sysdep')
-rw-r--r-- | sysdep/unix/alloc.c | 263 | ||||
-rw-r--r-- | sysdep/unix/coroutine.c | 3 | ||||
-rw-r--r-- | sysdep/unix/io-loop.c | 5 | ||||
-rw-r--r-- | sysdep/unix/io-loop.h | 21 | ||||
-rw-r--r-- | sysdep/unix/io.c | 3 | ||||
-rw-r--r-- | sysdep/unix/main.c | 29 |
6 files changed, 192 insertions, 132 deletions
diff --git a/sysdep/unix/alloc.c b/sysdep/unix/alloc.c index 61360e73..77c504e3 100644 --- a/sysdep/unix/alloc.c +++ b/sysdep/unix/alloc.c @@ -9,6 +9,8 @@ #include "nest/bird.h" #include "lib/resource.h" +#include "sysdep/unix/io-loop.h" + #include <stdlib.h> #include <unistd.h> #include <stdatomic.h> @@ -19,86 +21,47 @@ #endif long page_size = 0; -_Bool alloc_multipage = 0; - -static _Atomic int global_page_list_not_empty; -static list global_page_list; -static _Atomic int global_page_spinlock; - -#define GLOBAL_PAGE_SPIN_LOCK for (int v = 0; !atomic_compare_exchange_weak_explicit(&global_page_spinlock, &v, 1, memory_order_acq_rel, memory_order_acquire); v = 0) -#define GLOBAL_PAGE_SPIN_UNLOCK do { int v = 1; ASSERT_DIE(atomic_compare_exchange_strong_explicit(&global_page_spinlock, &v, 0, memory_order_acq_rel, memory_order_acquire)); } while (0) #ifdef HAVE_MMAP +#if DEBUGGING +#define FP_NODE_OFFSET 42 +#else +#define FP_NODE_OFFSET 1 +#endif static _Bool use_fake = 0; #else static _Bool use_fake = 1; #endif -void resource_sys_init(void) +static void * +alloc_sys_page(void) { -#ifdef HAVE_MMAP - init_list(&global_page_list); + void *ptr = mmap(NULL, page_size, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (!(page_size = sysconf(_SC_PAGESIZE))) - die("System page size must be non-zero"); + if (ptr == MAP_FAILED) + bug("mmap(%lu) failed: %m", page_size); - if ((u64_popcount(page_size) > 1) || (page_size > 16384)) -#endif - { - /* Too big or strange page, use the aligned allocator instead */ - page_size = 4096; - use_fake = 1; - } + return ptr; } void * -alloc_sys_page(void) +alloc_page(void) { #ifdef HAVE_MMAP if (!use_fake) { - if (atomic_load_explicit(&global_page_list_not_empty, memory_order_relaxed)) - { - GLOBAL_PAGE_SPIN_LOCK; - if (!EMPTY_LIST(global_page_list)) - { - node *ret = HEAD(global_page_list); - rem_node(ret); - if (EMPTY_LIST(global_page_list)) - atomic_store_explicit(&global_page_list_not_empty, 0, memory_order_relaxed); - GLOBAL_PAGE_SPIN_UNLOCK; - memset(ret, 0, sizeof(node)); - return (void *) ret; - } - GLOBAL_PAGE_SPIN_UNLOCK; - } - - if (alloc_multipage) - { - void *big = mmap(NULL, page_size * 2, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (big == MAP_FAILED) - bug("mmap(%lu) failed: %m", page_size); - - uintptr_t offset = ((uintptr_t) big) % page_size; - if (offset) - { - void *ret = big + page_size - offset; - munmap(big, page_size - offset); - munmap(ret + page_size, offset); - return ret; - } - else - { - munmap(big + page_size, page_size); - return big; - } - } - - void *ret = mmap(NULL, page_size, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (ret == MAP_FAILED) - bug("mmap(%lu) failed: %m", page_size); - - return ret; + struct free_pages *fp = &birdloop_current->pages; + if (!fp->cnt) + return alloc_sys_page(); + + node *n = HEAD(fp->list); + rem_node(n); + if (--fp->cnt < fp->min) + ev_send(&global_work_list, fp->cleanup); + + void *ptr = n - FP_NODE_OFFSET; + memset(ptr, 0, page_size); + return ptr; } else #endif @@ -111,56 +74,156 @@ alloc_sys_page(void) } void -free_sys_page(void *ptr) +free_page(void *ptr) { #ifdef HAVE_MMAP if (!use_fake) { - if (munmap(ptr, page_size) < 0) -#ifdef ENOMEM - if (errno == ENOMEM) - { - memset(ptr, 0, page_size); - - GLOBAL_PAGE_SPIN_LOCK; - add_tail(&global_page_list, (node *) ptr); - atomic_store_explicit(&global_page_list_not_empty, 1, memory_order_relaxed); - GLOBAL_PAGE_SPIN_UNLOCK; - } - else -#endif - bug("munmap(%p) failed: %m", ptr); + struct free_pages *fp = &birdloop_current->pages; + struct node *n = ptr; + n += FP_NODE_OFFSET; + + memset(n, 0, sizeof(node)); + add_tail(&fp->list, n); + if (++fp->cnt > fp->max) + ev_send(&global_work_list, fp->cleanup); } else #endif free(ptr); } +#ifdef HAVE_MMAP + +#define GFP (&main_birdloop.pages) + void -check_stored_pages(void) +flush_pages(struct birdloop *loop) { -#ifdef ENOMEM - if (atomic_load_explicit(&global_page_list_not_empty, memory_order_relaxed) == 0) - return; + ASSERT_DIE(birdloop_inside(&main_birdloop)); + + add_tail_list(&GFP->list, &loop->pages.list); + GFP->cnt += loop->pages.cnt; + + loop->pages.cnt = 0; + loop->pages.list = (list) {}; + loop->pages.min = 0; + loop->pages.max = 0; + + rfree(loop->pages.cleanup); + loop->pages.cleanup = NULL; +} - for (uint limit = 0; limit < 256; limit++) +static void +cleanup_pages(void *data) +{ + struct birdloop *loop = data; + birdloop_enter(loop); + + struct free_pages *fp = &birdloop_current->pages; + + while ((fp->cnt < fp->min) && (GFP->cnt > GFP->min)) { - GLOBAL_PAGE_SPIN_LOCK; - void *ptr = HEAD(global_page_list); - if (!NODE_VALID(ptr)) - { - atomic_store_explicit(&global_page_list_not_empty, 0, memory_order_relaxed); - GLOBAL_PAGE_SPIN_UNLOCK; - return; - } - - rem_node(ptr); - if (munmap(ptr, page_size) < 0) - if (errno == ENOMEM) - add_tail(&global_page_list, ptr); - else - bug("munmap(%p) failed: %m", ptr); - GLOBAL_PAGE_SPIN_UNLOCK; + node *n = HEAD(GFP->list); + rem_node(n); + add_tail(&fp->list, n); + fp->cnt++; + GFP->cnt--; + } + + while (fp->cnt < fp->min) + { + node *n = alloc_sys_page(); + add_tail(&fp->list, n + FP_NODE_OFFSET); + fp->cnt++; + } + + while (fp->cnt > fp->max) + { + node *n = HEAD(fp->list); + rem_node(n); + add_tail(&GFP->list, n); + fp->cnt--; + GFP->cnt++; + } + + birdloop_leave(loop); + + if (GFP->cnt > GFP->max) + ev_send(&global_work_list, GFP->cleanup); +} + +static void +cleanup_global_pages(void *data UNUSED) +{ + while (GFP->cnt < GFP->max) + { + node *n = alloc_sys_page(); + add_tail(&GFP->list, n + FP_NODE_OFFSET); + GFP->cnt++; + } + + for (uint limit = GFP->cnt; (limit > 0) && (GFP->cnt > GFP->max); limit--) + { + node *n = TAIL(GFP->list); + rem_node(n); + + if (munmap(n - FP_NODE_OFFSET, page_size) == 0) + GFP->cnt--; + else if (errno == ENOMEM) + add_head(&GFP->list, n); + else + bug("munmap(%p) failed: %m", n - FP_NODE_OFFSET); + } +} + +void +init_pages(struct birdloop *loop) +{ + struct free_pages *fp = &loop->pages; + + init_list(&fp->list); + fp->cleanup = ev_new_init(&root_pool, cleanup_pages, loop); + fp->min = 4; + fp->max = 16; + + for (fp->cnt = 0; fp->cnt < fp->min; fp->cnt++) + { + node *n = alloc_sys_page(); + add_tail(&fp->list, n + FP_NODE_OFFSET); } -#endif } + +static event global_free_pages_cleanup_event = { .hook = cleanup_global_pages }; + +void resource_sys_init(void) +{ + if (!(page_size = sysconf(_SC_PAGESIZE))) + die("System page size must be non-zero"); + + if (u64_popcount(page_size) == 1) + { + init_list(&GFP->list); + GFP->cleanup = &global_free_pages_cleanup_event; + GFP->min = 0; + GFP->max = 256; + return; + } + + log(L_WARN "Got strange memory page size (%lu), using the aligned allocator instead", page_size); + + /* Too big or strange page, use the aligned allocator instead */ + page_size = 4096; + use_fake = 1; +} + +#else + +void +resource_sys_init(void) +{ + page_size = 4096; + use_fake = 1; +} + +#endif diff --git a/sysdep/unix/coroutine.c b/sysdep/unix/coroutine.c index 12ba55d8..e4657157 100644 --- a/sysdep/unix/coroutine.c +++ b/sysdep/unix/coroutine.c @@ -145,10 +145,13 @@ static void coro_free(resource *r) coro_cleaned_up = 1; } +static void coro_dump(resource *r UNUSED) { } + static struct resclass coro_class = { .name = "Coroutine", .size = sizeof(struct coroutine), .free = coro_free, + .dump = coro_dump, }; _Thread_local struct coroutine *this_coro = NULL; diff --git a/sysdep/unix/io-loop.c b/sysdep/unix/io-loop.c index 769f01ba..732ea64d 100644 --- a/sysdep/unix/io-loop.c +++ b/sysdep/unix/io-loop.c @@ -32,7 +32,7 @@ * Current thread context */ -_Thread_local struct birdloop *birdloop_current; +_Thread_local struct birdloop *birdloop_current = NULL; static _Thread_local struct birdloop *birdloop_wakeup_masked; static _Thread_local uint birdloop_wakeup_masked_count; @@ -391,6 +391,8 @@ birdloop_new(pool *pp, uint order, const char *name) timers_init(&loop->time, loop->pool); sockets_init(loop); + init_pages(loop); + loop->time.coro = coro_run(loop->pool, birdloop_main, loop); birdloop_leave(loop); @@ -571,6 +573,7 @@ birdloop_main(void *arg) /* Free the pool and loop */ birdloop_enter(loop); rp_free(loop->pool, parent); + flush_pages(loop); birdloop_leave(loop); rfree(&loop->r); diff --git a/sysdep/unix/io-loop.h b/sysdep/unix/io-loop.h index 3fccd520..e5af52d1 100644 --- a/sysdep/unix/io-loop.h +++ b/sysdep/unix/io-loop.h @@ -7,6 +7,20 @@ #ifndef _BIRD_SYSDEP_UNIX_IO_LOOP_H_ #define _BIRD_SYSDEP_UNIX_IO_LOOP_H_ +#include "nest/bird.h" + +#include "lib/lists.h" +#include "lib/event.h" +#include "lib/timer.h" + +struct free_pages +{ + list list; /* List of empty pages */ + event *cleanup; /* Event to call when number of pages is outside bounds */ + u16 min, max; /* Minimal and maximal number of free pages kept */ + uint cnt; /* Number of empty pages */ +}; + struct birdloop { resource r; @@ -29,10 +43,17 @@ struct birdloop uint links; + struct free_pages pages; + void (*stopped)(void *data); void *stop_data; struct birdloop *prev_loop; }; +extern _Thread_local struct birdloop *birdloop_current; + +void init_pages(struct birdloop *loop); +void flush_pages(struct birdloop *loop); + #endif diff --git a/sysdep/unix/io.c b/sysdep/unix/io.c index 91d717d0..eee7b586 100644 --- a/sysdep/unix/io.c +++ b/sysdep/unix/io.c @@ -2216,9 +2216,6 @@ io_loop(void) timers_fire(&main_birdloop.time, 1); io_close_event(); - /* Try to release some memory if possible */ - check_stored_pages(); - // FIXME poll_tout = (events ? 0 : 3000); /* Time in milliseconds */ if (t = timers_first(&main_birdloop.time)) diff --git a/sysdep/unix/main.c b/sysdep/unix/main.c index ca06611f..57c51c99 100644 --- a/sysdep/unix/main.c +++ b/sysdep/unix/main.c @@ -683,7 +683,7 @@ signal_init(void) * Parsing of command-line arguments */ -static char *opt_list = "B:c:dD:ps:P:u:g:flRh"; +static char *opt_list = "c:dD:ps:P:u:g:flRh"; int parse_and_exit; char *bird_name; static char *use_user; @@ -704,7 +704,6 @@ display_help(void) fprintf(stderr, "\n" "Options: \n" - " -B <block-size> Use 2^this number as memory allocation block size (default: 12)\n" " -c <config-file> Use given configuration file instead of\n" " " PATH_CONFIG_FILE "\n" " -d Enable debug messages and run bird in foreground\n" @@ -791,15 +790,12 @@ get_gid(const char *s) return gr->gr_gid; } -extern _Bool alloc_multipage; - static void parse_args(int argc, char **argv) { int config_changed = 0; int socket_changed = 0; int c; - int bp; bird_name = get_bird_name(argv[0], "bird"); if (argc == 2) @@ -812,29 +808,6 @@ parse_args(int argc, char **argv) while ((c = getopt(argc, argv, opt_list)) >= 0) switch (c) { - case 'B': - bp = atoi(optarg); - if (bp < 1) - { - fprintf(stderr, "Strange block size power %d\n\n", bp); - display_usage(); - exit(1); - } - - if ((1 << bp) < page_size) - { - fprintf(stderr, "Requested block size %ld is lesser than page size %ld\n\n", (1L<<bp), page_size); - display_usage(); - exit(1); - } - - if ((1L << bp) > page_size) - { - alloc_multipage = 1; - page_size = (1L << bp); - } - - break; case 'c': config_name = optarg; config_changed = 1; |