diff options
author | Maria Matejka <mq@ucw.cz> | 2021-11-30 23:57:14 +0100 |
---|---|---|
committer | Maria Matejka <mq@ucw.cz> | 2021-12-01 13:00:54 +0100 |
commit | bb63e99d7877023667edaf26495dd657ec2fd57b (patch) | |
tree | 3ae919a00541c27c8f661addb56c6d4ef681d361 | |
parent | 385b3ea3956aefc2868cdd838fc0a90f1d8a7857 (diff) |
Page allocator moved from pools to IO loops.
The resource pool system is highly hierarchical and keeping spare pages
in pools leads to unnecessarily complex memory management.
Loops have a flat hiearchy, at least for now, and it is therefore much
easier to keep care of pages, especially in cases of excessive virtual memory
fragmentation.
-rw-r--r-- | lib/mempool.c | 4 | ||||
-rw-r--r-- | lib/resource.c | 67 | ||||
-rw-r--r-- | lib/resource.h | 4 | ||||
-rw-r--r-- | lib/slab.c | 13 | ||||
-rw-r--r-- | nest/a-path_test.c | 2 | ||||
-rw-r--r-- | nest/rt-table.c | 6 | ||||
-rw-r--r-- | sysdep/unix/alloc.c | 263 | ||||
-rw-r--r-- | sysdep/unix/coroutine.c | 3 | ||||
-rw-r--r-- | sysdep/unix/io-loop.c | 5 | ||||
-rw-r--r-- | sysdep/unix/io-loop.h | 21 | ||||
-rw-r--r-- | sysdep/unix/io.c | 3 | ||||
-rw-r--r-- | sysdep/unix/main.c | 29 |
12 files changed, 207 insertions, 213 deletions
diff --git a/lib/mempool.c b/lib/mempool.c index 8f300b81..ed3ae8de 100644 --- a/lib/mempool.c +++ b/lib/mempool.c @@ -130,7 +130,7 @@ lp_alloc(linpool *m, uint size) { /* Need to allocate a new chunk */ if (m->use_pages) - c = alloc_page(m->p); + c = alloc_page(); else c = xmalloc(sizeof(struct lp_chunk) + m->chunk_size); @@ -271,7 +271,7 @@ lp_free(resource *r) { c = d->next; if (m->use_pages) - free_page(m->p, d); + free_page(d); else xfree(d); } diff --git a/lib/resource.c b/lib/resource.c index c847d41a..d98cd4ff 100644 --- a/lib/resource.c +++ b/lib/resource.c @@ -30,14 +30,6 @@ * is freed upon shutdown of the module. */ -struct pool_pages { - uint free; - uint used; - void *ptr[0]; -}; - -#define POOL_PAGES_MAX ((page_size - sizeof(struct pool_pages)) / sizeof (void *)) - static void pool_dump(resource *); static void pool_free(resource *); static resource *pool_lookup(resource *, unsigned long); @@ -54,9 +46,6 @@ static struct resclass pool_class = { pool root_pool; -void *alloc_sys_page(void); -int free_sys_page(void *); - static int indent; /** @@ -103,16 +92,6 @@ pool_free(resource *P) r = rr; } - if (p->pages) - { - ASSERT_DIE(!p->pages->used); - - for (uint i = 0; i < p->pages->free; i++) - free_sys_page(p->pages->ptr[i]); - - free_sys_page(p->pages); - } - pool_parent = parent; } @@ -185,9 +164,6 @@ pool_memsize_locked(pool *p) WALK_LIST(r, p->inside) sum += rmemsize(r); - if (p->pages) - sum += page_size * (p->pages->used + p->pages->free + 1); - return sum; } @@ -551,49 +527,6 @@ mb_free(void *m) rfree(b); } -void * -alloc_page(pool *p) -{ - if (!p->pages) - { - p->pages = alloc_sys_page(); - p->pages->free = 0; - p->pages->used = 1; - } - else - p->pages->used++; - - if (p->pages->free) - { - void *ptr = p->pages->ptr[--p->pages->free]; - bzero(ptr, page_size); - return ptr; - } - else - return alloc_sys_page(); -} - -void -free_page(pool *p, void *ptr) -{ - ASSERT_DIE(p->pages); - p->pages->used--; - - ASSERT_DIE(p->pages->free <= POOL_PAGES_MAX); - - if (p->pages->free == POOL_PAGES_MAX) - { - const unsigned long keep = POOL_PAGES_MAX / 4; - - for (uint i = keep; i < p->pages->free; i++) - free_sys_page(p->pages->ptr[i]); - - p->pages->free = keep; - } - - p->pages->ptr[p->pages->free++] = ptr; -} - #define STEP_UP(x) ((x) + (x)/2 + 4) diff --git a/lib/resource.h b/lib/resource.h index 7adde493..9d7dae69 100644 --- a/lib/resource.h +++ b/lib/resource.h @@ -108,8 +108,8 @@ void buffer_realloc(void **buf, unsigned *size, unsigned need, unsigned item_siz extern long page_size; /* Allocator of whole pages; for use in slabs and other high-level allocators. */ -void *alloc_page(pool *); -void free_page(pool *, void *); +void *alloc_page(void); +void free_page(void *); #define PAGE_HEAD(x) ((void *) (((intptr_t) (x)) & ~(page_size-1))) #ifdef HAVE_LIBDMALLOC @@ -269,7 +269,7 @@ no_partial: s->num_empty_heads--; goto okay; } - h = alloc_page(s->p); + h = alloc_page(); #ifdef POISON memset(h, 0xba, page_size); #endif @@ -332,7 +332,7 @@ sl_free(slab *s, void *oo) #ifdef POISON memset(h, 0xde, page_size); #endif - free_page(s->p, h); + free_page(h); } else { @@ -349,11 +349,11 @@ slab_free(resource *r) struct sl_head *h, *g; WALK_LIST_DELSAFE(h, g, s->empty_heads) - free_page(s->p, h); + free_page(h); WALK_LIST_DELSAFE(h, g, s->partial_heads) - free_page(s->p, h); + free_page(h); WALK_LIST_DELSAFE(h, g, s->full_heads) - free_page(s->p, h); + free_page(h); } static void @@ -386,8 +386,7 @@ slab_memsize(resource *r) WALK_LIST(h, s->full_heads) heads++; -// return ALLOC_OVERHEAD + sizeof(struct slab) + heads * (ALLOC_OVERHEAD + page_size); - return ALLOC_OVERHEAD + sizeof(struct slab); /* The page sizes are accounted for in the pool */ + return ALLOC_OVERHEAD + sizeof(struct slab) + heads * page_size; } static resource * diff --git a/nest/a-path_test.c b/nest/a-path_test.c index 2e6683f2..2533dbae 100644 --- a/nest/a-path_test.c +++ b/nest/a-path_test.c @@ -198,6 +198,7 @@ t_as_path_converting(void) #endif void resource_sys_init(void); +void io_init(void); int main(int argc, char *argv[]) @@ -207,6 +208,7 @@ main(int argc, char *argv[]) resource_init(); the_bird_lock(); birdloop_init(); + io_init(); bt_test_suite(t_as_path_match, "Testing AS path matching and some a-path utilities."); bt_test_suite(t_path_format, "Testing formating as path into byte buffer"); diff --git a/nest/rt-table.c b/nest/rt-table.c index cd0d6291..ada54396 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -1057,7 +1057,7 @@ rte_announce(rtable_private *tab, net *net, struct rte_storage *new, struct rte_ if (!rpeb) { - rpeb = alloc_page(tab->rp); + rpeb = alloc_page(); *rpeb = (struct rt_export_block) {}; add_tail(&tab->pending_exports, &rpeb->n); } @@ -2157,7 +2157,7 @@ rt_free(resource *_r) static void rt_res_dump(resource *_r) { - RT_LOCKED((rtable *) _r, r) + rtable_private *r = RT_PRIV((rtable *) _r); debug("name \"%s\", addr_type=%s, rt_count=%u, use_count=%d\n", r->name, net_label[r->addr_type], r->rt_count, r->use_count); } @@ -2484,7 +2484,7 @@ rt_export_cleanup(void *data) memset(reb, 0xbe, page_size); #endif - free_page(tab->rp, reb); + free_page(reb); if (EMPTY_LIST(tab->pending_exports)) { diff --git a/sysdep/unix/alloc.c b/sysdep/unix/alloc.c index 61360e73..77c504e3 100644 --- a/sysdep/unix/alloc.c +++ b/sysdep/unix/alloc.c @@ -9,6 +9,8 @@ #include "nest/bird.h" #include "lib/resource.h" +#include "sysdep/unix/io-loop.h" + #include <stdlib.h> #include <unistd.h> #include <stdatomic.h> @@ -19,86 +21,47 @@ #endif long page_size = 0; -_Bool alloc_multipage = 0; - -static _Atomic int global_page_list_not_empty; -static list global_page_list; -static _Atomic int global_page_spinlock; - -#define GLOBAL_PAGE_SPIN_LOCK for (int v = 0; !atomic_compare_exchange_weak_explicit(&global_page_spinlock, &v, 1, memory_order_acq_rel, memory_order_acquire); v = 0) -#define GLOBAL_PAGE_SPIN_UNLOCK do { int v = 1; ASSERT_DIE(atomic_compare_exchange_strong_explicit(&global_page_spinlock, &v, 0, memory_order_acq_rel, memory_order_acquire)); } while (0) #ifdef HAVE_MMAP +#if DEBUGGING +#define FP_NODE_OFFSET 42 +#else +#define FP_NODE_OFFSET 1 +#endif static _Bool use_fake = 0; #else static _Bool use_fake = 1; #endif -void resource_sys_init(void) +static void * +alloc_sys_page(void) { -#ifdef HAVE_MMAP - init_list(&global_page_list); + void *ptr = mmap(NULL, page_size, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (!(page_size = sysconf(_SC_PAGESIZE))) - die("System page size must be non-zero"); + if (ptr == MAP_FAILED) + bug("mmap(%lu) failed: %m", page_size); - if ((u64_popcount(page_size) > 1) || (page_size > 16384)) -#endif - { - /* Too big or strange page, use the aligned allocator instead */ - page_size = 4096; - use_fake = 1; - } + return ptr; } void * -alloc_sys_page(void) +alloc_page(void) { #ifdef HAVE_MMAP if (!use_fake) { - if (atomic_load_explicit(&global_page_list_not_empty, memory_order_relaxed)) - { - GLOBAL_PAGE_SPIN_LOCK; - if (!EMPTY_LIST(global_page_list)) - { - node *ret = HEAD(global_page_list); - rem_node(ret); - if (EMPTY_LIST(global_page_list)) - atomic_store_explicit(&global_page_list_not_empty, 0, memory_order_relaxed); - GLOBAL_PAGE_SPIN_UNLOCK; - memset(ret, 0, sizeof(node)); - return (void *) ret; - } - GLOBAL_PAGE_SPIN_UNLOCK; - } - - if (alloc_multipage) - { - void *big = mmap(NULL, page_size * 2, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (big == MAP_FAILED) - bug("mmap(%lu) failed: %m", page_size); - - uintptr_t offset = ((uintptr_t) big) % page_size; - if (offset) - { - void *ret = big + page_size - offset; - munmap(big, page_size - offset); - munmap(ret + page_size, offset); - return ret; - } - else - { - munmap(big + page_size, page_size); - return big; - } - } - - void *ret = mmap(NULL, page_size, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (ret == MAP_FAILED) - bug("mmap(%lu) failed: %m", page_size); - - return ret; + struct free_pages *fp = &birdloop_current->pages; + if (!fp->cnt) + return alloc_sys_page(); + + node *n = HEAD(fp->list); + rem_node(n); + if (--fp->cnt < fp->min) + ev_send(&global_work_list, fp->cleanup); + + void *ptr = n - FP_NODE_OFFSET; + memset(ptr, 0, page_size); + return ptr; } else #endif @@ -111,56 +74,156 @@ alloc_sys_page(void) } void -free_sys_page(void *ptr) +free_page(void *ptr) { #ifdef HAVE_MMAP if (!use_fake) { - if (munmap(ptr, page_size) < 0) -#ifdef ENOMEM - if (errno == ENOMEM) - { - memset(ptr, 0, page_size); - - GLOBAL_PAGE_SPIN_LOCK; - add_tail(&global_page_list, (node *) ptr); - atomic_store_explicit(&global_page_list_not_empty, 1, memory_order_relaxed); - GLOBAL_PAGE_SPIN_UNLOCK; - } - else -#endif - bug("munmap(%p) failed: %m", ptr); + struct free_pages *fp = &birdloop_current->pages; + struct node *n = ptr; + n += FP_NODE_OFFSET; + + memset(n, 0, sizeof(node)); + add_tail(&fp->list, n); + if (++fp->cnt > fp->max) + ev_send(&global_work_list, fp->cleanup); } else #endif free(ptr); } +#ifdef HAVE_MMAP + +#define GFP (&main_birdloop.pages) + void -check_stored_pages(void) +flush_pages(struct birdloop *loop) { -#ifdef ENOMEM - if (atomic_load_explicit(&global_page_list_not_empty, memory_order_relaxed) == 0) - return; + ASSERT_DIE(birdloop_inside(&main_birdloop)); + + add_tail_list(&GFP->list, &loop->pages.list); + GFP->cnt += loop->pages.cnt; + + loop->pages.cnt = 0; + loop->pages.list = (list) {}; + loop->pages.min = 0; + loop->pages.max = 0; + + rfree(loop->pages.cleanup); + loop->pages.cleanup = NULL; +} - for (uint limit = 0; limit < 256; limit++) +static void +cleanup_pages(void *data) +{ + struct birdloop *loop = data; + birdloop_enter(loop); + + struct free_pages *fp = &birdloop_current->pages; + + while ((fp->cnt < fp->min) && (GFP->cnt > GFP->min)) { - GLOBAL_PAGE_SPIN_LOCK; - void *ptr = HEAD(global_page_list); - if (!NODE_VALID(ptr)) - { - atomic_store_explicit(&global_page_list_not_empty, 0, memory_order_relaxed); - GLOBAL_PAGE_SPIN_UNLOCK; - return; - } - - rem_node(ptr); - if (munmap(ptr, page_size) < 0) - if (errno == ENOMEM) - add_tail(&global_page_list, ptr); - else - bug("munmap(%p) failed: %m", ptr); - GLOBAL_PAGE_SPIN_UNLOCK; + node *n = HEAD(GFP->list); + rem_node(n); + add_tail(&fp->list, n); + fp->cnt++; + GFP->cnt--; + } + + while (fp->cnt < fp->min) + { + node *n = alloc_sys_page(); + add_tail(&fp->list, n + FP_NODE_OFFSET); + fp->cnt++; + } + + while (fp->cnt > fp->max) + { + node *n = HEAD(fp->list); + rem_node(n); + add_tail(&GFP->list, n); + fp->cnt--; + GFP->cnt++; + } + + birdloop_leave(loop); + + if (GFP->cnt > GFP->max) + ev_send(&global_work_list, GFP->cleanup); +} + +static void +cleanup_global_pages(void *data UNUSED) +{ + while (GFP->cnt < GFP->max) + { + node *n = alloc_sys_page(); + add_tail(&GFP->list, n + FP_NODE_OFFSET); + GFP->cnt++; + } + + for (uint limit = GFP->cnt; (limit > 0) && (GFP->cnt > GFP->max); limit--) + { + node *n = TAIL(GFP->list); + rem_node(n); + + if (munmap(n - FP_NODE_OFFSET, page_size) == 0) + GFP->cnt--; + else if (errno == ENOMEM) + add_head(&GFP->list, n); + else + bug("munmap(%p) failed: %m", n - FP_NODE_OFFSET); + } +} + +void +init_pages(struct birdloop *loop) +{ + struct free_pages *fp = &loop->pages; + + init_list(&fp->list); + fp->cleanup = ev_new_init(&root_pool, cleanup_pages, loop); + fp->min = 4; + fp->max = 16; + + for (fp->cnt = 0; fp->cnt < fp->min; fp->cnt++) + { + node *n = alloc_sys_page(); + add_tail(&fp->list, n + FP_NODE_OFFSET); } -#endif } + +static event global_free_pages_cleanup_event = { .hook = cleanup_global_pages }; + +void resource_sys_init(void) +{ + if (!(page_size = sysconf(_SC_PAGESIZE))) + die("System page size must be non-zero"); + + if (u64_popcount(page_size) == 1) + { + init_list(&GFP->list); + GFP->cleanup = &global_free_pages_cleanup_event; + GFP->min = 0; + GFP->max = 256; + return; + } + + log(L_WARN "Got strange memory page size (%lu), using the aligned allocator instead", page_size); + + /* Too big or strange page, use the aligned allocator instead */ + page_size = 4096; + use_fake = 1; +} + +#else + +void +resource_sys_init(void) +{ + page_size = 4096; + use_fake = 1; +} + +#endif diff --git a/sysdep/unix/coroutine.c b/sysdep/unix/coroutine.c index 12ba55d8..e4657157 100644 --- a/sysdep/unix/coroutine.c +++ b/sysdep/unix/coroutine.c @@ -145,10 +145,13 @@ static void coro_free(resource *r) coro_cleaned_up = 1; } +static void coro_dump(resource *r UNUSED) { } + static struct resclass coro_class = { .name = "Coroutine", .size = sizeof(struct coroutine), .free = coro_free, + .dump = coro_dump, }; _Thread_local struct coroutine *this_coro = NULL; diff --git a/sysdep/unix/io-loop.c b/sysdep/unix/io-loop.c index 769f01ba..732ea64d 100644 --- a/sysdep/unix/io-loop.c +++ b/sysdep/unix/io-loop.c @@ -32,7 +32,7 @@ * Current thread context */ -_Thread_local struct birdloop *birdloop_current; +_Thread_local struct birdloop *birdloop_current = NULL; static _Thread_local struct birdloop *birdloop_wakeup_masked; static _Thread_local uint birdloop_wakeup_masked_count; @@ -391,6 +391,8 @@ birdloop_new(pool *pp, uint order, const char *name) timers_init(&loop->time, loop->pool); sockets_init(loop); + init_pages(loop); + loop->time.coro = coro_run(loop->pool, birdloop_main, loop); birdloop_leave(loop); @@ -571,6 +573,7 @@ birdloop_main(void *arg) /* Free the pool and loop */ birdloop_enter(loop); rp_free(loop->pool, parent); + flush_pages(loop); birdloop_leave(loop); rfree(&loop->r); diff --git a/sysdep/unix/io-loop.h b/sysdep/unix/io-loop.h index 3fccd520..e5af52d1 100644 --- a/sysdep/unix/io-loop.h +++ b/sysdep/unix/io-loop.h @@ -7,6 +7,20 @@ #ifndef _BIRD_SYSDEP_UNIX_IO_LOOP_H_ #define _BIRD_SYSDEP_UNIX_IO_LOOP_H_ +#include "nest/bird.h" + +#include "lib/lists.h" +#include "lib/event.h" +#include "lib/timer.h" + +struct free_pages +{ + list list; /* List of empty pages */ + event *cleanup; /* Event to call when number of pages is outside bounds */ + u16 min, max; /* Minimal and maximal number of free pages kept */ + uint cnt; /* Number of empty pages */ +}; + struct birdloop { resource r; @@ -29,10 +43,17 @@ struct birdloop uint links; + struct free_pages pages; + void (*stopped)(void *data); void *stop_data; struct birdloop *prev_loop; }; +extern _Thread_local struct birdloop *birdloop_current; + +void init_pages(struct birdloop *loop); +void flush_pages(struct birdloop *loop); + #endif diff --git a/sysdep/unix/io.c b/sysdep/unix/io.c index 91d717d0..eee7b586 100644 --- a/sysdep/unix/io.c +++ b/sysdep/unix/io.c @@ -2216,9 +2216,6 @@ io_loop(void) timers_fire(&main_birdloop.time, 1); io_close_event(); - /* Try to release some memory if possible */ - check_stored_pages(); - // FIXME poll_tout = (events ? 0 : 3000); /* Time in milliseconds */ if (t = timers_first(&main_birdloop.time)) diff --git a/sysdep/unix/main.c b/sysdep/unix/main.c index ca06611f..57c51c99 100644 --- a/sysdep/unix/main.c +++ b/sysdep/unix/main.c @@ -683,7 +683,7 @@ signal_init(void) * Parsing of command-line arguments */ -static char *opt_list = "B:c:dD:ps:P:u:g:flRh"; +static char *opt_list = "c:dD:ps:P:u:g:flRh"; int parse_and_exit; char *bird_name; static char *use_user; @@ -704,7 +704,6 @@ display_help(void) fprintf(stderr, "\n" "Options: \n" - " -B <block-size> Use 2^this number as memory allocation block size (default: 12)\n" " -c <config-file> Use given configuration file instead of\n" " " PATH_CONFIG_FILE "\n" " -d Enable debug messages and run bird in foreground\n" @@ -791,15 +790,12 @@ get_gid(const char *s) return gr->gr_gid; } -extern _Bool alloc_multipage; - static void parse_args(int argc, char **argv) { int config_changed = 0; int socket_changed = 0; int c; - int bp; bird_name = get_bird_name(argv[0], "bird"); if (argc == 2) @@ -812,29 +808,6 @@ parse_args(int argc, char **argv) while ((c = getopt(argc, argv, opt_list)) >= 0) switch (c) { - case 'B': - bp = atoi(optarg); - if (bp < 1) - { - fprintf(stderr, "Strange block size power %d\n\n", bp); - display_usage(); - exit(1); - } - - if ((1 << bp) < page_size) - { - fprintf(stderr, "Requested block size %ld is lesser than page size %ld\n\n", (1L<<bp), page_size); - display_usage(); - exit(1); - } - - if ((1L << bp) > page_size) - { - alloc_multipage = 1; - page_size = (1L << bp); - } - - break; case 'c': config_name = optarg; config_changed = 1; |