summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMaria Matejka <mq@ucw.cz>2021-11-30 23:57:14 +0100
committerMaria Matejka <mq@ucw.cz>2021-12-01 13:00:54 +0100
commitbb63e99d7877023667edaf26495dd657ec2fd57b (patch)
tree3ae919a00541c27c8f661addb56c6d4ef681d361
parent385b3ea3956aefc2868cdd838fc0a90f1d8a7857 (diff)
Page allocator moved from pools to IO loops.
The resource pool system is highly hierarchical and keeping spare pages in pools leads to unnecessarily complex memory management. Loops have a flat hiearchy, at least for now, and it is therefore much easier to keep care of pages, especially in cases of excessive virtual memory fragmentation.
-rw-r--r--lib/mempool.c4
-rw-r--r--lib/resource.c67
-rw-r--r--lib/resource.h4
-rw-r--r--lib/slab.c13
-rw-r--r--nest/a-path_test.c2
-rw-r--r--nest/rt-table.c6
-rw-r--r--sysdep/unix/alloc.c263
-rw-r--r--sysdep/unix/coroutine.c3
-rw-r--r--sysdep/unix/io-loop.c5
-rw-r--r--sysdep/unix/io-loop.h21
-rw-r--r--sysdep/unix/io.c3
-rw-r--r--sysdep/unix/main.c29
12 files changed, 207 insertions, 213 deletions
diff --git a/lib/mempool.c b/lib/mempool.c
index 8f300b81..ed3ae8de 100644
--- a/lib/mempool.c
+++ b/lib/mempool.c
@@ -130,7 +130,7 @@ lp_alloc(linpool *m, uint size)
{
/* Need to allocate a new chunk */
if (m->use_pages)
- c = alloc_page(m->p);
+ c = alloc_page();
else
c = xmalloc(sizeof(struct lp_chunk) + m->chunk_size);
@@ -271,7 +271,7 @@ lp_free(resource *r)
{
c = d->next;
if (m->use_pages)
- free_page(m->p, d);
+ free_page(d);
else
xfree(d);
}
diff --git a/lib/resource.c b/lib/resource.c
index c847d41a..d98cd4ff 100644
--- a/lib/resource.c
+++ b/lib/resource.c
@@ -30,14 +30,6 @@
* is freed upon shutdown of the module.
*/
-struct pool_pages {
- uint free;
- uint used;
- void *ptr[0];
-};
-
-#define POOL_PAGES_MAX ((page_size - sizeof(struct pool_pages)) / sizeof (void *))
-
static void pool_dump(resource *);
static void pool_free(resource *);
static resource *pool_lookup(resource *, unsigned long);
@@ -54,9 +46,6 @@ static struct resclass pool_class = {
pool root_pool;
-void *alloc_sys_page(void);
-int free_sys_page(void *);
-
static int indent;
/**
@@ -103,16 +92,6 @@ pool_free(resource *P)
r = rr;
}
- if (p->pages)
- {
- ASSERT_DIE(!p->pages->used);
-
- for (uint i = 0; i < p->pages->free; i++)
- free_sys_page(p->pages->ptr[i]);
-
- free_sys_page(p->pages);
- }
-
pool_parent = parent;
}
@@ -185,9 +164,6 @@ pool_memsize_locked(pool *p)
WALK_LIST(r, p->inside)
sum += rmemsize(r);
- if (p->pages)
- sum += page_size * (p->pages->used + p->pages->free + 1);
-
return sum;
}
@@ -551,49 +527,6 @@ mb_free(void *m)
rfree(b);
}
-void *
-alloc_page(pool *p)
-{
- if (!p->pages)
- {
- p->pages = alloc_sys_page();
- p->pages->free = 0;
- p->pages->used = 1;
- }
- else
- p->pages->used++;
-
- if (p->pages->free)
- {
- void *ptr = p->pages->ptr[--p->pages->free];
- bzero(ptr, page_size);
- return ptr;
- }
- else
- return alloc_sys_page();
-}
-
-void
-free_page(pool *p, void *ptr)
-{
- ASSERT_DIE(p->pages);
- p->pages->used--;
-
- ASSERT_DIE(p->pages->free <= POOL_PAGES_MAX);
-
- if (p->pages->free == POOL_PAGES_MAX)
- {
- const unsigned long keep = POOL_PAGES_MAX / 4;
-
- for (uint i = keep; i < p->pages->free; i++)
- free_sys_page(p->pages->ptr[i]);
-
- p->pages->free = keep;
- }
-
- p->pages->ptr[p->pages->free++] = ptr;
-}
-
#define STEP_UP(x) ((x) + (x)/2 + 4)
diff --git a/lib/resource.h b/lib/resource.h
index 7adde493..9d7dae69 100644
--- a/lib/resource.h
+++ b/lib/resource.h
@@ -108,8 +108,8 @@ void buffer_realloc(void **buf, unsigned *size, unsigned need, unsigned item_siz
extern long page_size;
/* Allocator of whole pages; for use in slabs and other high-level allocators. */
-void *alloc_page(pool *);
-void free_page(pool *, void *);
+void *alloc_page(void);
+void free_page(void *);
#define PAGE_HEAD(x) ((void *) (((intptr_t) (x)) & ~(page_size-1)))
#ifdef HAVE_LIBDMALLOC
diff --git a/lib/slab.c b/lib/slab.c
index 6348e29b..32e241e4 100644
--- a/lib/slab.c
+++ b/lib/slab.c
@@ -269,7 +269,7 @@ no_partial:
s->num_empty_heads--;
goto okay;
}
- h = alloc_page(s->p);
+ h = alloc_page();
#ifdef POISON
memset(h, 0xba, page_size);
#endif
@@ -332,7 +332,7 @@ sl_free(slab *s, void *oo)
#ifdef POISON
memset(h, 0xde, page_size);
#endif
- free_page(s->p, h);
+ free_page(h);
}
else
{
@@ -349,11 +349,11 @@ slab_free(resource *r)
struct sl_head *h, *g;
WALK_LIST_DELSAFE(h, g, s->empty_heads)
- free_page(s->p, h);
+ free_page(h);
WALK_LIST_DELSAFE(h, g, s->partial_heads)
- free_page(s->p, h);
+ free_page(h);
WALK_LIST_DELSAFE(h, g, s->full_heads)
- free_page(s->p, h);
+ free_page(h);
}
static void
@@ -386,8 +386,7 @@ slab_memsize(resource *r)
WALK_LIST(h, s->full_heads)
heads++;
-// return ALLOC_OVERHEAD + sizeof(struct slab) + heads * (ALLOC_OVERHEAD + page_size);
- return ALLOC_OVERHEAD + sizeof(struct slab); /* The page sizes are accounted for in the pool */
+ return ALLOC_OVERHEAD + sizeof(struct slab) + heads * page_size;
}
static resource *
diff --git a/nest/a-path_test.c b/nest/a-path_test.c
index 2e6683f2..2533dbae 100644
--- a/nest/a-path_test.c
+++ b/nest/a-path_test.c
@@ -198,6 +198,7 @@ t_as_path_converting(void)
#endif
void resource_sys_init(void);
+void io_init(void);
int
main(int argc, char *argv[])
@@ -207,6 +208,7 @@ main(int argc, char *argv[])
resource_init();
the_bird_lock();
birdloop_init();
+ io_init();
bt_test_suite(t_as_path_match, "Testing AS path matching and some a-path utilities.");
bt_test_suite(t_path_format, "Testing formating as path into byte buffer");
diff --git a/nest/rt-table.c b/nest/rt-table.c
index cd0d6291..ada54396 100644
--- a/nest/rt-table.c
+++ b/nest/rt-table.c
@@ -1057,7 +1057,7 @@ rte_announce(rtable_private *tab, net *net, struct rte_storage *new, struct rte_
if (!rpeb)
{
- rpeb = alloc_page(tab->rp);
+ rpeb = alloc_page();
*rpeb = (struct rt_export_block) {};
add_tail(&tab->pending_exports, &rpeb->n);
}
@@ -2157,7 +2157,7 @@ rt_free(resource *_r)
static void
rt_res_dump(resource *_r)
{
- RT_LOCKED((rtable *) _r, r)
+ rtable_private *r = RT_PRIV((rtable *) _r);
debug("name \"%s\", addr_type=%s, rt_count=%u, use_count=%d\n",
r->name, net_label[r->addr_type], r->rt_count, r->use_count);
}
@@ -2484,7 +2484,7 @@ rt_export_cleanup(void *data)
memset(reb, 0xbe, page_size);
#endif
- free_page(tab->rp, reb);
+ free_page(reb);
if (EMPTY_LIST(tab->pending_exports))
{
diff --git a/sysdep/unix/alloc.c b/sysdep/unix/alloc.c
index 61360e73..77c504e3 100644
--- a/sysdep/unix/alloc.c
+++ b/sysdep/unix/alloc.c
@@ -9,6 +9,8 @@
#include "nest/bird.h"
#include "lib/resource.h"
+#include "sysdep/unix/io-loop.h"
+
#include <stdlib.h>
#include <unistd.h>
#include <stdatomic.h>
@@ -19,86 +21,47 @@
#endif
long page_size = 0;
-_Bool alloc_multipage = 0;
-
-static _Atomic int global_page_list_not_empty;
-static list global_page_list;
-static _Atomic int global_page_spinlock;
-
-#define GLOBAL_PAGE_SPIN_LOCK for (int v = 0; !atomic_compare_exchange_weak_explicit(&global_page_spinlock, &v, 1, memory_order_acq_rel, memory_order_acquire); v = 0)
-#define GLOBAL_PAGE_SPIN_UNLOCK do { int v = 1; ASSERT_DIE(atomic_compare_exchange_strong_explicit(&global_page_spinlock, &v, 0, memory_order_acq_rel, memory_order_acquire)); } while (0)
#ifdef HAVE_MMAP
+#if DEBUGGING
+#define FP_NODE_OFFSET 42
+#else
+#define FP_NODE_OFFSET 1
+#endif
static _Bool use_fake = 0;
#else
static _Bool use_fake = 1;
#endif
-void resource_sys_init(void)
+static void *
+alloc_sys_page(void)
{
-#ifdef HAVE_MMAP
- init_list(&global_page_list);
+ void *ptr = mmap(NULL, page_size, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
- if (!(page_size = sysconf(_SC_PAGESIZE)))
- die("System page size must be non-zero");
+ if (ptr == MAP_FAILED)
+ bug("mmap(%lu) failed: %m", page_size);
- if ((u64_popcount(page_size) > 1) || (page_size > 16384))
-#endif
- {
- /* Too big or strange page, use the aligned allocator instead */
- page_size = 4096;
- use_fake = 1;
- }
+ return ptr;
}
void *
-alloc_sys_page(void)
+alloc_page(void)
{
#ifdef HAVE_MMAP
if (!use_fake)
{
- if (atomic_load_explicit(&global_page_list_not_empty, memory_order_relaxed))
- {
- GLOBAL_PAGE_SPIN_LOCK;
- if (!EMPTY_LIST(global_page_list))
- {
- node *ret = HEAD(global_page_list);
- rem_node(ret);
- if (EMPTY_LIST(global_page_list))
- atomic_store_explicit(&global_page_list_not_empty, 0, memory_order_relaxed);
- GLOBAL_PAGE_SPIN_UNLOCK;
- memset(ret, 0, sizeof(node));
- return (void *) ret;
- }
- GLOBAL_PAGE_SPIN_UNLOCK;
- }
-
- if (alloc_multipage)
- {
- void *big = mmap(NULL, page_size * 2, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
- if (big == MAP_FAILED)
- bug("mmap(%lu) failed: %m", page_size);
-
- uintptr_t offset = ((uintptr_t) big) % page_size;
- if (offset)
- {
- void *ret = big + page_size - offset;
- munmap(big, page_size - offset);
- munmap(ret + page_size, offset);
- return ret;
- }
- else
- {
- munmap(big + page_size, page_size);
- return big;
- }
- }
-
- void *ret = mmap(NULL, page_size, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
- if (ret == MAP_FAILED)
- bug("mmap(%lu) failed: %m", page_size);
-
- return ret;
+ struct free_pages *fp = &birdloop_current->pages;
+ if (!fp->cnt)
+ return alloc_sys_page();
+
+ node *n = HEAD(fp->list);
+ rem_node(n);
+ if (--fp->cnt < fp->min)
+ ev_send(&global_work_list, fp->cleanup);
+
+ void *ptr = n - FP_NODE_OFFSET;
+ memset(ptr, 0, page_size);
+ return ptr;
}
else
#endif
@@ -111,56 +74,156 @@ alloc_sys_page(void)
}
void
-free_sys_page(void *ptr)
+free_page(void *ptr)
{
#ifdef HAVE_MMAP
if (!use_fake)
{
- if (munmap(ptr, page_size) < 0)
-#ifdef ENOMEM
- if (errno == ENOMEM)
- {
- memset(ptr, 0, page_size);
-
- GLOBAL_PAGE_SPIN_LOCK;
- add_tail(&global_page_list, (node *) ptr);
- atomic_store_explicit(&global_page_list_not_empty, 1, memory_order_relaxed);
- GLOBAL_PAGE_SPIN_UNLOCK;
- }
- else
-#endif
- bug("munmap(%p) failed: %m", ptr);
+ struct free_pages *fp = &birdloop_current->pages;
+ struct node *n = ptr;
+ n += FP_NODE_OFFSET;
+
+ memset(n, 0, sizeof(node));
+ add_tail(&fp->list, n);
+ if (++fp->cnt > fp->max)
+ ev_send(&global_work_list, fp->cleanup);
}
else
#endif
free(ptr);
}
+#ifdef HAVE_MMAP
+
+#define GFP (&main_birdloop.pages)
+
void
-check_stored_pages(void)
+flush_pages(struct birdloop *loop)
{
-#ifdef ENOMEM
- if (atomic_load_explicit(&global_page_list_not_empty, memory_order_relaxed) == 0)
- return;
+ ASSERT_DIE(birdloop_inside(&main_birdloop));
+
+ add_tail_list(&GFP->list, &loop->pages.list);
+ GFP->cnt += loop->pages.cnt;
+
+ loop->pages.cnt = 0;
+ loop->pages.list = (list) {};
+ loop->pages.min = 0;
+ loop->pages.max = 0;
+
+ rfree(loop->pages.cleanup);
+ loop->pages.cleanup = NULL;
+}
- for (uint limit = 0; limit < 256; limit++)
+static void
+cleanup_pages(void *data)
+{
+ struct birdloop *loop = data;
+ birdloop_enter(loop);
+
+ struct free_pages *fp = &birdloop_current->pages;
+
+ while ((fp->cnt < fp->min) && (GFP->cnt > GFP->min))
{
- GLOBAL_PAGE_SPIN_LOCK;
- void *ptr = HEAD(global_page_list);
- if (!NODE_VALID(ptr))
- {
- atomic_store_explicit(&global_page_list_not_empty, 0, memory_order_relaxed);
- GLOBAL_PAGE_SPIN_UNLOCK;
- return;
- }
-
- rem_node(ptr);
- if (munmap(ptr, page_size) < 0)
- if (errno == ENOMEM)
- add_tail(&global_page_list, ptr);
- else
- bug("munmap(%p) failed: %m", ptr);
- GLOBAL_PAGE_SPIN_UNLOCK;
+ node *n = HEAD(GFP->list);
+ rem_node(n);
+ add_tail(&fp->list, n);
+ fp->cnt++;
+ GFP->cnt--;
+ }
+
+ while (fp->cnt < fp->min)
+ {
+ node *n = alloc_sys_page();
+ add_tail(&fp->list, n + FP_NODE_OFFSET);
+ fp->cnt++;
+ }
+
+ while (fp->cnt > fp->max)
+ {
+ node *n = HEAD(fp->list);
+ rem_node(n);
+ add_tail(&GFP->list, n);
+ fp->cnt--;
+ GFP->cnt++;
+ }
+
+ birdloop_leave(loop);
+
+ if (GFP->cnt > GFP->max)
+ ev_send(&global_work_list, GFP->cleanup);
+}
+
+static void
+cleanup_global_pages(void *data UNUSED)
+{
+ while (GFP->cnt < GFP->max)
+ {
+ node *n = alloc_sys_page();
+ add_tail(&GFP->list, n + FP_NODE_OFFSET);
+ GFP->cnt++;
+ }
+
+ for (uint limit = GFP->cnt; (limit > 0) && (GFP->cnt > GFP->max); limit--)
+ {
+ node *n = TAIL(GFP->list);
+ rem_node(n);
+
+ if (munmap(n - FP_NODE_OFFSET, page_size) == 0)
+ GFP->cnt--;
+ else if (errno == ENOMEM)
+ add_head(&GFP->list, n);
+ else
+ bug("munmap(%p) failed: %m", n - FP_NODE_OFFSET);
+ }
+}
+
+void
+init_pages(struct birdloop *loop)
+{
+ struct free_pages *fp = &loop->pages;
+
+ init_list(&fp->list);
+ fp->cleanup = ev_new_init(&root_pool, cleanup_pages, loop);
+ fp->min = 4;
+ fp->max = 16;
+
+ for (fp->cnt = 0; fp->cnt < fp->min; fp->cnt++)
+ {
+ node *n = alloc_sys_page();
+ add_tail(&fp->list, n + FP_NODE_OFFSET);
}
-#endif
}
+
+static event global_free_pages_cleanup_event = { .hook = cleanup_global_pages };
+
+void resource_sys_init(void)
+{
+ if (!(page_size = sysconf(_SC_PAGESIZE)))
+ die("System page size must be non-zero");
+
+ if (u64_popcount(page_size) == 1)
+ {
+ init_list(&GFP->list);
+ GFP->cleanup = &global_free_pages_cleanup_event;
+ GFP->min = 0;
+ GFP->max = 256;
+ return;
+ }
+
+ log(L_WARN "Got strange memory page size (%lu), using the aligned allocator instead", page_size);
+
+ /* Too big or strange page, use the aligned allocator instead */
+ page_size = 4096;
+ use_fake = 1;
+}
+
+#else
+
+void
+resource_sys_init(void)
+{
+ page_size = 4096;
+ use_fake = 1;
+}
+
+#endif
diff --git a/sysdep/unix/coroutine.c b/sysdep/unix/coroutine.c
index 12ba55d8..e4657157 100644
--- a/sysdep/unix/coroutine.c
+++ b/sysdep/unix/coroutine.c
@@ -145,10 +145,13 @@ static void coro_free(resource *r)
coro_cleaned_up = 1;
}
+static void coro_dump(resource *r UNUSED) { }
+
static struct resclass coro_class = {
.name = "Coroutine",
.size = sizeof(struct coroutine),
.free = coro_free,
+ .dump = coro_dump,
};
_Thread_local struct coroutine *this_coro = NULL;
diff --git a/sysdep/unix/io-loop.c b/sysdep/unix/io-loop.c
index 769f01ba..732ea64d 100644
--- a/sysdep/unix/io-loop.c
+++ b/sysdep/unix/io-loop.c
@@ -32,7 +32,7 @@
* Current thread context
*/
-_Thread_local struct birdloop *birdloop_current;
+_Thread_local struct birdloop *birdloop_current = NULL;
static _Thread_local struct birdloop *birdloop_wakeup_masked;
static _Thread_local uint birdloop_wakeup_masked_count;
@@ -391,6 +391,8 @@ birdloop_new(pool *pp, uint order, const char *name)
timers_init(&loop->time, loop->pool);
sockets_init(loop);
+ init_pages(loop);
+
loop->time.coro = coro_run(loop->pool, birdloop_main, loop);
birdloop_leave(loop);
@@ -571,6 +573,7 @@ birdloop_main(void *arg)
/* Free the pool and loop */
birdloop_enter(loop);
rp_free(loop->pool, parent);
+ flush_pages(loop);
birdloop_leave(loop);
rfree(&loop->r);
diff --git a/sysdep/unix/io-loop.h b/sysdep/unix/io-loop.h
index 3fccd520..e5af52d1 100644
--- a/sysdep/unix/io-loop.h
+++ b/sysdep/unix/io-loop.h
@@ -7,6 +7,20 @@
#ifndef _BIRD_SYSDEP_UNIX_IO_LOOP_H_
#define _BIRD_SYSDEP_UNIX_IO_LOOP_H_
+#include "nest/bird.h"
+
+#include "lib/lists.h"
+#include "lib/event.h"
+#include "lib/timer.h"
+
+struct free_pages
+{
+ list list; /* List of empty pages */
+ event *cleanup; /* Event to call when number of pages is outside bounds */
+ u16 min, max; /* Minimal and maximal number of free pages kept */
+ uint cnt; /* Number of empty pages */
+};
+
struct birdloop
{
resource r;
@@ -29,10 +43,17 @@ struct birdloop
uint links;
+ struct free_pages pages;
+
void (*stopped)(void *data);
void *stop_data;
struct birdloop *prev_loop;
};
+extern _Thread_local struct birdloop *birdloop_current;
+
+void init_pages(struct birdloop *loop);
+void flush_pages(struct birdloop *loop);
+
#endif
diff --git a/sysdep/unix/io.c b/sysdep/unix/io.c
index 91d717d0..eee7b586 100644
--- a/sysdep/unix/io.c
+++ b/sysdep/unix/io.c
@@ -2216,9 +2216,6 @@ io_loop(void)
timers_fire(&main_birdloop.time, 1);
io_close_event();
- /* Try to release some memory if possible */
- check_stored_pages();
-
// FIXME
poll_tout = (events ? 0 : 3000); /* Time in milliseconds */
if (t = timers_first(&main_birdloop.time))
diff --git a/sysdep/unix/main.c b/sysdep/unix/main.c
index ca06611f..57c51c99 100644
--- a/sysdep/unix/main.c
+++ b/sysdep/unix/main.c
@@ -683,7 +683,7 @@ signal_init(void)
* Parsing of command-line arguments
*/
-static char *opt_list = "B:c:dD:ps:P:u:g:flRh";
+static char *opt_list = "c:dD:ps:P:u:g:flRh";
int parse_and_exit;
char *bird_name;
static char *use_user;
@@ -704,7 +704,6 @@ display_help(void)
fprintf(stderr,
"\n"
"Options: \n"
- " -B <block-size> Use 2^this number as memory allocation block size (default: 12)\n"
" -c <config-file> Use given configuration file instead of\n"
" " PATH_CONFIG_FILE "\n"
" -d Enable debug messages and run bird in foreground\n"
@@ -791,15 +790,12 @@ get_gid(const char *s)
return gr->gr_gid;
}
-extern _Bool alloc_multipage;
-
static void
parse_args(int argc, char **argv)
{
int config_changed = 0;
int socket_changed = 0;
int c;
- int bp;
bird_name = get_bird_name(argv[0], "bird");
if (argc == 2)
@@ -812,29 +808,6 @@ parse_args(int argc, char **argv)
while ((c = getopt(argc, argv, opt_list)) >= 0)
switch (c)
{
- case 'B':
- bp = atoi(optarg);
- if (bp < 1)
- {
- fprintf(stderr, "Strange block size power %d\n\n", bp);
- display_usage();
- exit(1);
- }
-
- if ((1 << bp) < page_size)
- {
- fprintf(stderr, "Requested block size %ld is lesser than page size %ld\n\n", (1L<<bp), page_size);
- display_usage();
- exit(1);
- }
-
- if ((1L << bp) > page_size)
- {
- alloc_multipage = 1;
- page_size = (1L << bp);
- }
-
- break;
case 'c':
config_name = optarg;
config_changed = 1;