summaryrefslogtreecommitdiff
path: root/sysdep
diff options
context:
space:
mode:
authorMaria Matejka <mq@ucw.cz>2021-11-30 23:57:14 +0100
committerMaria Matejka <mq@ucw.cz>2021-12-01 13:00:54 +0100
commitbb63e99d7877023667edaf26495dd657ec2fd57b (patch)
tree3ae919a00541c27c8f661addb56c6d4ef681d361 /sysdep
parent385b3ea3956aefc2868cdd838fc0a90f1d8a7857 (diff)
Page allocator moved from pools to IO loops.
The resource pool system is highly hierarchical and keeping spare pages in pools leads to unnecessarily complex memory management. Loops have a flat hiearchy, at least for now, and it is therefore much easier to keep care of pages, especially in cases of excessive virtual memory fragmentation.
Diffstat (limited to 'sysdep')
-rw-r--r--sysdep/unix/alloc.c263
-rw-r--r--sysdep/unix/coroutine.c3
-rw-r--r--sysdep/unix/io-loop.c5
-rw-r--r--sysdep/unix/io-loop.h21
-rw-r--r--sysdep/unix/io.c3
-rw-r--r--sysdep/unix/main.c29
6 files changed, 192 insertions, 132 deletions
diff --git a/sysdep/unix/alloc.c b/sysdep/unix/alloc.c
index 61360e73..77c504e3 100644
--- a/sysdep/unix/alloc.c
+++ b/sysdep/unix/alloc.c
@@ -9,6 +9,8 @@
#include "nest/bird.h"
#include "lib/resource.h"
+#include "sysdep/unix/io-loop.h"
+
#include <stdlib.h>
#include <unistd.h>
#include <stdatomic.h>
@@ -19,86 +21,47 @@
#endif
long page_size = 0;
-_Bool alloc_multipage = 0;
-
-static _Atomic int global_page_list_not_empty;
-static list global_page_list;
-static _Atomic int global_page_spinlock;
-
-#define GLOBAL_PAGE_SPIN_LOCK for (int v = 0; !atomic_compare_exchange_weak_explicit(&global_page_spinlock, &v, 1, memory_order_acq_rel, memory_order_acquire); v = 0)
-#define GLOBAL_PAGE_SPIN_UNLOCK do { int v = 1; ASSERT_DIE(atomic_compare_exchange_strong_explicit(&global_page_spinlock, &v, 0, memory_order_acq_rel, memory_order_acquire)); } while (0)
#ifdef HAVE_MMAP
+#if DEBUGGING
+#define FP_NODE_OFFSET 42
+#else
+#define FP_NODE_OFFSET 1
+#endif
static _Bool use_fake = 0;
#else
static _Bool use_fake = 1;
#endif
-void resource_sys_init(void)
+static void *
+alloc_sys_page(void)
{
-#ifdef HAVE_MMAP
- init_list(&global_page_list);
+ void *ptr = mmap(NULL, page_size, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
- if (!(page_size = sysconf(_SC_PAGESIZE)))
- die("System page size must be non-zero");
+ if (ptr == MAP_FAILED)
+ bug("mmap(%lu) failed: %m", page_size);
- if ((u64_popcount(page_size) > 1) || (page_size > 16384))
-#endif
- {
- /* Too big or strange page, use the aligned allocator instead */
- page_size = 4096;
- use_fake = 1;
- }
+ return ptr;
}
void *
-alloc_sys_page(void)
+alloc_page(void)
{
#ifdef HAVE_MMAP
if (!use_fake)
{
- if (atomic_load_explicit(&global_page_list_not_empty, memory_order_relaxed))
- {
- GLOBAL_PAGE_SPIN_LOCK;
- if (!EMPTY_LIST(global_page_list))
- {
- node *ret = HEAD(global_page_list);
- rem_node(ret);
- if (EMPTY_LIST(global_page_list))
- atomic_store_explicit(&global_page_list_not_empty, 0, memory_order_relaxed);
- GLOBAL_PAGE_SPIN_UNLOCK;
- memset(ret, 0, sizeof(node));
- return (void *) ret;
- }
- GLOBAL_PAGE_SPIN_UNLOCK;
- }
-
- if (alloc_multipage)
- {
- void *big = mmap(NULL, page_size * 2, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
- if (big == MAP_FAILED)
- bug("mmap(%lu) failed: %m", page_size);
-
- uintptr_t offset = ((uintptr_t) big) % page_size;
- if (offset)
- {
- void *ret = big + page_size - offset;
- munmap(big, page_size - offset);
- munmap(ret + page_size, offset);
- return ret;
- }
- else
- {
- munmap(big + page_size, page_size);
- return big;
- }
- }
-
- void *ret = mmap(NULL, page_size, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
- if (ret == MAP_FAILED)
- bug("mmap(%lu) failed: %m", page_size);
-
- return ret;
+ struct free_pages *fp = &birdloop_current->pages;
+ if (!fp->cnt)
+ return alloc_sys_page();
+
+ node *n = HEAD(fp->list);
+ rem_node(n);
+ if (--fp->cnt < fp->min)
+ ev_send(&global_work_list, fp->cleanup);
+
+ void *ptr = n - FP_NODE_OFFSET;
+ memset(ptr, 0, page_size);
+ return ptr;
}
else
#endif
@@ -111,56 +74,156 @@ alloc_sys_page(void)
}
void
-free_sys_page(void *ptr)
+free_page(void *ptr)
{
#ifdef HAVE_MMAP
if (!use_fake)
{
- if (munmap(ptr, page_size) < 0)
-#ifdef ENOMEM
- if (errno == ENOMEM)
- {
- memset(ptr, 0, page_size);
-
- GLOBAL_PAGE_SPIN_LOCK;
- add_tail(&global_page_list, (node *) ptr);
- atomic_store_explicit(&global_page_list_not_empty, 1, memory_order_relaxed);
- GLOBAL_PAGE_SPIN_UNLOCK;
- }
- else
-#endif
- bug("munmap(%p) failed: %m", ptr);
+ struct free_pages *fp = &birdloop_current->pages;
+ struct node *n = ptr;
+ n += FP_NODE_OFFSET;
+
+ memset(n, 0, sizeof(node));
+ add_tail(&fp->list, n);
+ if (++fp->cnt > fp->max)
+ ev_send(&global_work_list, fp->cleanup);
}
else
#endif
free(ptr);
}
+#ifdef HAVE_MMAP
+
+#define GFP (&main_birdloop.pages)
+
void
-check_stored_pages(void)
+flush_pages(struct birdloop *loop)
{
-#ifdef ENOMEM
- if (atomic_load_explicit(&global_page_list_not_empty, memory_order_relaxed) == 0)
- return;
+ ASSERT_DIE(birdloop_inside(&main_birdloop));
+
+ add_tail_list(&GFP->list, &loop->pages.list);
+ GFP->cnt += loop->pages.cnt;
+
+ loop->pages.cnt = 0;
+ loop->pages.list = (list) {};
+ loop->pages.min = 0;
+ loop->pages.max = 0;
+
+ rfree(loop->pages.cleanup);
+ loop->pages.cleanup = NULL;
+}
- for (uint limit = 0; limit < 256; limit++)
+static void
+cleanup_pages(void *data)
+{
+ struct birdloop *loop = data;
+ birdloop_enter(loop);
+
+ struct free_pages *fp = &birdloop_current->pages;
+
+ while ((fp->cnt < fp->min) && (GFP->cnt > GFP->min))
{
- GLOBAL_PAGE_SPIN_LOCK;
- void *ptr = HEAD(global_page_list);
- if (!NODE_VALID(ptr))
- {
- atomic_store_explicit(&global_page_list_not_empty, 0, memory_order_relaxed);
- GLOBAL_PAGE_SPIN_UNLOCK;
- return;
- }
-
- rem_node(ptr);
- if (munmap(ptr, page_size) < 0)
- if (errno == ENOMEM)
- add_tail(&global_page_list, ptr);
- else
- bug("munmap(%p) failed: %m", ptr);
- GLOBAL_PAGE_SPIN_UNLOCK;
+ node *n = HEAD(GFP->list);
+ rem_node(n);
+ add_tail(&fp->list, n);
+ fp->cnt++;
+ GFP->cnt--;
+ }
+
+ while (fp->cnt < fp->min)
+ {
+ node *n = alloc_sys_page();
+ add_tail(&fp->list, n + FP_NODE_OFFSET);
+ fp->cnt++;
+ }
+
+ while (fp->cnt > fp->max)
+ {
+ node *n = HEAD(fp->list);
+ rem_node(n);
+ add_tail(&GFP->list, n);
+ fp->cnt--;
+ GFP->cnt++;
+ }
+
+ birdloop_leave(loop);
+
+ if (GFP->cnt > GFP->max)
+ ev_send(&global_work_list, GFP->cleanup);
+}
+
+static void
+cleanup_global_pages(void *data UNUSED)
+{
+ while (GFP->cnt < GFP->max)
+ {
+ node *n = alloc_sys_page();
+ add_tail(&GFP->list, n + FP_NODE_OFFSET);
+ GFP->cnt++;
+ }
+
+ for (uint limit = GFP->cnt; (limit > 0) && (GFP->cnt > GFP->max); limit--)
+ {
+ node *n = TAIL(GFP->list);
+ rem_node(n);
+
+ if (munmap(n - FP_NODE_OFFSET, page_size) == 0)
+ GFP->cnt--;
+ else if (errno == ENOMEM)
+ add_head(&GFP->list, n);
+ else
+ bug("munmap(%p) failed: %m", n - FP_NODE_OFFSET);
+ }
+}
+
+void
+init_pages(struct birdloop *loop)
+{
+ struct free_pages *fp = &loop->pages;
+
+ init_list(&fp->list);
+ fp->cleanup = ev_new_init(&root_pool, cleanup_pages, loop);
+ fp->min = 4;
+ fp->max = 16;
+
+ for (fp->cnt = 0; fp->cnt < fp->min; fp->cnt++)
+ {
+ node *n = alloc_sys_page();
+ add_tail(&fp->list, n + FP_NODE_OFFSET);
}
-#endif
}
+
+static event global_free_pages_cleanup_event = { .hook = cleanup_global_pages };
+
+void resource_sys_init(void)
+{
+ if (!(page_size = sysconf(_SC_PAGESIZE)))
+ die("System page size must be non-zero");
+
+ if (u64_popcount(page_size) == 1)
+ {
+ init_list(&GFP->list);
+ GFP->cleanup = &global_free_pages_cleanup_event;
+ GFP->min = 0;
+ GFP->max = 256;
+ return;
+ }
+
+ log(L_WARN "Got strange memory page size (%lu), using the aligned allocator instead", page_size);
+
+ /* Too big or strange page, use the aligned allocator instead */
+ page_size = 4096;
+ use_fake = 1;
+}
+
+#else
+
+void
+resource_sys_init(void)
+{
+ page_size = 4096;
+ use_fake = 1;
+}
+
+#endif
diff --git a/sysdep/unix/coroutine.c b/sysdep/unix/coroutine.c
index 12ba55d8..e4657157 100644
--- a/sysdep/unix/coroutine.c
+++ b/sysdep/unix/coroutine.c
@@ -145,10 +145,13 @@ static void coro_free(resource *r)
coro_cleaned_up = 1;
}
+static void coro_dump(resource *r UNUSED) { }
+
static struct resclass coro_class = {
.name = "Coroutine",
.size = sizeof(struct coroutine),
.free = coro_free,
+ .dump = coro_dump,
};
_Thread_local struct coroutine *this_coro = NULL;
diff --git a/sysdep/unix/io-loop.c b/sysdep/unix/io-loop.c
index 769f01ba..732ea64d 100644
--- a/sysdep/unix/io-loop.c
+++ b/sysdep/unix/io-loop.c
@@ -32,7 +32,7 @@
* Current thread context
*/
-_Thread_local struct birdloop *birdloop_current;
+_Thread_local struct birdloop *birdloop_current = NULL;
static _Thread_local struct birdloop *birdloop_wakeup_masked;
static _Thread_local uint birdloop_wakeup_masked_count;
@@ -391,6 +391,8 @@ birdloop_new(pool *pp, uint order, const char *name)
timers_init(&loop->time, loop->pool);
sockets_init(loop);
+ init_pages(loop);
+
loop->time.coro = coro_run(loop->pool, birdloop_main, loop);
birdloop_leave(loop);
@@ -571,6 +573,7 @@ birdloop_main(void *arg)
/* Free the pool and loop */
birdloop_enter(loop);
rp_free(loop->pool, parent);
+ flush_pages(loop);
birdloop_leave(loop);
rfree(&loop->r);
diff --git a/sysdep/unix/io-loop.h b/sysdep/unix/io-loop.h
index 3fccd520..e5af52d1 100644
--- a/sysdep/unix/io-loop.h
+++ b/sysdep/unix/io-loop.h
@@ -7,6 +7,20 @@
#ifndef _BIRD_SYSDEP_UNIX_IO_LOOP_H_
#define _BIRD_SYSDEP_UNIX_IO_LOOP_H_
+#include "nest/bird.h"
+
+#include "lib/lists.h"
+#include "lib/event.h"
+#include "lib/timer.h"
+
+struct free_pages
+{
+ list list; /* List of empty pages */
+ event *cleanup; /* Event to call when number of pages is outside bounds */
+ u16 min, max; /* Minimal and maximal number of free pages kept */
+ uint cnt; /* Number of empty pages */
+};
+
struct birdloop
{
resource r;
@@ -29,10 +43,17 @@ struct birdloop
uint links;
+ struct free_pages pages;
+
void (*stopped)(void *data);
void *stop_data;
struct birdloop *prev_loop;
};
+extern _Thread_local struct birdloop *birdloop_current;
+
+void init_pages(struct birdloop *loop);
+void flush_pages(struct birdloop *loop);
+
#endif
diff --git a/sysdep/unix/io.c b/sysdep/unix/io.c
index 91d717d0..eee7b586 100644
--- a/sysdep/unix/io.c
+++ b/sysdep/unix/io.c
@@ -2216,9 +2216,6 @@ io_loop(void)
timers_fire(&main_birdloop.time, 1);
io_close_event();
- /* Try to release some memory if possible */
- check_stored_pages();
-
// FIXME
poll_tout = (events ? 0 : 3000); /* Time in milliseconds */
if (t = timers_first(&main_birdloop.time))
diff --git a/sysdep/unix/main.c b/sysdep/unix/main.c
index ca06611f..57c51c99 100644
--- a/sysdep/unix/main.c
+++ b/sysdep/unix/main.c
@@ -683,7 +683,7 @@ signal_init(void)
* Parsing of command-line arguments
*/
-static char *opt_list = "B:c:dD:ps:P:u:g:flRh";
+static char *opt_list = "c:dD:ps:P:u:g:flRh";
int parse_and_exit;
char *bird_name;
static char *use_user;
@@ -704,7 +704,6 @@ display_help(void)
fprintf(stderr,
"\n"
"Options: \n"
- " -B <block-size> Use 2^this number as memory allocation block size (default: 12)\n"
" -c <config-file> Use given configuration file instead of\n"
" " PATH_CONFIG_FILE "\n"
" -d Enable debug messages and run bird in foreground\n"
@@ -791,15 +790,12 @@ get_gid(const char *s)
return gr->gr_gid;
}
-extern _Bool alloc_multipage;
-
static void
parse_args(int argc, char **argv)
{
int config_changed = 0;
int socket_changed = 0;
int c;
- int bp;
bird_name = get_bird_name(argv[0], "bird");
if (argc == 2)
@@ -812,29 +808,6 @@ parse_args(int argc, char **argv)
while ((c = getopt(argc, argv, opt_list)) >= 0)
switch (c)
{
- case 'B':
- bp = atoi(optarg);
- if (bp < 1)
- {
- fprintf(stderr, "Strange block size power %d\n\n", bp);
- display_usage();
- exit(1);
- }
-
- if ((1 << bp) < page_size)
- {
- fprintf(stderr, "Requested block size %ld is lesser than page size %ld\n\n", (1L<<bp), page_size);
- display_usage();
- exit(1);
- }
-
- if ((1L << bp) > page_size)
- {
- alloc_multipage = 1;
- page_size = (1L << bp);
- }
-
- break;
case 'c':
config_name = optarg;
config_changed = 1;