summaryrefslogtreecommitdiff
path: root/sysdep/unix/alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'sysdep/unix/alloc.c')
-rw-r--r--sysdep/unix/alloc.c235
1 files changed, 153 insertions, 82 deletions
diff --git a/sysdep/unix/alloc.c b/sysdep/unix/alloc.c
index e7c4e6b0..3ea10c32 100644
--- a/sysdep/unix/alloc.c
+++ b/sysdep/unix/alloc.c
@@ -10,6 +10,7 @@
#include "lib/resource.h"
#include "lib/lists.h"
#include "lib/event.h"
+#include "lib/rcu.h"
#include <errno.h>
#include <stdlib.h>
@@ -22,55 +23,55 @@
long page_size = 0;
#ifdef HAVE_MMAP
-#define KEEP_PAGES_MAIN_MAX 256
-#define KEEP_PAGES_MAIN_MIN 8
-#define CLEANUP_PAGES_BULK 256
+#define KEEP_PAGES_MAX 512
+#define KEEP_PAGES_MIN 32
+#define KEEP_PAGES_MAX_LOCAL 16
+#define ALLOC_PAGES_AT_ONCE 8
-STATIC_ASSERT(KEEP_PAGES_MAIN_MIN * 4 < KEEP_PAGES_MAIN_MAX);
+STATIC_ASSERT(KEEP_PAGES_MIN * 4 < KEEP_PAGES_MAX);
+STATIC_ASSERT(ALLOC_PAGES_AT_ONCE < KEEP_PAGES_MAX_LOCAL);
static _Bool use_fake = 0;
+static _Bool initialized = 0;
#if DEBUGGING
struct free_page {
node unused[42];
- node n;
+ struct free_page * _Atomic next;
};
#else
struct free_page {
- node n;
+ struct free_page * _Atomic next;
};
#endif
#define EP_POS_MAX ((page_size - OFFSETOF(struct empty_pages, pages)) / sizeof (void *))
struct empty_pages {
- node n;
+ struct empty_pages *next;
uint pos;
void *pages[0];
};
-struct free_pages {
- list pages; /* List of (struct free_page) keeping free pages without releasing them (hot) */
- list empty; /* List of (struct empty_pages) keeping invalidated pages mapped for us (cold) */
- u16 min, max; /* Minimal and maximal number of free pages kept */
- uint cnt; /* Number of free pages in list */
- event cleanup;
-};
+DEFINE_DOMAIN(resource);
+static DOMAIN(resource) empty_pages_domain;
+static struct empty_pages *empty_pages = NULL;
-static void global_free_pages_cleanup_event(void *);
+static struct free_page * _Atomic page_stack = NULL;
+static _Thread_local struct free_page * local_page_stack = NULL;
-static struct free_pages global_free_pages = {
- .min = KEEP_PAGES_MAIN_MIN,
- .max = KEEP_PAGES_MAIN_MAX,
- .cleanup = { .hook = global_free_pages_cleanup_event },
-};
+static void page_cleanup(void *);
+static event page_cleanup_event = { .hook = page_cleanup, };
+#define SCHEDULE_CLEANUP do if (initialized && !shutting_down) ev_send(&global_event_list, &page_cleanup_event); while (0)
-uint *pages_kept = &global_free_pages.cnt;
+_Atomic int pages_kept = 0;
+_Atomic int pages_kept_locally = 0;
+static int pages_kept_here = 0;
static void *
alloc_sys_page(void)
{
- void *ptr = mmap(NULL, page_size, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ void *ptr = mmap(NULL, page_size * ALLOC_PAGES_AT_ONCE, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (ptr == MAP_FAILED)
bug("mmap(%lu) failed: %m", page_size);
@@ -100,37 +101,55 @@ alloc_page(void)
}
#ifdef HAVE_MMAP
- struct free_pages *fps = &global_free_pages;
-
- /* If there is any free page kept hot, we use it. */
- if (fps->cnt)
+ /* If there is any free page kept hot in this thread, we use it. */
+ struct free_page *fp = local_page_stack;
+ if (fp)
{
- struct free_page *fp = SKIP_BACK(struct free_page, n, HEAD(fps->pages));
- rem_node(&fp->n);
+ local_page_stack = atomic_load_explicit(&fp->next, memory_order_acquire);
+ atomic_fetch_sub_explicit(&pages_kept_locally, 1, memory_order_relaxed);
+ pages_kept_here--;
+ return fp;
+ }
- /* If the hot-free-page cache is getting short, request the cleanup routine to replenish the cache */
- if ((--fps->cnt < fps->min) && !shutting_down)
- ev_schedule(&fps->cleanup);
+ /* If there is any free page kept hot in global storage, we use it. */
+ rcu_read_lock();
+ fp = atomic_load_explicit(&page_stack, memory_order_acquire);
+ while (fp && !atomic_compare_exchange_strong_explicit(
+ &page_stack, &fp, atomic_load_explicit(&fp->next, memory_order_acquire),
+ memory_order_acq_rel, memory_order_acquire))
+ ;
+ rcu_read_unlock();
+ if (fp)
+ {
+ atomic_fetch_sub_explicit(&pages_kept, 1, memory_order_relaxed);
return fp;
}
/* If there is any free page kept cold, we use that. */
- if (!EMPTY_LIST(fps->empty))
- {
- struct empty_pages *ep = HEAD(fps->empty);
+ LOCK_DOMAIN(resource, empty_pages_domain);
+ if (empty_pages) {
+ if (empty_pages->pos)
+ /* Either the keeper page contains at least one cold page pointer, return that */
+ fp = empty_pages->pages[--empty_pages->pos];
+ else
+ {
+ /* Or the keeper page has no more cold page pointer, return the keeper page */
+ fp = (struct free_page *) empty_pages;
+ empty_pages = empty_pages->next;
+ }
+ }
+ UNLOCK_DOMAIN(resource, empty_pages_domain);
- /* Either the keeper page contains at least one cold page pointer, return that */
- if (ep->pos)
- return ep->pages[--ep->pos];
+ if (fp)
+ return fp;
- /* Or the keeper page has no more cold page pointer, return the keeper page */
- rem_node(&ep->n);
- return ep;
- }
+ /* And in the worst case, allocate some new pages by mmap() */
+ void *ptr = alloc_sys_page();
+ for (int i=1; i<ALLOC_PAGES_AT_ONCE; i++)
+ free_page(ptr + page_size * i);
- /* And in the worst case, allocate a new page by mmap() */
- return alloc_sys_page();
+ return ptr;
#endif
}
@@ -145,58 +164,108 @@ free_page(void *ptr)
}
#ifdef HAVE_MMAP
- struct free_pages *fps = &global_free_pages;
+ /* We primarily try to keep the pages locally. */
struct free_page *fp = ptr;
+ if (shutting_down || (pages_kept_here < KEEP_PAGES_MAX_LOCAL))
+ {
+ atomic_store_explicit(&fp->next, local_page_stack, memory_order_relaxed);
+ atomic_fetch_add_explicit(&pages_kept_locally, 1, memory_order_relaxed);
+ pages_kept_here++;
+ return;
+ }
+
+ /* If there are too many local pages, we add the free page to the global hot-free-page list */
+ rcu_read_lock();
+ struct free_page *next = atomic_load_explicit(&page_stack, memory_order_acquire);
- /* Otherwise, we add the free page to the hot-free-page list */
- fp->n = (node) {};
- add_tail(&fps->pages, &fp->n);
+ do atomic_store_explicit(&fp->next, next, memory_order_release);
+ while (!atomic_compare_exchange_strong_explicit(
+ &page_stack, &next, fp,
+ memory_order_acq_rel, memory_order_acquire));
+ rcu_read_unlock();
- /* And if there are too many hot free pages, we ask for page cleanup */
- if ((++fps->cnt > fps->max) && !shutting_down)
- ev_schedule(&fps->cleanup);
+ /* And if there are too many global hot free pages, we ask for page cleanup */
+ if (atomic_fetch_add_explicit(&pages_kept, 1, memory_order_relaxed) >= KEEP_PAGES_MAX)
+ SCHEDULE_CLEANUP;
#endif
}
+/* When the routine is going to sleep for a long time, we flush the local
+ * hot page cache to not keep dirty pages for nothing. */
+void
+flush_local_pages(void)
+{
+ if (use_fake || !local_page_stack || shutting_down)
+ return;
+
+ /* We first count the pages to enable consistency checking.
+ * Also, we need to know the last page. */
+ struct free_page *last = local_page_stack, *next;
+ int check_count = 1;
+ while (next = atomic_load_explicit(&last->next, memory_order_acquire))
+ {
+ check_count++;
+ last = next;
+ }
+
+ /* The actual number of pages must be equal to the counter value. */
+ ASSERT_DIE(check_count == pages_kept_here);
+
+ /* Repeatedly trying to insert the whole page list into global page stack at once. */
+ rcu_read_lock();
+ next = atomic_load_explicit(&page_stack, memory_order_acquire);
+
+ /* First we set the outwards pointer (from our last),
+ * then we try to set the inwards pointer to our first page. */
+ do atomic_store_explicit(&last->next, next, memory_order_release);
+ while (!atomic_compare_exchange_strong_explicit(
+ &page_stack, &next, local_page_stack,
+ memory_order_acq_rel, memory_order_acquire));
+ rcu_read_unlock();
+
+ /* Finished. Now the local stack is empty. */
+ local_page_stack = NULL;
+ pages_kept_here = 0;
+
+ /* Check the state of global page cache and maybe schedule its cleanup. */
+ atomic_fetch_sub_explicit(&pages_kept_locally, check_count, memory_order_relaxed);
+ if (atomic_fetch_add_explicit(&pages_kept, check_count, memory_order_relaxed) >= KEEP_PAGES_MAX)
+ SCHEDULE_CLEANUP;
+}
+
#ifdef HAVE_MMAP
static void
-global_free_pages_cleanup_event(void *data UNUSED)
+page_cleanup(void *_ UNUSED)
{
/* Cleanup on shutdown is ignored. All pages may be kept hot, OS will take care. */
if (shutting_down)
return;
- struct free_pages *fps = &global_free_pages;
+ struct free_page *stack = atomic_exchange_explicit(&page_stack, NULL, memory_order_acq_rel);
+ if (!stack)
+ return;
- /* Cleanup may get called when hot free page cache is short of pages. Replenishing. */
- while (fps->cnt / 2 < fps->min)
- {
- struct free_page *fp = alloc_sys_page();
- fp->n = (node) {};
- add_tail(&fps->pages, &fp->n);
- fps->cnt++;
- }
+ /* Cleanup gets called when hot free page cache is too big.
+ * Moving some pages to the cold free page cache. */
- /* Or the hot free page cache is too big. Moving some pages to the cold free page cache. */
- for (int limit = CLEANUP_PAGES_BULK; limit && (fps->cnt > fps->max / 2); fps->cnt--, limit--)
- {
- struct free_page *fp = SKIP_BACK(struct free_page, n, TAIL(fps->pages));
- rem_node(&fp->n);
+ do {
+ synchronize_rcu();
+ struct free_page *fp = stack;
+ stack = atomic_load_explicit(&fp->next, memory_order_acquire);
+ LOCK_DOMAIN(resource, empty_pages_domain);
/* Empty pages are stored as pointers. To store them, we need a pointer block. */
- struct empty_pages *ep;
- if (EMPTY_LIST(fps->empty) || ((ep = HEAD(fps->empty))->pos == EP_POS_MAX))
+ if (!empty_pages || (empty_pages->pos == EP_POS_MAX))
{
/* There is either no pointer block or the last block is full. We use this block as a pointer block. */
- ep = (struct empty_pages *) fp;
- *ep = (struct empty_pages) {};
- add_head(&fps->empty, &ep->n);
+ empty_pages = (struct empty_pages *) fp;
+ *empty_pages = (struct empty_pages) {};
}
else
{
/* We store this block as a pointer into the first free place
* and tell the OS that the underlying memory is trash. */
- ep->pages[ep->pos++] = fp;
+ empty_pages->pages[empty_pages->pos++] = fp;
if (madvise(fp, page_size,
#ifdef CONFIG_MADV_DONTNEED_TO_FREE
MADV_DONTNEED
@@ -206,12 +275,18 @@ global_free_pages_cleanup_event(void *data UNUSED)
) < 0)
bug("madvise(%p) failed: %m", fp);
}
+ UNLOCK_DOMAIN(resource, empty_pages_domain);
}
+ while ((atomic_fetch_sub_explicit(&pages_kept, 1, memory_order_relaxed) >= KEEP_PAGES_MAX / 2) && stack);
- /* If the hot free page cleanup hit the limit, re-schedule this routine
- * to allow for other routines to run. */
- if (fps->cnt > fps->max)
- ev_schedule(&fps->cleanup);
+ while (stack)
+ {
+ struct free_page *f = stack;
+ stack = atomic_load_explicit(&f->next, memory_order_acquire);
+ free_page(f);
+
+ atomic_fetch_sub_explicit(&pages_kept, 1, memory_order_relaxed);
+ }
}
#endif
@@ -219,8 +294,6 @@ void
resource_sys_init(void)
{
#ifdef HAVE_MMAP
- ASSERT_DIE(global_free_pages.cnt == 0);
-
/* Check what page size the system supports */
if (!(page_size = sysconf(_SC_PAGESIZE)))
die("System page size must be non-zero");
@@ -230,11 +303,8 @@ resource_sys_init(void)
/* We assume that page size has only one bit and is between 1K and 256K (incl.).
* Otherwise, the assumptions in lib/slab.c (sl_head's num_full range) aren't met. */
- struct free_pages *fps = &global_free_pages;
-
- init_list(&fps->pages);
- init_list(&fps->empty);
- global_free_pages_cleanup_event(NULL);
+ empty_pages_domain = DOMAIN_NEW(resource, "Empty Pages");
+ initialized = 1;
return;
}
@@ -244,4 +314,5 @@ resource_sys_init(void)
#endif
page_size = 4096;
+ initialized = 1;
}