diff options
author | Maria Matejka <mq@ucw.cz> | 2022-11-01 18:40:56 +0100 |
---|---|---|
committer | Maria Matejka <mq@ucw.cz> | 2022-11-02 12:56:54 +0100 |
commit | 9d03c3f56ced3d3191982f57029f9a3d12fa2e5a (patch) | |
tree | ad1fa71442ad21704a99a18653d716b088bb2b78 | |
parent | 37b644413723670928f6b54f2abe0c852eb0b652 (diff) |
Memory pages are not munmapped, instead we just madvise()
Memory unmapping causes slow address space fragmentation, leading in
extreme cases to failing to allocate pages at all. Removing this problem
by keeping all the pages allocated to us, yet calling madvise() to let
kernel dispose of them.
This adds a little complexity and overhead as we have to keep the
pointers to the free pages, therefore to hold e.g. 1 GB of 4K pages with
8B pointers, we have to store 2 MB of data.
-rw-r--r-- | sysdep/cf/linux.h | 1 | ||||
-rw-r--r-- | sysdep/unix/alloc.c | 52 |
2 files changed, 46 insertions, 7 deletions
diff --git a/sysdep/cf/linux.h b/sysdep/cf/linux.h index c640bef4..56ecf017 100644 --- a/sysdep/cf/linux.h +++ b/sysdep/cf/linux.h @@ -20,6 +20,7 @@ #define CONFIG_RESTRICTED_PRIVILEGES #define CONFIG_INCLUDE_SYSPRIV_H "sysdep/linux/syspriv.h" +#define CONFIG_MADV_DONTNEED_TO_FREE #ifndef AF_MPLS #define AF_MPLS 28 diff --git a/sysdep/unix/alloc.c b/sysdep/unix/alloc.c index edad6209..2800a8ba 100644 --- a/sysdep/unix/alloc.c +++ b/sysdep/unix/alloc.c @@ -41,8 +41,17 @@ struct free_page { }; #endif +#define EP_POS_MAX ((page_size - OFFSETOF(struct empty_pages, pages)) / sizeof (void *)) + +struct empty_pages { + node n; + uint pos; + void *pages[0]; +}; + struct free_pages { list pages; + list empty; u16 min, max; /* Minimal and maximal number of free pages kept */ uint cnt; /* Number of empty pages */ event cleanup; @@ -103,6 +112,16 @@ alloc_page(void) return fp; } + if (!EMPTY_LIST(fps->empty)) + { + struct empty_pages *ep = HEAD(fps->empty); + if (ep->pos) + return ep->pages[--ep->pos]; + + rem_node(&ep->n); + return ep; + } + return alloc_sys_page(); #endif } @@ -145,18 +164,36 @@ global_free_pages_cleanup_event(void *data UNUSED) fps->cnt++; } - for (uint seen = 0; (seen < CLEANUP_PAGES_BULK) && (fps->cnt > fps->max / 2); seen++) + int limit = CLEANUP_PAGES_BULK; + while (--limit && (fps->cnt > fps->max / 2)) { struct free_page *fp = SKIP_BACK(struct free_page, n, TAIL(fps->pages)); rem_node(&fp->n); - - if (munmap(fp, page_size) == 0) - fps->cnt--; - else if (errno == ENOMEM) - add_head(&fps->pages, &fp->n); + fps->cnt--; + + struct empty_pages *ep; + if (EMPTY_LIST(fps->empty) || ((ep = HEAD(fps->empty))->pos == EP_POS_MAX)) + { + ep = (struct empty_pages *) fp; + *ep = (struct empty_pages) {}; + add_head(&fps->empty, &ep->n); + } else - bug("munmap(%p) failed: %m", fp); + { + ep->pages[ep->pos++] = fp; + if (madvise(fp, page_size, +#ifdef CONFIG_MADV_DONTNEED_TO_FREE + MADV_DONTNEED +#else + MADV_FREE +#endif + ) < 0) + bug("madvise(%p) failed: %m", fp); + } } + + if (!limit) + ev_schedule(&fps->cleanup); } #endif @@ -174,6 +211,7 @@ resource_sys_init(void) struct free_pages *fps = &global_free_pages; init_list(&fps->pages); + init_list(&fps->empty); global_free_pages_cleanup_event(NULL); return; } |