summaryrefslogtreecommitdiff
path: root/sysdep
diff options
context:
space:
mode:
authorMaria Matejka <mq@ucw.cz>2022-11-01 18:40:56 +0100
committerMaria Matejka <mq@ucw.cz>2022-11-02 12:56:54 +0100
commit9d03c3f56ced3d3191982f57029f9a3d12fa2e5a (patch)
treead1fa71442ad21704a99a18653d716b088bb2b78 /sysdep
parent37b644413723670928f6b54f2abe0c852eb0b652 (diff)
Memory pages are not munmapped, instead we just madvise()
Memory unmapping causes slow address space fragmentation, leading in extreme cases to failing to allocate pages at all. Removing this problem by keeping all the pages allocated to us, yet calling madvise() to let kernel dispose of them. This adds a little complexity and overhead as we have to keep the pointers to the free pages, therefore to hold e.g. 1 GB of 4K pages with 8B pointers, we have to store 2 MB of data.
Diffstat (limited to 'sysdep')
-rw-r--r--sysdep/cf/linux.h1
-rw-r--r--sysdep/unix/alloc.c52
2 files changed, 46 insertions, 7 deletions
diff --git a/sysdep/cf/linux.h b/sysdep/cf/linux.h
index c640bef4..56ecf017 100644
--- a/sysdep/cf/linux.h
+++ b/sysdep/cf/linux.h
@@ -20,6 +20,7 @@
#define CONFIG_RESTRICTED_PRIVILEGES
#define CONFIG_INCLUDE_SYSPRIV_H "sysdep/linux/syspriv.h"
+#define CONFIG_MADV_DONTNEED_TO_FREE
#ifndef AF_MPLS
#define AF_MPLS 28
diff --git a/sysdep/unix/alloc.c b/sysdep/unix/alloc.c
index edad6209..2800a8ba 100644
--- a/sysdep/unix/alloc.c
+++ b/sysdep/unix/alloc.c
@@ -41,8 +41,17 @@ struct free_page {
};
#endif
+#define EP_POS_MAX ((page_size - OFFSETOF(struct empty_pages, pages)) / sizeof (void *))
+
+struct empty_pages {
+ node n;
+ uint pos;
+ void *pages[0];
+};
+
struct free_pages {
list pages;
+ list empty;
u16 min, max; /* Minimal and maximal number of free pages kept */
uint cnt; /* Number of empty pages */
event cleanup;
@@ -103,6 +112,16 @@ alloc_page(void)
return fp;
}
+ if (!EMPTY_LIST(fps->empty))
+ {
+ struct empty_pages *ep = HEAD(fps->empty);
+ if (ep->pos)
+ return ep->pages[--ep->pos];
+
+ rem_node(&ep->n);
+ return ep;
+ }
+
return alloc_sys_page();
#endif
}
@@ -145,18 +164,36 @@ global_free_pages_cleanup_event(void *data UNUSED)
fps->cnt++;
}
- for (uint seen = 0; (seen < CLEANUP_PAGES_BULK) && (fps->cnt > fps->max / 2); seen++)
+ int limit = CLEANUP_PAGES_BULK;
+ while (--limit && (fps->cnt > fps->max / 2))
{
struct free_page *fp = SKIP_BACK(struct free_page, n, TAIL(fps->pages));
rem_node(&fp->n);
-
- if (munmap(fp, page_size) == 0)
- fps->cnt--;
- else if (errno == ENOMEM)
- add_head(&fps->pages, &fp->n);
+ fps->cnt--;
+
+ struct empty_pages *ep;
+ if (EMPTY_LIST(fps->empty) || ((ep = HEAD(fps->empty))->pos == EP_POS_MAX))
+ {
+ ep = (struct empty_pages *) fp;
+ *ep = (struct empty_pages) {};
+ add_head(&fps->empty, &ep->n);
+ }
else
- bug("munmap(%p) failed: %m", fp);
+ {
+ ep->pages[ep->pos++] = fp;
+ if (madvise(fp, page_size,
+#ifdef CONFIG_MADV_DONTNEED_TO_FREE
+ MADV_DONTNEED
+#else
+ MADV_FREE
+#endif
+ ) < 0)
+ bug("madvise(%p) failed: %m", fp);
+ }
}
+
+ if (!limit)
+ ev_schedule(&fps->cleanup);
}
#endif
@@ -174,6 +211,7 @@ resource_sys_init(void)
struct free_pages *fps = &global_free_pages;
init_list(&fps->pages);
+ init_list(&fps->empty);
global_free_pages_cleanup_event(NULL);
return;
}