summaryrefslogtreecommitdiff
path: root/sysdep/unix
diff options
context:
space:
mode:
authorMaria Matejka <mq@ucw.cz>2022-10-04 16:15:36 +0200
committerMaria Matejka <mq@ucw.cz>2022-10-04 16:15:36 +0200
commitdc9351d326b9d2d8bcb7e9a0e5126878c2b02762 (patch)
tree842b9d21cc1f4b16869cac58711902f5b1f78b91 /sysdep/unix
parent00679a688a5feff7a919cbeae71dd050ccc90b22 (diff)
parent67256d50359d42aca4e64bb1cb5dcb3c63669578 (diff)
Merge commit '67256d50' into HEAD
Diffstat (limited to 'sysdep/unix')
-rw-r--r--sysdep/unix/Makefile2
-rw-r--r--sysdep/unix/alloc.c178
-rw-r--r--sysdep/unix/io-loop.c141
-rw-r--r--sysdep/unix/io-loop.h15
-rw-r--r--sysdep/unix/io.c7
5 files changed, 239 insertions, 104 deletions
diff --git a/sysdep/unix/Makefile b/sysdep/unix/Makefile
index 3f1a8b3a..6f6b0d26 100644
--- a/sysdep/unix/Makefile
+++ b/sysdep/unix/Makefile
@@ -2,8 +2,6 @@ src := alloc.c io.c io-loop.c krt.c log.c main.c random.c domain.c
obj := $(src-o-files)
$(all-daemon)
$(cf-local)
-$(call proto-build,kif_build)
-$(call proto-build,krt_build)
$(conf-y-targets): $(s)krt.Y
src := $(filter-out main.c, $(src))
diff --git a/sysdep/unix/alloc.c b/sysdep/unix/alloc.c
index a2384ca8..847def30 100644
--- a/sysdep/unix/alloc.c
+++ b/sysdep/unix/alloc.c
@@ -10,6 +10,7 @@
#include "lib/resource.h"
#include "lib/lists.h"
#include "lib/event.h"
+#include "lib/rcu.h"
#include <errno.h>
#include <stdlib.h>
@@ -22,46 +23,43 @@
long page_size = 0;
#ifdef HAVE_MMAP
-#define KEEP_PAGES_MAIN_MAX 256
-#define KEEP_PAGES_MAIN_MIN 8
-#define CLEANUP_PAGES_BULK 256
+#define KEEP_PAGES_MAX 512
+#define KEEP_PAGES_MIN 32
+#define KEEP_PAGES_MAX_LOCAL 16
+#define ALLOC_PAGES_AT_ONCE 8
-STATIC_ASSERT(KEEP_PAGES_MAIN_MIN * 4 < KEEP_PAGES_MAIN_MAX);
+STATIC_ASSERT(KEEP_PAGES_MIN * 4 < KEEP_PAGES_MAX);
+STATIC_ASSERT(ALLOC_PAGES_AT_ONCE < KEEP_PAGES_MAX_LOCAL);
static _Bool use_fake = 0;
+static _Bool initialized = 0;
#if DEBUGGING
struct free_page {
node unused[42];
- node n;
+ struct free_page * _Atomic next;
};
#else
struct free_page {
- node n;
+ struct free_page * _Atomic next;
};
#endif
-struct free_pages {
- list pages;
- u16 min, max; /* Minimal and maximal number of free pages kept */
- uint cnt; /* Number of empty pages */
- event cleanup;
-};
-
-static void global_free_pages_cleanup_event(void *);
+static struct free_page * _Atomic page_stack = NULL;
+static _Thread_local struct free_page * local_page_stack = NULL;
-static struct free_pages global_free_pages = {
- .min = KEEP_PAGES_MAIN_MIN,
- .max = KEEP_PAGES_MAIN_MAX,
- .cleanup = { .hook = global_free_pages_cleanup_event },
-};
+static void page_cleanup(void *);
+static event page_cleanup_event = { .hook = page_cleanup, };
+#define SCHEDULE_CLEANUP do if (initialized && !shutting_down) ev_send(&global_event_list, &page_cleanup_event); while (0)
-uint *pages_kept = &global_free_pages.cnt;
+_Atomic int pages_kept = 0;
+_Atomic int pages_kept_locally = 0;
+static int pages_kept_here = 0;
static void *
alloc_sys_page(void)
{
- void *ptr = mmap(NULL, page_size, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ void *ptr = mmap(NULL, page_size * ALLOC_PAGES_AT_ONCE, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (ptr == MAP_FAILED)
bug("mmap(%lu) failed: %m", page_size);
@@ -90,20 +88,33 @@ alloc_page(void)
}
#ifdef HAVE_MMAP
- struct free_pages *fps = &global_free_pages;
-
- if (fps->cnt)
+ struct free_page *fp = local_page_stack;
+ if (fp)
{
- struct free_page *fp = SKIP_BACK(struct free_page, n, HEAD(fps->pages));
- rem_node(&fp->n);
- if ((--fps->cnt < fps->min) && !shutting_down)
- ev_send(&global_work_list, &fps->cleanup);
-
- bzero(fp, page_size);
+ local_page_stack = atomic_load_explicit(&fp->next, memory_order_acquire);
+ atomic_fetch_sub_explicit(&pages_kept_locally, 1, memory_order_relaxed);
+ pages_kept_here--;
return fp;
}
- return alloc_sys_page();
+ rcu_read_lock();
+ fp = atomic_load_explicit(&page_stack, memory_order_acquire);
+ while (fp && !atomic_compare_exchange_strong_explicit(
+ &page_stack, &fp, atomic_load_explicit(&fp->next, memory_order_acquire),
+ memory_order_acq_rel, memory_order_acquire))
+ ;
+ rcu_read_unlock();
+
+ if (!fp)
+ {
+ void *ptr = alloc_sys_page();
+ for (int i=1; i<ALLOC_PAGES_AT_ONCE; i++)
+ free_page(ptr + page_size * i);
+ return ptr;
+ }
+
+ atomic_fetch_sub_explicit(&pages_kept, 1, memory_order_relaxed);
+ return fp;
#endif
}
@@ -117,45 +128,95 @@ free_page(void *ptr)
}
#ifdef HAVE_MMAP
- struct free_pages *fps = &global_free_pages;
struct free_page *fp = ptr;
+ if (shutting_down || (pages_kept_here < KEEP_PAGES_MAX_LOCAL))
+ {
+ atomic_store_explicit(&fp->next, local_page_stack, memory_order_relaxed);
+ atomic_fetch_add_explicit(&pages_kept_locally, 1, memory_order_relaxed);
+ pages_kept_here++;
+ return;
+ }
+
+ rcu_read_lock();
+ struct free_page *next = atomic_load_explicit(&page_stack, memory_order_acquire);
- fp->n = (node) {};
- add_tail(&fps->pages, &fp->n);
+ do atomic_store_explicit(&fp->next, next, memory_order_release);
+ while (!atomic_compare_exchange_strong_explicit(
+ &page_stack, &next, fp,
+ memory_order_acq_rel, memory_order_acquire));
+ rcu_read_unlock();
- if ((++fps->cnt > fps->max) && !shutting_down)
- ev_send(&global_work_list, &fps->cleanup);
+ if (atomic_fetch_add_explicit(&pages_kept, 1, memory_order_relaxed) >= KEEP_PAGES_MAX)
+ SCHEDULE_CLEANUP;
#endif
}
+void
+flush_local_pages(void)
+{
+ if (use_fake || !local_page_stack || shutting_down)
+ return;
+
+ struct free_page *last = local_page_stack, *next;
+ int check_count = 1;
+ while (next = atomic_load_explicit(&last->next, memory_order_acquire))
+ {
+ check_count++;
+ last = next;
+ }
+
+ ASSERT_DIE(check_count == pages_kept_here);
+
+ rcu_read_lock();
+ next = atomic_load_explicit(&page_stack, memory_order_acquire);
+
+ do atomic_store_explicit(&last->next, next, memory_order_release);
+ while (!atomic_compare_exchange_strong_explicit(
+ &page_stack, &next, local_page_stack,
+ memory_order_acq_rel, memory_order_acquire));
+ rcu_read_unlock();
+
+ local_page_stack = NULL;
+ pages_kept_here = 0;
+
+ atomic_fetch_sub_explicit(&pages_kept_locally, check_count, memory_order_relaxed);
+ if (atomic_fetch_add_explicit(&pages_kept, check_count, memory_order_relaxed) >= KEEP_PAGES_MAX)
+ SCHEDULE_CLEANUP;
+}
+
#ifdef HAVE_MMAP
static void
-global_free_pages_cleanup_event(void *data UNUSED)
+page_cleanup(void *_ UNUSED)
{
if (shutting_down)
return;
- struct free_pages *fps = &global_free_pages;
+ struct free_page *stack = atomic_exchange_explicit(&page_stack, NULL, memory_order_acq_rel);
+ if (!stack)
+ return;
- while (fps->cnt / 2 < fps->min)
- {
- struct free_page *fp = alloc_sys_page();
- fp->n = (node) {};
- add_tail(&fps->pages, &fp->n);
- fps->cnt++;
+ synchronize_rcu();
+
+ do {
+ struct free_page *f = stack;
+ stack = atomic_load_explicit(&f->next, memory_order_acquire);
+
+ if (munmap(f, page_size) == 0)
+ continue;
+ else if (errno != ENOMEM)
+ bug("munmap(%p) failed: %m", f);
+ else
+ free_page(f);
}
+ while ((atomic_fetch_sub_explicit(&pages_kept, 1, memory_order_relaxed) >= KEEP_PAGES_MAX / 2) && stack);
- for (uint seen = 0; (seen < CLEANUP_PAGES_BULK) && (fps->cnt > fps->max / 2); seen++)
+ while (stack)
{
- struct free_page *fp = SKIP_BACK(struct free_page, n, TAIL(fps->pages));
- rem_node(&fp->n);
+ struct free_page *f = stack;
+ stack = atomic_load_explicit(&f->next, memory_order_acquire);
+ free_page(f);
- if (munmap(fp, page_size) == 0)
- fps->cnt--;
- else if (errno == ENOMEM)
- add_head(&fps->pages, &fp->n);
- else
- bug("munmap(%p) failed: %m", fp);
+ atomic_fetch_sub_explicit(&pages_kept, 1, memory_order_relaxed);
}
}
#endif
@@ -164,17 +225,17 @@ void
resource_sys_init(void)
{
#ifdef HAVE_MMAP
- ASSERT_DIE(global_free_pages.cnt == 0);
-
if (!(page_size = sysconf(_SC_PAGESIZE)))
die("System page size must be non-zero");
if (u64_popcount(page_size) == 1)
{
- struct free_pages *fps = &global_free_pages;
- init_list(&fps->pages);
- global_free_pages_cleanup_event(NULL);
+ for (int i = 0; i < (KEEP_PAGES_MIN * 2); i++)
+ free_page(alloc_page());
+
+ page_cleanup(NULL);
+ initialized = 1;
return;
}
@@ -184,4 +245,5 @@ resource_sys_init(void)
#endif
page_size = 4096;
+ initialized = 1;
}
diff --git a/sysdep/unix/io-loop.c b/sysdep/unix/io-loop.c
index 3e3fc31a..dbca36e9 100644
--- a/sysdep/unix/io-loop.c
+++ b/sysdep/unix/io-loop.c
@@ -59,78 +59,138 @@ birdloop_inside(struct birdloop *loop)
return 0;
}
+void
+birdloop_flag(struct birdloop *loop, u32 flag)
+{
+ atomic_fetch_or_explicit(&loop->flags, flag, memory_order_acq_rel);
+ birdloop_ping(loop);
+}
+
+void
+birdloop_flag_set_handler(struct birdloop *loop, struct birdloop_flag_handler *fh)
+{
+ ASSERT_DIE(birdloop_inside(loop));
+ loop->flag_handler = fh;
+}
+
+static int
+birdloop_process_flags(struct birdloop *loop)
+{
+ if (!loop->flag_handler)
+ return 0;
+
+ u32 flags = atomic_exchange_explicit(&loop->flags, 0, memory_order_acq_rel);
+ loop->flag_handler->hook(loop->flag_handler, flags);
+ return !!flags;
+}
+
+static int
+birdloop_run_events(struct birdloop *loop)
+{
+ btime begin = current_time();
+ while (current_time() - begin < 5 MS)
+ {
+ if (!ev_run_list(&loop->event_list))
+ return 0;
+
+ times_update();
+ }
+
+ return 1;
+}
+
/*
* Wakeup code for birdloop
*/
-static void
-pipe_new(int *pfds)
+void
+pipe_new(struct pipe *p)
{
- int rv = pipe(pfds);
+ int rv = pipe(p->fd);
if (rv < 0)
die("pipe: %m");
- if (fcntl(pfds[0], F_SETFL, O_NONBLOCK) < 0)
+ if (fcntl(p->fd[0], F_SETFL, O_NONBLOCK) < 0)
die("fcntl(O_NONBLOCK): %m");
- if (fcntl(pfds[1], F_SETFL, O_NONBLOCK) < 0)
+ if (fcntl(p->fd[1], F_SETFL, O_NONBLOCK) < 0)
die("fcntl(O_NONBLOCK): %m");
}
void
-pipe_drain(int fd)
+pipe_drain(struct pipe *p)
{
- char buf[64];
- int rv;
-
- try:
- rv = read(fd, buf, 64);
- if (rv < 0)
- {
- if (errno == EINTR)
- goto try;
- if (errno == EAGAIN)
+ while (1) {
+ char buf[64];
+ int rv = read(p->fd[0], buf, sizeof(buf));
+ if ((rv < 0) && (errno == EAGAIN))
return;
- die("wakeup read: %m");
+
+ if (rv == 0)
+ bug("wakeup read eof");
+ if ((rv < 0) && (errno != EINTR))
+ bug("wakeup read: %m");
+ }
+}
+
+int
+pipe_read_one(struct pipe *p)
+{
+ while (1) {
+ char v;
+ int rv = read(p->fd[0], &v, sizeof(v));
+ if (rv == 1)
+ return 1;
+ if ((rv < 0) && (errno == EAGAIN))
+ return 0;
+ if (rv > 1)
+ bug("wakeup read more bytes than expected: %d", rv);
+ if (rv == 0)
+ bug("wakeup read eof");
+ if (errno != EINTR)
+ bug("wakeup read: %m");
}
- if (rv == 64)
- goto try;
}
void
-pipe_kick(int fd)
+pipe_kick(struct pipe *p)
{
- u64 v = 1;
+ char v = 1;
int rv;
- try:
- rv = write(fd, &v, sizeof(u64));
- if (rv < 0)
- {
- if (errno == EINTR)
- goto try;
- if (errno == EAGAIN)
+ while (1) {
+ rv = write(p->fd[1], &v, sizeof(v));
+ if ((rv >= 0) || (errno == EAGAIN))
return;
- die("wakeup write: %m");
+ if (errno != EINTR)
+ bug("wakeup write: %m");
}
}
+void
+pipe_pollin(struct pipe *p, struct pollfd *pfd)
+{
+ pfd->fd = p->fd[0];
+ pfd->events = POLLIN;
+ pfd->revents = 0;
+}
+
static inline void
wakeup_init(struct birdloop *loop)
{
- pipe_new(loop->wakeup_fds);
+ pipe_new(&loop->wakeup);
}
static inline void
wakeup_drain(struct birdloop *loop)
{
- pipe_drain(loop->wakeup_fds[0]);
+ pipe_drain(&loop->wakeup);
}
static inline void
wakeup_do_kick(struct birdloop *loop)
{
- pipe_kick(loop->wakeup_fds[1]);
+ pipe_kick(&loop->wakeup);
}
static inline void
@@ -259,9 +319,7 @@ sockets_prepare(struct birdloop *loop)
/* Add internal wakeup fd */
*psk = NULL;
- pfd->fd = loop->wakeup_fds[0];
- pfd->events = POLLIN;
- pfd->revents = 0;
+ pipe_pollin(&loop->wakeup, pfd);
loop->poll_changed = 0;
}
@@ -516,7 +574,7 @@ birdloop_main(void *arg)
while (1)
{
timers_fire(&loop->time, 0);
- if (ev_run_list(&loop->event_list))
+ if (birdloop_process_flags(loop) + birdloop_run_events(loop))
timeout = 0;
else if (t = timers_first(&loop->time))
timeout = (tm_remains(t) TO_MS) + 1;
@@ -525,6 +583,9 @@ birdloop_main(void *arg)
if (loop->poll_changed)
sockets_prepare(loop);
+ else
+ if ((timeout < 0) || (timeout > 5000))
+ flush_local_pages();
btime duration = current_time() - loop_begin;
if (duration > config->watchdog_warning)
@@ -560,14 +621,18 @@ birdloop_main(void *arg)
/* Flush remaining events */
ASSERT_DIE(!ev_run_list(&loop->event_list));
- /* No timers allowed */
- ASSERT_DIE(timers_count(&loop->time) == 0);
+ /* Drop timers */
+ while (t = timers_first(&loop->time))
+ tm_stop(t);
+
+ /* No sockets allowed */
ASSERT_DIE(EMPTY_LIST(loop->sock_list));
ASSERT_DIE(loop->sock_num == 0);
birdloop_leave(loop);
loop->stopped(loop->stop_data);
+ flush_local_pages();
return NULL;
}
diff --git a/sysdep/unix/io-loop.h b/sysdep/unix/io-loop.h
index 31c40459..29ca96d6 100644
--- a/sysdep/unix/io-loop.h
+++ b/sysdep/unix/io-loop.h
@@ -9,6 +9,16 @@
#include "lib/rcu.h"
+struct pipe
+{
+ int fd[2];
+};
+
+void pipe_new(struct pipe *);
+void pipe_pollin(struct pipe *, struct pollfd *);
+void pipe_drain(struct pipe *);
+void pipe_kick(struct pipe *);
+
struct birdloop
{
pool *pool;
@@ -25,7 +35,7 @@ struct birdloop
uint ping_pending;
_Atomic u32 ping_sent;
- int wakeup_fds[2];
+ struct pipe wakeup;
pthread_t thread_id;
pthread_attr_t thread_attr;
@@ -34,6 +44,9 @@ struct birdloop
uint links;
+ _Atomic u32 flags;
+ struct birdloop_flag_handler *flag_handler;
+
void (*stopped)(void *data);
void *stop_data;
diff --git a/sysdep/unix/io.c b/sysdep/unix/io.c
index 23baffb2..6454f15f 100644
--- a/sysdep/unix/io.c
+++ b/sysdep/unix/io.c
@@ -2213,8 +2213,6 @@ static int short_loops = 0;
#define SHORT_LOOP_MAX 10
#define WORK_EVENTS_MAX 10
-void pipe_drain(int fd);
-
void
io_loop(void)
{
@@ -2246,8 +2244,7 @@ io_loop(void)
}
/* A hack to reload main io_loop() when something has changed asynchronously. */
- pfd[0].fd = main_birdloop.wakeup_fds[0];
- pfd[0].events = POLLIN;
+ pipe_pollin(&main_birdloop.wakeup, &pfd[0]);
nfds = 1;
@@ -2325,7 +2322,7 @@ io_loop(void)
if (pfd[0].revents & POLLIN)
{
/* IO loop reload requested */
- pipe_drain(main_birdloop.wakeup_fds[0]);
+ pipe_drain(&main_birdloop.wakeup);
atomic_exchange_explicit(&main_birdloop.ping_sent, 0, memory_order_acq_rel);
continue;
}