diff options
Diffstat (limited to 'sysdep')
-rw-r--r-- | sysdep/unix/Makefile | 2 | ||||
-rw-r--r-- | sysdep/unix/alloc.c | 178 | ||||
-rw-r--r-- | sysdep/unix/io-loop.c | 141 | ||||
-rw-r--r-- | sysdep/unix/io-loop.h | 15 | ||||
-rw-r--r-- | sysdep/unix/io.c | 7 |
5 files changed, 239 insertions, 104 deletions
diff --git a/sysdep/unix/Makefile b/sysdep/unix/Makefile index 3f1a8b3a..6f6b0d26 100644 --- a/sysdep/unix/Makefile +++ b/sysdep/unix/Makefile @@ -2,8 +2,6 @@ src := alloc.c io.c io-loop.c krt.c log.c main.c random.c domain.c obj := $(src-o-files) $(all-daemon) $(cf-local) -$(call proto-build,kif_build) -$(call proto-build,krt_build) $(conf-y-targets): $(s)krt.Y src := $(filter-out main.c, $(src)) diff --git a/sysdep/unix/alloc.c b/sysdep/unix/alloc.c index a2384ca8..847def30 100644 --- a/sysdep/unix/alloc.c +++ b/sysdep/unix/alloc.c @@ -10,6 +10,7 @@ #include "lib/resource.h" #include "lib/lists.h" #include "lib/event.h" +#include "lib/rcu.h" #include <errno.h> #include <stdlib.h> @@ -22,46 +23,43 @@ long page_size = 0; #ifdef HAVE_MMAP -#define KEEP_PAGES_MAIN_MAX 256 -#define KEEP_PAGES_MAIN_MIN 8 -#define CLEANUP_PAGES_BULK 256 +#define KEEP_PAGES_MAX 512 +#define KEEP_PAGES_MIN 32 +#define KEEP_PAGES_MAX_LOCAL 16 +#define ALLOC_PAGES_AT_ONCE 8 -STATIC_ASSERT(KEEP_PAGES_MAIN_MIN * 4 < KEEP_PAGES_MAIN_MAX); +STATIC_ASSERT(KEEP_PAGES_MIN * 4 < KEEP_PAGES_MAX); +STATIC_ASSERT(ALLOC_PAGES_AT_ONCE < KEEP_PAGES_MAX_LOCAL); static _Bool use_fake = 0; +static _Bool initialized = 0; #if DEBUGGING struct free_page { node unused[42]; - node n; + struct free_page * _Atomic next; }; #else struct free_page { - node n; + struct free_page * _Atomic next; }; #endif -struct free_pages { - list pages; - u16 min, max; /* Minimal and maximal number of free pages kept */ - uint cnt; /* Number of empty pages */ - event cleanup; -}; - -static void global_free_pages_cleanup_event(void *); +static struct free_page * _Atomic page_stack = NULL; +static _Thread_local struct free_page * local_page_stack = NULL; -static struct free_pages global_free_pages = { - .min = KEEP_PAGES_MAIN_MIN, - .max = KEEP_PAGES_MAIN_MAX, - .cleanup = { .hook = global_free_pages_cleanup_event }, -}; +static void page_cleanup(void *); +static event page_cleanup_event = { .hook = page_cleanup, }; +#define SCHEDULE_CLEANUP do if (initialized && !shutting_down) ev_send(&global_event_list, &page_cleanup_event); while (0) -uint *pages_kept = &global_free_pages.cnt; +_Atomic int pages_kept = 0; +_Atomic int pages_kept_locally = 0; +static int pages_kept_here = 0; static void * alloc_sys_page(void) { - void *ptr = mmap(NULL, page_size, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + void *ptr = mmap(NULL, page_size * ALLOC_PAGES_AT_ONCE, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (ptr == MAP_FAILED) bug("mmap(%lu) failed: %m", page_size); @@ -90,20 +88,33 @@ alloc_page(void) } #ifdef HAVE_MMAP - struct free_pages *fps = &global_free_pages; - - if (fps->cnt) + struct free_page *fp = local_page_stack; + if (fp) { - struct free_page *fp = SKIP_BACK(struct free_page, n, HEAD(fps->pages)); - rem_node(&fp->n); - if ((--fps->cnt < fps->min) && !shutting_down) - ev_send(&global_work_list, &fps->cleanup); - - bzero(fp, page_size); + local_page_stack = atomic_load_explicit(&fp->next, memory_order_acquire); + atomic_fetch_sub_explicit(&pages_kept_locally, 1, memory_order_relaxed); + pages_kept_here--; return fp; } - return alloc_sys_page(); + rcu_read_lock(); + fp = atomic_load_explicit(&page_stack, memory_order_acquire); + while (fp && !atomic_compare_exchange_strong_explicit( + &page_stack, &fp, atomic_load_explicit(&fp->next, memory_order_acquire), + memory_order_acq_rel, memory_order_acquire)) + ; + rcu_read_unlock(); + + if (!fp) + { + void *ptr = alloc_sys_page(); + for (int i=1; i<ALLOC_PAGES_AT_ONCE; i++) + free_page(ptr + page_size * i); + return ptr; + } + + atomic_fetch_sub_explicit(&pages_kept, 1, memory_order_relaxed); + return fp; #endif } @@ -117,45 +128,95 @@ free_page(void *ptr) } #ifdef HAVE_MMAP - struct free_pages *fps = &global_free_pages; struct free_page *fp = ptr; + if (shutting_down || (pages_kept_here < KEEP_PAGES_MAX_LOCAL)) + { + atomic_store_explicit(&fp->next, local_page_stack, memory_order_relaxed); + atomic_fetch_add_explicit(&pages_kept_locally, 1, memory_order_relaxed); + pages_kept_here++; + return; + } + + rcu_read_lock(); + struct free_page *next = atomic_load_explicit(&page_stack, memory_order_acquire); - fp->n = (node) {}; - add_tail(&fps->pages, &fp->n); + do atomic_store_explicit(&fp->next, next, memory_order_release); + while (!atomic_compare_exchange_strong_explicit( + &page_stack, &next, fp, + memory_order_acq_rel, memory_order_acquire)); + rcu_read_unlock(); - if ((++fps->cnt > fps->max) && !shutting_down) - ev_send(&global_work_list, &fps->cleanup); + if (atomic_fetch_add_explicit(&pages_kept, 1, memory_order_relaxed) >= KEEP_PAGES_MAX) + SCHEDULE_CLEANUP; #endif } +void +flush_local_pages(void) +{ + if (use_fake || !local_page_stack || shutting_down) + return; + + struct free_page *last = local_page_stack, *next; + int check_count = 1; + while (next = atomic_load_explicit(&last->next, memory_order_acquire)) + { + check_count++; + last = next; + } + + ASSERT_DIE(check_count == pages_kept_here); + + rcu_read_lock(); + next = atomic_load_explicit(&page_stack, memory_order_acquire); + + do atomic_store_explicit(&last->next, next, memory_order_release); + while (!atomic_compare_exchange_strong_explicit( + &page_stack, &next, local_page_stack, + memory_order_acq_rel, memory_order_acquire)); + rcu_read_unlock(); + + local_page_stack = NULL; + pages_kept_here = 0; + + atomic_fetch_sub_explicit(&pages_kept_locally, check_count, memory_order_relaxed); + if (atomic_fetch_add_explicit(&pages_kept, check_count, memory_order_relaxed) >= KEEP_PAGES_MAX) + SCHEDULE_CLEANUP; +} + #ifdef HAVE_MMAP static void -global_free_pages_cleanup_event(void *data UNUSED) +page_cleanup(void *_ UNUSED) { if (shutting_down) return; - struct free_pages *fps = &global_free_pages; + struct free_page *stack = atomic_exchange_explicit(&page_stack, NULL, memory_order_acq_rel); + if (!stack) + return; - while (fps->cnt / 2 < fps->min) - { - struct free_page *fp = alloc_sys_page(); - fp->n = (node) {}; - add_tail(&fps->pages, &fp->n); - fps->cnt++; + synchronize_rcu(); + + do { + struct free_page *f = stack; + stack = atomic_load_explicit(&f->next, memory_order_acquire); + + if (munmap(f, page_size) == 0) + continue; + else if (errno != ENOMEM) + bug("munmap(%p) failed: %m", f); + else + free_page(f); } + while ((atomic_fetch_sub_explicit(&pages_kept, 1, memory_order_relaxed) >= KEEP_PAGES_MAX / 2) && stack); - for (uint seen = 0; (seen < CLEANUP_PAGES_BULK) && (fps->cnt > fps->max / 2); seen++) + while (stack) { - struct free_page *fp = SKIP_BACK(struct free_page, n, TAIL(fps->pages)); - rem_node(&fp->n); + struct free_page *f = stack; + stack = atomic_load_explicit(&f->next, memory_order_acquire); + free_page(f); - if (munmap(fp, page_size) == 0) - fps->cnt--; - else if (errno == ENOMEM) - add_head(&fps->pages, &fp->n); - else - bug("munmap(%p) failed: %m", fp); + atomic_fetch_sub_explicit(&pages_kept, 1, memory_order_relaxed); } } #endif @@ -164,17 +225,17 @@ void resource_sys_init(void) { #ifdef HAVE_MMAP - ASSERT_DIE(global_free_pages.cnt == 0); - if (!(page_size = sysconf(_SC_PAGESIZE))) die("System page size must be non-zero"); if (u64_popcount(page_size) == 1) { - struct free_pages *fps = &global_free_pages; - init_list(&fps->pages); - global_free_pages_cleanup_event(NULL); + for (int i = 0; i < (KEEP_PAGES_MIN * 2); i++) + free_page(alloc_page()); + + page_cleanup(NULL); + initialized = 1; return; } @@ -184,4 +245,5 @@ resource_sys_init(void) #endif page_size = 4096; + initialized = 1; } diff --git a/sysdep/unix/io-loop.c b/sysdep/unix/io-loop.c index 3e3fc31a..dbca36e9 100644 --- a/sysdep/unix/io-loop.c +++ b/sysdep/unix/io-loop.c @@ -59,78 +59,138 @@ birdloop_inside(struct birdloop *loop) return 0; } +void +birdloop_flag(struct birdloop *loop, u32 flag) +{ + atomic_fetch_or_explicit(&loop->flags, flag, memory_order_acq_rel); + birdloop_ping(loop); +} + +void +birdloop_flag_set_handler(struct birdloop *loop, struct birdloop_flag_handler *fh) +{ + ASSERT_DIE(birdloop_inside(loop)); + loop->flag_handler = fh; +} + +static int +birdloop_process_flags(struct birdloop *loop) +{ + if (!loop->flag_handler) + return 0; + + u32 flags = atomic_exchange_explicit(&loop->flags, 0, memory_order_acq_rel); + loop->flag_handler->hook(loop->flag_handler, flags); + return !!flags; +} + +static int +birdloop_run_events(struct birdloop *loop) +{ + btime begin = current_time(); + while (current_time() - begin < 5 MS) + { + if (!ev_run_list(&loop->event_list)) + return 0; + + times_update(); + } + + return 1; +} + /* * Wakeup code for birdloop */ -static void -pipe_new(int *pfds) +void +pipe_new(struct pipe *p) { - int rv = pipe(pfds); + int rv = pipe(p->fd); if (rv < 0) die("pipe: %m"); - if (fcntl(pfds[0], F_SETFL, O_NONBLOCK) < 0) + if (fcntl(p->fd[0], F_SETFL, O_NONBLOCK) < 0) die("fcntl(O_NONBLOCK): %m"); - if (fcntl(pfds[1], F_SETFL, O_NONBLOCK) < 0) + if (fcntl(p->fd[1], F_SETFL, O_NONBLOCK) < 0) die("fcntl(O_NONBLOCK): %m"); } void -pipe_drain(int fd) +pipe_drain(struct pipe *p) { - char buf[64]; - int rv; - - try: - rv = read(fd, buf, 64); - if (rv < 0) - { - if (errno == EINTR) - goto try; - if (errno == EAGAIN) + while (1) { + char buf[64]; + int rv = read(p->fd[0], buf, sizeof(buf)); + if ((rv < 0) && (errno == EAGAIN)) return; - die("wakeup read: %m"); + + if (rv == 0) + bug("wakeup read eof"); + if ((rv < 0) && (errno != EINTR)) + bug("wakeup read: %m"); + } +} + +int +pipe_read_one(struct pipe *p) +{ + while (1) { + char v; + int rv = read(p->fd[0], &v, sizeof(v)); + if (rv == 1) + return 1; + if ((rv < 0) && (errno == EAGAIN)) + return 0; + if (rv > 1) + bug("wakeup read more bytes than expected: %d", rv); + if (rv == 0) + bug("wakeup read eof"); + if (errno != EINTR) + bug("wakeup read: %m"); } - if (rv == 64) - goto try; } void -pipe_kick(int fd) +pipe_kick(struct pipe *p) { - u64 v = 1; + char v = 1; int rv; - try: - rv = write(fd, &v, sizeof(u64)); - if (rv < 0) - { - if (errno == EINTR) - goto try; - if (errno == EAGAIN) + while (1) { + rv = write(p->fd[1], &v, sizeof(v)); + if ((rv >= 0) || (errno == EAGAIN)) return; - die("wakeup write: %m"); + if (errno != EINTR) + bug("wakeup write: %m"); } } +void +pipe_pollin(struct pipe *p, struct pollfd *pfd) +{ + pfd->fd = p->fd[0]; + pfd->events = POLLIN; + pfd->revents = 0; +} + static inline void wakeup_init(struct birdloop *loop) { - pipe_new(loop->wakeup_fds); + pipe_new(&loop->wakeup); } static inline void wakeup_drain(struct birdloop *loop) { - pipe_drain(loop->wakeup_fds[0]); + pipe_drain(&loop->wakeup); } static inline void wakeup_do_kick(struct birdloop *loop) { - pipe_kick(loop->wakeup_fds[1]); + pipe_kick(&loop->wakeup); } static inline void @@ -259,9 +319,7 @@ sockets_prepare(struct birdloop *loop) /* Add internal wakeup fd */ *psk = NULL; - pfd->fd = loop->wakeup_fds[0]; - pfd->events = POLLIN; - pfd->revents = 0; + pipe_pollin(&loop->wakeup, pfd); loop->poll_changed = 0; } @@ -516,7 +574,7 @@ birdloop_main(void *arg) while (1) { timers_fire(&loop->time, 0); - if (ev_run_list(&loop->event_list)) + if (birdloop_process_flags(loop) + birdloop_run_events(loop)) timeout = 0; else if (t = timers_first(&loop->time)) timeout = (tm_remains(t) TO_MS) + 1; @@ -525,6 +583,9 @@ birdloop_main(void *arg) if (loop->poll_changed) sockets_prepare(loop); + else + if ((timeout < 0) || (timeout > 5000)) + flush_local_pages(); btime duration = current_time() - loop_begin; if (duration > config->watchdog_warning) @@ -560,14 +621,18 @@ birdloop_main(void *arg) /* Flush remaining events */ ASSERT_DIE(!ev_run_list(&loop->event_list)); - /* No timers allowed */ - ASSERT_DIE(timers_count(&loop->time) == 0); + /* Drop timers */ + while (t = timers_first(&loop->time)) + tm_stop(t); + + /* No sockets allowed */ ASSERT_DIE(EMPTY_LIST(loop->sock_list)); ASSERT_DIE(loop->sock_num == 0); birdloop_leave(loop); loop->stopped(loop->stop_data); + flush_local_pages(); return NULL; } diff --git a/sysdep/unix/io-loop.h b/sysdep/unix/io-loop.h index 31c40459..29ca96d6 100644 --- a/sysdep/unix/io-loop.h +++ b/sysdep/unix/io-loop.h @@ -9,6 +9,16 @@ #include "lib/rcu.h" +struct pipe +{ + int fd[2]; +}; + +void pipe_new(struct pipe *); +void pipe_pollin(struct pipe *, struct pollfd *); +void pipe_drain(struct pipe *); +void pipe_kick(struct pipe *); + struct birdloop { pool *pool; @@ -25,7 +35,7 @@ struct birdloop uint ping_pending; _Atomic u32 ping_sent; - int wakeup_fds[2]; + struct pipe wakeup; pthread_t thread_id; pthread_attr_t thread_attr; @@ -34,6 +44,9 @@ struct birdloop uint links; + _Atomic u32 flags; + struct birdloop_flag_handler *flag_handler; + void (*stopped)(void *data); void *stop_data; diff --git a/sysdep/unix/io.c b/sysdep/unix/io.c index 23baffb2..6454f15f 100644 --- a/sysdep/unix/io.c +++ b/sysdep/unix/io.c @@ -2213,8 +2213,6 @@ static int short_loops = 0; #define SHORT_LOOP_MAX 10 #define WORK_EVENTS_MAX 10 -void pipe_drain(int fd); - void io_loop(void) { @@ -2246,8 +2244,7 @@ io_loop(void) } /* A hack to reload main io_loop() when something has changed asynchronously. */ - pfd[0].fd = main_birdloop.wakeup_fds[0]; - pfd[0].events = POLLIN; + pipe_pollin(&main_birdloop.wakeup, &pfd[0]); nfds = 1; @@ -2325,7 +2322,7 @@ io_loop(void) if (pfd[0].revents & POLLIN) { /* IO loop reload requested */ - pipe_drain(main_birdloop.wakeup_fds[0]); + pipe_drain(&main_birdloop.wakeup); atomic_exchange_explicit(&main_birdloop.ping_sent, 0, memory_order_acq_rel); continue; } |