summaryrefslogtreecommitdiff
path: root/sysdep/unix
diff options
context:
space:
mode:
Diffstat (limited to 'sysdep/unix')
-rw-r--r--sysdep/unix/Makefile4
-rw-r--r--sysdep/unix/alloc.c178
-rw-r--r--sysdep/unix/domain.c116
-rw-r--r--sysdep/unix/io-loop.c643
-rw-r--r--sysdep/unix/io-loop.h56
-rw-r--r--sysdep/unix/io.c178
-rw-r--r--sysdep/unix/krt.c104
-rw-r--r--sysdep/unix/krt.h9
-rw-r--r--sysdep/unix/log.c45
-rw-r--r--sysdep/unix/main.c9
10 files changed, 1145 insertions, 197 deletions
diff --git a/sysdep/unix/Makefile b/sysdep/unix/Makefile
index 51ab98a9..6f6b0d26 100644
--- a/sysdep/unix/Makefile
+++ b/sysdep/unix/Makefile
@@ -1,9 +1,7 @@
-src := alloc.c io.c krt.c log.c main.c random.c
+src := alloc.c io.c io-loop.c krt.c log.c main.c random.c domain.c
obj := $(src-o-files)
$(all-daemon)
$(cf-local)
-$(call proto-build,kif_build)
-$(call proto-build,krt_build)
$(conf-y-targets): $(s)krt.Y
src := $(filter-out main.c, $(src))
diff --git a/sysdep/unix/alloc.c b/sysdep/unix/alloc.c
index edad6209..847def30 100644
--- a/sysdep/unix/alloc.c
+++ b/sysdep/unix/alloc.c
@@ -10,6 +10,7 @@
#include "lib/resource.h"
#include "lib/lists.h"
#include "lib/event.h"
+#include "lib/rcu.h"
#include <errno.h>
#include <stdlib.h>
@@ -22,46 +23,43 @@
long page_size = 0;
#ifdef HAVE_MMAP
-#define KEEP_PAGES_MAIN_MAX 256
-#define KEEP_PAGES_MAIN_MIN 8
-#define CLEANUP_PAGES_BULK 256
+#define KEEP_PAGES_MAX 512
+#define KEEP_PAGES_MIN 32
+#define KEEP_PAGES_MAX_LOCAL 16
+#define ALLOC_PAGES_AT_ONCE 8
-STATIC_ASSERT(KEEP_PAGES_MAIN_MIN * 4 < KEEP_PAGES_MAIN_MAX);
+STATIC_ASSERT(KEEP_PAGES_MIN * 4 < KEEP_PAGES_MAX);
+STATIC_ASSERT(ALLOC_PAGES_AT_ONCE < KEEP_PAGES_MAX_LOCAL);
static _Bool use_fake = 0;
+static _Bool initialized = 0;
#if DEBUGGING
struct free_page {
node unused[42];
- node n;
+ struct free_page * _Atomic next;
};
#else
struct free_page {
- node n;
+ struct free_page * _Atomic next;
};
#endif
-struct free_pages {
- list pages;
- u16 min, max; /* Minimal and maximal number of free pages kept */
- uint cnt; /* Number of empty pages */
- event cleanup;
-};
-
-static void global_free_pages_cleanup_event(void *);
+static struct free_page * _Atomic page_stack = NULL;
+static _Thread_local struct free_page * local_page_stack = NULL;
-static struct free_pages global_free_pages = {
- .min = KEEP_PAGES_MAIN_MIN,
- .max = KEEP_PAGES_MAIN_MAX,
- .cleanup = { .hook = global_free_pages_cleanup_event },
-};
+static void page_cleanup(void *);
+static event page_cleanup_event = { .hook = page_cleanup, };
+#define SCHEDULE_CLEANUP do if (initialized && !shutting_down) ev_send(&global_event_list, &page_cleanup_event); while (0)
-uint *pages_kept = &global_free_pages.cnt;
+_Atomic int pages_kept = 0;
+_Atomic int pages_kept_locally = 0;
+static int pages_kept_here = 0;
static void *
alloc_sys_page(void)
{
- void *ptr = mmap(NULL, page_size, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ void *ptr = mmap(NULL, page_size * ALLOC_PAGES_AT_ONCE, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (ptr == MAP_FAILED)
bug("mmap(%lu) failed: %m", page_size);
@@ -90,20 +88,33 @@ alloc_page(void)
}
#ifdef HAVE_MMAP
- struct free_pages *fps = &global_free_pages;
-
- if (fps->cnt)
+ struct free_page *fp = local_page_stack;
+ if (fp)
{
- struct free_page *fp = SKIP_BACK(struct free_page, n, HEAD(fps->pages));
- rem_node(&fp->n);
- if ((--fps->cnt < fps->min) && !shutting_down)
- ev_schedule(&fps->cleanup);
-
- bzero(fp, page_size);
+ local_page_stack = atomic_load_explicit(&fp->next, memory_order_acquire);
+ atomic_fetch_sub_explicit(&pages_kept_locally, 1, memory_order_relaxed);
+ pages_kept_here--;
return fp;
}
- return alloc_sys_page();
+ rcu_read_lock();
+ fp = atomic_load_explicit(&page_stack, memory_order_acquire);
+ while (fp && !atomic_compare_exchange_strong_explicit(
+ &page_stack, &fp, atomic_load_explicit(&fp->next, memory_order_acquire),
+ memory_order_acq_rel, memory_order_acquire))
+ ;
+ rcu_read_unlock();
+
+ if (!fp)
+ {
+ void *ptr = alloc_sys_page();
+ for (int i=1; i<ALLOC_PAGES_AT_ONCE; i++)
+ free_page(ptr + page_size * i);
+ return ptr;
+ }
+
+ atomic_fetch_sub_explicit(&pages_kept, 1, memory_order_relaxed);
+ return fp;
#endif
}
@@ -117,45 +128,95 @@ free_page(void *ptr)
}
#ifdef HAVE_MMAP
- struct free_pages *fps = &global_free_pages;
struct free_page *fp = ptr;
+ if (shutting_down || (pages_kept_here < KEEP_PAGES_MAX_LOCAL))
+ {
+ atomic_store_explicit(&fp->next, local_page_stack, memory_order_relaxed);
+ atomic_fetch_add_explicit(&pages_kept_locally, 1, memory_order_relaxed);
+ pages_kept_here++;
+ return;
+ }
+
+ rcu_read_lock();
+ struct free_page *next = atomic_load_explicit(&page_stack, memory_order_acquire);
- fp->n = (node) {};
- add_tail(&fps->pages, &fp->n);
+ do atomic_store_explicit(&fp->next, next, memory_order_release);
+ while (!atomic_compare_exchange_strong_explicit(
+ &page_stack, &next, fp,
+ memory_order_acq_rel, memory_order_acquire));
+ rcu_read_unlock();
- if ((++fps->cnt > fps->max) && !shutting_down)
- ev_schedule(&fps->cleanup);
+ if (atomic_fetch_add_explicit(&pages_kept, 1, memory_order_relaxed) >= KEEP_PAGES_MAX)
+ SCHEDULE_CLEANUP;
#endif
}
+void
+flush_local_pages(void)
+{
+ if (use_fake || !local_page_stack || shutting_down)
+ return;
+
+ struct free_page *last = local_page_stack, *next;
+ int check_count = 1;
+ while (next = atomic_load_explicit(&last->next, memory_order_acquire))
+ {
+ check_count++;
+ last = next;
+ }
+
+ ASSERT_DIE(check_count == pages_kept_here);
+
+ rcu_read_lock();
+ next = atomic_load_explicit(&page_stack, memory_order_acquire);
+
+ do atomic_store_explicit(&last->next, next, memory_order_release);
+ while (!atomic_compare_exchange_strong_explicit(
+ &page_stack, &next, local_page_stack,
+ memory_order_acq_rel, memory_order_acquire));
+ rcu_read_unlock();
+
+ local_page_stack = NULL;
+ pages_kept_here = 0;
+
+ atomic_fetch_sub_explicit(&pages_kept_locally, check_count, memory_order_relaxed);
+ if (atomic_fetch_add_explicit(&pages_kept, check_count, memory_order_relaxed) >= KEEP_PAGES_MAX)
+ SCHEDULE_CLEANUP;
+}
+
#ifdef HAVE_MMAP
static void
-global_free_pages_cleanup_event(void *data UNUSED)
+page_cleanup(void *_ UNUSED)
{
if (shutting_down)
return;
- struct free_pages *fps = &global_free_pages;
+ struct free_page *stack = atomic_exchange_explicit(&page_stack, NULL, memory_order_acq_rel);
+ if (!stack)
+ return;
- while (fps->cnt / 2 < fps->min)
- {
- struct free_page *fp = alloc_sys_page();
- fp->n = (node) {};
- add_tail(&fps->pages, &fp->n);
- fps->cnt++;
+ synchronize_rcu();
+
+ do {
+ struct free_page *f = stack;
+ stack = atomic_load_explicit(&f->next, memory_order_acquire);
+
+ if (munmap(f, page_size) == 0)
+ continue;
+ else if (errno != ENOMEM)
+ bug("munmap(%p) failed: %m", f);
+ else
+ free_page(f);
}
+ while ((atomic_fetch_sub_explicit(&pages_kept, 1, memory_order_relaxed) >= KEEP_PAGES_MAX / 2) && stack);
- for (uint seen = 0; (seen < CLEANUP_PAGES_BULK) && (fps->cnt > fps->max / 2); seen++)
+ while (stack)
{
- struct free_page *fp = SKIP_BACK(struct free_page, n, TAIL(fps->pages));
- rem_node(&fp->n);
+ struct free_page *f = stack;
+ stack = atomic_load_explicit(&f->next, memory_order_acquire);
+ free_page(f);
- if (munmap(fp, page_size) == 0)
- fps->cnt--;
- else if (errno == ENOMEM)
- add_head(&fps->pages, &fp->n);
- else
- bug("munmap(%p) failed: %m", fp);
+ atomic_fetch_sub_explicit(&pages_kept, 1, memory_order_relaxed);
}
}
#endif
@@ -164,17 +225,17 @@ void
resource_sys_init(void)
{
#ifdef HAVE_MMAP
- ASSERT_DIE(global_free_pages.cnt == 0);
-
if (!(page_size = sysconf(_SC_PAGESIZE)))
die("System page size must be non-zero");
if (u64_popcount(page_size) == 1)
{
- struct free_pages *fps = &global_free_pages;
- init_list(&fps->pages);
- global_free_pages_cleanup_event(NULL);
+ for (int i = 0; i < (KEEP_PAGES_MIN * 2); i++)
+ free_page(alloc_page());
+
+ page_cleanup(NULL);
+ initialized = 1;
return;
}
@@ -184,4 +245,5 @@ resource_sys_init(void)
#endif
page_size = 4096;
+ initialized = 1;
}
diff --git a/sysdep/unix/domain.c b/sysdep/unix/domain.c
new file mode 100644
index 00000000..0a5858a6
--- /dev/null
+++ b/sysdep/unix/domain.c
@@ -0,0 +1,116 @@
+/*
+ * BIRD Locking
+ *
+ * (c) 2020 Maria Matejka <mq@jmq.cz>
+ *
+ * Can be freely distributed and used under the terms of the GNU GPL.
+ */
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#undef LOCAL_DEBUG
+
+#undef DEBUG_LOCKING
+
+#include "lib/birdlib.h"
+#include "lib/locking.h"
+#include "lib/resource.h"
+#include "lib/timer.h"
+
+#include "conf/conf.h"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <pthread.h>
+#include <semaphore.h>
+#include <stdatomic.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+/*
+ * Locking subsystem
+ */
+
+_Thread_local struct lock_order locking_stack = {};
+_Thread_local struct domain_generic **last_locked = NULL;
+
+#define ASSERT_NO_LOCK ASSERT_DIE(last_locked == NULL)
+
+struct domain_generic {
+ pthread_mutex_t mutex;
+ uint order;
+ struct domain_generic **prev;
+ struct lock_order *locked_by;
+ const char *name;
+};
+
+#define DOMAIN_INIT(_name, _order) { .mutex = PTHREAD_MUTEX_INITIALIZER, .name = _name, .order = _order }
+
+static struct domain_generic the_bird_domain_gen = DOMAIN_INIT("The BIRD", OFFSETOF(struct lock_order, the_bird));
+
+DOMAIN(the_bird) the_bird_domain = { .the_bird = &the_bird_domain_gen };
+
+struct domain_generic *
+domain_new(const char *name, uint order)
+{
+ ASSERT_DIE(order < sizeof(struct lock_order));
+ struct domain_generic *dg = xmalloc(sizeof(struct domain_generic));
+ *dg = (struct domain_generic) DOMAIN_INIT(name, order);
+ return dg;
+}
+
+void
+domain_free(struct domain_generic *dg)
+{
+ pthread_mutex_destroy(&dg->mutex);
+ xfree(dg);
+}
+
+uint dg_order(struct domain_generic *dg)
+{
+ return dg->order;
+}
+
+void do_lock(struct domain_generic *dg, struct domain_generic **lsp)
+{
+ if ((char *) lsp - (char *) &locking_stack != dg->order)
+ bug("Trying to lock on bad position: order=%u, lsp=%p, base=%p", dg->order, lsp, &locking_stack);
+
+ if (lsp <= last_locked)
+ bug("Trying to lock in a bad order");
+ if (*lsp)
+ bug("Inconsistent locking stack state on lock");
+
+ btime lock_begin = current_time();
+ pthread_mutex_lock(&dg->mutex);
+ btime duration = current_time() - lock_begin;
+ if (config && (duration > config->watchdog_warning))
+ log(L_WARN "Locking of %s took %d ms", dg->name, (int) (duration TO_MS));
+
+ if (dg->prev || dg->locked_by)
+ bug("Previous unlock not finished correctly");
+ dg->prev = last_locked;
+ *lsp = dg;
+ last_locked = lsp;
+ dg->locked_by = &locking_stack;
+}
+
+void do_unlock(struct domain_generic *dg, struct domain_generic **lsp)
+{
+ if ((char *) lsp - (char *) &locking_stack != dg->order)
+ bug("Trying to unlock on bad position: order=%u, lsp=%p, base=%p", dg->order, lsp, &locking_stack);
+
+ if (dg->locked_by != &locking_stack)
+ bug("Inconsistent domain state on unlock");
+ if ((last_locked != lsp) || (*lsp != dg))
+ bug("Inconsistent locking stack state on unlock");
+ dg->locked_by = NULL;
+ last_locked = dg->prev;
+ *lsp = NULL;
+ dg->prev = NULL;
+ pthread_mutex_unlock(&dg->mutex);
+}
diff --git a/sysdep/unix/io-loop.c b/sysdep/unix/io-loop.c
new file mode 100644
index 00000000..dbca36e9
--- /dev/null
+++ b/sysdep/unix/io-loop.c
@@ -0,0 +1,643 @@
+/*
+ * BIRD -- I/O and event loop
+ *
+ * Can be freely distributed and used under the terms of the GNU GPL.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <pthread.h>
+#include <time.h>
+#include <sys/time.h>
+
+#include "nest/bird.h"
+
+#include "lib/buffer.h"
+#include "lib/lists.h"
+#include "lib/resource.h"
+#include "lib/event.h"
+#include "lib/timer.h"
+#include "lib/socket.h"
+
+#include "lib/io-loop.h"
+#include "sysdep/unix/io-loop.h"
+#include "conf/conf.h"
+
+#define THREAD_STACK_SIZE 65536 /* To be lowered in near future */
+
+/*
+ * Current thread context
+ */
+
+_Thread_local struct birdloop *birdloop_current;
+static _Thread_local struct birdloop *birdloop_wakeup_masked;
+static _Thread_local uint birdloop_wakeup_masked_count;
+
+event_list *
+birdloop_event_list(struct birdloop *loop)
+{
+ return &loop->event_list;
+}
+
+struct timeloop *
+birdloop_time_loop(struct birdloop *loop)
+{
+ return &loop->time;
+}
+
+_Bool
+birdloop_inside(struct birdloop *loop)
+{
+ for (struct birdloop *c = birdloop_current; c; c = c->prev_loop)
+ if (loop == c)
+ return 1;
+
+ return 0;
+}
+
+void
+birdloop_flag(struct birdloop *loop, u32 flag)
+{
+ atomic_fetch_or_explicit(&loop->flags, flag, memory_order_acq_rel);
+ birdloop_ping(loop);
+}
+
+void
+birdloop_flag_set_handler(struct birdloop *loop, struct birdloop_flag_handler *fh)
+{
+ ASSERT_DIE(birdloop_inside(loop));
+ loop->flag_handler = fh;
+}
+
+static int
+birdloop_process_flags(struct birdloop *loop)
+{
+ if (!loop->flag_handler)
+ return 0;
+
+ u32 flags = atomic_exchange_explicit(&loop->flags, 0, memory_order_acq_rel);
+ loop->flag_handler->hook(loop->flag_handler, flags);
+ return !!flags;
+}
+
+static int
+birdloop_run_events(struct birdloop *loop)
+{
+ btime begin = current_time();
+ while (current_time() - begin < 5 MS)
+ {
+ if (!ev_run_list(&loop->event_list))
+ return 0;
+
+ times_update();
+ }
+
+ return 1;
+}
+
+/*
+ * Wakeup code for birdloop
+ */
+
+void
+pipe_new(struct pipe *p)
+{
+ int rv = pipe(p->fd);
+ if (rv < 0)
+ die("pipe: %m");
+
+ if (fcntl(p->fd[0], F_SETFL, O_NONBLOCK) < 0)
+ die("fcntl(O_NONBLOCK): %m");
+
+ if (fcntl(p->fd[1], F_SETFL, O_NONBLOCK) < 0)
+ die("fcntl(O_NONBLOCK): %m");
+}
+
+void
+pipe_drain(struct pipe *p)
+{
+ while (1) {
+ char buf[64];
+ int rv = read(p->fd[0], buf, sizeof(buf));
+ if ((rv < 0) && (errno == EAGAIN))
+ return;
+
+ if (rv == 0)
+ bug("wakeup read eof");
+ if ((rv < 0) && (errno != EINTR))
+ bug("wakeup read: %m");
+ }
+}
+
+int
+pipe_read_one(struct pipe *p)
+{
+ while (1) {
+ char v;
+ int rv = read(p->fd[0], &v, sizeof(v));
+ if (rv == 1)
+ return 1;
+ if ((rv < 0) && (errno == EAGAIN))
+ return 0;
+ if (rv > 1)
+ bug("wakeup read more bytes than expected: %d", rv);
+ if (rv == 0)
+ bug("wakeup read eof");
+ if (errno != EINTR)
+ bug("wakeup read: %m");
+ }
+}
+
+void
+pipe_kick(struct pipe *p)
+{
+ char v = 1;
+ int rv;
+
+ while (1) {
+ rv = write(p->fd[1], &v, sizeof(v));
+ if ((rv >= 0) || (errno == EAGAIN))
+ return;
+ if (errno != EINTR)
+ bug("wakeup write: %m");
+ }
+}
+
+void
+pipe_pollin(struct pipe *p, struct pollfd *pfd)
+{
+ pfd->fd = p->fd[0];
+ pfd->events = POLLIN;
+ pfd->revents = 0;
+}
+
+static inline void
+wakeup_init(struct birdloop *loop)
+{
+ pipe_new(&loop->wakeup);
+}
+
+static inline void
+wakeup_drain(struct birdloop *loop)
+{
+ pipe_drain(&loop->wakeup);
+}
+
+static inline void
+wakeup_do_kick(struct birdloop *loop)
+{
+ pipe_kick(&loop->wakeup);
+}
+
+static inline void
+birdloop_do_ping(struct birdloop *loop)
+{
+ if (atomic_fetch_add_explicit(&loop->ping_sent, 1, memory_order_acq_rel))
+ return;
+
+ if (loop == birdloop_wakeup_masked)
+ birdloop_wakeup_masked_count++;
+ else
+ wakeup_do_kick(loop);
+}
+
+void
+birdloop_ping(struct birdloop *loop)
+{
+ if (birdloop_inside(loop) && !loop->ping_pending)
+ loop->ping_pending++;
+ else
+ birdloop_do_ping(loop);
+}
+
+
+/*
+ * Sockets
+ */
+
+static void
+sockets_init(struct birdloop *loop)
+{
+ init_list(&loop->sock_list);
+ loop->sock_num = 0;
+
+ BUFFER_INIT(loop->poll_sk, loop->pool, 4);
+ BUFFER_INIT(loop->poll_fd, loop->pool, 4);
+ loop->poll_changed = 1; /* add wakeup fd */
+}
+
+static void
+sockets_add(struct birdloop *loop, sock *s)
+{
+ add_tail(&loop->sock_list, &s->n);
+ loop->sock_num++;
+
+ s->index = -1;
+ loop->poll_changed = 1;
+
+ birdloop_ping(loop);
+}
+
+void
+sk_start(sock *s)
+{
+ ASSERT_DIE(birdloop_current != &main_birdloop);
+ sockets_add(birdloop_current, s);
+}
+
+static void
+sockets_remove(struct birdloop *loop, sock *s)
+{
+ rem_node(&s->n);
+ loop->sock_num--;
+
+ if (s->index >= 0)
+ {
+ loop->poll_sk.data[s->index] = NULL;
+ s->index = -1;
+ loop->poll_changed = 1;
+ loop->close_scheduled = 1;
+ birdloop_ping(loop);
+ }
+ else
+ close(s->fd);
+}
+
+void
+sk_stop(sock *s)
+{
+ sockets_remove(birdloop_current, s);
+}
+
+static inline uint sk_want_events(sock *s)
+{ return (s->rx_hook ? POLLIN : 0) | ((s->ttx != s->tpos) ? POLLOUT : 0); }
+
+/*
+FIXME: this should be called from sock code
+
+static void
+sockets_update(struct birdloop *loop, sock *s)
+{
+ if (s->index >= 0)
+ loop->poll_fd.data[s->index].events = sk_want_events(s);
+}
+*/
+
+static void
+sockets_prepare(struct birdloop *loop)
+{
+ BUFFER_SET(loop->poll_sk, loop->sock_num + 1);
+ BUFFER_SET(loop->poll_fd, loop->sock_num + 1);
+
+ struct pollfd *pfd = loop->poll_fd.data;
+ sock **psk = loop->poll_sk.data;
+ uint i = 0;
+ node *n;
+
+ WALK_LIST(n, loop->sock_list)
+ {
+ sock *s = SKIP_BACK(sock, n, n);
+
+ ASSERT(i < loop->sock_num);
+
+ s->index = i;
+ *psk = s;
+ pfd->fd = s->fd;
+ pfd->events = sk_want_events(s);
+ pfd->revents = 0;
+
+ pfd++;
+ psk++;
+ i++;
+ }
+
+ ASSERT(i == loop->sock_num);
+
+ /* Add internal wakeup fd */
+ *psk = NULL;
+ pipe_pollin(&loop->wakeup, pfd);
+
+ loop->poll_changed = 0;
+}
+
+static void
+sockets_close_fds(struct birdloop *loop)
+{
+ struct pollfd *pfd = loop->poll_fd.data;
+ sock **psk = loop->poll_sk.data;
+ int poll_num = loop->poll_fd.used - 1;
+
+ int i;
+ for (i = 0; i < poll_num; i++)
+ if (psk[i] == NULL)
+ close(pfd[i].fd);
+
+ loop->close_scheduled = 0;
+}
+
+int sk_read(sock *s, int revents);
+int sk_write(sock *s);
+
+static void
+sockets_fire(struct birdloop *loop)
+{
+ struct pollfd *pfd = loop->poll_fd.data;
+ sock **psk = loop->poll_sk.data;
+ int poll_num = loop->poll_fd.used - 1;
+
+ times_update();
+
+ /* Last fd is internal wakeup fd */
+ if (pfd[poll_num].revents & POLLIN)
+ wakeup_drain(loop);
+
+ int i;
+ for (i = 0; i < poll_num; pfd++, psk++, i++)
+ {
+ int e = 1;
+
+ if (! pfd->revents)
+ continue;
+
+ if (pfd->revents & POLLNVAL)
+ die("poll: invalid fd %d", pfd->fd);
+
+ if (pfd->revents & POLLIN)
+ while (e && *psk && (*psk)->rx_hook)
+ e = sk_read(*psk, pfd->revents);
+
+ e = 1;
+ if (pfd->revents & POLLOUT)
+ {
+ loop->poll_changed = 1;
+ while (e && *psk)
+ e = sk_write(*psk);
+ }
+ }
+}
+
+
+/*
+ * Birdloop
+ */
+
+struct birdloop main_birdloop;
+
+static void birdloop_enter_locked(struct birdloop *loop);
+
+void
+birdloop_init(void)
+{
+ wakeup_init(&main_birdloop);
+
+ main_birdloop.time.domain = the_bird_domain.the_bird;
+ main_birdloop.time.loop = &main_birdloop;
+
+ times_update();
+ timers_init(&main_birdloop.time, &root_pool);
+
+ birdloop_enter_locked(&main_birdloop);
+}
+
+static void *birdloop_main(void *arg);
+
+struct birdloop *
+birdloop_new(pool *pp, uint order, const char *name)
+{
+ struct domain_generic *dg = domain_new(name, order);
+
+ pool *p = rp_new(pp, name);
+ struct birdloop *loop = mb_allocz(p, sizeof(struct birdloop));
+ loop->pool = p;
+
+ loop->time.domain = dg;
+ loop->time.loop = loop;
+
+ birdloop_enter(loop);
+
+ wakeup_init(loop);
+ ev_init_list(&loop->event_list, loop, name);
+ timers_init(&loop->time, p);
+ sockets_init(loop);
+
+ int e = 0;
+
+ if (e = pthread_attr_init(&loop->thread_attr))
+ die("pthread_attr_init() failed: %M", e);
+
+ if (e = pthread_attr_setstacksize(&loop->thread_attr, THREAD_STACK_SIZE))
+ die("pthread_attr_setstacksize(%u) failed: %M", THREAD_STACK_SIZE, e);
+
+ if (e = pthread_attr_setdetachstate(&loop->thread_attr, PTHREAD_CREATE_DETACHED))
+ die("pthread_attr_setdetachstate(PTHREAD_CREATE_DETACHED) failed: %M", e);
+
+ if (e = pthread_create(&loop->thread_id, &loop->thread_attr, birdloop_main, loop))
+ die("pthread_create() failed: %M", e);
+
+ birdloop_leave(loop);
+
+ return loop;
+}
+
+static void
+birdloop_do_stop(struct birdloop *loop, void (*stopped)(void *data), void *data)
+{
+ loop->stopped = stopped;
+ loop->stop_data = data;
+ wakeup_do_kick(loop);
+}
+
+void
+birdloop_stop(struct birdloop *loop, void (*stopped)(void *data), void *data)
+{
+ DG_LOCK(loop->time.domain);
+ birdloop_do_stop(loop, stopped, data);
+ DG_UNLOCK(loop->time.domain);
+}
+
+void
+birdloop_stop_self(struct birdloop *loop, void (*stopped)(void *data), void *data)
+{
+ ASSERT_DIE(loop == birdloop_current);
+ ASSERT_DIE(DG_IS_LOCKED(loop->time.domain));
+
+ birdloop_do_stop(loop, stopped, data);
+}
+
+void
+birdloop_free(struct birdloop *loop)
+{
+ ASSERT_DIE(loop->links == 0);
+ ASSERT_DIE(pthread_equal(pthread_self(), loop->thread_id));
+
+ rcu_birdloop_stop(&loop->rcu);
+ pthread_attr_destroy(&loop->thread_attr);
+
+ domain_free(loop->time.domain);
+ rfree(loop->pool);
+}
+
+static void
+birdloop_enter_locked(struct birdloop *loop)
+{
+ ASSERT_DIE(DG_IS_LOCKED(loop->time.domain));
+ ASSERT_DIE(!birdloop_inside(loop));
+
+ /* Store the old context */
+ loop->prev_loop = birdloop_current;
+
+ /* Put the new context */
+ birdloop_current = loop;
+}
+
+void
+birdloop_enter(struct birdloop *loop)
+{
+ DG_LOCK(loop->time.domain);
+ return birdloop_enter_locked(loop);
+}
+
+static void
+birdloop_leave_locked(struct birdloop *loop)
+{
+ /* Check the current context */
+ ASSERT_DIE(birdloop_current == loop);
+
+ /* Send pending pings */
+ if (loop->ping_pending)
+ {
+ loop->ping_pending = 0;
+ birdloop_do_ping(loop);
+ }
+
+ /* Restore the old context */
+ birdloop_current = loop->prev_loop;
+}
+
+void
+birdloop_leave(struct birdloop *loop)
+{
+ birdloop_leave_locked(loop);
+ DG_UNLOCK(loop->time.domain);
+}
+
+void
+birdloop_mask_wakeups(struct birdloop *loop)
+{
+ ASSERT_DIE(birdloop_wakeup_masked == NULL);
+ birdloop_wakeup_masked = loop;
+}
+
+void
+birdloop_unmask_wakeups(struct birdloop *loop)
+{
+ ASSERT_DIE(birdloop_wakeup_masked == loop);
+ birdloop_wakeup_masked = NULL;
+ if (birdloop_wakeup_masked_count)
+ wakeup_do_kick(loop);
+
+ birdloop_wakeup_masked_count = 0;
+}
+
+void
+birdloop_link(struct birdloop *loop)
+{
+ ASSERT_DIE(birdloop_inside(loop));
+ loop->links++;
+}
+
+void
+birdloop_unlink(struct birdloop *loop)
+{
+ ASSERT_DIE(birdloop_inside(loop));
+ loop->links--;
+}
+
+static void *
+birdloop_main(void *arg)
+{
+ struct birdloop *loop = arg;
+ timer *t;
+ int rv, timeout;
+
+ rcu_birdloop_start(&loop->rcu);
+
+ btime loop_begin = current_time();
+
+ tmp_init(loop->pool);
+
+ birdloop_enter(loop);
+ while (1)
+ {
+ timers_fire(&loop->time, 0);
+ if (birdloop_process_flags(loop) + birdloop_run_events(loop))
+ timeout = 0;
+ else if (t = timers_first(&loop->time))
+ timeout = (tm_remains(t) TO_MS) + 1;
+ else
+ timeout = -1;
+
+ if (loop->poll_changed)
+ sockets_prepare(loop);
+ else
+ if ((timeout < 0) || (timeout > 5000))
+ flush_local_pages();
+
+ btime duration = current_time() - loop_begin;
+ if (duration > config->watchdog_warning)
+ log(L_WARN "I/O loop cycle took %d ms", (int) (duration TO_MS));
+
+ birdloop_leave(loop);
+
+ try:
+ rv = poll(loop->poll_fd.data, loop->poll_fd.used, timeout);
+ if (rv < 0)
+ {
+ if (errno == EINTR || errno == EAGAIN)
+ goto try;
+ die("poll: %m");
+ }
+
+ birdloop_enter(loop);
+
+ if (loop->close_scheduled)
+ sockets_close_fds(loop);
+
+ if (loop->stopped)
+ break;
+
+ loop_begin = current_time();
+
+ if (rv)
+ sockets_fire(loop);
+
+ atomic_exchange_explicit(&loop->ping_sent, 0, memory_order_acq_rel);
+ }
+
+ /* Flush remaining events */
+ ASSERT_DIE(!ev_run_list(&loop->event_list));
+
+ /* Drop timers */
+ while (t = timers_first(&loop->time))
+ tm_stop(t);
+
+ /* No sockets allowed */
+ ASSERT_DIE(EMPTY_LIST(loop->sock_list));
+ ASSERT_DIE(loop->sock_num == 0);
+
+ birdloop_leave(loop);
+ loop->stopped(loop->stop_data);
+
+ flush_local_pages();
+ return NULL;
+}
+
+void
+birdloop_yield(void)
+{
+ usleep(100);
+}
diff --git a/sysdep/unix/io-loop.h b/sysdep/unix/io-loop.h
new file mode 100644
index 00000000..29ca96d6
--- /dev/null
+++ b/sysdep/unix/io-loop.h
@@ -0,0 +1,56 @@
+/*
+ * BIRD -- I/O and event loop
+ *
+ * Can be freely distributed and used under the terms of the GNU GPL.
+ */
+
+#ifndef _BIRD_SYSDEP_UNIX_IO_LOOP_H_
+#define _BIRD_SYSDEP_UNIX_IO_LOOP_H_
+
+#include "lib/rcu.h"
+
+struct pipe
+{
+ int fd[2];
+};
+
+void pipe_new(struct pipe *);
+void pipe_pollin(struct pipe *, struct pollfd *);
+void pipe_drain(struct pipe *);
+void pipe_kick(struct pipe *);
+
+struct birdloop
+{
+ pool *pool;
+
+ struct timeloop time;
+ event_list event_list;
+ list sock_list;
+ uint sock_num;
+
+ BUFFER(sock *) poll_sk;
+ BUFFER(struct pollfd) poll_fd;
+ u8 poll_changed;
+ u8 close_scheduled;
+
+ uint ping_pending;
+ _Atomic u32 ping_sent;
+ struct pipe wakeup;
+
+ pthread_t thread_id;
+ pthread_attr_t thread_attr;
+
+ struct rcu_birdloop rcu;
+
+ uint links;
+
+ _Atomic u32 flags;
+ struct birdloop_flag_handler *flag_handler;
+
+ void (*stopped)(void *data);
+ void *stop_data;
+
+ struct birdloop *prev_loop;
+};
+
+#endif
diff --git a/sysdep/unix/io.c b/sysdep/unix/io.c
index 8a116789..6454f15f 100644
--- a/sysdep/unix/io.c
+++ b/sysdep/unix/io.c
@@ -36,12 +36,14 @@
#include "lib/resource.h"
#include "lib/socket.h"
#include "lib/event.h"
+#include "lib/locking.h"
#include "lib/timer.h"
#include "lib/string.h"
#include "nest/iface.h"
#include "conf/conf.h"
#include "sysdep/unix/unix.h"
+#include "sysdep/unix/io-loop.h"
#include CONFIG_INCLUDE_SYSIO_H
/* Maximum number of calls of tx handler for one socket in one
@@ -122,55 +124,50 @@ rf_fileno(struct rfile *f)
btime boot_time;
+
void
-times_init(struct timeloop *loop)
+times_update(void)
{
struct timespec ts;
int rv;
+ btime old_time = current_time();
+ btime old_real_time = current_real_time();
+
rv = clock_gettime(CLOCK_MONOTONIC, &ts);
if (rv < 0)
die("Monotonic clock is missing");
if ((ts.tv_sec < 0) || (((u64) ts.tv_sec) > ((u64) 1 << 40)))
log(L_WARN "Monotonic clock is crazy");
-
- loop->last_time = ts.tv_sec S + ts.tv_nsec NS;
- loop->real_time = 0;
-}
-
-void
-times_update(struct timeloop *loop)
-{
- struct timespec ts;
- int rv;
-
- rv = clock_gettime(CLOCK_MONOTONIC, &ts);
- if (rv < 0)
- die("clock_gettime: %m");
-
+
btime new_time = ts.tv_sec S + ts.tv_nsec NS;
- if (new_time < loop->last_time)
+ if (new_time < old_time)
log(L_ERR "Monotonic clock is broken");
- loop->last_time = new_time;
- loop->real_time = 0;
-}
-
-void
-times_update_real_time(struct timeloop *loop)
-{
- struct timespec ts;
- int rv;
-
rv = clock_gettime(CLOCK_REALTIME, &ts);
if (rv < 0)
die("clock_gettime: %m");
- loop->real_time = ts.tv_sec S + ts.tv_nsec NS;
-}
+ btime new_real_time = ts.tv_sec S + ts.tv_nsec NS;
+ if (!atomic_compare_exchange_strong_explicit(
+ &last_time,
+ &old_time,
+ new_time,
+ memory_order_acq_rel,
+ memory_order_relaxed))
+ DBG("Time update collision: last_time");
+
+ if (!atomic_compare_exchange_strong_explicit(
+ &real_time,
+ &old_real_time,
+ new_real_time,
+ memory_order_acq_rel,
+ memory_order_relaxed))
+ DBG("Time update collision: real_time");
+}
/**
* DOC: Sockets
@@ -804,18 +801,16 @@ sk_free(resource *r)
sk_ssh_free(s);
#endif
- if (s->fd < 0)
+ if ((s->fd < 0) || (s->flags & SKF_THREAD))
return;
- /* FIXME: we should call sk_stop() for SKF_THREAD sockets */
- if (!(s->flags & SKF_THREAD))
- {
- if (s == current_sock)
- current_sock = sk_next(s);
- if (s == stored_sock)
- stored_sock = sk_next(s);
+ if (s == current_sock)
+ current_sock = sk_next(s);
+ if (s == stored_sock)
+ stored_sock = sk_next(s);
+
+ if (enlisted(&s->n))
rem_node(&s->n);
- }
if (s->type != SK_SSH && s->type != SK_SSH_ACTIVE)
close(s->fd);
@@ -1108,7 +1103,11 @@ sk_passive_connected(sock *s, int type)
return 1;
}
- sk_insert(t);
+ if (s->flags & SKF_PASSIVE_THREAD)
+ t->flags |= SKF_THREAD;
+ else
+ sk_insert(t);
+
sk_alloc_bufs(t);
s->rx_hook(t, 0);
return 1;
@@ -1516,6 +1515,36 @@ sk_open_unix(sock *s, char *name)
return 0;
}
+static void
+sk_reloop_hook(void *_vs)
+{
+ sock *s = _vs;
+ if (birdloop_inside(&main_birdloop))
+ {
+ s->flags &= ~SKF_THREAD;
+ sk_insert(s);
+ }
+ else
+ {
+ s->flags |= SKF_THREAD;
+ sk_start(s);
+ }
+}
+
+void
+sk_reloop(sock *s, struct birdloop *loop)
+{
+ if (enlisted(&s->n))
+ rem_node(&s->n);
+
+ s->reloop = (event) {
+ .hook = sk_reloop_hook,
+ .data = s,
+ };
+
+ ev_send_loop(loop, &s->reloop);
+}
+
#define CMSG_RX_SPACE MAX(CMSG4_SPACE_PKTINFO+CMSG4_SPACE_TTL, \
CMSG6_SPACE_PKTINFO+CMSG6_SPACE_TTL)
@@ -2037,34 +2066,21 @@ struct event_log_entry
static struct event_log_entry event_log[EVENT_LOG_LENGTH];
static struct event_log_entry *event_open;
static int event_log_pos, event_log_num, watchdog_active;
-static btime last_time;
+static btime last_io_time;
static btime loop_time;
static void
io_update_time(void)
{
- struct timespec ts;
- int rv;
-
- /*
- * This is third time-tracking procedure (after update_times() above and
- * times_update() in BFD), dedicated to internal event log and latency
- * tracking. Hopefully, we consolidate these sometimes.
- */
-
- rv = clock_gettime(CLOCK_MONOTONIC, &ts);
- if (rv < 0)
- die("clock_gettime: %m");
-
- last_time = ts.tv_sec S + ts.tv_nsec NS;
+ last_io_time = current_time();
if (event_open)
{
- event_open->duration = last_time - event_open->timestamp;
+ event_open->duration = last_io_time - event_open->timestamp;
if (event_open->duration > config->latency_limit)
- log(L_WARN "Event 0x%p 0x%p took %d ms",
- event_open->hook, event_open->data, (int) (event_open->duration TO_MS));
+ log(L_WARN "Event 0x%p 0x%p took %u.%03u ms",
+ event_open->hook, event_open->data, (uint) (event_open->duration TO_MS), (uint) (event_open->duration % 1000));
event_open = NULL;
}
@@ -2089,7 +2105,7 @@ io_log_event(void *hook, void *data)
en->hook = hook;
en->data = data;
- en->timestamp = last_time;
+ en->timestamp = last_io_time;
en->duration = 0;
event_log_num++;
@@ -2117,14 +2133,14 @@ io_log_dump(void)
struct event_log_entry *en = event_log + (event_log_pos + i) % EVENT_LOG_LENGTH;
if (en->hook)
log(L_DEBUG " Event 0x%p 0x%p at %8d for %d ms", en->hook, en->data,
- (int) ((last_time - en->timestamp) TO_MS), (int) (en->duration TO_MS));
+ (int) ((last_io_time - en->timestamp) TO_MS), (int) (en->duration TO_MS));
}
}
void
watchdog_sigalrm(int sig UNUSED)
{
- /* Update last_time and duration, but skip latency check */
+ /* Update last_io_time and duration, but skip latency check */
config->latency_limit = 0xffffffff;
io_update_time();
@@ -2137,7 +2153,7 @@ watchdog_start1(void)
{
io_update_time();
- loop_time = last_time;
+ loop_time = last_io_time;
}
static inline void
@@ -2145,7 +2161,7 @@ watchdog_start(void)
{
io_update_time();
- loop_time = last_time;
+ loop_time = last_io_time;
event_log_num = 0;
if (config->watchdog_timeout)
@@ -2166,10 +2182,10 @@ watchdog_stop(void)
watchdog_active = 0;
}
- btime duration = last_time - loop_time;
+ btime duration = last_io_time - loop_time;
if (duration > config->watchdog_warning)
- log(L_WARN "I/O loop cycle took %d ms for %d events",
- (int) (duration TO_MS), event_log_num);
+ log(L_WARN "I/O loop cycle took %u.%03u ms for %d events",
+ (uint) (duration TO_MS), (uint) (duration % 1000), event_log_num);
}
@@ -2181,8 +2197,9 @@ void
io_init(void)
{
init_list(&sock_list);
- init_list(&global_event_list);
- init_list(&global_work_list);
+ ev_init_list(&global_event_list, &main_birdloop, "Global event list");
+ ev_init_list(&global_work_list, &main_birdloop, "Global work list");
+ ev_init_list(&main_birdloop.event_list, &main_birdloop, "Global fast event list");
krt_io_init();
// XXX init_times();
// XXX update_times();
@@ -2210,22 +2227,27 @@ io_loop(void)
watchdog_start1();
for(;;)
{
- times_update(&main_timeloop);
+ times_update();
events = ev_run_list(&global_event_list);
events = ev_run_list_limited(&global_work_list, WORK_EVENTS_MAX) || events;
- timers_fire(&main_timeloop);
+ events = ev_run_list(&main_birdloop.event_list) || events;
+ timers_fire(&main_birdloop.time, 1);
io_close_event();
// FIXME
poll_tout = (events ? 0 : 3000); /* Time in milliseconds */
- if (t = timers_first(&main_timeloop))
+ if (t = timers_first(&main_birdloop.time))
{
- times_update(&main_timeloop);
+ times_update();
timeout = (tm_remains(t) TO_MS) + 1;
poll_tout = MIN(poll_tout, timeout);
}
- nfds = 0;
+ /* A hack to reload main io_loop() when something has changed asynchronously. */
+ pipe_pollin(&main_birdloop.wakeup, &pfd[0]);
+
+ nfds = 1;
+
WALK_LIST(n, sock_list)
{
pfd[nfds] = (struct pollfd) { .fd = -1 }; /* everything other set to 0 by this */
@@ -2284,7 +2306,9 @@ io_loop(void)
/* And finally enter poll() to find active sockets */
watchdog_stop();
+ birdloop_leave(&main_birdloop);
pout = poll(pfd, nfds, poll_tout);
+ birdloop_enter(&main_birdloop);
watchdog_start();
if (pout < 0)
@@ -2295,7 +2319,15 @@ io_loop(void)
}
if (pout)
{
- times_update(&main_timeloop);
+ if (pfd[0].revents & POLLIN)
+ {
+ /* IO loop reload requested */
+ pipe_drain(&main_birdloop.wakeup);
+ atomic_exchange_explicit(&main_birdloop.ping_sent, 0, memory_order_acq_rel);
+ continue;
+ }
+
+ times_update();
/* guaranteed to be non-empty */
current_sock = SKIP_BACK(sock, n, HEAD(sock_list));
diff --git a/sysdep/unix/krt.c b/sysdep/unix/krt.c
index 93d19614..080c3a03 100644
--- a/sysdep/unix/krt.c
+++ b/sysdep/unix/krt.c
@@ -314,6 +314,7 @@ krt_learn_scan(struct krt_proto *p, rte *e)
ea_set_attr_u32(&e0.attrs, &ea_gen_preference, 0, p->p.main_channel->preference);
rte_update(p->p.main_channel, e->net, &e0, e0.src);
+ rt_unlock_source(e0.src);
}
static void
@@ -322,9 +323,9 @@ krt_learn_async(struct krt_proto *p, rte *e, int new)
if (new)
return krt_learn_scan(p, e);
- struct rte_src *src = rt_find_source(&p->p, krt_metric(e));
- if (src)
- rte_update(p->p.main_channel, e->net, NULL, src);
+ struct rte_src *src = rt_get_source(&p->p, krt_metric(e));
+ rte_update(p->p.main_channel, e->net, NULL, src);
+ rt_unlock_source(src);
}
#endif
@@ -365,6 +366,13 @@ rte_feed_obtain(net *n, rte **feed, uint count)
static struct rte *
krt_export_net(struct krt_proto *p, net *net)
{
+ /* FIXME: Here we are calling filters in table-locked context when exporting
+ * to kernel. Here BIRD can crash if the user requested ROA check in kernel
+ * export filter. It doesn't make much sense to write the filters like this,
+ * therefore we may keep this unfinished piece of work here for later as it
+ * won't really affect anybody. */
+ ASSERT_DIE(RT_IS_LOCKED(p->p.main_channel->table));
+
struct channel *c = p->p.main_channel;
const struct filter *filter = c->out_filter;
@@ -445,6 +453,9 @@ krt_got_route(struct krt_proto *p, rte *e, s8 src)
#endif
/* The rest is for KRT_SRC_BIRD (or KRT_SRC_UNKNOWN) */
+ RT_LOCKED(p->p.main_channel->table, tab)
+ {
+
/* Deleting all routes if flush is requested */
if (p->flush_routes)
goto delete;
@@ -453,7 +464,7 @@ krt_got_route(struct krt_proto *p, rte *e, s8 src)
if (!p->ready)
goto ignore;
- net *net = net_find(p->p.main_channel->table, e->net);
+ net *net = net_find(tab, e->net);
if (!net || !krt_is_installed(p, net))
goto delete;
@@ -498,7 +509,9 @@ delete:
krt_replace_rte(p, e->net, NULL, e);
goto done;
-done:
+done:;
+ }
+
lp_flush(krt_filter_lp);
}
@@ -511,7 +524,8 @@ krt_init_scan(struct krt_proto *p)
static void
krt_prune(struct krt_proto *p)
{
- struct rtable *t = p->p.main_channel->table;
+ RT_LOCKED(p->p.main_channel->table, t)
+ {
KRT_TRACE(p, D_EVENTS, "Pruning table %s", t->name);
FIB_WALK(&t->fib, net, n)
@@ -533,6 +547,8 @@ krt_prune(struct krt_proto *p)
if (p->ready)
p->initialized = 1;
+
+ }
}
static void
@@ -577,18 +593,17 @@ krt_got_route_async(struct krt_proto *p, rte *e, int new, s8 src)
}
}
+
/*
* Periodic scanning
*/
-
-#ifdef CONFIG_ALL_TABLES_AT_ONCE
-
-static timer *krt_scan_timer;
-static int krt_scan_count;
+static timer *krt_scan_all_timer;
+static int krt_scan_all_count;
+static _Bool krt_scan_all_tables;
static void
-krt_scan(timer *t UNUSED)
+krt_scan_all(timer *t UNUSED)
{
struct krt_proto *p;
node *n;
@@ -609,35 +624,42 @@ krt_scan(timer *t UNUSED)
}
static void
-krt_scan_timer_start(struct krt_proto *p)
+krt_scan_all_timer_start(struct krt_proto *p)
{
- if (!krt_scan_count)
- krt_scan_timer = tm_new_init(krt_pool, krt_scan, NULL, KRT_CF->scan_time, 0);
+ if (!krt_scan_all_count)
+ krt_scan_all_timer = tm_new_init(krt_pool, krt_scan_all, NULL, KRT_CF->scan_time, 0);
- krt_scan_count++;
+ krt_scan_all_count++;
- tm_start(krt_scan_timer, 1 S);
+ tm_start(krt_scan_all_timer, 1 S);
}
static void
-krt_scan_timer_stop(struct krt_proto *p UNUSED)
+krt_scan_all_timer_stop(void)
{
- krt_scan_count--;
+ ASSERT(krt_scan_all_count > 0);
+
+ krt_scan_all_count--;
- if (!krt_scan_count)
+ if (!krt_scan_all_count)
{
- rfree(krt_scan_timer);
- krt_scan_timer = NULL;
+ rfree(krt_scan_all_timer);
+ krt_scan_all_timer = NULL;
}
}
static void
-krt_scan_timer_kick(struct krt_proto *p UNUSED)
+krt_scan_all_timer_kick(void)
{
- tm_start(krt_scan_timer, 0);
+ tm_start(krt_scan_all_timer, 0);
+}
+
+void
+krt_use_shared_scan(void)
+{
+ krt_scan_all_tables = 1;
}
-#else
static void
krt_scan(timer *t)
@@ -655,35 +677,42 @@ krt_scan(timer *t)
static void
krt_scan_timer_start(struct krt_proto *p)
{
- p->scan_timer = tm_new_init(p->p.pool, krt_scan, p, KRT_CF->scan_time, 0);
- tm_start(p->scan_timer, 1 S);
+ if (krt_scan_all_tables)
+ krt_scan_all_timer_start(p);
+ else
+ {
+ p->scan_timer = tm_new_init(p->p.pool, krt_scan, p, KRT_CF->scan_time, 0);
+ tm_start(p->scan_timer, 1 S);
+ }
}
static void
krt_scan_timer_stop(struct krt_proto *p)
{
- tm_stop(p->scan_timer);
+ if (krt_scan_all_tables)
+ krt_scan_all_timer_stop();
+ else
+ tm_stop(p->scan_timer);
}
static void
krt_scan_timer_kick(struct krt_proto *p)
{
- tm_start(p->scan_timer, 0);
+ if (krt_scan_all_tables)
+ krt_scan_all_timer_kick();
+ else
+ tm_start(p->scan_timer, 0);
}
-#endif
-
-
-
/*
* Updates
*/
static int
-krt_preexport(struct channel *c, rte *e)
+krt_preexport(struct channel *C, rte *e)
{
- if (e->src->proto == c->proto)
+ if (e->src->owner == &C->proto->sources)
return -1;
if (!krt_capable(e))
@@ -791,11 +820,6 @@ krt_postconfig(struct proto_config *CF)
if (! proto_cf_main_channel(CF))
cf_error("Channel not specified");
-#ifdef CONFIG_ALL_TABLES_AT_ONCE
- if (krt_cf->scan_time != cf->scan_time)
- cf_error("All kernel syncers must use the same table scan interval");
-#endif
-
struct channel_config *cc = proto_cf_main_channel(CF);
struct rtable_config *tab = cc->table;
if (tab->krt_attached)
diff --git a/sysdep/unix/krt.h b/sysdep/unix/krt.h
index e0d60cbd..9f7ebb4f 100644
--- a/sysdep/unix/krt.h
+++ b/sysdep/unix/krt.h
@@ -21,11 +21,6 @@ struct kif_proto;
#define KRT_DEFAULT_ECMP_LIMIT 16
-#if 0
-#define EA_KRT_SOURCE EA_CODE(PROTOCOL_KERNEL, 0)
-#define EA_KRT_METRIC EA_CODE(PROTOCOL_KERNEL, 1)
-#endif
-
extern struct ea_class ea_krt_source, ea_krt_metric;
#define KRT_REF_SEEN 0x1 /* Seen in table */
@@ -55,10 +50,7 @@ struct krt_proto {
struct proto p;
struct krt_state sys; /* Sysdep state */
-#ifndef CONFIG_ALL_TABLES_AT_ONCE
timer *scan_timer;
-#endif
-
struct bmap sync_map; /* Keeps track which exported routes were successfully written to kernel */
struct bmap seen_map; /* Routes seen during last periodic scan */
node krt_node; /* Node in krt_proto_list */
@@ -80,6 +72,7 @@ extern pool *krt_pool;
struct proto_config * kif_init_config(int class);
void kif_request_scan(void);
+void krt_use_shared_scan(void);
void krt_got_route(struct krt_proto *p, struct rte *e, s8 src);
void krt_got_route_async(struct krt_proto *p, struct rte *e, int new, s8 src);
diff --git a/sysdep/unix/log.c b/sysdep/unix/log.c
index 4e9df069..185231e8 100644
--- a/sysdep/unix/log.c
+++ b/sysdep/unix/log.c
@@ -15,6 +15,7 @@
* user's manual.
*/
+#include <stdatomic.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
@@ -35,8 +36,10 @@ static FILE *dbgf;
static list *current_log_list;
static char *current_syslog_name; /* NULL -> syslog closed */
+static _Atomic uint max_thread_id = ATOMIC_VAR_INIT(1);
+static _Thread_local uint this_thread_id;
-#ifdef USE_PTHREADS
+#define THIS_THREAD_ID (this_thread_id ?: (this_thread_id = atomic_fetch_add_explicit(&max_thread_id, 1, memory_order_acq_rel)))
#include <pthread.h>
@@ -48,15 +51,6 @@ static pthread_t main_thread;
void main_thread_init(void) { main_thread = pthread_self(); }
static int main_thread_self(void) { return pthread_equal(pthread_self(), main_thread); }
-#else
-
-static inline void log_lock(void) { }
-static inline void log_unlock(void) { }
-void main_thread_init(void) { }
-static int main_thread_self(void) { return 1; }
-
-#endif
-
#ifdef HAVE_SYSLOG_H
#include <sys/syslog.h>
@@ -189,7 +183,7 @@ log_commit(int class, buffer *buf)
l->pos += msg_len;
}
- fprintf(l->fh, "%s <%s> ", tbuf, class_names[class]);
+ fprintf(l->fh, "%s [%04x] <%s> ", tbuf, THIS_THREAD_ID, class_names[class]);
}
fputs(buf->start, l->fh);
fputc('\n', l->fh);
@@ -299,6 +293,8 @@ die(const char *msg, ...)
exit(1);
}
+static struct timespec dbg_time_start;
+
/**
* debug - write to debug output
* @msg: a printf-like message
@@ -311,12 +307,33 @@ debug(const char *msg, ...)
{
#define MAX_DEBUG_BUFSIZE 16384
va_list args;
- char buf[MAX_DEBUG_BUFSIZE];
+ char buf[MAX_DEBUG_BUFSIZE], *pos = buf;
+ int max = MAX_DEBUG_BUFSIZE;
va_start(args, msg);
if (dbgf)
{
- if (bvsnprintf(buf, MAX_DEBUG_BUFSIZE, msg, args) < 0)
+ struct timespec dbg_time;
+ clock_gettime(CLOCK_MONOTONIC, &dbg_time);
+ uint nsec;
+ uint sec;
+
+ if (dbg_time.tv_nsec > dbg_time_start.tv_nsec)
+ {
+ nsec = dbg_time.tv_nsec - dbg_time_start.tv_nsec;
+ sec = dbg_time.tv_sec - dbg_time_start.tv_sec;
+ }
+ else
+ {
+ nsec = 1000000000 + dbg_time.tv_nsec - dbg_time_start.tv_nsec;
+ sec = dbg_time.tv_sec - dbg_time_start.tv_sec - 1;
+ }
+
+ int n = bsnprintf(pos, max, "%u.%09u: [%04x] ", sec, nsec, THIS_THREAD_ID);
+ pos += n;
+ max -= n;
+
+ if (bvsnprintf(pos, max, msg, args) < 0)
bug("Extremely long debug output, split it.");
fputs(buf, dbgf);
@@ -422,6 +439,8 @@ done:
void
log_init_debug(char *f)
{
+ clock_gettime(CLOCK_MONOTONIC, &dbg_time_start);
+
if (dbgf && dbgf != stderr)
fclose(dbgf);
if (!f)
diff --git a/sysdep/unix/main.c b/sysdep/unix/main.c
index fd4934d9..bf9f2be0 100644
--- a/sysdep/unix/main.c
+++ b/sysdep/unix/main.c
@@ -28,6 +28,7 @@
#include "lib/resource.h"
#include "lib/socket.h"
#include "lib/event.h"
+#include "lib/locking.h"
#include "lib/timer.h"
#include "lib/string.h"
#include "nest/rt.h"
@@ -116,7 +117,7 @@ add_num_const(char *name, int val, const char *file, const uint line)
struct f_val *v = cfg_alloc(sizeof(struct f_val));
*v = (struct f_val) { .type = T_INT, .val.i = val };
struct symbol *sym = cf_get_symbol(name);
- if (sym->class && (sym->scope == conf_this_scope))
+ if (sym->class && cf_symbol_is_local(sym))
cf_error("Error reading value for %s from %s:%d: already defined", name, file, line);
cf_define_symbol(sym, SYM_CONSTANT | T_INT, val, v);
@@ -873,13 +874,16 @@ main(int argc, char **argv)
dmalloc_debug(0x2f03d00);
#endif
+ times_update();
parse_args(argc, argv);
log_switch(1, NULL, NULL);
+ the_bird_lock();
+
random_init();
net_init();
resource_init();
- timer_init();
+ birdloop_init();
olock_init();
rt_init();
io_init();
@@ -927,6 +931,7 @@ main(int argc, char **argv)
dup2(0, 2);
}
+
main_thread_init();
write_pid_file();