summaryrefslogtreecommitdiff
path: root/sysdep/unix
diff options
context:
space:
mode:
Diffstat (limited to 'sysdep/unix')
-rw-r--r--sysdep/unix/Makefile2
-rw-r--r--sysdep/unix/alloc.c226
-rw-r--r--sysdep/unix/domain.c (renamed from sysdep/unix/coroutine.c)78
-rw-r--r--sysdep/unix/io-loop.c97
-rw-r--r--sysdep/unix/io-loop.h11
-rw-r--r--sysdep/unix/io.c37
-rw-r--r--sysdep/unix/krt.Y5
-rw-r--r--sysdep/unix/krt.c222
-rw-r--r--sysdep/unix/krt.h7
-rw-r--r--sysdep/unix/log.c10
-rw-r--r--sysdep/unix/main.c42
11 files changed, 399 insertions, 338 deletions
diff --git a/sysdep/unix/Makefile b/sysdep/unix/Makefile
index 07f454ab..6f6b0d26 100644
--- a/sysdep/unix/Makefile
+++ b/sysdep/unix/Makefile
@@ -1,4 +1,4 @@
-src := alloc.c io.c io-loop.c krt.c log.c main.c random.c coroutine.c
+src := alloc.c io.c io-loop.c krt.c log.c main.c random.c domain.c
obj := $(src-o-files)
$(all-daemon)
$(cf-local)
diff --git a/sysdep/unix/alloc.c b/sysdep/unix/alloc.c
index 4ae1a9db..47cd4624 100644
--- a/sysdep/unix/alloc.c
+++ b/sysdep/unix/alloc.c
@@ -8,130 +8,180 @@
#include "nest/bird.h"
#include "lib/resource.h"
+#include "lib/lists.h"
+#include "lib/event.h"
+#include "lib/rcu.h"
+#include <errno.h>
#include <stdlib.h>
#include <unistd.h>
-#include <stdatomic.h>
-#include <errno.h>
#ifdef HAVE_MMAP
#include <sys/mman.h>
#endif
long page_size = 0;
-_Bool alloc_multipage = 0;
-static _Atomic int global_page_list_not_empty;
-static list global_page_list;
-static _Atomic int global_page_spinlock;
+#ifdef HAVE_MMAP
+#define KEEP_PAGES_MAX 256
+#define KEEP_PAGES_MIN 8
-#define GLOBAL_PAGE_SPIN_LOCK for (int v = 0; !atomic_compare_exchange_weak_explicit(&global_page_spinlock, &v, 1, memory_order_acq_rel, memory_order_acquire); v = 0)
-#define GLOBAL_PAGE_SPIN_UNLOCK do { int v = 1; ASSERT_DIE(atomic_compare_exchange_strong_explicit(&global_page_spinlock, &v, 0, memory_order_acq_rel, memory_order_acquire)); } while (0)
+STATIC_ASSERT(KEEP_PAGES_MIN * 4 < KEEP_PAGES_MAX);
-#ifdef HAVE_MMAP
static _Bool use_fake = 0;
+static _Bool initialized = 0;
+
+#if DEBUGGING
+struct free_page {
+ node unused[42];
+ struct free_page * _Atomic next;
+};
#else
-static _Bool use_fake = 1;
+struct free_page {
+ struct free_page * _Atomic next;
+};
#endif
-void resource_sys_init(void)
+static struct free_page * _Atomic page_stack = NULL;
+
+static void page_cleanup(void *);
+static event page_cleanup_event = { .hook = page_cleanup, };
+#define SCHEDULE_CLEANUP do if (initialized && !shutting_down) ev_send(&global_event_list, &page_cleanup_event); while (0)
+
+_Atomic int pages_kept = 0;
+
+static void *
+alloc_sys_page(void)
{
-#ifdef HAVE_MMAP
- init_list(&global_page_list);
+ void *ptr = mmap(NULL, page_size, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
- if (!(page_size = sysconf(_SC_PAGESIZE)))
- die("System page size must be non-zero");
+ if (ptr == MAP_FAILED)
+ bug("mmap(%lu) failed: %m", page_size);
- if ((u64_popcount(page_size) > 1) || (page_size > 16384))
+ return ptr;
+}
+
+extern int shutting_down; /* Shutdown requested. */
+
+#else // ! HAVE_MMAP
+#define use_fake 1
#endif
+
+void *
+alloc_page(void)
+{
+ if (use_fake)
{
- /* Too big or strange page, use the aligned allocator instead */
- page_size = 4096;
- use_fake = 1;
+ void *ptr = NULL;
+ int err = posix_memalign(&ptr, page_size, page_size);
+
+ if (err || !ptr)
+ bug("posix_memalign(%lu) failed", (long unsigned int) page_size);
+
+ return ptr;
}
+
+#ifdef HAVE_MMAP
+ rcu_read_lock();
+ struct free_page *fp = atomic_load_explicit(&page_stack, memory_order_acquire);
+ while (fp && !atomic_compare_exchange_strong_explicit(
+ &page_stack, &fp, atomic_load_explicit(&fp->next, memory_order_acquire),
+ memory_order_acq_rel, memory_order_acquire))
+ ;
+ rcu_read_unlock();
+
+ if (!fp)
+ return alloc_sys_page();
+
+ if (atomic_fetch_sub_explicit(&pages_kept, 1, memory_order_relaxed) <= KEEP_PAGES_MIN)
+ SCHEDULE_CLEANUP;
+
+ return fp;
+#endif
}
-void *
-alloc_sys_page(void)
+void
+free_page(void *ptr)
{
-#ifdef HAVE_MMAP
- if (!use_fake)
+ if (use_fake)
{
- if (atomic_load_explicit(&global_page_list_not_empty, memory_order_relaxed))
- {
- GLOBAL_PAGE_SPIN_LOCK;
- if (!EMPTY_LIST(global_page_list))
- {
- node *ret = HEAD(global_page_list);
- rem_node(ret);
- if (EMPTY_LIST(global_page_list))
- atomic_store_explicit(&global_page_list_not_empty, 0, memory_order_relaxed);
- GLOBAL_PAGE_SPIN_UNLOCK;
- memset(ret, 0, sizeof(node));
- return (void *) ret;
- }
- GLOBAL_PAGE_SPIN_UNLOCK;
- }
-
- if (alloc_multipage)
- {
- void *big = mmap(NULL, page_size * 2, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
- if (big == MAP_FAILED)
- bug("mmap(%lu) failed: %m", page_size);
-
- uintptr_t offset = ((uintptr_t) big) % page_size;
- if (offset)
- {
- void *ret = big + page_size - offset;
- munmap(big, page_size - offset);
- munmap(ret + page_size, offset);
- return ret;
- }
- else
- {
- munmap(big + page_size, page_size);
- return big;
- }
- }
-
- void *ret = mmap(NULL, page_size, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
- if (ret == MAP_FAILED)
- bug("mmap(%lu) failed: %m", page_size);
-
- return ret;
+ free(ptr);
+ return;
}
- else
+
+#ifdef HAVE_MMAP
+ rcu_read_lock();
+ struct free_page *fp = ptr;
+ struct free_page *next = atomic_load_explicit(&page_stack, memory_order_acquire);
+
+ do atomic_store_explicit(&fp->next, next, memory_order_release);
+ while (!atomic_compare_exchange_strong_explicit(
+ &page_stack, &next, fp,
+ memory_order_acq_rel, memory_order_acquire));
+ rcu_read_unlock();
+
+ if (atomic_fetch_add_explicit(&pages_kept, 1, memory_order_relaxed) >= KEEP_PAGES_MAX)
+ SCHEDULE_CLEANUP;
#endif
+}
+
+#ifdef HAVE_MMAP
+static void
+page_cleanup(void *_ UNUSED)
+{
+ struct free_page *stack = atomic_exchange_explicit(&page_stack, NULL, memory_order_acq_rel);
+ if (!stack)
+ return;
+
+ synchronize_rcu();
+
+ do {
+ struct free_page *f = stack;
+ stack = atomic_load_explicit(&f->next, memory_order_acquire);
+
+ if (munmap(f, page_size) == 0)
+ continue;
+ else if (errno != ENOMEM)
+ bug("munmap(%p) failed: %m", f);
+ else
+ free_page(f);
+ }
+ while (stack && (atomic_fetch_sub_explicit(&pages_kept, 1, memory_order_relaxed) >= KEEP_PAGES_MAX / 2));
+
+ while (stack)
{
- void *ret = aligned_alloc(page_size, page_size);
- if (!ret)
- bug("aligned_alloc(%lu) failed", page_size);
- return ret;
+ atomic_fetch_sub_explicit(&pages_kept, 1, memory_order_relaxed);
+
+ struct free_page *f = stack;
+ stack = atomic_load_explicit(&f->next, memory_order_acquire);
+ free_page(f);
}
}
+#endif
void
-free_sys_page(void *ptr)
+resource_sys_init(void)
{
#ifdef HAVE_MMAP
- if (!use_fake)
+ if (!(page_size = sysconf(_SC_PAGESIZE)))
+ die("System page size must be non-zero");
+
+ if (u64_popcount(page_size) == 1)
{
- if (munmap(ptr, page_size) < 0)
-#ifdef ENOMEM
- if (errno == ENOMEM)
- {
- memset(ptr, 0, page_size);
-
- GLOBAL_PAGE_SPIN_LOCK;
- add_tail(&global_page_list, (node *) ptr);
- atomic_store_explicit(&global_page_list_not_empty, 1, memory_order_relaxed);
- GLOBAL_PAGE_SPIN_UNLOCK;
- }
- else
-#endif
- bug("munmap(%p) failed: %m", ptr);
+
+ for (int i = 0; i < (KEEP_PAGES_MIN * 2); i++)
+ free_page(alloc_page());
+
+ page_cleanup(NULL);
+ initialized = 1;
+ return;
}
- else
+
+ /* Too big or strange page, use the aligned allocator instead */
+ log(L_WARN "Got strange memory page size (%lu), using the aligned allocator instead", page_size);
+ use_fake = 1;
#endif
- free(ptr);
+
+ page_size = 4096;
+ initialized = 1;
}
diff --git a/sysdep/unix/coroutine.c b/sysdep/unix/domain.c
index 4758c056..0a5858a6 100644
--- a/sysdep/unix/coroutine.c
+++ b/sysdep/unix/domain.c
@@ -1,7 +1,6 @@
/*
- * BIRD Coroutines
+ * BIRD Locking
*
- * (c) 2017 Martin Mares <mj@ucw.cz>
* (c) 2020 Maria Matejka <mq@jmq.cz>
*
* Can be freely distributed and used under the terms of the GNU GPL.
@@ -17,18 +16,11 @@
#include "lib/birdlib.h"
#include "lib/locking.h"
-#include "lib/coro.h"
#include "lib/resource.h"
#include "lib/timer.h"
#include "conf/conf.h"
-#define CORO_STACK_SIZE 65536
-
-/*
- * Implementation of coroutines based on POSIX threads
- */
-
#include <errno.h>
#include <fcntl.h>
#include <poll.h>
@@ -122,71 +114,3 @@ void do_unlock(struct domain_generic *dg, struct domain_generic **lsp)
dg->prev = NULL;
pthread_mutex_unlock(&dg->mutex);
}
-
-/* Coroutines */
-struct coroutine {
- resource r;
- pthread_t id;
- pthread_attr_t attr;
- void (*entry)(void *);
- void *data;
-};
-
-static _Thread_local _Bool coro_cleaned_up = 0;
-
-static void coro_free(resource *r)
-{
- struct coroutine *c = (void *) r;
- ASSERT_DIE(pthread_equal(pthread_self(), c->id));
- pthread_attr_destroy(&c->attr);
- coro_cleaned_up = 1;
-}
-
-static struct resclass coro_class = {
- .name = "Coroutine",
- .size = sizeof(struct coroutine),
- .free = coro_free,
-};
-
-_Thread_local struct coroutine *this_coro = NULL;
-
-static void *coro_entry(void *p)
-{
- struct coroutine *c = p;
-
- ASSERT_DIE(c->entry);
-
- this_coro = c;
-
- c->entry(c->data);
- ASSERT_DIE(coro_cleaned_up);
-
- return NULL;
-}
-
-struct coroutine *coro_run(pool *p, void (*entry)(void *), void *data)
-{
- ASSERT_DIE(entry);
- ASSERT_DIE(p);
-
- struct coroutine *c = ralloc(p, &coro_class);
-
- c->entry = entry;
- c->data = data;
-
- int e = 0;
-
- if (e = pthread_attr_init(&c->attr))
- die("pthread_attr_init() failed: %M", e);
-
- if (e = pthread_attr_setstacksize(&c->attr, CORO_STACK_SIZE))
- die("pthread_attr_setstacksize(%u) failed: %M", CORO_STACK_SIZE, e);
-
- if (e = pthread_attr_setdetachstate(&c->attr, PTHREAD_CREATE_DETACHED))
- die("pthread_attr_setdetachstate(PTHREAD_CREATE_DETACHED) failed: %M", e);
-
- if (e = pthread_create(&c->id, &c->attr, coro_entry, c))
- die("pthread_create() failed: %M", e);
-
- return c;
-}
diff --git a/sysdep/unix/io-loop.c b/sysdep/unix/io-loop.c
index c7cf4ad2..575e5403 100644
--- a/sysdep/unix/io-loop.c
+++ b/sysdep/unix/io-loop.c
@@ -17,7 +17,6 @@
#include "nest/bird.h"
#include "lib/buffer.h"
-#include "lib/coro.h"
#include "lib/lists.h"
#include "lib/resource.h"
#include "lib/event.h"
@@ -28,6 +27,8 @@
#include "sysdep/unix/io-loop.h"
#include "conf/conf.h"
+#define THREAD_STACK_SIZE 65536 /* To be lowered in near future */
+
/*
* Current thread context
*/
@@ -58,6 +59,31 @@ birdloop_inside(struct birdloop *loop)
return 0;
}
+void
+birdloop_flag(struct birdloop *loop, u32 flag)
+{
+ atomic_fetch_or_explicit(&loop->flags, flag, memory_order_acq_rel);
+ birdloop_ping(loop);
+}
+
+void
+birdloop_flag_set_handler(struct birdloop *loop, struct birdloop_flag_handler *fh)
+{
+ ASSERT_DIE(birdloop_inside(loop));
+ loop->flag_handler = fh;
+}
+
+static int
+birdloop_process_flags(struct birdloop *loop)
+{
+ if (!loop->flag_handler)
+ return 0;
+
+ u32 flags = atomic_exchange_explicit(&loop->flags, 0, memory_order_acq_rel);
+ loop->flag_handler->hook(loop->flag_handler, flags);
+ return !!flags;
+}
+
/*
* Wakeup code for birdloop
*/
@@ -132,11 +158,10 @@ wakeup_do_kick(struct birdloop *loop)
pipe_kick(loop->wakeup_fds[1]);
}
-void
-birdloop_ping(struct birdloop *loop)
+static inline void
+birdloop_do_ping(struct birdloop *loop)
{
- u32 ping_sent = atomic_fetch_add_explicit(&loop->ping_sent, 1, memory_order_acq_rel);
- if (ping_sent)
+ if (atomic_fetch_add_explicit(&loop->ping_sent, 1, memory_order_acq_rel))
return;
if (loop == birdloop_wakeup_masked)
@@ -145,6 +170,15 @@ birdloop_ping(struct birdloop *loop)
wakeup_do_kick(loop);
}
+void
+birdloop_ping(struct birdloop *loop)
+{
+ if (birdloop_inside(loop) && !loop->ping_pending)
+ loop->ping_pending++;
+ else
+ birdloop_do_ping(loop);
+}
+
/*
* Sockets
@@ -205,7 +239,7 @@ sk_stop(sock *s)
}
static inline uint sk_want_events(sock *s)
-{ return ((s->rx_hook && !ev_corked(s->cork)) ? POLLIN : 0) | ((s->ttx != s->tpos) ? POLLOUT : 0); }
+{ return (s->rx_hook ? POLLIN : 0) | ((s->ttx != s->tpos) ? POLLOUT : 0); }
/*
FIXME: this should be called from sock code
@@ -336,7 +370,7 @@ birdloop_init(void)
birdloop_enter_locked(&main_birdloop);
}
-static void birdloop_main(void *arg);
+static void *birdloop_main(void *arg);
struct birdloop *
birdloop_new(pool *pp, uint order, const char *name)
@@ -357,7 +391,19 @@ birdloop_new(pool *pp, uint order, const char *name)
timers_init(&loop->time, p);
sockets_init(loop);
- loop->time.coro = coro_run(p, birdloop_main, loop);
+ int e = 0;
+
+ if (e = pthread_attr_init(&loop->thread_attr))
+ die("pthread_attr_init() failed: %M", e);
+
+ if (e = pthread_attr_setstacksize(&loop->thread_attr, THREAD_STACK_SIZE))
+ die("pthread_attr_setstacksize(%u) failed: %M", THREAD_STACK_SIZE, e);
+
+ if (e = pthread_attr_setdetachstate(&loop->thread_attr, PTHREAD_CREATE_DETACHED))
+ die("pthread_attr_setdetachstate(PTHREAD_CREATE_DETACHED) failed: %M", e);
+
+ if (e = pthread_create(&loop->thread_id, &loop->thread_attr, birdloop_main, loop))
+ die("pthread_create() failed: %M", e);
birdloop_leave(loop);
@@ -393,6 +439,11 @@ void
birdloop_free(struct birdloop *loop)
{
ASSERT_DIE(loop->links == 0);
+ ASSERT_DIE(pthread_equal(pthread_self(), loop->thread_id));
+
+ rcu_birdloop_stop(&loop->rcu);
+ pthread_attr_destroy(&loop->thread_attr);
+
domain_free(loop->time.domain);
rfree(loop->pool);
}
@@ -423,6 +474,13 @@ birdloop_leave_locked(struct birdloop *loop)
/* Check the current context */
ASSERT_DIE(birdloop_current == loop);
+ /* Send pending pings */
+ if (loop->ping_pending)
+ {
+ loop->ping_pending = 0;
+ birdloop_do_ping(loop);
+ }
+
/* Restore the old context */
birdloop_current = loop->prev_loop;
}
@@ -466,20 +524,24 @@ birdloop_unlink(struct birdloop *loop)
loop->links--;
}
-static void
+static void *
birdloop_main(void *arg)
{
struct birdloop *loop = arg;
timer *t;
int rv, timeout;
+ rcu_birdloop_start(&loop->rcu);
+
btime loop_begin = current_time();
+ tmp_init(loop->pool);
+
birdloop_enter(loop);
while (1)
{
timers_fire(&loop->time, 0);
- if (ev_run_list(&loop->event_list))
+ if (birdloop_process_flags(loop) + ev_run_list(&loop->event_list))
timeout = 0;
else if (t = timers_first(&loop->time))
timeout = (tm_remains(t) TO_MS) + 1;
@@ -523,13 +585,22 @@ birdloop_main(void *arg)
/* Flush remaining events */
ASSERT_DIE(!ev_run_list(&loop->event_list));
- /* No timers allowed */
- ASSERT_DIE(timers_count(&loop->time) == 0);
+ /* Drop timers */
+ while (t = timers_first(&loop->time))
+ tm_stop(t);
+
+ /* No sockets allowed */
ASSERT_DIE(EMPTY_LIST(loop->sock_list));
ASSERT_DIE(loop->sock_num == 0);
birdloop_leave(loop);
loop->stopped(loop->stop_data);
-}
+ return NULL;
+}
+void
+birdloop_yield(void)
+{
+ usleep(100);
+}
diff --git a/sysdep/unix/io-loop.h b/sysdep/unix/io-loop.h
index 4024b6c5..aec7a409 100644
--- a/sysdep/unix/io-loop.h
+++ b/sysdep/unix/io-loop.h
@@ -7,6 +7,8 @@
#ifndef _BIRD_SYSDEP_UNIX_IO_LOOP_H_
#define _BIRD_SYSDEP_UNIX_IO_LOOP_H_
+#include "lib/rcu.h"
+
struct birdloop
{
pool *pool;
@@ -21,11 +23,20 @@ struct birdloop
u8 poll_changed;
u8 close_scheduled;
+ uint ping_pending;
_Atomic u32 ping_sent;
int wakeup_fds[2];
+ pthread_t thread_id;
+ pthread_attr_t thread_attr;
+
+ struct rcu_birdloop rcu;
+
uint links;
+ _Atomic u32 flags;
+ struct birdloop_flag_handler *flag_handler;
+
void (*stopped)(void *data);
void *stop_data;
diff --git a/sysdep/unix/io.c b/sysdep/unix/io.c
index dd385c80..23baffb2 100644
--- a/sysdep/unix/io.c
+++ b/sysdep/unix/io.c
@@ -1435,6 +1435,10 @@ sk_open(sock *s)
if (sk_set_high_port(s) < 0)
log(L_WARN "Socket error: %s%#m", s->err);
+ if (s->flags & SKF_FREEBIND)
+ if (sk_set_freebind(s) < 0)
+ log(L_WARN "Socket error: %s%#m", s->err);
+
sockaddr_fill(&sa, s->af, bind_addr, s->iface, bind_port);
if (bind(fd, &sa.sa, SA_LEN(sa)) < 0)
ERR2("bind");
@@ -1879,8 +1883,8 @@ sk_read_ssh(sock *s)
/* sk_read() and sk_write() are called from BFD's event loop */
-int
-sk_read(sock *s, int revents)
+static inline int
+sk_read_noflush(sock *s, int revents)
{
switch (s->type)
{
@@ -1943,7 +1947,15 @@ sk_read(sock *s, int revents)
}
int
-sk_write(sock *s)
+sk_read(sock *s, int revents)
+{
+ int e = sk_read_noflush(s, revents);
+ tmp_flush();
+ return e;
+}
+
+static inline int
+sk_write_noflush(sock *s)
{
switch (s->type)
{
@@ -1991,6 +2003,14 @@ sk_write(sock *s)
}
}
+int
+sk_write(sock *s)
+{
+ int e = sk_write_noflush(s);
+ tmp_flush();
+ return e;
+}
+
int sk_is_ipv4(sock *s)
{ return s->af == AF_INET; }
@@ -2009,6 +2029,7 @@ sk_err(sock *s, int revents)
}
s->err_hook(s, se);
+ tmp_flush();
}
void
@@ -2058,8 +2079,8 @@ io_update_time(void)
event_open->duration = last_io_time - event_open->timestamp;
if (event_open->duration > config->latency_limit)
- log(L_WARN "Event 0x%p 0x%p took %d ms",
- event_open->hook, event_open->data, (int) (event_open->duration TO_MS));
+ log(L_WARN "Event 0x%p 0x%p took %u.%03u ms",
+ event_open->hook, event_open->data, (uint) (event_open->duration TO_MS), (uint) (event_open->duration % 1000));
event_open = NULL;
}
@@ -2163,8 +2184,8 @@ watchdog_stop(void)
btime duration = last_io_time - loop_time;
if (duration > config->watchdog_warning)
- log(L_WARN "I/O loop cycle took %d ms for %d events",
- (int) (duration TO_MS), event_log_num);
+ log(L_WARN "I/O loop cycle took %u.%03u ms for %d events",
+ (uint) (duration TO_MS), (uint) (duration % 1000), event_log_num);
}
@@ -2234,7 +2255,7 @@ io_loop(void)
{
pfd[nfds] = (struct pollfd) { .fd = -1 }; /* everything other set to 0 by this */
s = SKIP_BACK(sock, n, n);
- if (s->rx_hook && !ev_corked(s->cork))
+ if (s->rx_hook)
{
pfd[nfds].fd = s->fd;
pfd[nfds].events |= POLLIN;
diff --git a/sysdep/unix/krt.Y b/sysdep/unix/krt.Y
index 95b54d65..4ce9a328 100644
--- a/sysdep/unix/krt.Y
+++ b/sysdep/unix/krt.Y
@@ -29,7 +29,7 @@ kif_set_preferred(ip_addr ip)
CF_DECLS
-CF_KEYWORDS(KERNEL, PERSIST, SCAN, TIME, LEARN, DEVICE, ROUTES, GRACEFUL, RESTART, KRT_SOURCE, KRT_METRIC, MERGE, PATHS)
+CF_KEYWORDS(KERNEL, PERSIST, SCAN, TIME, LEARN, DEVICE, ROUTES, GRACEFUL, RESTART, MERGE, PATHS)
CF_KEYWORDS(INTERFACE, PREFERRED)
%type <i> kern_mp_limit
@@ -122,9 +122,6 @@ kif_iface:
kif_iface_start iface_patt_list_nopx kif_iface_opt_list;
-dynamic_attr: KRT_SOURCE { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_SOURCE); } ;
-dynamic_attr: KRT_METRIC { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_METRIC); } ;
-
CF_CODE
CF_END
diff --git a/sysdep/unix/krt.c b/sysdep/unix/krt.c
index 98c56391..d507c133 100644
--- a/sysdep/unix/krt.c
+++ b/sysdep/unix/krt.c
@@ -53,7 +53,7 @@
#include "nest/bird.h"
#include "nest/iface.h"
-#include "nest/route.h"
+#include "nest/rt.h"
#include "nest/protocol.h"
#include "filter/filter.h"
#include "conf/conf.h"
@@ -232,7 +232,6 @@ kif_copy_config(struct proto_config *dest, struct proto_config *src)
struct protocol proto_unix_iface = {
.name = "Device",
.template = "device%d",
- .class = PROTOCOL_DEVICE,
.proto_size = sizeof(struct kif_proto),
.config_size = sizeof(struct kif_config),
.preconfig = kif_preconfig,
@@ -243,6 +242,13 @@ struct protocol proto_unix_iface = {
.copy_config = kif_copy_config
};
+void
+kif_build(void)
+{
+ proto_build(&proto_unix_iface);
+}
+
+
/*
* Tracing of routes
*/
@@ -280,30 +286,46 @@ static struct tbf rl_alien = TBF_DEFAULT_LOG_LIMITS;
static inline u32
krt_metric(rte *a)
{
- eattr *ea = ea_find(a->attrs->eattrs, EA_KRT_METRIC);
+ eattr *ea = ea_find(a->attrs, &ea_krt_metric);
return ea ? ea->u.data : 0;
}
static inline int
-krt_rte_better(rte *a, rte *b)
+krt_same_key(rte *a, rte *b)
+{
+ return (krt_metric(a) == krt_metric(b));
+}
+
+static inline int
+krt_uptodate(rte *a, rte *b)
{
- return (krt_metric(a) > krt_metric(b));
+ return (a->attrs == b->attrs);
}
/* Called when alien route is discovered during scan */
static void
-krt_learn_rte(struct krt_proto *p, rte *e)
+krt_learn_scan(struct krt_proto *p, rte *e)
{
- struct rte_src *src = e->src = rt_get_source(&p->p, krt_metric(e));
- rte_update(p->p.main_channel, e->net, e, e->src);
- rt_unlock_source(src);
+ rte e0 = {
+ .attrs = e->attrs,
+ .src = rt_get_source(&p->p, krt_metric(e)),
+ };
+
+ ea_set_attr_u32(&e0.attrs, &ea_gen_preference, 0, p->p.main_channel->preference);
+
+ rte_update(p->p.main_channel, e->net, &e0, e0.src);
+ rt_unlock_source(e0.src);
}
static void
-krt_learn_init(struct krt_proto *p)
+krt_learn_async(struct krt_proto *p, rte *e, int new)
{
- if (KRT_CF->learn)
- channel_setup_in_table(p->p.main_channel, 1);
+ if (new)
+ return krt_learn_scan(p, e);
+
+ struct rte_src *src = rt_get_source(&p->p, krt_metric(e));
+ rte_update(p->p.main_channel, e->net, NULL, src);
+ rt_unlock_source(src);
}
#endif
@@ -323,7 +345,7 @@ rte_feed_count(net *n)
{
uint count = 0;
for (struct rte_storage *e = n->routes; e; e = e->next)
- if (rte_is_valid(RTES_OR_NULL(e)))
+ if (rte_is_valid(RTE_OR_NULL(e)))
count++;
return count;
}
@@ -333,7 +355,7 @@ rte_feed_obtain(net *n, rte **feed, uint count)
{
uint i = 0;
for (struct rte_storage *e = n->routes; e; e = e->next)
- if (rte_is_valid(RTES_OR_NULL(e)))
+ if (rte_is_valid(RTE_OR_NULL(e)))
{
ASSERT_DIE(i < count);
feed[i++] = &e->rte;
@@ -344,6 +366,13 @@ rte_feed_obtain(net *n, rte **feed, uint count)
static struct rte *
krt_export_net(struct krt_proto *p, net *net)
{
+ /* FIXME: Here we are calling filters in table-locked context when exporting
+ * to kernel. Here BIRD can crash if the user requested ROA check in kernel
+ * export filter. It doesn't make much sense to write the filters like this,
+ * therefore we may keep this unfinished piece of work here for later as it
+ * won't really affect anybody. */
+ ASSERT_DIE(RT_IS_LOCKED(p->p.main_channel->table));
+
struct channel *c = p->p.main_channel;
const struct filter *filter = c->out_filter;
@@ -372,7 +401,7 @@ krt_export_net(struct krt_proto *p, net *net)
if (filter == FILTER_ACCEPT)
goto accept;
- if (f_run(filter, &rt, krt_filter_lp, FF_SILENT) > F_ACCEPT)
+ if (f_run(filter, &rt, FF_SILENT) > F_ACCEPT)
goto reject;
@@ -386,15 +415,12 @@ reject:
static int
krt_same_dest(rte *k, rte *e)
{
- rta *ka = k->attrs, *ea = e->attrs;
-
- if (ka->dest != ea->dest)
- return 0;
+ ea_list *ka = k->attrs, *ea = e->attrs;
- if (ka->dest == RTD_UNICAST)
- return nexthop_same(&(ka->nh), &(ea->nh));
+ eattr *nhea_k = ea_find(ka, &ea_gen_nexthop);
+ eattr *nhea_e = ea_find(ea, &ea_gen_nexthop);
- return 1;
+ return (!nhea_k == !nhea_e) && adata_same(nhea_k->u.ptr, nhea_e->u.ptr);
}
/*
@@ -419,7 +445,7 @@ krt_got_route(struct krt_proto *p, rte *e, s8 src)
case KRT_SRC_ALIEN:
if (KRT_CF->learn)
- krt_learn_rte(p, e);
+ krt_learn_scan(p, e);
else
krt_trace_in_rl(&rl_alien, p, e, "[alien] ignored");
return;
@@ -427,7 +453,9 @@ krt_got_route(struct krt_proto *p, rte *e, s8 src)
#endif
/* The rest is for KRT_SRC_BIRD (or KRT_SRC_UNKNOWN) */
- RT_LOCK(p->p.main_channel->table);
+ RT_LOCKED(p->p.main_channel->table, tab)
+ {
+
/* Deleting all routes if flush is requested */
if (p->flush_routes)
goto delete;
@@ -436,7 +464,7 @@ krt_got_route(struct krt_proto *p, rte *e, s8 src)
if (!p->ready)
goto ignore;
- net *net = net_find(RT_PRIV(p->p.main_channel->table), e->net);
+ net *net = net_find(tab, e->net);
if (!net || !krt_is_installed(p, net))
goto delete;
@@ -481,8 +509,9 @@ delete:
krt_replace_rte(p, e->net, NULL, e);
goto done;
-done:
- RT_UNLOCK(p->p.main_channel->table);
+done:;
+ }
+
lp_flush(krt_filter_lp);
}
@@ -490,18 +519,13 @@ static void
krt_init_scan(struct krt_proto *p)
{
bmap_reset(&p->seen_map, 1024);
-
-#ifdef KRT_ALLOW_LEARN
- if (KRT_CF->learn)
- channel_refresh_begin(p->p.main_channel);
-#endif
}
static void
krt_prune(struct krt_proto *p)
{
- RT_LOCK(p->p.main_channel->table);
- rtable_private *t = RT_PRIV(p->p.main_channel->table);
+ RT_LOCKED(p->p.main_channel->table, t)
+ {
KRT_TRACE(p, D_EVENTS, "Pruning table %s", t->name);
FIB_WALK(&t->fib, net, n)
@@ -521,15 +545,10 @@ krt_prune(struct krt_proto *p)
}
FIB_WALK_END;
- RT_UNLOCK(p->p.main_channel->table);
-
-#ifdef KRT_ALLOW_LEARN
- if (KRT_CF->learn)
- channel_refresh_end(p->p.main_channel);
-#endif
-
if (p->ready)
p->initialized = 1;
+
+ }
}
static void
@@ -567,25 +586,24 @@ krt_got_route_async(struct krt_proto *p, rte *e, int new, s8 src)
case KRT_SRC_ALIEN:
if (KRT_CF->learn)
{
- krt_learn_rte(p, e);
+ krt_learn_async(p, e, new);
return;
}
#endif
}
}
+
/*
* Periodic scanning
*/
-
-#ifdef CONFIG_ALL_TABLES_AT_ONCE
-
-static timer *krt_scan_timer;
-static int krt_scan_count;
+static timer *krt_scan_all_timer;
+static int krt_scan_all_count;
+static _Bool krt_scan_all_tables;
static void
-krt_scan(timer *t UNUSED)
+krt_scan_all(timer *t UNUSED)
{
struct krt_proto *p;
node *n;
@@ -606,35 +624,42 @@ krt_scan(timer *t UNUSED)
}
static void
-krt_scan_timer_start(struct krt_proto *p)
+krt_scan_all_timer_start(struct krt_proto *p)
{
- if (!krt_scan_count)
- krt_scan_timer = tm_new_init(krt_pool, krt_scan, NULL, KRT_CF->scan_time, 0);
+ if (!krt_scan_all_count)
+ krt_scan_all_timer = tm_new_init(krt_pool, krt_scan_all, NULL, KRT_CF->scan_time, 0);
- krt_scan_count++;
+ krt_scan_all_count++;
- tm_start(krt_scan_timer, 1 S);
+ tm_start(krt_scan_all_timer, 1 S);
}
static void
-krt_scan_timer_stop(struct krt_proto *p UNUSED)
+krt_scan_all_timer_stop(void)
{
- krt_scan_count--;
+ ASSERT(krt_scan_all_count > 0);
+
+ krt_scan_all_count--;
- if (!krt_scan_count)
+ if (!krt_scan_all_count)
{
- rfree(krt_scan_timer);
- krt_scan_timer = NULL;
+ rfree(krt_scan_all_timer);
+ krt_scan_all_timer = NULL;
}
}
static void
-krt_scan_timer_kick(struct krt_proto *p UNUSED)
+krt_scan_all_timer_kick(void)
{
- tm_start(krt_scan_timer, 0);
+ tm_start(krt_scan_all_timer, 0);
+}
+
+void
+krt_use_shared_scan(void)
+{
+ krt_scan_all_tables = 1;
}
-#else
static void
krt_scan(timer *t)
@@ -652,35 +677,42 @@ krt_scan(timer *t)
static void
krt_scan_timer_start(struct krt_proto *p)
{
- p->scan_timer = tm_new_init(p->p.pool, krt_scan, p, KRT_CF->scan_time, 0);
- tm_start(p->scan_timer, 1 S);
+ if (krt_scan_all_tables)
+ krt_scan_all_timer_start(p);
+ else
+ {
+ p->scan_timer = tm_new_init(p->p.pool, krt_scan, p, KRT_CF->scan_time, 0);
+ tm_start(p->scan_timer, 1 S);
+ }
}
static void
krt_scan_timer_stop(struct krt_proto *p)
{
- tm_stop(p->scan_timer);
+ if (krt_scan_all_tables)
+ krt_scan_all_timer_stop();
+ else
+ tm_stop(p->scan_timer);
}
static void
krt_scan_timer_kick(struct krt_proto *p)
{
- tm_start(p->scan_timer, 0);
+ if (krt_scan_all_tables)
+ krt_scan_all_timer_kick();
+ else
+ tm_start(p->scan_timer, 0);
}
-#endif
-
-
-
/*
* Updates
*/
static int
-krt_preexport(struct channel *c, rte *e)
+krt_preexport(struct channel *C, rte *e)
{
- if (e->src->owner == &c->proto->sources)
+ if (e->src->owner == &C->proto->sources)
return -1;
if (!krt_capable(e))
@@ -780,11 +812,6 @@ krt_postconfig(struct proto_config *CF)
if (! proto_cf_main_channel(CF))
cf_error("Channel not specified");
-#ifdef CONFIG_ALL_TABLES_AT_ONCE
- if (krt_cf->scan_time != cf->scan_time)
- cf_error("All kernel syncers must use the same table scan interval");
-#endif
-
struct channel_config *cc = proto_cf_main_channel(CF);
struct rtable_config *tab = cc->table;
if (tab->krt_attached)
@@ -813,7 +840,6 @@ krt_init(struct proto_config *CF)
p->p.if_notify = krt_if_notify;
p->p.reload_routes = krt_reload_routes;
p->p.feed_end = krt_feed_end;
- p->p.rte_better = krt_rte_better;
krt_sys_init(p);
return &p->p;
@@ -839,10 +865,6 @@ krt_start(struct proto *P)
bmap_init(&p->seen_map, p->p.pool, 1024);
add_tail(&krt_proto_list, &p->krt_node);
-#ifdef KRT_ALLOW_LEARN
- krt_learn_init(p);
-#endif
-
if (!krt_sys_start(p))
{
rem_node(&p->krt_node);
@@ -922,24 +944,15 @@ krt_copy_config(struct proto_config *dest, struct proto_config *src)
krt_sys_copy_config(d, s);
}
-static int
-krt_get_attr(const eattr *a, byte *buf, int buflen)
-{
- switch (a->id)
- {
- case EA_KRT_SOURCE:
- bsprintf(buf, "source");
- return GA_NAME;
-
- case EA_KRT_METRIC:
- bsprintf(buf, "metric");
- return GA_NAME;
-
- default:
- return krt_sys_get_attr(a, buf, buflen);
- }
-}
+struct ea_class ea_krt_source = {
+ .name = "krt_source",
+ .type = T_INT,
+};
+struct ea_class ea_krt_metric = {
+ .name = "krt_metric",
+ .type = T_INT,
+};
#ifdef CONFIG_IP6_SADR_KERNEL
#define MAYBE_IP6_SADR NB_IP6_SADR
@@ -956,7 +969,6 @@ krt_get_attr(const eattr *a, byte *buf, int buflen)
struct protocol proto_unix_kernel = {
.name = "Kernel",
.template = "kernel%d",
- .class = PROTOCOL_KERNEL,
.preference = DEF_PREF_INHERITED,
.channel_mask = NB_IP | MAYBE_IP6_SADR | MAYBE_MPLS,
.proto_size = sizeof(struct krt_proto),
@@ -968,5 +980,15 @@ struct protocol proto_unix_kernel = {
.shutdown = krt_shutdown,
.reconfigure = krt_reconfigure,
.copy_config = krt_copy_config,
- .get_attr = krt_get_attr,
};
+
+void
+krt_build(void)
+{
+ proto_build(&proto_unix_kernel);
+
+ EA_REGISTER_ALL(
+ &ea_krt_source,
+ &ea_krt_metric,
+ );
+}
diff --git a/sysdep/unix/krt.h b/sysdep/unix/krt.h
index 968c5b16..9f7ebb4f 100644
--- a/sysdep/unix/krt.h
+++ b/sysdep/unix/krt.h
@@ -21,8 +21,7 @@ struct kif_proto;
#define KRT_DEFAULT_ECMP_LIMIT 16
-#define EA_KRT_SOURCE EA_CODE(PROTOCOL_KERNEL, 0)
-#define EA_KRT_METRIC EA_CODE(PROTOCOL_KERNEL, 1)
+extern struct ea_class ea_krt_source, ea_krt_metric;
#define KRT_REF_SEEN 0x1 /* Seen in table */
#define KRT_REF_BEST 0x2 /* Best in table */
@@ -51,10 +50,7 @@ struct krt_proto {
struct proto p;
struct krt_state sys; /* Sysdep state */
-#ifndef CONFIG_ALL_TABLES_AT_ONCE
timer *scan_timer;
-#endif
-
struct bmap sync_map; /* Keeps track which exported routes were successfully written to kernel */
struct bmap seen_map; /* Routes seen during last periodic scan */
node krt_node; /* Node in krt_proto_list */
@@ -76,6 +72,7 @@ extern pool *krt_pool;
struct proto_config * kif_init_config(int class);
void kif_request_scan(void);
+void krt_use_shared_scan(void);
void krt_got_route(struct krt_proto *p, struct rte *e, s8 src);
void krt_got_route_async(struct krt_proto *p, struct rte *e, int new, s8 src);
diff --git a/sysdep/unix/log.c b/sysdep/unix/log.c
index f48588b6..185231e8 100644
--- a/sysdep/unix/log.c
+++ b/sysdep/unix/log.c
@@ -36,10 +36,10 @@ static FILE *dbgf;
static list *current_log_list;
static char *current_syslog_name; /* NULL -> syslog closed */
-static _Atomic uint max_coro_id = ATOMIC_VAR_INIT(1);
-static _Thread_local uint this_coro_id;
+static _Atomic uint max_thread_id = ATOMIC_VAR_INIT(1);
+static _Thread_local uint this_thread_id;
-#define THIS_CORO_ID (this_coro_id ?: (this_coro_id = atomic_fetch_add_explicit(&max_coro_id, 1, memory_order_acq_rel)))
+#define THIS_THREAD_ID (this_thread_id ?: (this_thread_id = atomic_fetch_add_explicit(&max_thread_id, 1, memory_order_acq_rel)))
#include <pthread.h>
@@ -183,7 +183,7 @@ log_commit(int class, buffer *buf)
l->pos += msg_len;
}
- fprintf(l->fh, "%s [%04x] <%s> ", tbuf, THIS_CORO_ID, class_names[class]);
+ fprintf(l->fh, "%s [%04x] <%s> ", tbuf, THIS_THREAD_ID, class_names[class]);
}
fputs(buf->start, l->fh);
fputc('\n', l->fh);
@@ -329,7 +329,7 @@ debug(const char *msg, ...)
sec = dbg_time.tv_sec - dbg_time_start.tv_sec - 1;
}
- int n = bsnprintf(pos, max, "%u.%09u: [%04x] ", sec, nsec, THIS_CORO_ID);
+ int n = bsnprintf(pos, max, "%u.%09u: [%04x] ", sec, nsec, THIS_THREAD_ID);
pos += n;
max -= n;
diff --git a/sysdep/unix/main.c b/sysdep/unix/main.c
index 5da27cb6..bf9f2be0 100644
--- a/sysdep/unix/main.c
+++ b/sysdep/unix/main.c
@@ -31,7 +31,7 @@
#include "lib/locking.h"
#include "lib/timer.h"
#include "lib/string.h"
-#include "nest/route.h"
+#include "nest/rt.h"
#include "nest/protocol.h"
#include "nest/iface.h"
#include "nest/cli.h"
@@ -57,7 +57,7 @@ async_dump(void)
// XXXX tm_dump_all();
if_dump_all();
neigh_dump_all();
- rta_dump_all();
+ ea_dump_all();
rt_dump_all();
protos_dump_all();
@@ -117,7 +117,7 @@ add_num_const(char *name, int val, const char *file, const uint line)
struct f_val *v = cfg_alloc(sizeof(struct f_val));
*v = (struct f_val) { .type = T_INT, .val.i = val };
struct symbol *sym = cf_get_symbol(name);
- if (sym->class && (sym->scope == conf_this_scope))
+ if (sym->class && cf_symbol_is_local(sym))
cf_error("Error reading value for %s from %s:%d: already defined", name, file, line);
cf_define_symbol(sym, SYM_CONSTANT | T_INT, val, v);
@@ -683,7 +683,7 @@ signal_init(void)
* Parsing of command-line arguments
*/
-static char *opt_list = "B:c:dD:ps:P:u:g:flRh";
+static char *opt_list = "bc:dD:ps:P:u:g:flRh";
int parse_and_exit;
char *bird_name;
static char *use_user;
@@ -704,7 +704,6 @@ display_help(void)
fprintf(stderr,
"\n"
"Options: \n"
- " -B <block-size> Use 2^this number as memory allocation block size (default: 12)\n"
" -c <config-file> Use given configuration file instead of\n"
" " PATH_CONFIG_FILE "\n"
" -d Enable debug messages and run bird in foreground\n"
@@ -791,15 +790,12 @@ get_gid(const char *s)
return gr->gr_gid;
}
-extern _Bool alloc_multipage;
-
static void
parse_args(int argc, char **argv)
{
int config_changed = 0;
int socket_changed = 0;
int c;
- int bp;
bird_name = get_bird_name(argv[0], "bird");
if (argc == 2)
@@ -812,29 +808,6 @@ parse_args(int argc, char **argv)
while ((c = getopt(argc, argv, opt_list)) >= 0)
switch (c)
{
- case 'B':
- bp = atoi(optarg);
- if (bp < 1)
- {
- fprintf(stderr, "Strange block size power %d\n\n", bp);
- display_usage();
- exit(1);
- }
-
- if ((1 << bp) < page_size)
- {
- fprintf(stderr, "Requested block size %ld is lesser than page size %ld\n\n", (1L<<bp), page_size);
- display_usage();
- exit(1);
- }
-
- if ((1L << bp) > page_size)
- {
- alloc_multipage = 1;
- page_size = (1L << bp);
- }
-
- break;
case 'c':
config_name = optarg;
config_changed = 1;
@@ -889,8 +862,6 @@ parse_args(int argc, char **argv)
}
}
-void resource_sys_init(void);
-
/*
* Hic Est main()
*/
@@ -904,7 +875,6 @@ main(int argc, char **argv)
#endif
times_update();
- resource_sys_init();
parse_args(argc, argv);
log_switch(1, NULL, NULL);
@@ -915,8 +885,8 @@ main(int argc, char **argv)
resource_init();
birdloop_init();
olock_init();
- io_init();
rt_init();
+ io_init();
if_init();
// roa_init();
config_init();
@@ -940,8 +910,6 @@ main(int argc, char **argv)
open_pid_file();
protos_build();
- proto_build(&proto_unix_kernel);
- proto_build(&proto_unix_iface);
struct config *conf = read_config();