diff options
Diffstat (limited to 'sysdep/unix')
-rw-r--r-- | sysdep/unix/Makefile | 2 | ||||
-rw-r--r-- | sysdep/unix/alloc.c | 193 | ||||
-rw-r--r-- | sysdep/unix/domain.c (renamed from sysdep/unix/coroutine.c) | 78 | ||||
-rw-r--r-- | sysdep/unix/io-loop.c | 97 | ||||
-rw-r--r-- | sysdep/unix/io-loop.h | 11 | ||||
-rw-r--r-- | sysdep/unix/io.c | 37 | ||||
-rw-r--r-- | sysdep/unix/krt.Y | 5 | ||||
-rw-r--r-- | sysdep/unix/krt.c | 222 | ||||
-rw-r--r-- | sysdep/unix/krt.h | 7 | ||||
-rw-r--r-- | sysdep/unix/log.c | 10 | ||||
-rw-r--r-- | sysdep/unix/main.c | 42 |
11 files changed, 402 insertions, 302 deletions
diff --git a/sysdep/unix/Makefile b/sysdep/unix/Makefile index 07f454ab..6f6b0d26 100644 --- a/sysdep/unix/Makefile +++ b/sysdep/unix/Makefile @@ -1,4 +1,4 @@ -src := alloc.c io.c io-loop.c krt.c log.c main.c random.c coroutine.c +src := alloc.c io.c io-loop.c krt.c log.c main.c random.c domain.c obj := $(src-o-files) $(all-daemon) $(cf-local) diff --git a/sysdep/unix/alloc.c b/sysdep/unix/alloc.c index 4c9d5eb5..47cd4624 100644 --- a/sysdep/unix/alloc.c +++ b/sysdep/unix/alloc.c @@ -8,7 +8,11 @@ #include "nest/bird.h" #include "lib/resource.h" +#include "lib/lists.h" +#include "lib/event.h" +#include "lib/rcu.h" +#include <errno.h> #include <stdlib.h> #include <unistd.h> @@ -17,82 +21,167 @@ #endif long page_size = 0; -_Bool alloc_multipage = 0; #ifdef HAVE_MMAP +#define KEEP_PAGES_MAX 256 +#define KEEP_PAGES_MIN 8 + +STATIC_ASSERT(KEEP_PAGES_MIN * 4 < KEEP_PAGES_MAX); + static _Bool use_fake = 0; +static _Bool initialized = 0; + +#if DEBUGGING +struct free_page { + node unused[42]; + struct free_page * _Atomic next; +}; #else -static _Bool use_fake = 1; +struct free_page { + struct free_page * _Atomic next; +}; #endif -void resource_sys_init(void) +static struct free_page * _Atomic page_stack = NULL; + +static void page_cleanup(void *); +static event page_cleanup_event = { .hook = page_cleanup, }; +#define SCHEDULE_CLEANUP do if (initialized && !shutting_down) ev_send(&global_event_list, &page_cleanup_event); while (0) + +_Atomic int pages_kept = 0; + +static void * +alloc_sys_page(void) { -#ifdef HAVE_MMAP - if (!(page_size = sysconf(_SC_PAGESIZE))) - die("System page size must be non-zero"); + void *ptr = mmap(NULL, page_size, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if ((u64_popcount(page_size) > 1) || (page_size > 16384)) - { -#endif - /* Too big or strange page, use the aligned allocator instead */ - page_size = 4096; - use_fake = 1; - } + if (ptr == MAP_FAILED) + bug("mmap(%lu) failed: %m", page_size); + + return ptr; } +extern int shutting_down; /* Shutdown requested. */ + +#else // ! HAVE_MMAP +#define use_fake 1 +#endif + void * -alloc_sys_page(void) +alloc_page(void) { + if (use_fake) + { + void *ptr = NULL; + int err = posix_memalign(&ptr, page_size, page_size); + + if (err || !ptr) + bug("posix_memalign(%lu) failed", (long unsigned int) page_size); + + return ptr; + } + #ifdef HAVE_MMAP - if (!use_fake) + rcu_read_lock(); + struct free_page *fp = atomic_load_explicit(&page_stack, memory_order_acquire); + while (fp && !atomic_compare_exchange_strong_explicit( + &page_stack, &fp, atomic_load_explicit(&fp->next, memory_order_acquire), + memory_order_acq_rel, memory_order_acquire)) + ; + rcu_read_unlock(); + + if (!fp) + return alloc_sys_page(); + + if (atomic_fetch_sub_explicit(&pages_kept, 1, memory_order_relaxed) <= KEEP_PAGES_MIN) + SCHEDULE_CLEANUP; + + return fp; +#endif +} + +void +free_page(void *ptr) +{ + if (use_fake) { - if (alloc_multipage) - { - void *big = mmap(NULL, page_size * 2, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (big == MAP_FAILED) - bug("mmap(%lu) failed: %m", page_size); - - uintptr_t offset = ((uintptr_t) big) % page_size; - if (offset) - { - void *ret = big + page_size - offset; - munmap(big, page_size - offset); - munmap(ret + page_size, offset); - return ret; - } - else - { - munmap(big + page_size, page_size); - return big; - } - } - - void *ret = mmap(NULL, page_size, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (ret == MAP_FAILED) - bug("mmap(%lu) failed: %m", page_size); - - return ret; + free(ptr); + return; } - else + +#ifdef HAVE_MMAP + rcu_read_lock(); + struct free_page *fp = ptr; + struct free_page *next = atomic_load_explicit(&page_stack, memory_order_acquire); + + do atomic_store_explicit(&fp->next, next, memory_order_release); + while (!atomic_compare_exchange_strong_explicit( + &page_stack, &next, fp, + memory_order_acq_rel, memory_order_acquire)); + rcu_read_unlock(); + + if (atomic_fetch_add_explicit(&pages_kept, 1, memory_order_relaxed) >= KEEP_PAGES_MAX) + SCHEDULE_CLEANUP; #endif +} + +#ifdef HAVE_MMAP +static void +page_cleanup(void *_ UNUSED) +{ + struct free_page *stack = atomic_exchange_explicit(&page_stack, NULL, memory_order_acq_rel); + if (!stack) + return; + + synchronize_rcu(); + + do { + struct free_page *f = stack; + stack = atomic_load_explicit(&f->next, memory_order_acquire); + + if (munmap(f, page_size) == 0) + continue; + else if (errno != ENOMEM) + bug("munmap(%p) failed: %m", f); + else + free_page(f); + } + while (stack && (atomic_fetch_sub_explicit(&pages_kept, 1, memory_order_relaxed) >= KEEP_PAGES_MAX / 2)); + + while (stack) { - void *ret = aligned_alloc(page_size, page_size); - if (!ret) - bug("aligned_alloc(%lu) failed", page_size); - return ret; + atomic_fetch_sub_explicit(&pages_kept, 1, memory_order_relaxed); + + struct free_page *f = stack; + stack = atomic_load_explicit(&f->next, memory_order_acquire); + free_page(f); } } +#endif void -free_sys_page(void *ptr) +resource_sys_init(void) { #ifdef HAVE_MMAP - if (!use_fake) + if (!(page_size = sysconf(_SC_PAGESIZE))) + die("System page size must be non-zero"); + + if (u64_popcount(page_size) == 1) { - if (munmap(ptr, page_size) < 0) - bug("munmap(%p) failed: %m", ptr); + + for (int i = 0; i < (KEEP_PAGES_MIN * 2); i++) + free_page(alloc_page()); + + page_cleanup(NULL); + initialized = 1; + return; } - else + + /* Too big or strange page, use the aligned allocator instead */ + log(L_WARN "Got strange memory page size (%lu), using the aligned allocator instead", page_size); + use_fake = 1; #endif - free(ptr); + + page_size = 4096; + initialized = 1; } diff --git a/sysdep/unix/coroutine.c b/sysdep/unix/domain.c index 4758c056..0a5858a6 100644 --- a/sysdep/unix/coroutine.c +++ b/sysdep/unix/domain.c @@ -1,7 +1,6 @@ /* - * BIRD Coroutines + * BIRD Locking * - * (c) 2017 Martin Mares <mj@ucw.cz> * (c) 2020 Maria Matejka <mq@jmq.cz> * * Can be freely distributed and used under the terms of the GNU GPL. @@ -17,18 +16,11 @@ #include "lib/birdlib.h" #include "lib/locking.h" -#include "lib/coro.h" #include "lib/resource.h" #include "lib/timer.h" #include "conf/conf.h" -#define CORO_STACK_SIZE 65536 - -/* - * Implementation of coroutines based on POSIX threads - */ - #include <errno.h> #include <fcntl.h> #include <poll.h> @@ -122,71 +114,3 @@ void do_unlock(struct domain_generic *dg, struct domain_generic **lsp) dg->prev = NULL; pthread_mutex_unlock(&dg->mutex); } - -/* Coroutines */ -struct coroutine { - resource r; - pthread_t id; - pthread_attr_t attr; - void (*entry)(void *); - void *data; -}; - -static _Thread_local _Bool coro_cleaned_up = 0; - -static void coro_free(resource *r) -{ - struct coroutine *c = (void *) r; - ASSERT_DIE(pthread_equal(pthread_self(), c->id)); - pthread_attr_destroy(&c->attr); - coro_cleaned_up = 1; -} - -static struct resclass coro_class = { - .name = "Coroutine", - .size = sizeof(struct coroutine), - .free = coro_free, -}; - -_Thread_local struct coroutine *this_coro = NULL; - -static void *coro_entry(void *p) -{ - struct coroutine *c = p; - - ASSERT_DIE(c->entry); - - this_coro = c; - - c->entry(c->data); - ASSERT_DIE(coro_cleaned_up); - - return NULL; -} - -struct coroutine *coro_run(pool *p, void (*entry)(void *), void *data) -{ - ASSERT_DIE(entry); - ASSERT_DIE(p); - - struct coroutine *c = ralloc(p, &coro_class); - - c->entry = entry; - c->data = data; - - int e = 0; - - if (e = pthread_attr_init(&c->attr)) - die("pthread_attr_init() failed: %M", e); - - if (e = pthread_attr_setstacksize(&c->attr, CORO_STACK_SIZE)) - die("pthread_attr_setstacksize(%u) failed: %M", CORO_STACK_SIZE, e); - - if (e = pthread_attr_setdetachstate(&c->attr, PTHREAD_CREATE_DETACHED)) - die("pthread_attr_setdetachstate(PTHREAD_CREATE_DETACHED) failed: %M", e); - - if (e = pthread_create(&c->id, &c->attr, coro_entry, c)) - die("pthread_create() failed: %M", e); - - return c; -} diff --git a/sysdep/unix/io-loop.c b/sysdep/unix/io-loop.c index c7cf4ad2..575e5403 100644 --- a/sysdep/unix/io-loop.c +++ b/sysdep/unix/io-loop.c @@ -17,7 +17,6 @@ #include "nest/bird.h" #include "lib/buffer.h" -#include "lib/coro.h" #include "lib/lists.h" #include "lib/resource.h" #include "lib/event.h" @@ -28,6 +27,8 @@ #include "sysdep/unix/io-loop.h" #include "conf/conf.h" +#define THREAD_STACK_SIZE 65536 /* To be lowered in near future */ + /* * Current thread context */ @@ -58,6 +59,31 @@ birdloop_inside(struct birdloop *loop) return 0; } +void +birdloop_flag(struct birdloop *loop, u32 flag) +{ + atomic_fetch_or_explicit(&loop->flags, flag, memory_order_acq_rel); + birdloop_ping(loop); +} + +void +birdloop_flag_set_handler(struct birdloop *loop, struct birdloop_flag_handler *fh) +{ + ASSERT_DIE(birdloop_inside(loop)); + loop->flag_handler = fh; +} + +static int +birdloop_process_flags(struct birdloop *loop) +{ + if (!loop->flag_handler) + return 0; + + u32 flags = atomic_exchange_explicit(&loop->flags, 0, memory_order_acq_rel); + loop->flag_handler->hook(loop->flag_handler, flags); + return !!flags; +} + /* * Wakeup code for birdloop */ @@ -132,11 +158,10 @@ wakeup_do_kick(struct birdloop *loop) pipe_kick(loop->wakeup_fds[1]); } -void -birdloop_ping(struct birdloop *loop) +static inline void +birdloop_do_ping(struct birdloop *loop) { - u32 ping_sent = atomic_fetch_add_explicit(&loop->ping_sent, 1, memory_order_acq_rel); - if (ping_sent) + if (atomic_fetch_add_explicit(&loop->ping_sent, 1, memory_order_acq_rel)) return; if (loop == birdloop_wakeup_masked) @@ -145,6 +170,15 @@ birdloop_ping(struct birdloop *loop) wakeup_do_kick(loop); } +void +birdloop_ping(struct birdloop *loop) +{ + if (birdloop_inside(loop) && !loop->ping_pending) + loop->ping_pending++; + else + birdloop_do_ping(loop); +} + /* * Sockets @@ -205,7 +239,7 @@ sk_stop(sock *s) } static inline uint sk_want_events(sock *s) -{ return ((s->rx_hook && !ev_corked(s->cork)) ? POLLIN : 0) | ((s->ttx != s->tpos) ? POLLOUT : 0); } +{ return (s->rx_hook ? POLLIN : 0) | ((s->ttx != s->tpos) ? POLLOUT : 0); } /* FIXME: this should be called from sock code @@ -336,7 +370,7 @@ birdloop_init(void) birdloop_enter_locked(&main_birdloop); } -static void birdloop_main(void *arg); +static void *birdloop_main(void *arg); struct birdloop * birdloop_new(pool *pp, uint order, const char *name) @@ -357,7 +391,19 @@ birdloop_new(pool *pp, uint order, const char *name) timers_init(&loop->time, p); sockets_init(loop); - loop->time.coro = coro_run(p, birdloop_main, loop); + int e = 0; + + if (e = pthread_attr_init(&loop->thread_attr)) + die("pthread_attr_init() failed: %M", e); + + if (e = pthread_attr_setstacksize(&loop->thread_attr, THREAD_STACK_SIZE)) + die("pthread_attr_setstacksize(%u) failed: %M", THREAD_STACK_SIZE, e); + + if (e = pthread_attr_setdetachstate(&loop->thread_attr, PTHREAD_CREATE_DETACHED)) + die("pthread_attr_setdetachstate(PTHREAD_CREATE_DETACHED) failed: %M", e); + + if (e = pthread_create(&loop->thread_id, &loop->thread_attr, birdloop_main, loop)) + die("pthread_create() failed: %M", e); birdloop_leave(loop); @@ -393,6 +439,11 @@ void birdloop_free(struct birdloop *loop) { ASSERT_DIE(loop->links == 0); + ASSERT_DIE(pthread_equal(pthread_self(), loop->thread_id)); + + rcu_birdloop_stop(&loop->rcu); + pthread_attr_destroy(&loop->thread_attr); + domain_free(loop->time.domain); rfree(loop->pool); } @@ -423,6 +474,13 @@ birdloop_leave_locked(struct birdloop *loop) /* Check the current context */ ASSERT_DIE(birdloop_current == loop); + /* Send pending pings */ + if (loop->ping_pending) + { + loop->ping_pending = 0; + birdloop_do_ping(loop); + } + /* Restore the old context */ birdloop_current = loop->prev_loop; } @@ -466,20 +524,24 @@ birdloop_unlink(struct birdloop *loop) loop->links--; } -static void +static void * birdloop_main(void *arg) { struct birdloop *loop = arg; timer *t; int rv, timeout; + rcu_birdloop_start(&loop->rcu); + btime loop_begin = current_time(); + tmp_init(loop->pool); + birdloop_enter(loop); while (1) { timers_fire(&loop->time, 0); - if (ev_run_list(&loop->event_list)) + if (birdloop_process_flags(loop) + ev_run_list(&loop->event_list)) timeout = 0; else if (t = timers_first(&loop->time)) timeout = (tm_remains(t) TO_MS) + 1; @@ -523,13 +585,22 @@ birdloop_main(void *arg) /* Flush remaining events */ ASSERT_DIE(!ev_run_list(&loop->event_list)); - /* No timers allowed */ - ASSERT_DIE(timers_count(&loop->time) == 0); + /* Drop timers */ + while (t = timers_first(&loop->time)) + tm_stop(t); + + /* No sockets allowed */ ASSERT_DIE(EMPTY_LIST(loop->sock_list)); ASSERT_DIE(loop->sock_num == 0); birdloop_leave(loop); loop->stopped(loop->stop_data); -} + return NULL; +} +void +birdloop_yield(void) +{ + usleep(100); +} diff --git a/sysdep/unix/io-loop.h b/sysdep/unix/io-loop.h index 4024b6c5..aec7a409 100644 --- a/sysdep/unix/io-loop.h +++ b/sysdep/unix/io-loop.h @@ -7,6 +7,8 @@ #ifndef _BIRD_SYSDEP_UNIX_IO_LOOP_H_ #define _BIRD_SYSDEP_UNIX_IO_LOOP_H_ +#include "lib/rcu.h" + struct birdloop { pool *pool; @@ -21,11 +23,20 @@ struct birdloop u8 poll_changed; u8 close_scheduled; + uint ping_pending; _Atomic u32 ping_sent; int wakeup_fds[2]; + pthread_t thread_id; + pthread_attr_t thread_attr; + + struct rcu_birdloop rcu; + uint links; + _Atomic u32 flags; + struct birdloop_flag_handler *flag_handler; + void (*stopped)(void *data); void *stop_data; diff --git a/sysdep/unix/io.c b/sysdep/unix/io.c index dd385c80..23baffb2 100644 --- a/sysdep/unix/io.c +++ b/sysdep/unix/io.c @@ -1435,6 +1435,10 @@ sk_open(sock *s) if (sk_set_high_port(s) < 0) log(L_WARN "Socket error: %s%#m", s->err); + if (s->flags & SKF_FREEBIND) + if (sk_set_freebind(s) < 0) + log(L_WARN "Socket error: %s%#m", s->err); + sockaddr_fill(&sa, s->af, bind_addr, s->iface, bind_port); if (bind(fd, &sa.sa, SA_LEN(sa)) < 0) ERR2("bind"); @@ -1879,8 +1883,8 @@ sk_read_ssh(sock *s) /* sk_read() and sk_write() are called from BFD's event loop */ -int -sk_read(sock *s, int revents) +static inline int +sk_read_noflush(sock *s, int revents) { switch (s->type) { @@ -1943,7 +1947,15 @@ sk_read(sock *s, int revents) } int -sk_write(sock *s) +sk_read(sock *s, int revents) +{ + int e = sk_read_noflush(s, revents); + tmp_flush(); + return e; +} + +static inline int +sk_write_noflush(sock *s) { switch (s->type) { @@ -1991,6 +2003,14 @@ sk_write(sock *s) } } +int +sk_write(sock *s) +{ + int e = sk_write_noflush(s); + tmp_flush(); + return e; +} + int sk_is_ipv4(sock *s) { return s->af == AF_INET; } @@ -2009,6 +2029,7 @@ sk_err(sock *s, int revents) } s->err_hook(s, se); + tmp_flush(); } void @@ -2058,8 +2079,8 @@ io_update_time(void) event_open->duration = last_io_time - event_open->timestamp; if (event_open->duration > config->latency_limit) - log(L_WARN "Event 0x%p 0x%p took %d ms", - event_open->hook, event_open->data, (int) (event_open->duration TO_MS)); + log(L_WARN "Event 0x%p 0x%p took %u.%03u ms", + event_open->hook, event_open->data, (uint) (event_open->duration TO_MS), (uint) (event_open->duration % 1000)); event_open = NULL; } @@ -2163,8 +2184,8 @@ watchdog_stop(void) btime duration = last_io_time - loop_time; if (duration > config->watchdog_warning) - log(L_WARN "I/O loop cycle took %d ms for %d events", - (int) (duration TO_MS), event_log_num); + log(L_WARN "I/O loop cycle took %u.%03u ms for %d events", + (uint) (duration TO_MS), (uint) (duration % 1000), event_log_num); } @@ -2234,7 +2255,7 @@ io_loop(void) { pfd[nfds] = (struct pollfd) { .fd = -1 }; /* everything other set to 0 by this */ s = SKIP_BACK(sock, n, n); - if (s->rx_hook && !ev_corked(s->cork)) + if (s->rx_hook) { pfd[nfds].fd = s->fd; pfd[nfds].events |= POLLIN; diff --git a/sysdep/unix/krt.Y b/sysdep/unix/krt.Y index 95b54d65..4ce9a328 100644 --- a/sysdep/unix/krt.Y +++ b/sysdep/unix/krt.Y @@ -29,7 +29,7 @@ kif_set_preferred(ip_addr ip) CF_DECLS -CF_KEYWORDS(KERNEL, PERSIST, SCAN, TIME, LEARN, DEVICE, ROUTES, GRACEFUL, RESTART, KRT_SOURCE, KRT_METRIC, MERGE, PATHS) +CF_KEYWORDS(KERNEL, PERSIST, SCAN, TIME, LEARN, DEVICE, ROUTES, GRACEFUL, RESTART, MERGE, PATHS) CF_KEYWORDS(INTERFACE, PREFERRED) %type <i> kern_mp_limit @@ -122,9 +122,6 @@ kif_iface: kif_iface_start iface_patt_list_nopx kif_iface_opt_list; -dynamic_attr: KRT_SOURCE { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_SOURCE); } ; -dynamic_attr: KRT_METRIC { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_METRIC); } ; - CF_CODE CF_END diff --git a/sysdep/unix/krt.c b/sysdep/unix/krt.c index 98c56391..d507c133 100644 --- a/sysdep/unix/krt.c +++ b/sysdep/unix/krt.c @@ -53,7 +53,7 @@ #include "nest/bird.h" #include "nest/iface.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/protocol.h" #include "filter/filter.h" #include "conf/conf.h" @@ -232,7 +232,6 @@ kif_copy_config(struct proto_config *dest, struct proto_config *src) struct protocol proto_unix_iface = { .name = "Device", .template = "device%d", - .class = PROTOCOL_DEVICE, .proto_size = sizeof(struct kif_proto), .config_size = sizeof(struct kif_config), .preconfig = kif_preconfig, @@ -243,6 +242,13 @@ struct protocol proto_unix_iface = { .copy_config = kif_copy_config }; +void +kif_build(void) +{ + proto_build(&proto_unix_iface); +} + + /* * Tracing of routes */ @@ -280,30 +286,46 @@ static struct tbf rl_alien = TBF_DEFAULT_LOG_LIMITS; static inline u32 krt_metric(rte *a) { - eattr *ea = ea_find(a->attrs->eattrs, EA_KRT_METRIC); + eattr *ea = ea_find(a->attrs, &ea_krt_metric); return ea ? ea->u.data : 0; } static inline int -krt_rte_better(rte *a, rte *b) +krt_same_key(rte *a, rte *b) +{ + return (krt_metric(a) == krt_metric(b)); +} + +static inline int +krt_uptodate(rte *a, rte *b) { - return (krt_metric(a) > krt_metric(b)); + return (a->attrs == b->attrs); } /* Called when alien route is discovered during scan */ static void -krt_learn_rte(struct krt_proto *p, rte *e) +krt_learn_scan(struct krt_proto *p, rte *e) { - struct rte_src *src = e->src = rt_get_source(&p->p, krt_metric(e)); - rte_update(p->p.main_channel, e->net, e, e->src); - rt_unlock_source(src); + rte e0 = { + .attrs = e->attrs, + .src = rt_get_source(&p->p, krt_metric(e)), + }; + + ea_set_attr_u32(&e0.attrs, &ea_gen_preference, 0, p->p.main_channel->preference); + + rte_update(p->p.main_channel, e->net, &e0, e0.src); + rt_unlock_source(e0.src); } static void -krt_learn_init(struct krt_proto *p) +krt_learn_async(struct krt_proto *p, rte *e, int new) { - if (KRT_CF->learn) - channel_setup_in_table(p->p.main_channel, 1); + if (new) + return krt_learn_scan(p, e); + + struct rte_src *src = rt_get_source(&p->p, krt_metric(e)); + rte_update(p->p.main_channel, e->net, NULL, src); + rt_unlock_source(src); } #endif @@ -323,7 +345,7 @@ rte_feed_count(net *n) { uint count = 0; for (struct rte_storage *e = n->routes; e; e = e->next) - if (rte_is_valid(RTES_OR_NULL(e))) + if (rte_is_valid(RTE_OR_NULL(e))) count++; return count; } @@ -333,7 +355,7 @@ rte_feed_obtain(net *n, rte **feed, uint count) { uint i = 0; for (struct rte_storage *e = n->routes; e; e = e->next) - if (rte_is_valid(RTES_OR_NULL(e))) + if (rte_is_valid(RTE_OR_NULL(e))) { ASSERT_DIE(i < count); feed[i++] = &e->rte; @@ -344,6 +366,13 @@ rte_feed_obtain(net *n, rte **feed, uint count) static struct rte * krt_export_net(struct krt_proto *p, net *net) { + /* FIXME: Here we are calling filters in table-locked context when exporting + * to kernel. Here BIRD can crash if the user requested ROA check in kernel + * export filter. It doesn't make much sense to write the filters like this, + * therefore we may keep this unfinished piece of work here for later as it + * won't really affect anybody. */ + ASSERT_DIE(RT_IS_LOCKED(p->p.main_channel->table)); + struct channel *c = p->p.main_channel; const struct filter *filter = c->out_filter; @@ -372,7 +401,7 @@ krt_export_net(struct krt_proto *p, net *net) if (filter == FILTER_ACCEPT) goto accept; - if (f_run(filter, &rt, krt_filter_lp, FF_SILENT) > F_ACCEPT) + if (f_run(filter, &rt, FF_SILENT) > F_ACCEPT) goto reject; @@ -386,15 +415,12 @@ reject: static int krt_same_dest(rte *k, rte *e) { - rta *ka = k->attrs, *ea = e->attrs; - - if (ka->dest != ea->dest) - return 0; + ea_list *ka = k->attrs, *ea = e->attrs; - if (ka->dest == RTD_UNICAST) - return nexthop_same(&(ka->nh), &(ea->nh)); + eattr *nhea_k = ea_find(ka, &ea_gen_nexthop); + eattr *nhea_e = ea_find(ea, &ea_gen_nexthop); - return 1; + return (!nhea_k == !nhea_e) && adata_same(nhea_k->u.ptr, nhea_e->u.ptr); } /* @@ -419,7 +445,7 @@ krt_got_route(struct krt_proto *p, rte *e, s8 src) case KRT_SRC_ALIEN: if (KRT_CF->learn) - krt_learn_rte(p, e); + krt_learn_scan(p, e); else krt_trace_in_rl(&rl_alien, p, e, "[alien] ignored"); return; @@ -427,7 +453,9 @@ krt_got_route(struct krt_proto *p, rte *e, s8 src) #endif /* The rest is for KRT_SRC_BIRD (or KRT_SRC_UNKNOWN) */ - RT_LOCK(p->p.main_channel->table); + RT_LOCKED(p->p.main_channel->table, tab) + { + /* Deleting all routes if flush is requested */ if (p->flush_routes) goto delete; @@ -436,7 +464,7 @@ krt_got_route(struct krt_proto *p, rte *e, s8 src) if (!p->ready) goto ignore; - net *net = net_find(RT_PRIV(p->p.main_channel->table), e->net); + net *net = net_find(tab, e->net); if (!net || !krt_is_installed(p, net)) goto delete; @@ -481,8 +509,9 @@ delete: krt_replace_rte(p, e->net, NULL, e); goto done; -done: - RT_UNLOCK(p->p.main_channel->table); +done:; + } + lp_flush(krt_filter_lp); } @@ -490,18 +519,13 @@ static void krt_init_scan(struct krt_proto *p) { bmap_reset(&p->seen_map, 1024); - -#ifdef KRT_ALLOW_LEARN - if (KRT_CF->learn) - channel_refresh_begin(p->p.main_channel); -#endif } static void krt_prune(struct krt_proto *p) { - RT_LOCK(p->p.main_channel->table); - rtable_private *t = RT_PRIV(p->p.main_channel->table); + RT_LOCKED(p->p.main_channel->table, t) + { KRT_TRACE(p, D_EVENTS, "Pruning table %s", t->name); FIB_WALK(&t->fib, net, n) @@ -521,15 +545,10 @@ krt_prune(struct krt_proto *p) } FIB_WALK_END; - RT_UNLOCK(p->p.main_channel->table); - -#ifdef KRT_ALLOW_LEARN - if (KRT_CF->learn) - channel_refresh_end(p->p.main_channel); -#endif - if (p->ready) p->initialized = 1; + + } } static void @@ -567,25 +586,24 @@ krt_got_route_async(struct krt_proto *p, rte *e, int new, s8 src) case KRT_SRC_ALIEN: if (KRT_CF->learn) { - krt_learn_rte(p, e); + krt_learn_async(p, e, new); return; } #endif } } + /* * Periodic scanning */ - -#ifdef CONFIG_ALL_TABLES_AT_ONCE - -static timer *krt_scan_timer; -static int krt_scan_count; +static timer *krt_scan_all_timer; +static int krt_scan_all_count; +static _Bool krt_scan_all_tables; static void -krt_scan(timer *t UNUSED) +krt_scan_all(timer *t UNUSED) { struct krt_proto *p; node *n; @@ -606,35 +624,42 @@ krt_scan(timer *t UNUSED) } static void -krt_scan_timer_start(struct krt_proto *p) +krt_scan_all_timer_start(struct krt_proto *p) { - if (!krt_scan_count) - krt_scan_timer = tm_new_init(krt_pool, krt_scan, NULL, KRT_CF->scan_time, 0); + if (!krt_scan_all_count) + krt_scan_all_timer = tm_new_init(krt_pool, krt_scan_all, NULL, KRT_CF->scan_time, 0); - krt_scan_count++; + krt_scan_all_count++; - tm_start(krt_scan_timer, 1 S); + tm_start(krt_scan_all_timer, 1 S); } static void -krt_scan_timer_stop(struct krt_proto *p UNUSED) +krt_scan_all_timer_stop(void) { - krt_scan_count--; + ASSERT(krt_scan_all_count > 0); + + krt_scan_all_count--; - if (!krt_scan_count) + if (!krt_scan_all_count) { - rfree(krt_scan_timer); - krt_scan_timer = NULL; + rfree(krt_scan_all_timer); + krt_scan_all_timer = NULL; } } static void -krt_scan_timer_kick(struct krt_proto *p UNUSED) +krt_scan_all_timer_kick(void) { - tm_start(krt_scan_timer, 0); + tm_start(krt_scan_all_timer, 0); +} + +void +krt_use_shared_scan(void) +{ + krt_scan_all_tables = 1; } -#else static void krt_scan(timer *t) @@ -652,35 +677,42 @@ krt_scan(timer *t) static void krt_scan_timer_start(struct krt_proto *p) { - p->scan_timer = tm_new_init(p->p.pool, krt_scan, p, KRT_CF->scan_time, 0); - tm_start(p->scan_timer, 1 S); + if (krt_scan_all_tables) + krt_scan_all_timer_start(p); + else + { + p->scan_timer = tm_new_init(p->p.pool, krt_scan, p, KRT_CF->scan_time, 0); + tm_start(p->scan_timer, 1 S); + } } static void krt_scan_timer_stop(struct krt_proto *p) { - tm_stop(p->scan_timer); + if (krt_scan_all_tables) + krt_scan_all_timer_stop(); + else + tm_stop(p->scan_timer); } static void krt_scan_timer_kick(struct krt_proto *p) { - tm_start(p->scan_timer, 0); + if (krt_scan_all_tables) + krt_scan_all_timer_kick(); + else + tm_start(p->scan_timer, 0); } -#endif - - - /* * Updates */ static int -krt_preexport(struct channel *c, rte *e) +krt_preexport(struct channel *C, rte *e) { - if (e->src->owner == &c->proto->sources) + if (e->src->owner == &C->proto->sources) return -1; if (!krt_capable(e)) @@ -780,11 +812,6 @@ krt_postconfig(struct proto_config *CF) if (! proto_cf_main_channel(CF)) cf_error("Channel not specified"); -#ifdef CONFIG_ALL_TABLES_AT_ONCE - if (krt_cf->scan_time != cf->scan_time) - cf_error("All kernel syncers must use the same table scan interval"); -#endif - struct channel_config *cc = proto_cf_main_channel(CF); struct rtable_config *tab = cc->table; if (tab->krt_attached) @@ -813,7 +840,6 @@ krt_init(struct proto_config *CF) p->p.if_notify = krt_if_notify; p->p.reload_routes = krt_reload_routes; p->p.feed_end = krt_feed_end; - p->p.rte_better = krt_rte_better; krt_sys_init(p); return &p->p; @@ -839,10 +865,6 @@ krt_start(struct proto *P) bmap_init(&p->seen_map, p->p.pool, 1024); add_tail(&krt_proto_list, &p->krt_node); -#ifdef KRT_ALLOW_LEARN - krt_learn_init(p); -#endif - if (!krt_sys_start(p)) { rem_node(&p->krt_node); @@ -922,24 +944,15 @@ krt_copy_config(struct proto_config *dest, struct proto_config *src) krt_sys_copy_config(d, s); } -static int -krt_get_attr(const eattr *a, byte *buf, int buflen) -{ - switch (a->id) - { - case EA_KRT_SOURCE: - bsprintf(buf, "source"); - return GA_NAME; - - case EA_KRT_METRIC: - bsprintf(buf, "metric"); - return GA_NAME; - - default: - return krt_sys_get_attr(a, buf, buflen); - } -} +struct ea_class ea_krt_source = { + .name = "krt_source", + .type = T_INT, +}; +struct ea_class ea_krt_metric = { + .name = "krt_metric", + .type = T_INT, +}; #ifdef CONFIG_IP6_SADR_KERNEL #define MAYBE_IP6_SADR NB_IP6_SADR @@ -956,7 +969,6 @@ krt_get_attr(const eattr *a, byte *buf, int buflen) struct protocol proto_unix_kernel = { .name = "Kernel", .template = "kernel%d", - .class = PROTOCOL_KERNEL, .preference = DEF_PREF_INHERITED, .channel_mask = NB_IP | MAYBE_IP6_SADR | MAYBE_MPLS, .proto_size = sizeof(struct krt_proto), @@ -968,5 +980,15 @@ struct protocol proto_unix_kernel = { .shutdown = krt_shutdown, .reconfigure = krt_reconfigure, .copy_config = krt_copy_config, - .get_attr = krt_get_attr, }; + +void +krt_build(void) +{ + proto_build(&proto_unix_kernel); + + EA_REGISTER_ALL( + &ea_krt_source, + &ea_krt_metric, + ); +} diff --git a/sysdep/unix/krt.h b/sysdep/unix/krt.h index 968c5b16..9f7ebb4f 100644 --- a/sysdep/unix/krt.h +++ b/sysdep/unix/krt.h @@ -21,8 +21,7 @@ struct kif_proto; #define KRT_DEFAULT_ECMP_LIMIT 16 -#define EA_KRT_SOURCE EA_CODE(PROTOCOL_KERNEL, 0) -#define EA_KRT_METRIC EA_CODE(PROTOCOL_KERNEL, 1) +extern struct ea_class ea_krt_source, ea_krt_metric; #define KRT_REF_SEEN 0x1 /* Seen in table */ #define KRT_REF_BEST 0x2 /* Best in table */ @@ -51,10 +50,7 @@ struct krt_proto { struct proto p; struct krt_state sys; /* Sysdep state */ -#ifndef CONFIG_ALL_TABLES_AT_ONCE timer *scan_timer; -#endif - struct bmap sync_map; /* Keeps track which exported routes were successfully written to kernel */ struct bmap seen_map; /* Routes seen during last periodic scan */ node krt_node; /* Node in krt_proto_list */ @@ -76,6 +72,7 @@ extern pool *krt_pool; struct proto_config * kif_init_config(int class); void kif_request_scan(void); +void krt_use_shared_scan(void); void krt_got_route(struct krt_proto *p, struct rte *e, s8 src); void krt_got_route_async(struct krt_proto *p, struct rte *e, int new, s8 src); diff --git a/sysdep/unix/log.c b/sysdep/unix/log.c index f48588b6..185231e8 100644 --- a/sysdep/unix/log.c +++ b/sysdep/unix/log.c @@ -36,10 +36,10 @@ static FILE *dbgf; static list *current_log_list; static char *current_syslog_name; /* NULL -> syslog closed */ -static _Atomic uint max_coro_id = ATOMIC_VAR_INIT(1); -static _Thread_local uint this_coro_id; +static _Atomic uint max_thread_id = ATOMIC_VAR_INIT(1); +static _Thread_local uint this_thread_id; -#define THIS_CORO_ID (this_coro_id ?: (this_coro_id = atomic_fetch_add_explicit(&max_coro_id, 1, memory_order_acq_rel))) +#define THIS_THREAD_ID (this_thread_id ?: (this_thread_id = atomic_fetch_add_explicit(&max_thread_id, 1, memory_order_acq_rel))) #include <pthread.h> @@ -183,7 +183,7 @@ log_commit(int class, buffer *buf) l->pos += msg_len; } - fprintf(l->fh, "%s [%04x] <%s> ", tbuf, THIS_CORO_ID, class_names[class]); + fprintf(l->fh, "%s [%04x] <%s> ", tbuf, THIS_THREAD_ID, class_names[class]); } fputs(buf->start, l->fh); fputc('\n', l->fh); @@ -329,7 +329,7 @@ debug(const char *msg, ...) sec = dbg_time.tv_sec - dbg_time_start.tv_sec - 1; } - int n = bsnprintf(pos, max, "%u.%09u: [%04x] ", sec, nsec, THIS_CORO_ID); + int n = bsnprintf(pos, max, "%u.%09u: [%04x] ", sec, nsec, THIS_THREAD_ID); pos += n; max -= n; diff --git a/sysdep/unix/main.c b/sysdep/unix/main.c index 5da27cb6..bf9f2be0 100644 --- a/sysdep/unix/main.c +++ b/sysdep/unix/main.c @@ -31,7 +31,7 @@ #include "lib/locking.h" #include "lib/timer.h" #include "lib/string.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/protocol.h" #include "nest/iface.h" #include "nest/cli.h" @@ -57,7 +57,7 @@ async_dump(void) // XXXX tm_dump_all(); if_dump_all(); neigh_dump_all(); - rta_dump_all(); + ea_dump_all(); rt_dump_all(); protos_dump_all(); @@ -117,7 +117,7 @@ add_num_const(char *name, int val, const char *file, const uint line) struct f_val *v = cfg_alloc(sizeof(struct f_val)); *v = (struct f_val) { .type = T_INT, .val.i = val }; struct symbol *sym = cf_get_symbol(name); - if (sym->class && (sym->scope == conf_this_scope)) + if (sym->class && cf_symbol_is_local(sym)) cf_error("Error reading value for %s from %s:%d: already defined", name, file, line); cf_define_symbol(sym, SYM_CONSTANT | T_INT, val, v); @@ -683,7 +683,7 @@ signal_init(void) * Parsing of command-line arguments */ -static char *opt_list = "B:c:dD:ps:P:u:g:flRh"; +static char *opt_list = "bc:dD:ps:P:u:g:flRh"; int parse_and_exit; char *bird_name; static char *use_user; @@ -704,7 +704,6 @@ display_help(void) fprintf(stderr, "\n" "Options: \n" - " -B <block-size> Use 2^this number as memory allocation block size (default: 12)\n" " -c <config-file> Use given configuration file instead of\n" " " PATH_CONFIG_FILE "\n" " -d Enable debug messages and run bird in foreground\n" @@ -791,15 +790,12 @@ get_gid(const char *s) return gr->gr_gid; } -extern _Bool alloc_multipage; - static void parse_args(int argc, char **argv) { int config_changed = 0; int socket_changed = 0; int c; - int bp; bird_name = get_bird_name(argv[0], "bird"); if (argc == 2) @@ -812,29 +808,6 @@ parse_args(int argc, char **argv) while ((c = getopt(argc, argv, opt_list)) >= 0) switch (c) { - case 'B': - bp = atoi(optarg); - if (bp < 1) - { - fprintf(stderr, "Strange block size power %d\n\n", bp); - display_usage(); - exit(1); - } - - if ((1 << bp) < page_size) - { - fprintf(stderr, "Requested block size %ld is lesser than page size %ld\n\n", (1L<<bp), page_size); - display_usage(); - exit(1); - } - - if ((1L << bp) > page_size) - { - alloc_multipage = 1; - page_size = (1L << bp); - } - - break; case 'c': config_name = optarg; config_changed = 1; @@ -889,8 +862,6 @@ parse_args(int argc, char **argv) } } -void resource_sys_init(void); - /* * Hic Est main() */ @@ -904,7 +875,6 @@ main(int argc, char **argv) #endif times_update(); - resource_sys_init(); parse_args(argc, argv); log_switch(1, NULL, NULL); @@ -915,8 +885,8 @@ main(int argc, char **argv) resource_init(); birdloop_init(); olock_init(); - io_init(); rt_init(); + io_init(); if_init(); // roa_init(); config_init(); @@ -940,8 +910,6 @@ main(int argc, char **argv) open_pid_file(); protos_build(); - proto_build(&proto_unix_kernel); - proto_build(&proto_unix_iface); struct config *conf = read_config(); |