diff options
author | Maria Matejka <mq@ucw.cz> | 2022-10-04 16:15:36 +0200 |
---|---|---|
committer | Maria Matejka <mq@ucw.cz> | 2022-10-04 16:15:36 +0200 |
commit | dc9351d326b9d2d8bcb7e9a0e5126878c2b02762 (patch) | |
tree | 842b9d21cc1f4b16869cac58711902f5b1f78b91 | |
parent | 00679a688a5feff7a919cbeae71dd050ccc90b22 (diff) | |
parent | 67256d50359d42aca4e64bb1cb5dcb3c63669578 (diff) |
Merge commit '67256d50' into HEAD
53 files changed, 923 insertions, 490 deletions
diff --git a/Makefile.in b/Makefile.in index 0d55807b..fa534872 100644 --- a/Makefile.in +++ b/Makefile.in @@ -26,6 +26,7 @@ INSTALL_DATA=@INSTALL_DATA@ client=$(addprefix $(exedir)/,@CLIENT@) daemon=$(exedir)/bird protocols=@protocols@ +PROTO_BUILD := $(protocols) dev kif krt prefix=@prefix@ exec_prefix=@exec_prefix@ @@ -82,9 +83,6 @@ conf-lex-targets := $(addprefix $(objdir)/conf/,cf-lex.o) conf-y-targets := $(addprefix $(objdir)/conf/,cf-parse.y keywords.h commands.h) cf-local = $(conf-y-targets): $(s)config.Y -# nest/Makefile declarations needed for all other modules -proto-build-c := $(addprefix $(objdir)/nest/,proto-build.c) - src-o-files = $(patsubst %.c,$(o)%.o,$(src)) tests-target-files = $(patsubst %.c,$(o)%,$(tests_src)) @@ -98,13 +96,6 @@ else o = $(patsubst $(srcdir)%,$(objdir)%,$(s)) endif -define proto-build_in = -PROTO_BUILD += $(1) -$(proto-build-c): $(lastword $(MAKEFILE_LIST)) -endef - -proto-build = $(eval $(call proto-build_in,$(1))) - define clean_in = clean:: rm -f $(addprefix $(o),$(1)) diff --git a/conf/confbase.Y b/conf/confbase.Y index 241c332d..8e5da9e3 100644 --- a/conf/confbase.Y +++ b/conf/confbase.Y @@ -14,6 +14,7 @@ CF_HDR #include "conf/conf.h" #include "lib/resource.h" #include "lib/socket.h" +#include "lib/settle.h" #include "lib/timer.h" #include "lib/string.h" #include "nest/protocol.h" @@ -93,6 +94,7 @@ CF_DECLS struct proto_spec ps; struct channel_limit cl; struct timeformat *tf; + struct settle_config settle; struct adata *ad; struct bytestring *bs; } @@ -111,6 +113,7 @@ CF_DECLS %type <i> expr bool pxlen4 %type <time> expr_us time +%type <settle> settle %type <a> ipa %type <net> net_ip4_ net_ip4 net_ip6_ net_ip6 net_ip_ net_ip net_or_ipa %type <net_ptr> net_ net_any net_vpn4_ net_vpn6_ net_vpn_ net_roa4_ net_roa6_ net_roa_ net_ip6_sadr_ net_mpls_ @@ -386,6 +389,13 @@ time: } ; +/* Settle timer configuration */ +settle: expr_us expr_us { + if ($1 > $2) cf_error("Minimum settle time %t is bigger than maximum settle time %t", $1, $2); + $$.min = $1; + $$.max = $2; +}; + text: TEXT | CF_SYM_KNOWN { diff --git a/doc/bird.sgml b/doc/bird.sgml index 2bce3d57..0cfe19c4 100644 --- a/doc/bird.sgml +++ b/doc/bird.sgml @@ -699,6 +699,21 @@ to set options. threshold, the more memory can get used. In most cases, the defaults should work for you. Default: 128, 512. + <tag><label id="rtable-export-settle-time">export settle time <m/time/ <m/time/</tag> + Minimum and maximum settle times, respectively, for export announcements. + When multiple routes are changing, this mechanism waits for the changes + to settle before waking up sleeping export threads but if the changes are coming + steadily, BIRD isn't waiting forever; at most the maximum time. + Default values: <cf/1 ms 100 ms/. You have to always provide both values. + + <tag><label id="rtable-route-refresh-export-settle-time">route refresh export settle time <m/time/ <m/time/</tag> + Minimum and maximum settle times, respectively, for export announcements + (the same as above), valid when any channel is currently doing a route refresh. + This serves a purpose of even more aggresive change bundling, knowing that there + is some active process generating changes in a fast pace. If you don't want + this feature, set this to the same values as <ref id="rtable-export-settle-time" name="export settle time">. + Default values: <cf/100 ms 3 s/. + <tag><label id="rtable-debug">debug all|off|{ states|routes|events [, <m/.../] }</tag> Set table debugging options. Each table can write some trace messages into log with category <cf/trace/. You can request <cf/all/ trace messages @@ -965,14 +980,15 @@ inherited from templates can be updated by new definitions. <ref id="bgp-export-table" name="export table"> (for respective direction). Default: on. - <tag><label id="rtable-min-settle-time">min settle time <m/time/</tag> - Minimum settle time is a delay from the last ROA table change to wait - for more updates before triggering automatic reload. Default: 1 s. - - <tag><label id="rtable-min-settle-time">min settle time <m/time/</tag> - Maximum settle time is an upper limit to the settle time from the - initial ROA table change even if there are consecutive updates gradually - renewing the settle time. Default: 20 s. + <tag><label id="rtable-min-settle-time">roa settle time <m/time/ <m/time/</tag> + Minimum and maximum settle times, respectively, for ROA table changes. + The automatic reload is triggered after the minimum time after the last + ROA table change has been received but not later than the maximum time after + first unprocessed ROA table change. Therefore with default values, the + automatic reload happens 1 second after the ROA table stops updating, yet if it + were to be later than 20 seconds after the ROA table starts updating, + the automatic reload is triggered anyway. Default values: <cf/1 s 20 s/. + You have to always provide both values. <tag><label id="proto-import-limit">import limit [<m/number/ | off ] [action warn | block | restart | disable]</tag> Specify an import route limit (a maximum number of routes imported from diff --git a/filter/f-inst.c b/filter/f-inst.c index 426b598f..801eceec 100644 --- a/filter/f-inst.c +++ b/filter/f-inst.c @@ -70,7 +70,6 @@ * m4_dnl DYNAMIC_ATTR; dynamic attribute definition * m4_dnl RTC; route table config * m4_dnl ACCESS_RTE; this instruction needs route - * m4_dnl ACCESS_EATTRS; this instruction needs extended attributes * * m4_dnl FID_MEMBER( custom instruction member * m4_dnl C type, for storage in structs @@ -237,7 +236,6 @@ * m4_dnl fpool -> the current linpool * m4_dnl NEVER_CONSTANT-> don't generate pre-interpretation code at all * m4_dnl ACCESS_RTE -> check that route is available, also NEVER_CONSTANT - * m4_dnl ACCESS_EATTRS -> pre-cache the eattrs; use only with ACCESS_RTE * * m4_dnl If you are stymied, see FI_CALL or FI_CONSTANT or just search for * m4_dnl the mentioned macros in this file to see what is happening there in wild. @@ -687,7 +685,6 @@ { STATIC_ATTR; ACCESS_RTE; - ACCESS_EATTRS; switch (sa.sa_code) { @@ -695,7 +692,7 @@ case SA_PROTO: RESULT(sa.type, s, fs->rte->src->owner->name); break; default: { - struct eattr *nhea = ea_find(*fs->eattrs, &ea_gen_nexthop); + struct eattr *nhea = ea_find(fs->rte->attrs, &ea_gen_nexthop); struct nexthop_adata *nhad = nhea ? (struct nexthop_adata *) nhea->u.ptr : NULL; struct nexthop *nh = nhad ? &nhad->nh : NULL; @@ -731,7 +728,6 @@ INST(FI_RTA_SET, 1, 0) { ACCESS_RTE; - ACCESS_EATTRS; ARG_ANY(1); STATIC_ATTR; ARG_TYPE(1, sa.type); @@ -765,7 +761,7 @@ } case SA_GW: { - struct eattr *nh_ea = ea_find(*fs->eattrs, &ea_gen_nexthop); + struct eattr *nh_ea = ea_find(fs->rte->attrs, &ea_gen_nexthop); ip_addr ip = v1.val.ip; struct iface *ifa = (ipa_is_link_local(ip) && nh_ea) ? @@ -800,7 +796,7 @@ if (v1.val.i >= 0x100000) runtime( "Invalid MPLS label" ); - struct eattr *nh_ea = ea_find(*fs->eattrs, &ea_gen_nexthop); + struct eattr *nh_ea = ea_find(fs->rte->attrs, &ea_gen_nexthop); if (!nh_ea) runtime( "No nexthop to add a MPLS label to" ); @@ -823,7 +819,7 @@ if (i < 1 || i > 256) runtime( "Setting weight value out of bounds" ); - struct eattr *nh_ea = ea_find(*fs->eattrs, &ea_gen_nexthop); + struct eattr *nh_ea = ea_find(fs->rte->attrs, &ea_gen_nexthop); if (!nh_ea) runtime( "No nexthop to set weight on" ); @@ -837,7 +833,7 @@ NEXTHOP_WALK(nh, nhax) nh->weight = i - 1; - a = ea_set_attr(fs->eattrs, + a = ea_set_attr(&fs->rte->attrs, EA_LITERAL_DIRECT_ADATA(&ea_gen_nexthop, 0, &nhax->ad)); } break; @@ -847,7 +843,7 @@ } if (!a) - a = ea_set_attr(fs->eattrs, + a = ea_set_attr(&fs->rte->attrs, EA_LITERAL_DIRECT_ADATA(&ea_gen_nexthop, 0, tmp_copy_adata(&nha.ad))); a->originated = 1; @@ -858,11 +854,10 @@ INST(FI_EA_GET, 0, 1) { /* Access to extended attributes */ DYNAMIC_ATTR; ACCESS_RTE; - ACCESS_EATTRS; RESULT_TYPE(da->type); { const struct f_val *empty; - const eattr *e = ea_find(*fs->eattrs, da->id); + const eattr *e = ea_find(fs->rte->attrs, da->id); if (e) { @@ -888,7 +883,6 @@ INST(FI_EA_SET, 1, 0) { ACCESS_RTE; - ACCESS_EATTRS; ARG_ANY(1); DYNAMIC_ATTR; ARG_TYPE(1, da->type); @@ -905,12 +899,12 @@ break; case T_IP: - a = ea_set_attr(fs->eattrs, + a = ea_set_attr(&fs->rte->attrs, EA_LITERAL_STORE_ADATA(da, 0, &v1.val.ip, sizeof(ip_addr))); break; default: - a = ea_set_attr(fs->eattrs, + a = ea_set_attr(&fs->rte->attrs, EA_LITERAL_GENERIC(da->id, da->type, 0, .u = v1.val.bval)); break; } @@ -923,9 +917,8 @@ INST(FI_EA_UNSET, 0, 0) { DYNAMIC_ATTR; ACCESS_RTE; - ACCESS_EATTRS; - ea_unset_attr(fs->eattrs, 1, da); + ea_unset_attr(&fs->rte->attrs, 1, da); } INST(FI_DEFAULT, 2, 1) { diff --git a/filter/filter.c b/filter/filter.c index 9a94545c..0aff4d30 100644 --- a/filter/filter.c +++ b/filter/filter.c @@ -76,9 +76,6 @@ struct filter_state { /* The route we are processing. This may be NULL to indicate no route available. */ struct rte *rte; - /* Cached pointer to ea_list */ - struct ea_list **eattrs; - /* Buffer for log output */ struct buffer buf; @@ -94,11 +91,6 @@ void (*bt_assert_hook)(int result, const struct f_line_item *assert); #define f_stack_init(fs) ( _f_stack_init(fs, v, 128), _f_stack_init(fs, e, 128) ) -static inline void f_cache_eattrs(struct filter_state *fs) -{ - fs->eattrs = &(fs->rte->attrs); -} - static struct tbf rl_runtime_err = TBF_DEFAULT_LOG_LIMITS; /** @@ -164,8 +156,6 @@ interpret(struct filter_state *fs, const struct f_line *line, struct f_val *val) #define falloc(size) tmp_alloc(size) #define fpool tmp_linpool -#define ACCESS_EATTRS do { if (!fs->eattrs) f_cache_eattrs(fs); } while (0) - #include "filter/inst-interpret.c" #undef res #undef v1 @@ -174,7 +164,6 @@ interpret(struct filter_state *fs, const struct f_line *line, struct f_val *val) #undef runtime #undef falloc #undef fpool -#undef ACCESS_EATTRS } } diff --git a/filter/filter_test.c b/filter/filter_test.c index 5b24a765..671dba94 100644 --- a/filter/filter_test.c +++ b/filter/filter_test.c @@ -79,11 +79,11 @@ main(int argc, char *argv[]) if (!bt_config_file_parse(BT_CONFIG_FILE)) abort(); - bt_test_suite(t_reconfig, "Testing reconfiguration"); + bt_test_suite_extra(t_reconfig, 0, BT_TIMEOUT, "Testing reconfiguration"); struct f_bt_test_suite *t; WALK_LIST(t, config->tests) - bt_test_suite_base(run_function, t->fn_name, t, BT_FORKING, BT_TIMEOUT, "%s", t->dsc); + bt_test_suite_base(run_function, t->fn_name, t, 0, BT_TIMEOUT, "%s", t->dsc); bt_bird_cleanup(); return bt_exit_value(); diff --git a/filter/tree_test.c b/filter/tree_test.c index 05702f81..d180efbc 100644 --- a/filter/tree_test.c +++ b/filter/tree_test.c @@ -170,6 +170,8 @@ t_balancing(void) show_tree(balanced_tree_from_simple); bt_assert(same_tree(balanced_tree_from_simple, expected_balanced_tree)); + + tmp_flush(); } return 1; @@ -191,6 +193,9 @@ t_balancing_random(void) uint i; for(i = 0; i < 10; i++) { + struct lp_state lps; + lp_save(tmp_linpool, &lps); + struct f_tree *random_degenerated_tree = get_random_degenerated_left_tree(nodes_count); show_tree(random_degenerated_tree); @@ -200,7 +205,11 @@ t_balancing_random(void) show_tree(balanced_tree_from_random); bt_assert(same_tree(balanced_tree_from_random, expected_balanced_tree)); + + lp_restore(tmp_linpool, &lps); } + + tmp_flush(); } return 1; @@ -227,6 +236,8 @@ t_find(void) const struct f_tree *found_tree = find_tree(tree, &looking_up_value); bt_assert((val_compare(&looking_up_value, &(found_tree->from)) == 0) && (val_compare(&looking_up_value, &(found_tree->to)) == 0)); } + + tmp_flush(); } return 1; @@ -283,6 +294,8 @@ t_find_ranges(void) ((val_compare(&needle, &(found_tree->from)) == 1) && (val_compare(&needle, &(found_tree->to)) == -1)) ); } + + tmp_flush(); } return 1; diff --git a/filter/trie_test.c b/filter/trie_test.c index dc791280..ddce2daa 100644 --- a/filter/trie_test.c +++ b/filter/trie_test.c @@ -251,12 +251,12 @@ get_outer_net(net_addr *net, const struct f_prefix *src) static list * make_random_prefix_list(int num, int v6, int tight) { - list *prefixes = lp_allocz(tmp_linpool, sizeof(struct f_prefix_node)); + list *prefixes = tmp_allocz(sizeof(struct f_prefix_node)); init_list(prefixes); for (int i = 0; i < num; i++) { - struct f_prefix_node *px = lp_allocz(tmp_linpool, sizeof(struct f_prefix_node)); + struct f_prefix_node *px = tmp_allocz(sizeof(struct f_prefix_node)); get_random_prefix(&px->prefix, v6, tight); add_tail(prefixes, &px->n); @@ -294,7 +294,7 @@ read_prefix_list(FILE *f, int v6, int plus) char s[32]; int n; - list *pxlist = lp_allocz(tmp_linpool, sizeof(struct f_prefix_node)); + list *pxlist = tmp_allocz(sizeof(struct f_prefix_node)); init_list(pxlist); errno = 0; @@ -308,7 +308,7 @@ read_prefix_list(FILE *f, int v6, int plus) if (n != 5) bt_abort_msg("Invalid content of trie_data"); - struct f_prefix_node *px = lp_allocz(tmp_linpool, sizeof(struct f_prefix_node)); + struct f_prefix_node *px = tmp_allocz(sizeof(struct f_prefix_node)); net_fill_ip4(&px->prefix.net, ip4_build(a0, a1, a2, a3), pl); px->prefix.lo = pl; px->prefix.hi = plus ? IP4_MAX_PREFIX_LENGTH : pl; diff --git a/lib/event.c b/lib/event.c index 07d7dc53..68ee4c06 100644 --- a/lib/event.c +++ b/lib/event.c @@ -28,7 +28,50 @@ event_list global_event_list; event_list global_work_list; -STATIC_ASSERT(OFFSETOF(event_list, _sentinel.next) >= OFFSETOF(event_list, _end[0])); +//#ifdef DEBUGGING +#if 0 +#define EDL_MAX 16384 +enum edl_caller { + EDL_REMOVE_FROM = 1, + EDL_POSTPONE = 2, + EDL_RUN = 3, + EDL_SEND = 4, + EDL_RUN_LIST = 5, +} caller; +static struct event_debug_log { + event_list *target_list; + event *event; + event *receiver; + uint pos; + uint prev_edl_pos; + uint thread; + enum edl_caller caller; +} edl[EDL_MAX]; +static _Atomic uint edl_cnt; +_Thread_local static uint edl_thread; +_Thread_local static uint prev_edl_pos = ~0; +static inline void edlog(event_list *list, event *e, event *receiver, uint pos, enum edl_caller caller) +{ + uint edl_pos = atomic_fetch_add_explicit(&edl_cnt, 1, memory_order_acq_rel); + if (!edl_thread) + edl_thread = edl_pos; + + edl[edl_pos % EDL_MAX] = (struct event_debug_log) { + .target_list = list, + .event = e, + .receiver = receiver, + .pos = pos, + .prev_edl_pos = prev_edl_pos, + .thread = edl_thread, + .caller = caller, + }; + + prev_edl_pos = edl_pos; +} +#else +#define edlog(...) +#endif + void ev_init_list(event_list *el, struct birdloop *loop, const char *name) @@ -36,9 +79,8 @@ ev_init_list(event_list *el, struct birdloop *loop, const char *name) el->name = name; el->loop = loop; - atomic_store_explicit(&el->receiver, &el->_sentinel, memory_order_relaxed); - atomic_store_explicit(&el->_executor, &el->_sentinel, memory_order_relaxed); - atomic_store_explicit(&el->_sentinel.next, NULL, memory_order_relaxed); + atomic_store_explicit(&el->receiver, NULL, memory_order_release); + atomic_store_explicit(&el->_executor, NULL, memory_order_release); } /* @@ -61,13 +103,20 @@ ev_remove_from(event *e, event * _Atomic * head) /* The current event in queue to check */ event *cur = atomic_load_explicit(prev, memory_order_acquire); - /* Pre-loaded next pointer; if NULL, this is sentinel */ - event *next = atomic_load_explicit(&cur->next, memory_order_acquire); + /* This part of queue is empty! */ + if (!cur) + return 0; - while (next) + edlog(NULL, e, cur, 1, EDL_REMOVE_FROM); + while (cur) { + /* Pre-loaded next pointer */ + event *next = atomic_load_explicit(&cur->next, memory_order_acquire); + if (e == cur) { + edlog(NULL, e, next, 3, EDL_REMOVE_FROM); + /* Check whether we have collided with somebody else * adding an item to the queue. */ if (!atomic_compare_exchange_strong_explicit( @@ -86,12 +135,15 @@ ev_remove_from(event *e, event * _Atomic * head) return 1; } + edlog(NULL, e, next, 2, EDL_REMOVE_FROM); + /* Go to the next event. */ prev = &cur->next; cur = next; - next = atomic_load_explicit(&cur->next, memory_order_acquire); } + edlog(NULL, e, cur, 4, EDL_REMOVE_FROM); + return 0; } @@ -100,6 +152,7 @@ ev_postpone(event *e) { /* Find the list to remove the event from */ event_list *sl = ev_get_list(e); + edlog(sl, e, NULL, 1, EDL_POSTPONE); if (!sl) return; @@ -108,6 +161,10 @@ ev_postpone(event *e) /* Remove from one of these lists. */ ASSERT(ev_remove_from(e, &sl->_executor) || ev_remove_from(e, &sl->receiver)); + + /* Mark as inactive */ + ASSERT_DIE(sl == atomic_exchange_explicit(&e->list, NULL, memory_order_acq_rel)); + edlog(sl, e, NULL, 2, EDL_POSTPONE); } static void @@ -157,8 +214,10 @@ ev_new(pool *p) inline void ev_run(event *e) { + edlog(NULL, e, NULL, 1, EDL_RUN); ev_postpone(e); e->hook(e->data); + edlog(NULL, e, NULL, 2, EDL_RUN); } /** @@ -172,18 +231,36 @@ ev_run(event *e) inline void ev_send(event_list *l, event *e) { - event_list *sl = ev_get_list(e); - if (sl == l) - return; - if (sl) - bug("Queuing an already queued event to another queue is not supported."); - + edlog(l, e, NULL, 1, EDL_SEND); + /* Set the target list */ + event_list *ol = NULL; + if (!atomic_compare_exchange_strong_explicit( + &e->list, &ol, l, + memory_order_acq_rel, memory_order_acquire)) + if (ol == l) + return; + else + bug("Queuing an already queued event to another queue is not supported."); + + /* Here should be no concurrent senders */ event *next = atomic_load_explicit(&l->receiver, memory_order_acquire); - do atomic_store_explicit(&e->next, next, memory_order_relaxed); + edlog(l, e, next, 2, EDL_SEND); + event *old_next = NULL; + do + if (!atomic_compare_exchange_strong_explicit( + &e->next, &old_next, next, + memory_order_acq_rel, memory_order_acquire)) + bug("Event %p in inconsistent state"); + else + { + old_next = next; + edlog(l, old_next, next, 3, EDL_SEND); + } while (!atomic_compare_exchange_strong_explicit( &l->receiver, &next, e, memory_order_acq_rel, memory_order_acquire)); + edlog(l, e, next, 4, EDL_SEND); birdloop_ping(l->loop); } @@ -199,37 +276,41 @@ int ev_run_list_limited(event_list *l, uint limit) { event * _Atomic *ep = &l->_executor; + edlog(l, NULL, NULL, 1, EDL_RUN_LIST); /* No pending events, refill the queue. */ - if (atomic_load_explicit(ep, memory_order_relaxed) == &l->_sentinel) + if (!atomic_load_explicit(ep, memory_order_acquire)) { /* Move the current event list aside and create a new one. */ - event *received = atomic_exchange_explicit( - &l->receiver, &l->_sentinel, memory_order_acq_rel); + event *received = atomic_exchange_explicit(&l->receiver, NULL, memory_order_acq_rel); + edlog(l, NULL, received, 2, EDL_RUN_LIST); /* No event to run. */ - if (received == &l->_sentinel) + if (!received) return 0; /* Setup the executor queue */ - event *head = &l->_sentinel; + event *head = NULL; /* Flip the order of the events by relinking them one by one (push-pop) */ - while (received != &l->_sentinel) + while (received) { event *cur = received; - received = atomic_exchange_explicit(&cur->next, head, memory_order_relaxed); + received = atomic_exchange_explicit(&cur->next, head, memory_order_acq_rel); + edlog(l, head, received, 3, EDL_RUN_LIST); head = cur; } /* Store the executor queue to its designated place */ - atomic_store_explicit(ep, head, memory_order_relaxed); + ASSERT_DIE(atomic_exchange_explicit(ep, head, memory_order_acq_rel) == NULL); + edlog(l, NULL, head, 4, EDL_RUN_LIST); } /* Run the events in order. */ event *e; - while ((e = atomic_load_explicit(ep, memory_order_relaxed)) != &l->_sentinel) + while (e = atomic_load_explicit(ep, memory_order_acquire)) { + edlog(l, e, NULL, 5, EDL_RUN_LIST); /* Check limit */ if (!--limit) return 1; @@ -238,14 +319,20 @@ ev_run_list_limited(event_list *l, uint limit) if ((l == &global_event_list) || (l == &global_work_list)) io_log_event(e->hook, e->data); + edlog(l, e, NULL, 6, EDL_RUN_LIST); /* Inactivate the event */ - atomic_store_explicit(ep, atomic_load_explicit(&e->next, memory_order_relaxed), memory_order_relaxed); - atomic_store_explicit(&e->next, NULL, memory_order_relaxed); + event *next = atomic_load_explicit(&e->next, memory_order_relaxed); + ASSERT_DIE(e == atomic_exchange_explicit(ep, next, memory_order_acq_rel)); + ASSERT_DIE(next == atomic_exchange_explicit(&e->next, NULL, memory_order_acq_rel)); + ASSERT_DIE(l == atomic_exchange_explicit(&e->list, NULL, memory_order_acq_rel)); + edlog(l, e, next, 7, EDL_RUN_LIST); /* Run the event */ e->hook(e->data); tmp_flush(); + + edlog(l, e, next, 8, EDL_RUN_LIST); } - return atomic_load_explicit(&l->receiver, memory_order_relaxed) != &l->_sentinel; + return !!atomic_load_explicit(&l->receiver, memory_order_acquire); } diff --git a/lib/event.h b/lib/event.h index 9773c3a9..0bef737a 100644 --- a/lib/event.h +++ b/lib/event.h @@ -11,6 +11,7 @@ #include "lib/resource.h" #include "lib/locking.h" +#include "lib/rcu.h" #include <stdatomic.h> @@ -21,17 +22,14 @@ typedef struct event { void (*hook)(void *); void *data; struct event * _Atomic next; + struct event_list * _Atomic list; } event; -typedef union event_list { - struct { - event * _Atomic receiver; /* Event receive list */ - event * _Atomic _executor; /* Event execute list */ - const char *name; - struct birdloop *loop; /* The executor loop */ - char _end[0]; - }; - event _sentinel; /* Sentinel node to actively detect list end */ +typedef struct event_list { + event * _Atomic receiver; /* Event receive list */ + event * _Atomic _executor; /* Event execute list */ + const char *name; + struct birdloop *loop; /* The executor loop */ } event_list; extern event_list global_event_list; @@ -56,23 +54,13 @@ int ev_run_list_limited(event_list *, uint); static inline int ev_active(event *e) { - return atomic_load_explicit(&e->next, memory_order_relaxed) != NULL; + return atomic_load_explicit(&e->list, memory_order_acquire) != NULL; } static inline event_list * ev_get_list(event *e) { - /* We are looking for the sentinel node at the list end. - * After this, we have s->next == NULL */ - event *s = e; - for (event *sn; sn = atomic_load_explicit(&s->next, memory_order_acquire); s = sn) - ; - - /* No sentinel, no list. */ - if (s == e) - return NULL; - else - return SKIP_BACK(event_list, _sentinel, s); + return atomic_load_explicit(&e->list, memory_order_acquire); } static inline event* diff --git a/lib/io-loop.h b/lib/io-loop.h index 2450a609..ae58bbee 100644 --- a/lib/io-loop.h +++ b/lib/io-loop.h @@ -50,6 +50,14 @@ void birdloop_unlink(struct birdloop *loop); void birdloop_ping(struct birdloop *loop); +struct birdloop_flag_handler { + void (*hook)(struct birdloop_flag_handler *, u32 flags); + void *data; +}; + +void birdloop_flag(struct birdloop *loop, u32 flag); +void birdloop_flag_set_handler(struct birdloop *, struct birdloop_flag_handler *); + void birdloop_init(void); /* Yield for a little while. Use only in special cases. */ diff --git a/lib/locking.h b/lib/locking.h index 1df30063..498afdc8 100644 --- a/lib/locking.h +++ b/lib/locking.h @@ -15,6 +15,7 @@ struct domain_generic; struct lock_order { struct domain_generic *the_bird; struct domain_generic *proto; + struct domain_generic *service; struct domain_generic *rtable; struct domain_generic *attrs; struct domain_generic *resource; diff --git a/lib/resource.c b/lib/resource.c index 898fb533..2e367132 100644 --- a/lib/resource.c +++ b/lib/resource.c @@ -279,8 +279,8 @@ rlookup(unsigned long a) void resource_init(void) { - resource_sys_init(); rcu_init(); + resource_sys_init(); root_pool.r.class = &pool_class; root_pool.name = "Root"; diff --git a/lib/resource.h b/lib/resource.h index 5ad011ec..5d9e2165 100644 --- a/lib/resource.h +++ b/lib/resource.h @@ -122,8 +122,11 @@ void buffer_realloc(void **buf, unsigned *size, unsigned need, unsigned item_siz /* Allocator of whole pages; for use in slabs and other high-level allocators. */ #define PAGE_HEAD(x) ((void *) (((uintptr_t) (x)) & ~(page_size-1))) extern long page_size; +extern _Atomic int pages_kept; +extern _Atomic int pages_kept_locally; void *alloc_page(void); void free_page(void *); +void flush_local_pages(void); void resource_sys_init(void); diff --git a/lib/settle.h b/lib/settle.h new file mode 100644 index 00000000..d274599d --- /dev/null +++ b/lib/settle.h @@ -0,0 +1,64 @@ +/* + * BIRD -- Settle timer + * + * (c) 2022 Maria Matejka <mq@jmq.cz> + * (c) 2022 CZ.NIC z.s.p.o. + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#ifndef _BIRD_SETTLE_H_ +#define _BIRD_SETTLE_H_ + +#include "lib/birdlib.h" +#include "lib/timer.h" + +struct settle_config { + btime min, max; +}; + +struct settle { + union { + /* Timer hook polymorphism. */ + struct { + resource _r; + void (*hook)(struct settle *); + }; + timer tm; + }; + struct settle_config cf; + btime started; +}; + +STATIC_ASSERT(OFFSETOF(struct settle, hook) == OFFSETOF(struct settle, tm) + OFFSETOF(timer, hook)); + +#define SETTLE_INIT(_cfp, _hook, _data) (struct settle) { .tm = { .data = (_data), }, .hook = (_hook), .cf = ({ASSERT_DIE((_cfp)->min <= (_cfp)->max); *(_cfp); }), } + + +static inline void settle_init(struct settle *s, struct settle_config *cf, void (*hook)(struct settle *), void *data) +{ + *s = SETTLE_INIT(cf, hook, data); +} + +#define settle_active(s) tm_active(&(s)->tm) + +static inline void settle_kick(struct settle *s, struct birdloop *loop) +{ + if (!tm_active(&s->tm)) + { + s->started = current_time(); + tm_set_in(&s->tm, s->started + s->cf.min, loop); + } + else + { + btime now = current_time(); + tm_set_in(&s->tm, MIN_(now + s->cf.min, s->started + s->cf.max), loop); + } +} + +static inline void settle_cancel(struct settle *s) +{ + tm_stop(&s->tm); +} + +#endif diff --git a/nest/Makefile b/nest/Makefile index 39617350..5b27da0c 100644 --- a/nest/Makefile +++ b/nest/Makefile @@ -2,14 +2,13 @@ src := cli.c cmds.c iface.c locks.c neighbor.c password.c proto.c proto-build.c obj := $(src-o-files) $(all-daemon) $(cf-local) -$(call proto-build,dev_build) -$(proto-build-c): $(lastword $(MAKEFILE_LIST)) +$(objdir)/nest/proto-build.c: $(lastword $(MAKEFILE_LIST)) $(E)echo GEN $@ $(Q)echo "#include \"lib/birdlib.h\"" > $@ - $(Q)$(patsubst %,echo 'void %(void);' >> $@;,$(PROTO_BUILD)) + $(Q)$(patsubst %,echo 'void %_build(void);' >> $@;,$(PROTO_BUILD)) $(Q)echo "void protos_build_gen(void) {" >> $@ - $(Q)$(patsubst %,echo ' %();'>>$@;,$(PROTO_BUILD)) + $(Q)$(patsubst %,echo ' %_build();'>>$@;,$(PROTO_BUILD)) $(Q)echo "}" >> $@ tests_src := diff --git a/nest/cmds.c b/nest/cmds.c index 092be48a..8a5bbdd4 100644 --- a/nest/cmds.c +++ b/nest/cmds.c @@ -109,7 +109,6 @@ print_size(char *dsc, struct resmem vals) extern pool *rt_table_pool; extern pool *rta_pool; -extern uint *pages_kept; void cmd_show_memory(void) @@ -121,8 +120,10 @@ cmd_show_memory(void) print_size("Protocols:", rmemsize(proto_pool)); struct resmem total = rmemsize(&root_pool); #ifdef HAVE_MMAP - print_size("Standby memory:", (struct resmem) { .overhead = page_size * *pages_kept }); - total.overhead += page_size * *pages_kept; + int pk = atomic_load_explicit(&pages_kept, memory_order_relaxed) + + atomic_load_explicit(&pages_kept_locally, memory_order_relaxed); + print_size("Standby memory:", (struct resmem) { .overhead = page_size * pk }); + total.overhead += page_size * pk; #endif print_size("Total:", total); cli_msg(0, ""); diff --git a/nest/config.Y b/nest/config.Y index 84c76ae9..f2904882 100644 --- a/nest/config.Y +++ b/nest/config.Y @@ -125,7 +125,7 @@ CF_KEYWORDS(TIMEFORMAT, ISO, SHORT, LONG, ROUTE, PROTOCOL, BASE, LOG, S, MS, US) CF_KEYWORDS(GRACEFUL, RESTART, WAIT, MAX, AS) CF_KEYWORDS(MIN, IDLE, RX, TX, INTERVAL, MULTIPLIER, PASSIVE) CF_KEYWORDS(CHECK, LINK) -CF_KEYWORDS(CORK, SORTED, TRIE, MIN, MAX, SETTLE, TIME, GC, THRESHOLD, PERIOD) +CF_KEYWORDS(CORK, SORTED, TRIE, MIN, MAX, ROA, ROUTE, REFRESH, SETTLE, TIME, GC, THRESHOLD, PERIOD) /* For r_args_channel */ CF_KEYWORDS(IPV4, IPV4_MC, IPV4_MPLS, IPV6, IPV6_MC, IPV6_MPLS, IPV6_SADR, VPN4, VPN4_MC, VPN4_MPLS, VPN6, VPN6_MC, VPN6_MPLS, ROA4, ROA6, FLOW4, FLOW6, MPLS, PRI, SEC) @@ -233,6 +233,8 @@ table_opt: if ($3 > $4) cf_error("Cork low threshold must be lower than the high threshold."); this_table->cork_threshold.low = $3; this_table->cork_threshold.high = $4; } + | EXPORT SETTLE TIME settle { this_table->export_settle = $4; } + | ROUTE REFRESH EXPORT SETTLE TIME settle { this_table->export_rr_settle = $6; } | DEBUG bool { this_table->debug = $2; } ; @@ -294,8 +296,8 @@ proto_item: | MRTDUMP mrtdump_mask { this_proto->mrtdump = $2; } | ROUTER ID idval { this_proto->router_id = $3; } | DESCRIPTION text { this_proto->dsc = $2; } - | VRF text { this_proto->vrf = if_get_by_name($2); this_proto->vrf_set = 1; } - | VRF DEFAULT { this_proto->vrf = NULL; this_proto->vrf_set = 1; } + | VRF text { this_proto->vrf = if_get_by_name($2); } + | VRF DEFAULT { this_proto->vrf = &default_vrf; } ; @@ -321,8 +323,7 @@ channel_item_: | RECEIVE LIMIT limit_spec { this_channel->rx_limit = $3; } | IMPORT LIMIT limit_spec { this_channel->in_limit = $3; } | EXPORT LIMIT limit_spec { this_channel->out_limit = $3; } - | MIN SETTLE TIME expr_us { this_channel->min_settle_time = $4; } - | MAX SETTLE TIME expr_us { this_channel->max_settle_time = $4; } + | ROA SETTLE TIME settle { this_channel->roa_settle = $4; } | PREFERENCE expr { this_channel->preference = $2; check_u16($2); } | IMPORT KEEP FILTERED bool { if ($4) @@ -424,7 +425,6 @@ timeformat_base: TIMEFORMAT timeformat_spec ';' ; - /* Interface patterns */ iface_patt_node_init: diff --git a/nest/iface.c b/nest/iface.c index 682340c5..fc896e26 100644 --- a/nest/iface.c +++ b/nest/iface.c @@ -37,6 +37,7 @@ static pool *if_pool; list iface_list; +struct iface default_vrf; static void if_recalc_preferred(struct iface *i); @@ -147,7 +148,7 @@ ifa_send_notify(struct proto *p, unsigned c, struct ifa *a) { if (p->ifa_notify && (p->proto_state != PS_DOWN) && - (!p->vrf_set || p->vrf == a->iface->master)) + (!p->vrf || p->vrf == a->iface->master)) { if (p->debug & D_IFACES) log(L_TRACE "%s < address %N on interface %s %s", @@ -185,7 +186,7 @@ if_send_notify(struct proto *p, unsigned c, struct iface *i) { if (p->if_notify && (p->proto_state != PS_DOWN) && - (!p->vrf_set || p->vrf == i->master)) + (!p->vrf || p->vrf == i->master)) { if (p->debug & D_IFACES) log(L_TRACE "%s < interface %s %s", p->name, i->name, @@ -243,7 +244,7 @@ if_recalc_flags(struct iface *i UNUSED, uint flags) { if ((flags & IF_ADMIN_UP) && !(flags & (IF_SHUTDOWN | IF_TMP_DOWN)) && - !(i->master_index && !i->master)) + !(i->master_index && i->master == &default_vrf)) flags |= IF_UP; else flags &= ~IF_UP; @@ -301,6 +302,9 @@ if_update(struct iface *new) struct iface *i; unsigned c; + if (!new->master) + new->master = &default_vrf; + WALK_LIST(i, iface_list) if (!strcmp(new->name, i->name)) { @@ -711,6 +715,7 @@ if_init(void) { if_pool = rp_new(&root_pool, "Interfaces"); init_list(&iface_list); + strcpy(default_vrf.name, "default"); neigh_init(if_pool); } @@ -843,7 +848,7 @@ if_show(void) continue; char mbuf[16 + sizeof(i->name)] = {}; - if (i->master) + if (i->master != &default_vrf) bsprintf(mbuf, " master=%s", i->master->name); else if (i->master_index) bsprintf(mbuf, " master=#%u", i->master_index); diff --git a/nest/iface.h b/nest/iface.h index 1189cdd4..13f3bd12 100644 --- a/nest/iface.h +++ b/nest/iface.h @@ -28,6 +28,8 @@ struct ifa { /* Interface address */ unsigned flags; /* Analogous to iface->flags */ }; +extern struct iface default_vrf; + struct iface { node n; char name[16]; diff --git a/nest/neighbor.c b/nest/neighbor.c index 7cf9c85d..81da24d5 100644 --- a/nest/neighbor.c +++ b/nest/neighbor.c @@ -142,7 +142,7 @@ if_connected(ip_addr a, struct iface *i, struct ifa **ap, uint flags) } static inline int -if_connected_any(ip_addr a, struct iface *vrf, uint vrf_set, struct iface **iface, struct ifa **addr, uint flags) +if_connected_any(ip_addr a, struct iface *vrf, struct iface **iface, struct ifa **addr, uint flags) { struct iface *i; struct ifa *b; @@ -153,7 +153,7 @@ if_connected_any(ip_addr a, struct iface *vrf, uint vrf_set, struct iface **ifac /* Prefer SCOPE_HOST or longer prefix */ WALK_LIST(i, iface_list) - if ((!vrf_set || vrf == i->master) && ((s = if_connected(a, i, &b, flags)) >= 0)) + if ((!vrf || vrf == i->master) && ((s = if_connected(a, i, &b, flags)) >= 0)) if (scope_better(s, scope) || (scope_remote(s, scope) && ifa_better(b, *addr))) { *iface = i; @@ -245,7 +245,7 @@ neigh_find(struct proto *p, ip_addr a, struct iface *iface, uint flags) iface = (scope < 0) ? NULL : iface; } else - scope = if_connected_any(a, p->vrf, p->vrf_set, &iface, &addr, flags); + scope = if_connected_any(a, p->vrf, &iface, &addr, flags); /* scope < 0 means i don't know neighbor */ /* scope >= 0 <=> iface != NULL */ @@ -369,7 +369,7 @@ neigh_update(neighbor *n, struct iface *iface) return; /* VRF-bound neighbors ignore changes in other VRFs */ - if (p->vrf_set && (p->vrf != iface->master)) + if (p->vrf && (p->vrf != iface->master)) return; scope = if_connected(n->addr, iface, &ifa, n->flags); @@ -379,7 +379,7 @@ neigh_update(neighbor *n, struct iface *iface) { /* When neighbor is going down, try to respawn it on other ifaces */ if ((scope < 0) && (n->scope >= 0) && !n->ifreq && (n->flags & NEF_STICKY)) - scope = if_connected_any(n->addr, p->vrf, p->vrf_set, &iface, &ifa, n->flags); + scope = if_connected_any(n->addr, p->vrf, &iface, &ifa, n->flags); } else { diff --git a/nest/proto.c b/nest/proto.c index 783a936c..319b35dd 100644 --- a/nest/proto.c +++ b/nest/proto.c @@ -315,16 +315,15 @@ proto_remove_channels(struct proto *p) struct roa_subscription { node roa_node; - timer t; - btime base_settle_time; /* Start of settling interval */ + struct settle settle; struct channel *c; struct rt_export_request req; }; static void -channel_roa_in_changed(struct timer *t) +channel_roa_in_changed(struct settle *se) { - struct roa_subscription *s = SKIP_BACK(struct roa_subscription, t, t); + struct roa_subscription *s = SKIP_BACK(struct roa_subscription, settle, se); struct channel *c = s->c; int active = !!c->reload_req.hook; @@ -337,9 +336,9 @@ channel_roa_in_changed(struct timer *t) } static void -channel_roa_out_changed(struct timer *t) +channel_roa_out_changed(struct settle *se) { - struct roa_subscription *s = SKIP_BACK(struct roa_subscription, t, t); + struct roa_subscription *s = SKIP_BACK(struct roa_subscription, settle, se); struct channel *c = s->c; CD(c, "Feeding triggered by RPKI change"); @@ -356,17 +355,7 @@ channel_export_one_roa(struct rt_export_request *req, const net_addr *net UNUSED struct roa_subscription *s = SKIP_BACK(struct roa_subscription, req, req); /* TODO: use the information about what roa has changed */ - - if (!tm_active(&s->t)) - { - s->base_settle_time = current_time(); - tm_start(&s->t, s->base_settle_time + s->c->min_settle_time); - } - else - tm_set(&s->t, - MIN(s->base_settle_time + s->c->max_settle_time, - current_time() + s->c->min_settle_time)); - + settle_kick(&s->settle, &main_birdloop); rpe_mark_seen_all(req->hook, first, NULL); } @@ -380,14 +369,14 @@ channel_dump_roa_req(struct rt_export_request *req) debug(" Channel %s.%s ROA %s change notifier from table %s request %p\n", c->proto->name, c->name, - (s->t.hook == channel_roa_in_changed) ? "import" : "export", + (s->settle.hook == channel_roa_in_changed) ? "import" : "export", tab->name, req); } static int channel_roa_is_subscribed(struct channel *c, rtable *tab, int dir) { - void (*hook)(struct timer *) = + void (*hook)(struct settle *) = dir ? channel_roa_in_changed : channel_roa_out_changed; struct roa_subscription *s; @@ -395,7 +384,7 @@ channel_roa_is_subscribed(struct channel *c, rtable *tab, int dir) WALK_LIST2(s, n, c->roa_subscriptions, roa_node) if ((tab == SKIP_BACK(rtable, priv.exporter.e, s->req.hook->table)) - && (s->t.hook == hook)) + && (s->settle.hook == hook)) return 1; return 0; @@ -410,7 +399,7 @@ channel_roa_subscribe(struct channel *c, rtable *tab, int dir) struct roa_subscription *s = mb_allocz(c->proto->pool, sizeof(struct roa_subscription)); *s = (struct roa_subscription) { - .t = { .hook = dir ? channel_roa_in_changed : channel_roa_out_changed, }, + .settle = SETTLE_INIT(&c->roa_settle, dir ? channel_roa_in_changed : channel_roa_out_changed, NULL), .c = c, .req = { .name = mb_sprintf(c->proto->pool, "%s.%s.roa-%s.%s", @@ -934,8 +923,10 @@ channel_config_new(const struct channel_class *cc, const char *name, uint net_ty cf->debug = new_config->channel_default_debug; cf->rpki_reload = 1; - cf->min_settle_time = 1 S; - cf->max_settle_time = 20 S; + cf->roa_settle = (struct settle_config) { + .min = 1 S, + .max = 20 S, + }; add_tail(&proto->channels, &cf->n); @@ -1017,20 +1008,20 @@ channel_reconfigure(struct channel *c, struct channel_config *cf) c->in_req.trace_routes = c->out_req.trace_routes = c->debug | c->proto->debug; c->rpki_reload = cf->rpki_reload; - if ( (c->min_settle_time != cf->min_settle_time) - || (c->max_settle_time != cf->max_settle_time)) + if ( (c->roa_settle.min != cf->roa_settle.min) + || (c->roa_settle.max != cf->roa_settle.max)) { - c->min_settle_time = cf->min_settle_time; - c->max_settle_time = cf->max_settle_time; + c->roa_settle = cf->roa_settle; struct roa_subscription *s; node *n; WALK_LIST2(s, n, c->roa_subscriptions, roa_node) - if (tm_active(&s->t)) - tm_set(&s->t, - MIN(s->base_settle_time + c->max_settle_time, - current_time() + c->min_settle_time)); + { + s->settle.cf = cf->roa_settle; + if (settle_active(&s->settle)) + settle_kick(&s->settle, &main_birdloop); + } } /* Execute channel-specific reconfigure hook */ @@ -1156,6 +1147,7 @@ proto_event(void *ptr) { if (p->proto == &proto_unix_iface) if_flush_ifaces(p); + p->do_stop = 0; } @@ -1208,7 +1200,6 @@ proto_init(struct proto_config *c, node *n) p->proto_state = PS_DOWN; p->last_state_change = current_time(); p->vrf = c->vrf; - p->vrf_set = c->vrf_set; insert_node(&p->n, n); p->event = ev_new_init(proto_pool, proto_event, p); @@ -1385,8 +1376,7 @@ proto_reconfigure(struct proto *p, struct proto_config *oc, struct proto_config if ((nc->protocol != oc->protocol) || (nc->net_type != oc->net_type) || (nc->disabled != p->disabled) || - (nc->vrf != oc->vrf) || - (nc->vrf_set != oc->vrf_set)) + (nc->vrf != oc->vrf)) return 0; p->name = nc->name; @@ -2305,8 +2295,8 @@ proto_cmd_show(struct proto *p, uintptr_t verbose, int cnt) cli_msg(-1006, " Message: %s", p->message); if (p->cf->router_id) cli_msg(-1006, " Router ID: %R", p->cf->router_id); - if (p->vrf_set) - cli_msg(-1006, " VRF: %s", p->vrf ? p->vrf->name : "default"); + if (p->vrf) + cli_msg(-1006, " VRF: %s", p->vrf->name); if (p->proto->show_proto_info) p->proto->show_proto_info(p); diff --git a/nest/protocol.h b/nest/protocol.h index c88598cc..892d1890 100644 --- a/nest/protocol.h +++ b/nest/protocol.h @@ -12,6 +12,7 @@ #include "lib/lists.h" #include "lib/resource.h" #include "lib/event.h" +#include "lib/settle.h" #include "nest/rt.h" #include "nest/limit.h" #include "conf/conf.h" @@ -99,7 +100,6 @@ struct proto_config { int class; /* SYM_PROTO or SYM_TEMPLATE */ u8 net_type; /* Protocol network type (NET_*), 0 for undefined */ u8 disabled; /* Protocol enabled/disabled by default */ - u8 vrf_set; /* Related VRF instance (below) is defined */ u8 late_if_feed; /* Delay interface feed after channels are up */ u32 debug, mrtdump; /* Debugging bitfields, both use D_* constants */ u32 router_id; /* Protocol specific router ID */ @@ -136,7 +136,6 @@ struct proto { uint active_loops; /* Number of active IO loops */ byte net_type; /* Protocol network type (NET_*), 0 for undefined */ byte disabled; /* Manually disabled */ - byte vrf_set; /* Related VRF instance (above) is defined */ byte proto_state; /* Protocol state machine (PS_*, see below) */ byte active; /* From PS_START to cleanup after PS_STOP */ byte do_stop; /* Stop actions are scheduled */ @@ -459,8 +458,7 @@ struct channel_config { struct channel_limit in_limit; /* Limit for importing routes from protocol */ struct channel_limit out_limit; /* Limit for exporting routes to protocol */ - btime min_settle_time; /* Minimum settle time for ROA-induced reload */ - btime max_settle_time; /* Maximum settle time for ROA-induced reload */ + struct settle_config roa_settle; /* Settle times for ROA-induced reload */ u8 net_type; /* Routing table network type (NET_*), 0 for undefined */ u8 ra_mode; /* Mode of received route advertisements (RA_*) */ @@ -489,8 +487,7 @@ struct channel { struct limit in_limit; /* Input limit */ struct limit out_limit; /* Output limit */ - btime min_settle_time; /* Minimum settle time for ROA-induced reload */ - btime max_settle_time; /* Maximum settle time for ROA-induced reload */ + struct settle_config roa_settle; /* Settle times for ROA-induced reload */ u8 limit_actions[PLD_MAX]; /* Limit actions enum */ u8 limit_active; /* Flags for active limits */ diff --git a/nest/rt-table.c b/nest/rt-table.c index 95248635..36d69d92 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -130,7 +130,8 @@ struct rt_export_block { static void rt_free_hostcache(struct rtable_private *tab); static void rt_update_hostcache(void *tab); -static void rt_next_hop_update(void *tab); +static void rt_next_hop_update(struct rtable_private *tab); +static void rt_nhu_uncork(void *_tab); static inline void rt_next_hop_resolve_rte(rte *r); static inline void rt_flowspec_resolve_rte(rte *r, struct channel *c); static inline void rt_prune_table(struct rtable_private *tab); @@ -142,9 +143,10 @@ static void rt_feed_for(void *); static void rt_check_cork_low(struct rtable_private *tab); static void rt_check_cork_high(struct rtable_private *tab); static void rt_cork_release_hook(void *); +static void rt_shutdown(void *); static void rt_delete(void *); -static void rt_export_used(struct rt_table_exporter *); +static void rt_export_used(struct rt_table_exporter *, const char *, const char *); static void rt_export_cleanup(struct rtable_private *tab); static int rte_same(rte *x, rte *y); @@ -160,6 +162,7 @@ const char *rt_import_state_name_array[TIS_MAX] = { const char *rt_export_state_name_array[TES_MAX] = { [TES_DOWN] = "DOWN", + [TES_HUNGRY] = "HUNGRY", [TES_FEEDING] = "FEEDING", [TES_READY] = "READY", [TES_STOP] = "STOP" @@ -183,8 +186,12 @@ const char *rt_export_state_name(u8 state) static struct hostentry *rt_get_hostentry(struct rtable_private *tab, ip_addr a, ip_addr ll, rtable *dep); +static inline rtable *rt_priv_to_pub(struct rtable_private *tab) { return RT_PUB(tab); } +static inline rtable *rt_pub_to_pub(rtable *tab) { return tab; } +#define RT_ANY_TO_PUB(tab) _Generic((tab),rtable*:rt_pub_to_pub,struct rtable_private*:rt_priv_to_pub)((tab)) + #define rt_trace(tab, level, fmt, args...) do {\ - struct rtable_private *t = (tab); \ + rtable *t = RT_ANY_TO_PUB((tab)); \ if (t->config->debug & (level)) \ log(L_TRACE "%s: " fmt, t->name, ##args); \ } while (0) @@ -1416,9 +1423,9 @@ rt_send_export_event(struct rt_export_hook *hook) } static void -rt_announce_exports(timer *tm) +rt_announce_exports(struct settle *s) { - RT_LOCKED((rtable *) tm->data, tab) + RT_LOCKED(RT_PUB(SKIP_BACK(struct rtable_private, export_settle, s)), tab) if (!EMPTY_LIST(tab->exporter.pending)) { struct rt_export_hook *c; node *n; @@ -1433,32 +1440,37 @@ rt_announce_exports(timer *tm) } static void +rt_kick_export_settle(struct rtable_private *tab) +{ + tab->export_settle.cf = tab->rr_counter ? tab->config->export_rr_settle : tab->config->export_settle; + settle_kick(&tab->export_settle, tab->loop); +} + +static void rt_import_announce_exports(void *_hook) { struct rt_import_hook *hook = _hook; - RT_LOCKED(hook->table, tab) + if (hook->import_state == TIS_CLEARED) { - if (hook->import_state == TIS_CLEARED) + void (*stopped)(struct rt_import_request *) = hook->stopped; + struct rt_import_request *req = hook->req; + + RT_LOCKED(hook->table, tab) { - void (*stopped)(struct rt_import_request *) = hook->stopped; - struct rt_import_request *req = hook->req; req->hook = NULL; rt_trace(tab, D_EVENTS, "Hook %s stopped", req->name); rem_node(&hook->n); mb_free(hook); rt_unlock_table(tab); - RT_UNLOCK(tab); - - stopped(req); - return; } - rt_trace(tab, D_EVENTS, "Announcing exports after imports from %s", hook->req->name); - - if (!tm_active(tab->exporter.export_timer)) - tm_start(tab->exporter.export_timer, tab->config->export_settle_time); + stopped(req); + return; } + + rt_trace(hook->table, D_EVENTS, "Announcing exports after imports from %s", hook->req->name); + birdloop_flag(hook->table->loop, RTF_EXPORT); } static struct rt_pending_export * @@ -1494,7 +1506,7 @@ rt_export_hook(void *_data) if (!c->rpe_next) { - rt_export_used(c->table); + rt_export_used(c->table, c->h.req->name, "done exporting"); RT_UNLOCK(tab); return; } @@ -1515,7 +1527,7 @@ rt_export_hook(void *_data) if (used) RT_LOCKED(tab, _) - rt_export_used(c->table); + rt_export_used(c->table, c->h.req->name, "finished export bulk"); rt_send_export_event(&c->h); } @@ -1957,7 +1969,7 @@ rt_table_export_done(void *hh) DBG("Export hook %p in table %s finished uc=%u\n", hook, tab->name, tab->use_count); /* Drop pending exports */ - rt_export_used(&tab->exporter); + rt_export_used(&tab->exporter, hook->h.req->name, "stopped"); /* Do the common code; this frees the hook */ rt_export_stopped(&hook->h); @@ -1996,9 +2008,10 @@ void rt_set_export_state(struct rt_export_hook *hook, u8 state) { hook->last_state_change = current_time(); - atomic_store_explicit(&hook->export_state, state, memory_order_release); + u8 old = atomic_exchange_explicit(&hook->export_state, state, memory_order_release); - CALL(hook->req->log_state_change, hook->req, state); + if (old != state) + CALL(hook->req->log_state_change, hook->req, state); } void @@ -2033,9 +2046,46 @@ rt_stop_import(struct rt_import_request *req, void (*stopped)(struct rt_import_r rt_schedule_prune(tab); rt_set_import_state(hook, TIS_STOP); hook->stopped = stopped; + + if (hook->stale_set != hook->stale_pruned) + tab->rr_counter -= (hook->stale_set - hook->stale_pruned - 1); + else + tab->rr_counter++; + + hook->stale_set = hook->stale_pruned = hook->stale_pruning = hook->stale_valid = 0; } } +static void rt_table_export_start_feed(struct rtable_private *tab, struct rt_table_export_hook *hook); +static void +rt_table_export_uncork(void *_hook) +{ + ASSERT_DIE(birdloop_inside(&main_birdloop)); + + struct rt_table_export_hook *hook = _hook; + struct birdloop *loop = hook->h.req->list->loop; + + if (loop != &main_birdloop) + birdloop_enter(loop); + + u8 state; + switch (state = atomic_load_explicit(&hook->h.export_state, memory_order_relaxed)) + { + case TES_HUNGRY: + RT_LOCKED(RT_PUB(SKIP_BACK(struct rtable_private, exporter, hook->table)), tab) + rt_table_export_start_feed(tab, hook); + break; + case TES_STOP: + rt_stop_export_common(&hook->h); + break; + default: + bug("Uncorking a table export in a strange state: %u", state); + } + + if (loop != &main_birdloop) + birdloop_leave(loop); +} + static void rt_table_export_start_locked(struct rtable_private *tab, struct rt_export_request *req) { @@ -2046,6 +2096,22 @@ rt_table_export_start_locked(struct rtable_private *tab, struct rt_export_reques req->hook->req = req; struct rt_table_export_hook *hook = SKIP_BACK(struct rt_table_export_hook, h, req->hook); + hook->h.event = (event) { + .hook = rt_table_export_uncork, + .data = hook, + }; + + if (rt_cork_check(&hook->h.event)) + rt_set_export_state(&hook->h, TES_HUNGRY); + else + rt_table_export_start_feed(tab, hook); +} + +static void +rt_table_export_start_feed(struct rtable_private *tab, struct rt_table_export_hook *hook) +{ + struct rt_exporter *re = &tab->exporter.e; + struct rt_export_request *req = hook->h.req; /* stats zeroed by mb_allocz */ switch (req->addr_mode) @@ -2133,41 +2199,54 @@ rt_init_export(struct rt_exporter *re, struct rt_export_hook *hook) rt_send_export_event(hook); } -static void +static int rt_table_export_stop_locked(struct rt_export_hook *hh) { struct rt_table_export_hook *hook = SKIP_BACK(struct rt_table_export_hook, h, hh); struct rtable_private *tab = SKIP_BACK(struct rtable_private, exporter, hook->table); - if (atomic_load_explicit(&hh->export_state, memory_order_relaxed) == TES_FEEDING) - switch (hh->req->addr_mode) - { - case TE_ADDR_IN: - if (hook->walk_lock) - { - rt_unlock_trie(tab, hook->walk_lock); - hook->walk_lock = NULL; - mb_free(hook->walk_state); - hook->walk_state = NULL; + switch (atomic_load_explicit(&hh->export_state, memory_order_relaxed)) + { + case TES_HUNGRY: + return 0; + case TES_FEEDING: + switch (hh->req->addr_mode) + { + case TE_ADDR_IN: + if (hook->walk_lock) + { + rt_unlock_trie(tab, hook->walk_lock); + hook->walk_lock = NULL; + mb_free(hook->walk_state); + hook->walk_state = NULL; + break; + } + /* fall through */ + case TE_ADDR_NONE: + fit_get(&tab->fib, &hook->feed_fit); break; - } - /* fall through */ - case TE_ADDR_NONE: - fit_get(&tab->fib, &hook->feed_fit); - break; - } + } + + } + return 1; } static void rt_table_export_stop(struct rt_export_hook *hh) { struct rt_table_export_hook *hook = SKIP_BACK(struct rt_table_export_hook, h, hh); + int ok = 0; rtable *t = SKIP_BACK(rtable, priv.exporter, hook->table); if (RT_IS_LOCKED(t)) - rt_table_export_stop_locked(hh); + ok = rt_table_export_stop_locked(hh); else RT_LOCKED(t, tab) - rt_table_export_stop_locked(hh); + ok = rt_table_export_stop_locked(hh); + + if (ok) + rt_stop_export_common(hh); + else + rt_set_export_state(&hook->h, TES_STOP); } void @@ -2177,19 +2256,25 @@ rt_stop_export(struct rt_export_request *req, void (*stopped)(struct rt_export_r ASSERT_DIE(req->hook); struct rt_export_hook *hook = req->hook; - /* Cancel the feeder event */ - ev_postpone(&hook->event); - - /* Stop feeding from the exporter */ - CALL(hook->table->class->stop, hook); - - /* Reset the event as the stopped event */ - hook->event.hook = hook->table->class->done; + /* Set the stopped callback */ hook->stopped = stopped; + /* Run the stop code */ + if (hook->table->class->stop) + hook->table->class->stop(hook); + else + rt_stop_export_common(hook); +} + +void +rt_stop_export_common(struct rt_export_hook *hook) +{ /* Update export state */ rt_set_export_state(hook, TES_STOP); + /* Reset the event as the stopped event */ + hook->event.hook = hook->table->class->done; + /* Run the stopped event */ rt_send_export_event(hook); } @@ -2230,6 +2315,7 @@ rt_refresh_begin(struct rt_import_request *req) e->rte.stale_cycle = 0; } FIB_WALK_END; + tab->rr_counter -= (hook->stale_set - hook->stale_pruned - 1); hook->stale_set = 1; hook->stale_valid = 0; hook->stale_pruned = 0; @@ -2240,6 +2326,7 @@ rt_refresh_begin(struct rt_import_request *req) /* Let's reserve the stale_cycle zero value for always-invalid routes */ hook->stale_set = 1; hook->stale_valid = 0; + tab->rr_counter++; } if (req->trace_routes & D_STATES) @@ -2391,22 +2478,18 @@ rt_schedule_nhu(struct rtable_private *tab) if (tab->nhu_corked) { if (!(tab->nhu_corked & NHU_SCHEDULED)) - { tab->nhu_corked |= NHU_SCHEDULED; - rt_lock_table(tab); - } } else if (!(tab->nhu_state & NHU_SCHEDULED)) { rt_trace(tab, D_EVENTS, "Scheduling NHU"); - rt_lock_table(tab); /* state change: * NHU_CLEAN -> NHU_SCHEDULED * NHU_RUNNING -> NHU_DIRTY */ if ((tab->nhu_state |= NHU_SCHEDULED) == NHU_SCHEDULED) - ev_schedule(tab->nhu_event); + birdloop_flag(tab->loop, RTF_NHU); } } @@ -2414,42 +2497,51 @@ void rt_schedule_prune(struct rtable_private *tab) { if (tab->prune_state == 0) - ev_schedule(tab->rt_event); + birdloop_flag(tab->loop, RTF_CLEANUP); /* state change 0->1, 2->3 */ tab->prune_state |= 1; } static void -rt_export_used(struct rt_table_exporter *e) +rt_export_used(struct rt_table_exporter *e, const char *who, const char *why) { struct rtable_private *tab = SKIP_BACK(struct rtable_private, exporter, e); ASSERT_DIE(RT_IS_LOCKED(tab)); - rt_trace(tab, D_EVENTS, "Export cleanup requested"); + rt_trace(tab, D_EVENTS, "Export cleanup requested by %s %s", who, why); if (tab->export_used) return; tab->export_used = 1; - ev_schedule(tab->rt_event); + birdloop_flag(tab->loop, RTF_CLEANUP); } static void -rt_event(void *ptr) +rt_flag_handler(struct birdloop_flag_handler *fh, u32 flags) { - RT_LOCKED((rtable *) ptr, tab) + RT_LOCKED(RT_PUB(SKIP_BACK(struct rtable_private, fh, fh)), tab) { + ASSERT_DIE(birdloop_inside(tab->loop)); + rt_lock_table(tab); - rt_lock_table(tab); + if (flags & RTF_NHU) + rt_next_hop_update(tab); - if (tab->export_used) - rt_export_cleanup(tab); + if (flags & RTF_EXPORT) + rt_kick_export_settle(tab); - if (tab->prune_state) - rt_prune_table(tab); + if (flags & RTF_CLEANUP) + { + if (tab->export_used) + rt_export_cleanup(tab); + + if (tab->prune_state) + rt_prune_table(tab); + } - rt_unlock_table(tab); + rt_unlock_table(tab); } } @@ -2471,7 +2563,7 @@ rt_kick_prune_timer(struct rtable_private *tab) /* Randomize GC period to +/- 50% */ btime gc_period = tab->config->gc_period; gc_period = (gc_period / 2) + (random_u32() % (uint) gc_period); - tm_start(tab->prune_timer, gc_period); + tm_start_in(tab->prune_timer, gc_period, tab->loop); } @@ -2516,6 +2608,14 @@ rt_flowspec_dump_req(struct rt_export_request *req) debug(" Flowspec link for table %s (%p)\n", ln->dst->name, req); } +static void +rt_flowspec_log_state_change(struct rt_export_request *req, u8 state) +{ + struct rt_flowspec_link *ln = SKIP_BACK(struct rt_flowspec_link, req, req); + rt_trace(ln->dst, D_STATES, "Flowspec link from %s export state changed to %s", + ln->src->name, rt_export_state_name(state)); +} + static struct rt_flowspec_link * rt_flowspec_find_link(struct rtable_private *src, rtable *dst) { @@ -2523,6 +2623,7 @@ rt_flowspec_find_link(struct rtable_private *src, rtable *dst) WALK_LIST2(hook, n, src->exporter.e.hooks, h.n) switch (atomic_load_explicit(&hook->h.export_state, memory_order_acquire)) { + case TES_HUNGRY: case TES_FEEDING: case TES_READY: if (hook->h.req->export_one == rt_flowspec_export_one) @@ -2559,6 +2660,7 @@ rt_flowspec_link(rtable *src_pub, rtable *dst_pub) .list = &global_work_list, .trace_routes = src->config->debug, .dump_req = rt_flowspec_dump_req, + .log_state_change = rt_flowspec_log_state_change, .export_one = rt_flowspec_export_one, }; @@ -2670,6 +2772,8 @@ uint rtable_max_id = 0; rtable * rt_setup(pool *pp, struct rtable_config *cf) { + ASSERT_DIE(birdloop_inside(&main_birdloop)); + pool *p = rp_newf(pp, "Routing table %s", cf->name); struct rtable_private *t = ralloc(p, &rt_class); @@ -2701,18 +2805,19 @@ rt_setup(pool *pp, struct rtable_config *cf) hmap_init(&t->id_map, p, 1024); hmap_set(&t->id_map, 0); - t->rt_event = ev_new_init(p, rt_event, t); - t->nhu_event = ev_new_init(p, rt_next_hop_update, t); + t->fh = (struct birdloop_flag_handler) { .hook = rt_flag_handler, }; + t->nhu_uncork_event = ev_new_init(p, rt_nhu_uncork, t); t->prune_timer = tm_new_init(p, rt_prune_timer, t, 0, 0); t->last_rt_change = t->gc_time = current_time(); + t->export_settle = SETTLE_INIT(&cf->export_settle, rt_announce_exports, NULL); + t->exporter = (struct rt_table_exporter) { .e = { .class = &rt_table_exporter_class, .addr_type = t->addr_type, .rp = t->rp, }, - .export_timer = tm_new_init(p, rt_announce_exports, t, 0, 0), .next_seq = 1, }; @@ -2730,6 +2835,12 @@ rt_setup(pool *pp, struct rtable_config *cf) t->flowspec_trie->ipv4 = (t->addr_type == NET_FLOW4); } + /* Start the service thread */ + t->loop = birdloop_new(p, DOMAIN_ORDER(service), mb_sprintf(p, "Routing tahle %s", t->name)); + birdloop_enter(t->loop); + birdloop_flag_set_handler(t->loop, &t->fh); + birdloop_leave(t->loop); + return RT_PUB(t); } @@ -2818,7 +2929,7 @@ again: if (limit <= 0) { FIB_ITERATE_PUT(fit); - ev_schedule(tab->rt_event); + birdloop_flag(tab->loop, RTF_CLEANUP); return; } @@ -2852,15 +2963,15 @@ again: FIB_ITERATE_END; rt_trace(tab, D_EVENTS, "Prune done, scheduling export timer"); - if (!tm_active(tab->exporter.export_timer)) - tm_start(tab->exporter.export_timer, tab->config->export_settle_time); + rt_kick_export_settle(tab); #ifdef DEBUGGING fib_check(&tab->fib); #endif /* state change 2->0, 3->1 */ - tab->prune_state &= 1; + if (tab->prune_state &= 1) + birdloop_flag(tab->loop, RTF_CLEANUP); if (tab->trie_new) { @@ -2902,9 +3013,11 @@ again: ih->flush_seq = tab->exporter.next_seq; rt_set_import_state(ih, TIS_WAITING); flushed_channels++; + tab->rr_counter--; } else if (ih->stale_pruning != ih->stale_pruned) { + tab->rr_counter -= (ih->stale_pruned - ih->stale_pruning); ih->stale_pruned = ih->stale_pruning; if (ih->req->trace_routes & D_STATES) log(L_TRACE "%s: table prune after refresh end [%u]", ih->req->name, ih->stale_pruned); @@ -2932,6 +3045,7 @@ rt_export_cleanup(struct rtable_private *tab) switch (atomic_load_explicit(&eh->h.export_state, memory_order_acquire)) { case TES_DOWN: + case TES_HUNGRY: continue; case TES_READY: @@ -3075,11 +3189,10 @@ done:; rt_kick_prune_timer(tab); if (tab->export_used) - ev_schedule(tab->rt_event); + birdloop_flag(tab->loop, RTF_CLEANUP); - - if (EMPTY_LIST(tab->exporter.pending) && tm_active(tab->exporter.export_timer)) - tm_stop(tab->exporter.export_timer); + if (EMPTY_LIST(tab->exporter.pending)) + settle_cancel(&tab->export_settle); } static void @@ -3681,35 +3794,44 @@ rt_next_hop_update_net(struct rtable_private *tab, net *n) } static void -rt_next_hop_update(void *_tab) +rt_nhu_uncork(void *_tab) { RT_LOCKED((rtable *) _tab, tab) { - - /* If called from an uncork hook, reset the state */ - if (tab->nhu_corked) - { + ASSERT_DIE(tab->nhu_corked); ASSERT_DIE(tab->nhu_state == 0); + + /* Reset the state */ tab->nhu_state = tab->nhu_corked; tab->nhu_corked = 0; rt_trace(tab, D_STATES, "Next hop updater uncorked"); + + birdloop_flag(tab->loop, RTF_NHU); } +} + +static void +rt_next_hop_update(struct rtable_private *tab) +{ + ASSERT_DIE(birdloop_inside(tab->loop)); + + if (tab->nhu_corked) + return; if (!tab->nhu_state) - bug("Called NHU event for no reason in table %s", tab->name); + return; /* Check corkedness */ - if (rt_cork_check(tab->nhu_event)) + if (rt_cork_check(tab->nhu_uncork_event)) { rt_trace(tab, D_STATES, "Next hop updater corked"); if ((tab->nhu_state & NHU_RUNNING) - && !EMPTY_LIST(tab->exporter.pending) - && !tm_active(tab->exporter.export_timer)) - tm_start(tab->exporter.export_timer, tab->config->export_settle_time); + && !EMPTY_LIST(tab->exporter.pending)) + rt_kick_export_settle(tab); tab->nhu_corked = tab->nhu_state; tab->nhu_state = 0; - RT_RETURN(tab); + return; } struct fib_iterator *fit = &tab->nhu_fit; @@ -3731,8 +3853,8 @@ rt_next_hop_update(void *_tab) if (max_feed <= 0) { FIB_ITERATE_PUT(fit); - ev_schedule(tab->nhu_event); - RT_RETURN(tab); + birdloop_flag(tab->loop, RTF_NHU); + return; } lp_state lps; lp_save(tmp_linpool, &lps); @@ -3743,20 +3865,14 @@ rt_next_hop_update(void *_tab) /* Finished NHU, cleanup */ rt_trace(tab, D_EVENTS, "NHU done, scheduling export timer"); - - if (!tm_active(tab->exporter.export_timer)) - tm_start(tab->exporter.export_timer, tab->config->export_settle_time); + rt_kick_export_settle(tab); /* State change: * NHU_DIRTY -> NHU_SCHEDULED * NHU_RUNNING -> NHU_CLEAN */ if ((tab->nhu_state &= NHU_SCHEDULED) == NHU_SCHEDULED) - ev_schedule(tab->nhu_event); - - rt_unlock_table(tab); - - } + birdloop_flag(tab->loop, RTF_NHU); } void @@ -3801,6 +3917,14 @@ rt_new_table(struct symbol *s, uint addr_type) c->gc_period = (uint) -1; /* set in rt_postconfig() */ c->cork_threshold.low = 128; c->cork_threshold.high = 512; + c->export_settle = (struct settle_config) { + .min = 1 MS, + .max = 100 MS, + }; + c->export_rr_settle = (struct settle_config) { + .min = 100 MS, + .max = 3 S, + }; c->debug = new_config->table_debug; add_tail(&new_config->tables, &c->n); @@ -3840,13 +3964,22 @@ rt_unlock_table_priv(struct rtable_private *r, const char *file, uint line) { rt_trace(r, D_STATES, "Unlocked at %s:%d", file, line); if (!--r->use_count && r->deleted) - /* Schedule the delete event to finish this up */ - ev_send(&global_event_list, ev_new_init(r->rp, rt_delete, r)); + /* Stop the service thread to finish this up */ + ev_send(&global_event_list, ev_new_init(r->rp, rt_shutdown, r)); +} + +static void +rt_shutdown(void *tab_) +{ + struct rtable_private *r = tab_; + birdloop_stop(r->loop, rt_delete, r); } static void rt_delete(void *tab_) { + birdloop_enter(&main_birdloop); + /* We assume that nobody holds the table reference now as use_count is zero. * Anyway the last holder may still hold the lock. Therefore we lock and * unlock it the last time to be sure that nobody is there. */ @@ -3857,6 +3990,8 @@ rt_delete(void *tab_) rfree(tab->rp); config_del_obstacle(conf); + + birdloop_leave(&main_birdloop); } @@ -3970,7 +4105,11 @@ rt_commit(struct config *new, struct config *old) rt_lock_table(tab); if (tab->hostcache) + { rt_stop_export(&tab->hostcache->req, NULL); + if (ev_get_list(&tab->hostcache->update) == &rt_cork.queue) + ev_postpone(&tab->hostcache->update); + } rt_unlock_table(tab); @@ -4091,9 +4230,6 @@ rt_feed_by_fib(void *data) { if ((c->h.req->addr_mode == TE_ADDR_NONE) || net_in_netX(n->n.addr, c->h.req->addr)) { - if (atomic_load_explicit(&c->h.export_state, memory_order_acquire) != TES_FEEDING) - return; - if (!rt_prepare_feed(c, n, &block)) { FIB_ITERATE_PUT(fit); @@ -4133,9 +4269,6 @@ rt_feed_by_trie(void *data) if (!n) continue; - if (atomic_load_explicit(&c->h.export_state, memory_order_acquire) != TES_FEEDING) - RT_RETURN(tab); - if (!rt_prepare_feed(c, n, &block)) { RT_UNLOCK(tab); @@ -4336,16 +4469,28 @@ hc_notify_dump_req(struct rt_export_request *req) } static void +hc_notify_log_state_change(struct rt_export_request *req, u8 state) +{ + struct hostcache *hc = SKIP_BACK(struct hostcache, req, req); + rt_trace((rtable *) hc->update.data, D_STATES, "HCU Export state changed to %s", rt_export_state_name(state)); +} + +static void hc_notify_export_one(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *first) { struct hostcache *hc = SKIP_BACK(struct hostcache, req, req); /* No interest in this update, mark seen only */ - if (ev_active(&hc->update) || !trie_match_net(hc->trie, net)) - { - rpe_mark_seen_all(req->hook, first, NULL); + int interested = 1; + RT_LOCKED((rtable *) hc->update.data, tab) + if (ev_active(&hc->update) || !trie_match_net(hc->trie, net)) + { + rpe_mark_seen_all(req->hook, first, NULL); + interested = 0; + } + + if (!interested) return; - } /* This net may affect some hostentries, check the actual change */ rte *o = RTE_VALID_OR_NULL(first->old_best); @@ -4359,7 +4504,10 @@ hc_notify_export_one(struct rt_export_request *req, const net_addr *net, struct /* Yes, something has actually changed. Do the hostcache update. */ if (o != RTE_VALID_OR_NULL(new_best)) - ev_schedule_work(&hc->update); + RT_LOCKED((rtable *) hc->update.data, tab) + if ((atomic_load_explicit(&req->hook->export_state, memory_order_acquire) == TES_READY) + && !ev_active(&hc->update)) + ev_send_loop(tab->loop, &hc->update); } @@ -4386,6 +4534,7 @@ rt_init_hostcache(struct rtable_private *tab) .list = &global_work_list, .trace_routes = tab->config->debug, .dump_req = hc_notify_dump_req, + .log_state_change = hc_notify_log_state_change, .export_one = hc_notify_export_one, }; @@ -4522,6 +4671,10 @@ rt_update_hostcache(void *data) struct hostcache *hc = tab->hostcache; + /* Shutdown shortcut */ + if (!hc->req.hook) + RT_RETURN(tab); + if (rt_cork_check(&hc->update)) { rt_trace(tab, D_STATES, "Hostcache update corked"); @@ -19,6 +19,8 @@ #include "lib/route.h" #include "lib/event.h" #include "lib/rcu.h" +#include "lib/io-loop.h" +#include "lib/settle.h" #include <stdatomic.h> @@ -61,8 +63,10 @@ struct rtable_config { byte sorted; /* Routes of network are sorted according to rte_better() */ byte trie_used; /* Rtable has attached trie */ byte debug; /* Whether to log */ - btime export_settle_time; /* Delay before exports are announced */ struct rt_cork_threshold cork_threshold; /* Cork threshold values */ + struct settle_config export_settle; /* Export announcement settler */ + struct settle_config export_rr_settle;/* Export announcement settler config valid when any + route refresh is running */ }; struct rt_export_hook; @@ -85,7 +89,6 @@ struct rt_exporter { struct rt_table_exporter { struct rt_exporter e; list pending; /* List of packed struct rt_pending_export */ - struct timer *export_timer; struct rt_pending_export *first; /* First export to announce */ u64 next_seq; /* The next export will have this ID */ @@ -104,6 +107,7 @@ DEFINE_DOMAIN(rtable); uint id; /* Integer table ID for fast lookup */ \ DOMAIN(rtable) lock; /* Lock to take to access the private parts */ \ struct rtable_config *config; /* Configuration of this table */ \ + struct birdloop *loop; /* Service thread */ \ /* The complete rtable structure */ struct rtable_private { @@ -127,12 +131,16 @@ struct rtable_private { * delete as soon as use_count becomes 0 and remove * obstacle from this routing table. */ - struct event *rt_event; /* Routing table event */ - struct event *nhu_event; /* Specific event for next hop update */ + struct event *nhu_uncork_event; /* Helper event to schedule NHU on uncork */ + struct settle export_settle; /* Export batching settle timer */ struct timer *prune_timer; /* Timer for periodic pruning / GC */ + struct birdloop_flag_handler fh; /* Handler for simple events */ btime last_rt_change; /* Last time when route changed */ btime gc_time; /* Time of last GC */ uint gc_counter; /* Number of operations since last GC */ + uint rr_counter; /* Number of currently running route refreshes, + in fact sum of (stale_set - stale_pruned) over all importers + + one for each TIS_FLUSHING importer */ byte prune_state; /* Table prune state, 1 -> scheduled, 2-> running */ byte prune_trie; /* Prune prefix trie during next table prune */ byte nhu_state; /* Next Hop Update state */ @@ -171,6 +179,11 @@ typedef union rtable { #define RT_PRIV_SAME(tpriv, tpub) (&(tpub)->priv == (tpriv)) +/* Flags for birdloop_flag() */ +#define RTF_CLEANUP 1 +#define RTF_NHU 2 +#define RTF_EXPORT 4 + extern struct rt_cork { _Atomic uint active; event_list queue; @@ -187,7 +200,7 @@ static inline void rt_cork_release(void) if (atomic_fetch_sub_explicit(&rt_cork.active, 1, memory_order_acq_rel) == 1) { synchronize_rcu(); - ev_schedule_work(&rt_cork.run); + ev_send(&global_work_list, &rt_cork.run); } } @@ -356,6 +369,7 @@ struct rt_table_export_hook { #define TIS_MAX 6 #define TES_DOWN 0 +#define TES_HUNGRY 1 #define TES_FEEDING 2 #define TES_READY 3 #define TES_STOP 4 @@ -417,6 +431,7 @@ int rpe_get_seen(struct rt_export_hook *hook, struct rt_pending_export *rpe); void rt_init_export(struct rt_exporter *re, struct rt_export_hook *hook); struct rt_export_hook *rt_alloc_export(struct rt_exporter *re, uint size); +void rt_stop_export_common(struct rt_export_hook *hook); void rt_export_stopped(struct rt_export_hook *hook); void rt_exporter_init(struct rt_exporter *re); diff --git a/proto/babel/Makefile b/proto/babel/Makefile index ae6aeaf2..06b58e95 100644 --- a/proto/babel/Makefile +++ b/proto/babel/Makefile @@ -2,6 +2,5 @@ src := babel.c packets.c obj := $(src-o-files) $(all-daemon) $(cf-local) -$(call proto-build,babel_build) tests_objs := $(tests_objs) $(src-o-files) diff --git a/proto/bfd/Makefile b/proto/bfd/Makefile index 11d639d7..267dff98 100644 --- a/proto/bfd/Makefile +++ b/proto/bfd/Makefile @@ -2,6 +2,5 @@ src := bfd.c packets.c obj := $(src-o-files) $(all-daemon) $(cf-local) -$(call proto-build,bfd_build) tests_objs := $(tests_objs) $(src-o-files) diff --git a/proto/bfd/bfd.c b/proto/bfd/bfd.c index 331ba730..25ff19ac 100644 --- a/proto/bfd/bfd.c +++ b/proto/bfd/bfd.c @@ -82,7 +82,7 @@ * BFD thread to the main thread. This is done in an asynchronous way, sesions * with pending notifications are linked (in the BFD thread) to @notify_list in * &bfd_proto, and then bfd_notify_hook() in the main thread is activated using - * bfd_notify_kick() and a pipe. The hook then processes scheduled sessions and + * a standard event sending code. The hook then processes scheduled sessions and * calls hooks from associated BFD requests. This @notify_list (and state fields * in structure &bfd_session) is protected by a spinlock in &bfd_proto and * functions bfd_lock_sessions() / bfd_unlock_sessions(). @@ -128,7 +128,6 @@ const char *bfd_state_names[] = { "AdminDown", "Down", "Init", "Up" }; static void bfd_session_set_min_tx(struct bfd_session *s, u32 val); static struct bfd_iface *bfd_get_iface(struct bfd_proto *p, ip_addr local, struct iface *iface); static void bfd_free_iface(struct bfd_iface *ifa); -static inline void bfd_notify_kick(struct bfd_proto *p); /* @@ -177,7 +176,7 @@ bfd_session_update_state(struct bfd_session *s, uint state, uint diag) bfd_session_set_min_tx(s, s->cf.idle_tx_int); if (notify) - bfd_notify_kick(p); + ev_send(&global_event_list, &p->notify_event); } static void @@ -666,7 +665,7 @@ bfd_add_request(struct bfd_proto *p, struct bfd_request *req) { struct bfd_config *cf = (struct bfd_config *) (p->p.cf); - if (p->p.vrf_set && (p->p.vrf != req->vrf)) + if (p->p.vrf && (p->p.vrf != req->vrf)) return 0; if (ipa_is_ip4(req->addr) ? !cf->accept_ipv4 : !cf->accept_ipv6) @@ -952,21 +951,15 @@ bfd_reconfigure_neighbors(struct bfd_proto *p, struct bfd_config *new) /* This core notify code should be replaced after main loop transition to birdloop */ -int pipe(int pipefd[2]); -void pipe_drain(int fd); -void pipe_kick(int fd); - -static int -bfd_notify_hook(sock *sk, uint len UNUSED) +static void +bfd_notify_hook(void *data) { - struct bfd_proto *p = sk->data; + struct bfd_proto *p = data; struct bfd_session *s; list tmp_list; u8 state, diag; node *n, *nn; - pipe_drain(sk->fd); - bfd_lock_sessions(p); init_list(&tmp_list); add_tail_list(&tmp_list, &p->notify_list); @@ -990,55 +983,8 @@ bfd_notify_hook(sock *sk, uint len UNUSED) if (EMPTY_LIST(s->request_list)) bfd_remove_session(p, s); } - - return 0; -} - -static inline void -bfd_notify_kick(struct bfd_proto *p) -{ - pipe_kick(p->notify_ws->fd); -} - -static void -bfd_noterr_hook(sock *sk, int err) -{ - struct bfd_proto *p = sk->data; - log(L_ERR "%s: Notify socket error: %m", p->p.name, err); } -static void -bfd_notify_init(struct bfd_proto *p) -{ - int pfds[2]; - sock *sk; - - int rv = pipe(pfds); - if (rv < 0) - die("pipe: %m"); - - sk = sk_new(p->p.pool); - sk->type = SK_MAGIC; - sk->rx_hook = bfd_notify_hook; - sk->err_hook = bfd_noterr_hook; - sk->fd = pfds[0]; - sk->data = p; - if (sk_open(sk) < 0) - die("bfd: sk_open failed"); - p->notify_rs = sk; - - /* The write sock is not added to any event loop */ - sk = sk_new(p->p.pool); - sk->type = SK_MAGIC; - sk->fd = pfds[1]; - sk->data = p; - sk->flags = SKF_THREAD; - if (sk_open(sk) < 0) - die("bfd: sk_open failed"); - p->notify_ws = sk; -} - - /* * BFD protocol glue */ @@ -1070,7 +1016,10 @@ bfd_start(struct proto *P) init_list(&p->iface_list); init_list(&p->notify_list); - bfd_notify_init(p); + p->notify_event = (event) { + .hook = bfd_notify_hook, + .data = p, + }; add_tail(&bfd_global.proto_list, &p->bfd_node); diff --git a/proto/bfd/bfd.h b/proto/bfd/bfd.h index b9afaf92..9a8e20c6 100644 --- a/proto/bfd/bfd.h +++ b/proto/bfd/bfd.h @@ -99,8 +99,7 @@ struct bfd_proto HASH(struct bfd_session) session_hash_id; HASH(struct bfd_session) session_hash_ip; - sock *notify_rs; - sock *notify_ws; + event notify_event; list notify_list; sock *rx4_1; diff --git a/proto/bgp/Makefile b/proto/bgp/Makefile index 2a4cc99c..f6a38678 100644 --- a/proto/bgp/Makefile +++ b/proto/bgp/Makefile @@ -2,6 +2,5 @@ src := attrs.c bgp.c packets.c obj := $(src-o-files) $(all-daemon) $(cf-local) -$(call proto-build,bgp_build) tests_objs := $(tests_objs) $(src-o-files) diff --git a/proto/mrt/Makefile b/proto/mrt/Makefile index 000e1c1c..8cd44ac1 100644 --- a/proto/mrt/Makefile +++ b/proto/mrt/Makefile @@ -2,6 +2,5 @@ src := mrt.c obj := $(src-o-files) $(all-daemon) $(cf-local) -$(call proto-build,mrt_build) tests_objs := $(tests_objs) $(src-o-files) diff --git a/proto/ospf/Makefile b/proto/ospf/Makefile index 85664543..015f394a 100644 --- a/proto/ospf/Makefile +++ b/proto/ospf/Makefile @@ -2,6 +2,5 @@ src := dbdes.c hello.c iface.c lsack.c lsalib.c lsreq.c lsupd.c neighbor.c ospf. obj := $(src-o-files) $(all-daemon) $(cf-local) -$(call proto-build,ospf_build) tests_objs := $(tests_objs) $(src-o-files) diff --git a/proto/perf/Makefile b/proto/perf/Makefile index 42051f43..7877fb19 100644 --- a/proto/perf/Makefile +++ b/proto/perf/Makefile @@ -2,6 +2,5 @@ src := perf.c obj := $(src-o-files) $(all-daemon) $(cf-local) -$(call proto-build,perf_build) tests_objs := $(tests_objs) $(src-o-files) diff --git a/proto/pipe/Makefile b/proto/pipe/Makefile index ba66027f..0d68db4c 100644 --- a/proto/pipe/Makefile +++ b/proto/pipe/Makefile @@ -2,6 +2,5 @@ src := pipe.c obj := $(src-o-files) $(all-daemon) $(cf-local) -$(call proto-build,pipe_build) tests_objs := $(tests_objs) $(src-o-files) diff --git a/proto/pipe/config.Y b/proto/pipe/config.Y index 0990168e..444de127 100644 --- a/proto/pipe/config.Y +++ b/proto/pipe/config.Y @@ -25,6 +25,7 @@ proto: pipe_proto '}' { this_channel = NULL; } ; pipe_proto_start: proto_start PIPE { this_proto = proto_config_new(&proto_pipe, $1); + this_proto->loop_order = DOMAIN_ORDER(proto); PIPE_CFG->max_generation = 16; } proto_name diff --git a/proto/radv/Makefile b/proto/radv/Makefile index 4780bee3..5c56fbf3 100644 --- a/proto/radv/Makefile +++ b/proto/radv/Makefile @@ -2,6 +2,5 @@ src := packets.c radv.c obj := $(src-o-files) $(all-daemon) $(cf-local) -$(call proto-build,radv_build) tests_objs := $(tests_objs) $(src-o-files) diff --git a/proto/rip/Makefile b/proto/rip/Makefile index b9ff62d6..f4a6fa72 100644 --- a/proto/rip/Makefile +++ b/proto/rip/Makefile @@ -2,6 +2,5 @@ src := packets.c rip.c obj := $(src-o-files) $(all-daemon) $(cf-local) -$(call proto-build,rip_build) tests_objs := $(tests_objs) $(src-o-files) diff --git a/proto/rpki/Makefile b/proto/rpki/Makefile index 8e3a2761..0f60b2a0 100644 --- a/proto/rpki/Makefile +++ b/proto/rpki/Makefile @@ -2,6 +2,5 @@ src := rpki.c packets.c tcp_transport.c ssh_transport.c transport.c obj := $(src-o-files) $(all-daemon) $(cf-local) -$(call proto-build,rpki_build) tests_objs := $(tests_objs) $(src-o-files) diff --git a/proto/rpki/config.Y b/proto/rpki/config.Y index d6d326b8..743b5b42 100644 --- a/proto/rpki/config.Y +++ b/proto/rpki/config.Y @@ -42,6 +42,7 @@ proto: rpki_proto ; rpki_proto_start: proto_start RPKI { this_proto = proto_config_new(&proto_rpki, $1); + this_proto->loop_order = DOMAIN_ORDER(proto); RPKI_CFG->retry_interval = RPKI_RETRY_INTERVAL; RPKI_CFG->refresh_interval = RPKI_REFRESH_INTERVAL; RPKI_CFG->expire_interval = RPKI_EXPIRE_INTERVAL; diff --git a/proto/rpki/packets.c b/proto/rpki/packets.c index 108da61b..d7895a22 100644 --- a/proto/rpki/packets.c +++ b/proto/rpki/packets.c @@ -233,7 +233,12 @@ static const size_t min_pdu_size[] = { [ERROR] = 16, }; -static int rpki_send_error_pdu(struct rpki_cache *cache, const enum pdu_error_type error_code, const u32 err_pdu_len, const struct pdu_header *erroneous_pdu, const char *fmt, ...); +static int rpki_send_error_pdu_(struct rpki_cache *cache, const enum pdu_error_type error_code, const u32 err_pdu_len, const struct pdu_header *erroneous_pdu, const char *fmt, ...); + +#define rpki_send_error_pdu(cache, error_code, err_pdu_len, erroneous_pdu, fmt...) ({ \ + rpki_send_error_pdu_(cache, error_code, err_pdu_len, erroneous_pdu, #fmt); \ + CACHE_TRACE(D_PACKETS, cache, #fmt); \ + }) static void rpki_pdu_to_network_byte_order(struct pdu_header *pdu) @@ -595,6 +600,7 @@ rpki_handle_error_pdu(struct rpki_cache *cache, const struct pdu_error *pdu) case INTERNAL_ERROR: case INVALID_REQUEST: case UNSUPPORTED_PDU_TYPE: + CACHE_TRACE(D_PACKETS, cache, "Got UNSUPPORTED_PDU_TYPE"); rpki_cache_change_state(cache, RPKI_CS_ERROR_FATAL); break; @@ -652,21 +658,7 @@ rpki_handle_cache_response_pdu(struct rpki_cache *cache, const struct pdu_cache_ { if (cache->request_session_id) { - if (cache->last_update) - { - /* - * This isn't the first sync and we already received records. This point - * is after Reset Query and before importing new records from cache - * server. We need to load new ones and kick out missing ones. So start - * a refresh cycle. - */ - if (cache->p->roa4_channel) - rt_refresh_begin(&cache->p->roa4_channel->in_req); - if (cache->p->roa6_channel) - rt_refresh_begin(&cache->p->roa6_channel->in_req); - - cache->p->refresh_channels = 1; - } + rpki_start_refresh(cache->p); cache->session_id = pdu->session_id; cache->request_session_id = 0; } @@ -842,14 +834,7 @@ rpki_handle_end_of_data_pdu(struct rpki_cache *cache, const struct pdu_end_of_da (cf->keep_expire_interval ? "keeps " : ""), cache->expire_interval); } - if (cache->p->refresh_channels) - { - cache->p->refresh_channels = 0; - if (cache->p->roa4_channel) - rt_refresh_end(&cache->p->roa4_channel->in_req); - if (cache->p->roa6_channel) - rt_refresh_end(&cache->p->roa6_channel->in_req); - } + rpki_stop_refresh(cache->p); cache->last_update = current_time(); cache->serial_num = pdu->serial_num; @@ -1040,7 +1025,7 @@ rpki_connected_hook(sock *sk) * This function prepares Error PDU and sends it to a cache server. */ static int -rpki_send_error_pdu(struct rpki_cache *cache, const enum pdu_error_type error_code, const u32 err_pdu_len, const struct pdu_header *erroneous_pdu, const char *fmt, ...) +rpki_send_error_pdu_(struct rpki_cache *cache, const enum pdu_error_type error_code, const u32 err_pdu_len, const struct pdu_header *erroneous_pdu, const char *fmt, ...) { va_list args; char msg[128]; diff --git a/proto/rpki/rpki.c b/proto/rpki/rpki.c index 56615e36..7ec8d72f 100644 --- a/proto/rpki/rpki.c +++ b/proto/rpki/rpki.c @@ -109,6 +109,7 @@ static void rpki_schedule_next_expire_check(struct rpki_cache *cache); static void rpki_stop_refresh_timer_event(struct rpki_cache *cache); static void rpki_stop_retry_timer_event(struct rpki_cache *cache); static void rpki_stop_expire_timer_event(struct rpki_cache *cache); +static void rpki_stop_all_timers(struct rpki_cache *cache); /* @@ -136,6 +137,30 @@ rpki_table_remove_roa(struct rpki_cache *cache, struct channel *channel, const n rte_update(channel, &pfxr->n, NULL, p->p.main_source); } +void +rpki_start_refresh(struct rpki_proto *p) +{ + if (p->roa4_channel) + rt_refresh_begin(&p->roa4_channel->in_req); + if (p->roa6_channel) + rt_refresh_begin(&p->roa6_channel->in_req); + + p->refresh_channels = 1; +} + +void +rpki_stop_refresh(struct rpki_proto *p) +{ + if (!p->refresh_channels) + return; + + p->refresh_channels = 0; + + if (p->roa4_channel) + rt_refresh_end(&p->roa4_channel->in_req); + if (p->roa6_channel) + rt_refresh_end(&p->roa6_channel->in_req); +} /* * RPKI Protocol Logic @@ -192,6 +217,8 @@ rpki_force_restart_proto(struct rpki_proto *p) { if (p->cache) { + rpki_tr_close(p->cache->tr_sock); + rpki_stop_all_timers(p->cache); CACHE_DBG(p->cache, "Connection object destroying"); } @@ -315,7 +342,7 @@ rpki_schedule_next_refresh(struct rpki_cache *cache) btime t = cache->refresh_interval S; CACHE_DBG(cache, "after %t s", t); - tm_start(cache->refresh_timer, t); + tm_start_in(cache->refresh_timer, t, cache->p->p.loop); } static void @@ -324,7 +351,7 @@ rpki_schedule_next_retry(struct rpki_cache *cache) btime t = cache->retry_interval S; CACHE_DBG(cache, "after %t s", t); - tm_start(cache->retry_timer, t); + tm_start_in(cache->retry_timer, t, cache->p->p.loop); } static void @@ -335,7 +362,7 @@ rpki_schedule_next_expire_check(struct rpki_cache *cache) t = MAX(t, 1 S); CACHE_DBG(cache, "after %t s", t); - tm_start(cache->expire_timer, t); + tm_start_in(cache->expire_timer, t, cache->p->p.loop); } static void @@ -352,13 +379,21 @@ rpki_stop_retry_timer_event(struct rpki_cache *cache) tm_stop(cache->retry_timer); } -static void UNUSED +static void rpki_stop_expire_timer_event(struct rpki_cache *cache) { CACHE_DBG(cache, "Stop"); tm_stop(cache->expire_timer); } +static void +rpki_stop_all_timers(struct rpki_cache *cache) +{ + rpki_stop_refresh_timer_event(cache); + rpki_stop_retry_timer_event(cache); + rpki_stop_expire_timer_event(cache); +} + static int rpki_do_we_recv_prefix_pdu_in_last_seconds(struct rpki_cache *cache) { @@ -623,6 +658,7 @@ rpki_close_connection(struct rpki_cache *cache) { CACHE_TRACE(D_EVENTS, cache, "Closing a connection"); rpki_tr_close(cache->tr_sock); + rpki_stop_refresh(cache->p); proto_notify_state(&cache->p->p, PS_START); } diff --git a/proto/rpki/rpki.h b/proto/rpki/rpki.h index 26fbb46e..20253844 100644 --- a/proto/rpki/rpki.h +++ b/proto/rpki/rpki.h @@ -83,6 +83,8 @@ const char *rpki_cache_state_to_str(enum rpki_cache_state state); void rpki_table_add_roa(struct rpki_cache *cache, struct channel *channel, const net_addr_union *pfxr); void rpki_table_remove_roa(struct rpki_cache *cache, struct channel *channel, const net_addr_union *pfxr); +void rpki_start_refresh(struct rpki_proto *p); +void rpki_stop_refresh(struct rpki_proto *p); /* * RPKI Protocol Logic diff --git a/proto/rpki/ssh_transport.c b/proto/rpki/ssh_transport.c index 6333f367..223afa80 100644 --- a/proto/rpki/ssh_transport.c +++ b/proto/rpki/ssh_transport.c @@ -38,6 +38,8 @@ rpki_tr_ssh_open(struct rpki_tr_sock *tr) if (sk_open(sk) != 0) return RPKI_TR_ERROR; + sk_start(sk); + return RPKI_TR_SUCCESS; } diff --git a/proto/rpki/tcp_transport.c b/proto/rpki/tcp_transport.c index 132f8e2d..4e850c44 100644 --- a/proto/rpki/tcp_transport.c +++ b/proto/rpki/tcp_transport.c @@ -31,6 +31,8 @@ rpki_tr_tcp_open(struct rpki_tr_sock *tr) if (sk_open(sk) != 0) return RPKI_TR_ERROR; + sk_start(sk); + return RPKI_TR_SUCCESS; } diff --git a/proto/rpki/transport.c b/proto/rpki/transport.c index 81bd6dd8..4026fca4 100644 --- a/proto/rpki/transport.c +++ b/proto/rpki/transport.c @@ -85,6 +85,7 @@ rpki_tr_open(struct rpki_tr_sock *tr) sk->rbsize = RPKI_RX_BUFFER_SIZE; sk->tbsize = RPKI_TX_BUFFER_SIZE; sk->tos = IP_PREC_INTERNET_CONTROL; + sk->flags |= SKF_THREAD; sk->vrf = cache->p->p.vrf; if (ipa_zero(sk->daddr) && sk->host) @@ -120,6 +121,7 @@ rpki_tr_close(struct rpki_tr_sock *tr) if (tr->sk) { + sk_stop(tr->sk); rfree(tr->sk); tr->sk = NULL; } diff --git a/proto/static/Makefile b/proto/static/Makefile index 26aed31f..de6e819b 100644 --- a/proto/static/Makefile +++ b/proto/static/Makefile @@ -2,6 +2,5 @@ src := static.c obj := $(src-o-files) $(all-daemon) $(cf-local) -$(call proto-build,static_build) tests_objs := $(tests_objs) $(src-o-files) diff --git a/proto/static/static.c b/proto/static/static.c index cb764a1c..42fd20b7 100644 --- a/proto/static/static.c +++ b/proto/static/static.c @@ -494,7 +494,12 @@ static_start(struct proto *P) proto_notify_state(P, PS_UP); WALK_LIST(r, cf->routes) + { + struct lp_state lps; + lp_save(tmp_linpool, &lps); static_add_rte(p, r); + lp_restore(tmp_linpool, &lps); + } return PS_UP; } diff --git a/sysdep/unix/Makefile b/sysdep/unix/Makefile index 3f1a8b3a..6f6b0d26 100644 --- a/sysdep/unix/Makefile +++ b/sysdep/unix/Makefile @@ -2,8 +2,6 @@ src := alloc.c io.c io-loop.c krt.c log.c main.c random.c domain.c obj := $(src-o-files) $(all-daemon) $(cf-local) -$(call proto-build,kif_build) -$(call proto-build,krt_build) $(conf-y-targets): $(s)krt.Y src := $(filter-out main.c, $(src)) diff --git a/sysdep/unix/alloc.c b/sysdep/unix/alloc.c index a2384ca8..847def30 100644 --- a/sysdep/unix/alloc.c +++ b/sysdep/unix/alloc.c @@ -10,6 +10,7 @@ #include "lib/resource.h" #include "lib/lists.h" #include "lib/event.h" +#include "lib/rcu.h" #include <errno.h> #include <stdlib.h> @@ -22,46 +23,43 @@ long page_size = 0; #ifdef HAVE_MMAP -#define KEEP_PAGES_MAIN_MAX 256 -#define KEEP_PAGES_MAIN_MIN 8 -#define CLEANUP_PAGES_BULK 256 +#define KEEP_PAGES_MAX 512 +#define KEEP_PAGES_MIN 32 +#define KEEP_PAGES_MAX_LOCAL 16 +#define ALLOC_PAGES_AT_ONCE 8 -STATIC_ASSERT(KEEP_PAGES_MAIN_MIN * 4 < KEEP_PAGES_MAIN_MAX); +STATIC_ASSERT(KEEP_PAGES_MIN * 4 < KEEP_PAGES_MAX); +STATIC_ASSERT(ALLOC_PAGES_AT_ONCE < KEEP_PAGES_MAX_LOCAL); static _Bool use_fake = 0; +static _Bool initialized = 0; #if DEBUGGING struct free_page { node unused[42]; - node n; + struct free_page * _Atomic next; }; #else struct free_page { - node n; + struct free_page * _Atomic next; }; #endif -struct free_pages { - list pages; - u16 min, max; /* Minimal and maximal number of free pages kept */ - uint cnt; /* Number of empty pages */ - event cleanup; -}; - -static void global_free_pages_cleanup_event(void *); +static struct free_page * _Atomic page_stack = NULL; +static _Thread_local struct free_page * local_page_stack = NULL; -static struct free_pages global_free_pages = { - .min = KEEP_PAGES_MAIN_MIN, - .max = KEEP_PAGES_MAIN_MAX, - .cleanup = { .hook = global_free_pages_cleanup_event }, -}; +static void page_cleanup(void *); +static event page_cleanup_event = { .hook = page_cleanup, }; +#define SCHEDULE_CLEANUP do if (initialized && !shutting_down) ev_send(&global_event_list, &page_cleanup_event); while (0) -uint *pages_kept = &global_free_pages.cnt; +_Atomic int pages_kept = 0; +_Atomic int pages_kept_locally = 0; +static int pages_kept_here = 0; static void * alloc_sys_page(void) { - void *ptr = mmap(NULL, page_size, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + void *ptr = mmap(NULL, page_size * ALLOC_PAGES_AT_ONCE, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (ptr == MAP_FAILED) bug("mmap(%lu) failed: %m", page_size); @@ -90,20 +88,33 @@ alloc_page(void) } #ifdef HAVE_MMAP - struct free_pages *fps = &global_free_pages; - - if (fps->cnt) + struct free_page *fp = local_page_stack; + if (fp) { - struct free_page *fp = SKIP_BACK(struct free_page, n, HEAD(fps->pages)); - rem_node(&fp->n); - if ((--fps->cnt < fps->min) && !shutting_down) - ev_send(&global_work_list, &fps->cleanup); - - bzero(fp, page_size); + local_page_stack = atomic_load_explicit(&fp->next, memory_order_acquire); + atomic_fetch_sub_explicit(&pages_kept_locally, 1, memory_order_relaxed); + pages_kept_here--; return fp; } - return alloc_sys_page(); + rcu_read_lock(); + fp = atomic_load_explicit(&page_stack, memory_order_acquire); + while (fp && !atomic_compare_exchange_strong_explicit( + &page_stack, &fp, atomic_load_explicit(&fp->next, memory_order_acquire), + memory_order_acq_rel, memory_order_acquire)) + ; + rcu_read_unlock(); + + if (!fp) + { + void *ptr = alloc_sys_page(); + for (int i=1; i<ALLOC_PAGES_AT_ONCE; i++) + free_page(ptr + page_size * i); + return ptr; + } + + atomic_fetch_sub_explicit(&pages_kept, 1, memory_order_relaxed); + return fp; #endif } @@ -117,45 +128,95 @@ free_page(void *ptr) } #ifdef HAVE_MMAP - struct free_pages *fps = &global_free_pages; struct free_page *fp = ptr; + if (shutting_down || (pages_kept_here < KEEP_PAGES_MAX_LOCAL)) + { + atomic_store_explicit(&fp->next, local_page_stack, memory_order_relaxed); + atomic_fetch_add_explicit(&pages_kept_locally, 1, memory_order_relaxed); + pages_kept_here++; + return; + } + + rcu_read_lock(); + struct free_page *next = atomic_load_explicit(&page_stack, memory_order_acquire); - fp->n = (node) {}; - add_tail(&fps->pages, &fp->n); + do atomic_store_explicit(&fp->next, next, memory_order_release); + while (!atomic_compare_exchange_strong_explicit( + &page_stack, &next, fp, + memory_order_acq_rel, memory_order_acquire)); + rcu_read_unlock(); - if ((++fps->cnt > fps->max) && !shutting_down) - ev_send(&global_work_list, &fps->cleanup); + if (atomic_fetch_add_explicit(&pages_kept, 1, memory_order_relaxed) >= KEEP_PAGES_MAX) + SCHEDULE_CLEANUP; #endif } +void +flush_local_pages(void) +{ + if (use_fake || !local_page_stack || shutting_down) + return; + + struct free_page *last = local_page_stack, *next; + int check_count = 1; + while (next = atomic_load_explicit(&last->next, memory_order_acquire)) + { + check_count++; + last = next; + } + + ASSERT_DIE(check_count == pages_kept_here); + + rcu_read_lock(); + next = atomic_load_explicit(&page_stack, memory_order_acquire); + + do atomic_store_explicit(&last->next, next, memory_order_release); + while (!atomic_compare_exchange_strong_explicit( + &page_stack, &next, local_page_stack, + memory_order_acq_rel, memory_order_acquire)); + rcu_read_unlock(); + + local_page_stack = NULL; + pages_kept_here = 0; + + atomic_fetch_sub_explicit(&pages_kept_locally, check_count, memory_order_relaxed); + if (atomic_fetch_add_explicit(&pages_kept, check_count, memory_order_relaxed) >= KEEP_PAGES_MAX) + SCHEDULE_CLEANUP; +} + #ifdef HAVE_MMAP static void -global_free_pages_cleanup_event(void *data UNUSED) +page_cleanup(void *_ UNUSED) { if (shutting_down) return; - struct free_pages *fps = &global_free_pages; + struct free_page *stack = atomic_exchange_explicit(&page_stack, NULL, memory_order_acq_rel); + if (!stack) + return; - while (fps->cnt / 2 < fps->min) - { - struct free_page *fp = alloc_sys_page(); - fp->n = (node) {}; - add_tail(&fps->pages, &fp->n); - fps->cnt++; + synchronize_rcu(); + + do { + struct free_page *f = stack; + stack = atomic_load_explicit(&f->next, memory_order_acquire); + + if (munmap(f, page_size) == 0) + continue; + else if (errno != ENOMEM) + bug("munmap(%p) failed: %m", f); + else + free_page(f); } + while ((atomic_fetch_sub_explicit(&pages_kept, 1, memory_order_relaxed) >= KEEP_PAGES_MAX / 2) && stack); - for (uint seen = 0; (seen < CLEANUP_PAGES_BULK) && (fps->cnt > fps->max / 2); seen++) + while (stack) { - struct free_page *fp = SKIP_BACK(struct free_page, n, TAIL(fps->pages)); - rem_node(&fp->n); + struct free_page *f = stack; + stack = atomic_load_explicit(&f->next, memory_order_acquire); + free_page(f); - if (munmap(fp, page_size) == 0) - fps->cnt--; - else if (errno == ENOMEM) - add_head(&fps->pages, &fp->n); - else - bug("munmap(%p) failed: %m", fp); + atomic_fetch_sub_explicit(&pages_kept, 1, memory_order_relaxed); } } #endif @@ -164,17 +225,17 @@ void resource_sys_init(void) { #ifdef HAVE_MMAP - ASSERT_DIE(global_free_pages.cnt == 0); - if (!(page_size = sysconf(_SC_PAGESIZE))) die("System page size must be non-zero"); if (u64_popcount(page_size) == 1) { - struct free_pages *fps = &global_free_pages; - init_list(&fps->pages); - global_free_pages_cleanup_event(NULL); + for (int i = 0; i < (KEEP_PAGES_MIN * 2); i++) + free_page(alloc_page()); + + page_cleanup(NULL); + initialized = 1; return; } @@ -184,4 +245,5 @@ resource_sys_init(void) #endif page_size = 4096; + initialized = 1; } diff --git a/sysdep/unix/io-loop.c b/sysdep/unix/io-loop.c index 3e3fc31a..dbca36e9 100644 --- a/sysdep/unix/io-loop.c +++ b/sysdep/unix/io-loop.c @@ -59,78 +59,138 @@ birdloop_inside(struct birdloop *loop) return 0; } +void +birdloop_flag(struct birdloop *loop, u32 flag) +{ + atomic_fetch_or_explicit(&loop->flags, flag, memory_order_acq_rel); + birdloop_ping(loop); +} + +void +birdloop_flag_set_handler(struct birdloop *loop, struct birdloop_flag_handler *fh) +{ + ASSERT_DIE(birdloop_inside(loop)); + loop->flag_handler = fh; +} + +static int +birdloop_process_flags(struct birdloop *loop) +{ + if (!loop->flag_handler) + return 0; + + u32 flags = atomic_exchange_explicit(&loop->flags, 0, memory_order_acq_rel); + loop->flag_handler->hook(loop->flag_handler, flags); + return !!flags; +} + +static int +birdloop_run_events(struct birdloop *loop) +{ + btime begin = current_time(); + while (current_time() - begin < 5 MS) + { + if (!ev_run_list(&loop->event_list)) + return 0; + + times_update(); + } + + return 1; +} + /* * Wakeup code for birdloop */ -static void -pipe_new(int *pfds) +void +pipe_new(struct pipe *p) { - int rv = pipe(pfds); + int rv = pipe(p->fd); if (rv < 0) die("pipe: %m"); - if (fcntl(pfds[0], F_SETFL, O_NONBLOCK) < 0) + if (fcntl(p->fd[0], F_SETFL, O_NONBLOCK) < 0) die("fcntl(O_NONBLOCK): %m"); - if (fcntl(pfds[1], F_SETFL, O_NONBLOCK) < 0) + if (fcntl(p->fd[1], F_SETFL, O_NONBLOCK) < 0) die("fcntl(O_NONBLOCK): %m"); } void -pipe_drain(int fd) +pipe_drain(struct pipe *p) { - char buf[64]; - int rv; - - try: - rv = read(fd, buf, 64); - if (rv < 0) - { - if (errno == EINTR) - goto try; - if (errno == EAGAIN) + while (1) { + char buf[64]; + int rv = read(p->fd[0], buf, sizeof(buf)); + if ((rv < 0) && (errno == EAGAIN)) return; - die("wakeup read: %m"); + + if (rv == 0) + bug("wakeup read eof"); + if ((rv < 0) && (errno != EINTR)) + bug("wakeup read: %m"); + } +} + +int +pipe_read_one(struct pipe *p) +{ + while (1) { + char v; + int rv = read(p->fd[0], &v, sizeof(v)); + if (rv == 1) + return 1; + if ((rv < 0) && (errno == EAGAIN)) + return 0; + if (rv > 1) + bug("wakeup read more bytes than expected: %d", rv); + if (rv == 0) + bug("wakeup read eof"); + if (errno != EINTR) + bug("wakeup read: %m"); } - if (rv == 64) - goto try; } void -pipe_kick(int fd) +pipe_kick(struct pipe *p) { - u64 v = 1; + char v = 1; int rv; - try: - rv = write(fd, &v, sizeof(u64)); - if (rv < 0) - { - if (errno == EINTR) - goto try; - if (errno == EAGAIN) + while (1) { + rv = write(p->fd[1], &v, sizeof(v)); + if ((rv >= 0) || (errno == EAGAIN)) return; - die("wakeup write: %m"); + if (errno != EINTR) + bug("wakeup write: %m"); } } +void +pipe_pollin(struct pipe *p, struct pollfd *pfd) +{ + pfd->fd = p->fd[0]; + pfd->events = POLLIN; + pfd->revents = 0; +} + static inline void wakeup_init(struct birdloop *loop) { - pipe_new(loop->wakeup_fds); + pipe_new(&loop->wakeup); } static inline void wakeup_drain(struct birdloop *loop) { - pipe_drain(loop->wakeup_fds[0]); + pipe_drain(&loop->wakeup); } static inline void wakeup_do_kick(struct birdloop *loop) { - pipe_kick(loop->wakeup_fds[1]); + pipe_kick(&loop->wakeup); } static inline void @@ -259,9 +319,7 @@ sockets_prepare(struct birdloop *loop) /* Add internal wakeup fd */ *psk = NULL; - pfd->fd = loop->wakeup_fds[0]; - pfd->events = POLLIN; - pfd->revents = 0; + pipe_pollin(&loop->wakeup, pfd); loop->poll_changed = 0; } @@ -516,7 +574,7 @@ birdloop_main(void *arg) while (1) { timers_fire(&loop->time, 0); - if (ev_run_list(&loop->event_list)) + if (birdloop_process_flags(loop) + birdloop_run_events(loop)) timeout = 0; else if (t = timers_first(&loop->time)) timeout = (tm_remains(t) TO_MS) + 1; @@ -525,6 +583,9 @@ birdloop_main(void *arg) if (loop->poll_changed) sockets_prepare(loop); + else + if ((timeout < 0) || (timeout > 5000)) + flush_local_pages(); btime duration = current_time() - loop_begin; if (duration > config->watchdog_warning) @@ -560,14 +621,18 @@ birdloop_main(void *arg) /* Flush remaining events */ ASSERT_DIE(!ev_run_list(&loop->event_list)); - /* No timers allowed */ - ASSERT_DIE(timers_count(&loop->time) == 0); + /* Drop timers */ + while (t = timers_first(&loop->time)) + tm_stop(t); + + /* No sockets allowed */ ASSERT_DIE(EMPTY_LIST(loop->sock_list)); ASSERT_DIE(loop->sock_num == 0); birdloop_leave(loop); loop->stopped(loop->stop_data); + flush_local_pages(); return NULL; } diff --git a/sysdep/unix/io-loop.h b/sysdep/unix/io-loop.h index 31c40459..29ca96d6 100644 --- a/sysdep/unix/io-loop.h +++ b/sysdep/unix/io-loop.h @@ -9,6 +9,16 @@ #include "lib/rcu.h" +struct pipe +{ + int fd[2]; +}; + +void pipe_new(struct pipe *); +void pipe_pollin(struct pipe *, struct pollfd *); +void pipe_drain(struct pipe *); +void pipe_kick(struct pipe *); + struct birdloop { pool *pool; @@ -25,7 +35,7 @@ struct birdloop uint ping_pending; _Atomic u32 ping_sent; - int wakeup_fds[2]; + struct pipe wakeup; pthread_t thread_id; pthread_attr_t thread_attr; @@ -34,6 +44,9 @@ struct birdloop uint links; + _Atomic u32 flags; + struct birdloop_flag_handler *flag_handler; + void (*stopped)(void *data); void *stop_data; diff --git a/sysdep/unix/io.c b/sysdep/unix/io.c index 23baffb2..6454f15f 100644 --- a/sysdep/unix/io.c +++ b/sysdep/unix/io.c @@ -2213,8 +2213,6 @@ static int short_loops = 0; #define SHORT_LOOP_MAX 10 #define WORK_EVENTS_MAX 10 -void pipe_drain(int fd); - void io_loop(void) { @@ -2246,8 +2244,7 @@ io_loop(void) } /* A hack to reload main io_loop() when something has changed asynchronously. */ - pfd[0].fd = main_birdloop.wakeup_fds[0]; - pfd[0].events = POLLIN; + pipe_pollin(&main_birdloop.wakeup, &pfd[0]); nfds = 1; @@ -2325,7 +2322,7 @@ io_loop(void) if (pfd[0].revents & POLLIN) { /* IO loop reload requested */ - pipe_drain(main_birdloop.wakeup_fds[0]); + pipe_drain(&main_birdloop.wakeup); atomic_exchange_explicit(&main_birdloop.ping_sent, 0, memory_order_acq_rel); continue; } diff --git a/test/birdtest.h b/test/birdtest.h index ad5f8f9c..b8978b3e 100644 --- a/test/birdtest.h +++ b/test/birdtest.h @@ -40,7 +40,7 @@ static inline u64 bt_random(void) void bt_log_suite_result(int result, const char *fmt, ...); void bt_log_suite_case_result(int result, const char *fmt, ...); -#define BT_TIMEOUT 5 /* Default timeout in seconds */ +#define BT_TIMEOUT 20 /* Default timeout in seconds */ #define BT_FORKING 1 /* Forking is enabled in default */ #define BT_RANDOM_SEED 0x5097d2bb |