From feb17ced234bad13ae64b52a3f86241f74517997 Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Fri, 18 Jun 2021 18:10:42 +0200 Subject: Dropping the POSIX thread-local variables in favor of much easier-to-use C11 thread-local variables --- proto/bfd/io.c | 53 +++++++++++++---------------------------------------- 1 file changed, 13 insertions(+), 40 deletions(-) (limited to 'proto') diff --git a/proto/bfd/io.c b/proto/bfd/io.c index 1cd9365a..8fdc84fb 100644 --- a/proto/bfd/io.c +++ b/proto/bfd/io.c @@ -52,29 +52,15 @@ struct birdloop * Current thread context */ -static pthread_key_t current_loop_key; -extern pthread_key_t current_time_key; - -static inline struct birdloop * -birdloop_current(void) -{ - return pthread_getspecific(current_loop_key); -} +static _Thread_local struct birdloop *birdloop_current; static inline void birdloop_set_current(struct birdloop *loop) { - pthread_setspecific(current_loop_key, loop); - pthread_setspecific(current_time_key, loop ? &loop->time : &main_timeloop); + birdloop_current = loop; + local_timeloop = loop ? &loop->time : &main_timeloop; } -static inline void -birdloop_init_current(void) -{ - pthread_key_create(¤t_loop_key, NULL); -} - - /* * Wakeup code for birdloop */ @@ -162,10 +148,8 @@ wakeup_kick(struct birdloop *loop) void wakeup_kick_current(void) { - struct birdloop *loop = birdloop_current(); - - if (loop && loop->poll_active) - wakeup_kick(loop); + if (birdloop_current && birdloop_current->poll_active) + wakeup_kick(birdloop_current); } @@ -195,15 +179,13 @@ events_fire(struct birdloop *loop) void ev2_schedule(event *e) { - struct birdloop *loop = birdloop_current(); - - if (loop->poll_active && EMPTY_LIST(loop->event_list)) - wakeup_kick(loop); + if (birdloop_current->poll_active && EMPTY_LIST(birdloop_current->event_list)) + wakeup_kick(birdloop_current); if (e->n.next) rem_node(&e->n); - add_tail(&loop->event_list, &e->n); + add_tail(&birdloop_current->event_list, &e->n); } @@ -238,9 +220,7 @@ sockets_add(struct birdloop *loop, sock *s) void sk_start(sock *s) { - struct birdloop *loop = birdloop_current(); - - sockets_add(loop, s); + sockets_add(birdloop_current, s); } static void @@ -261,14 +241,12 @@ sockets_remove(struct birdloop *loop, sock *s) void sk_stop(sock *s) { - struct birdloop *loop = birdloop_current(); + sockets_remove(birdloop_current, s); - sockets_remove(loop, s); - - if (loop->poll_active) + if (birdloop_current->poll_active) { - loop->close_scheduled = 1; - wakeup_kick(loop); + birdloop_current->close_scheduled = 1; + wakeup_kick(birdloop_current); } else close(s->fd); @@ -392,11 +370,6 @@ static void * birdloop_main(void *arg); struct birdloop * birdloop_new(void) { - /* FIXME: this init should be elsewhere and thread-safe */ - static int init = 0; - if (!init) - { birdloop_init_current(); init = 1; } - pool *p = rp_new(NULL, "Birdloop root"); struct birdloop *loop = mb_allocz(p, sizeof(struct birdloop)); loop->pool = p; -- cgit v1.2.3 From 44f26c49f966ca842ff9af55468de0b98c44b73e Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Wed, 6 Oct 2021 15:10:33 +0200 Subject: Special table hooks rectified. * internal tables are now more standalone, having their own import and export hooks * route refresh/reload uses stale counter instead of stale flag, allowing to drop walking the table at the beginning * route modify (by BGP LLGR) is now done by a special refeed hook, reimporting the modified routes directly without filters --- lib/lists.c | 9 + lib/lists.h | 1 + nest/config.Y | 4 +- nest/proto.c | 426 +++++++++++++++++++++++++++++------- nest/protocol.h | 23 +- nest/route.h | 35 ++- nest/rt-attr.c | 3 +- nest/rt-show.c | 7 +- nest/rt-table.c | 593 ++++++++++++++------------------------------------- proto/bgp/attrs.c | 50 +++-- proto/bgp/bgp.c | 94 +++++--- proto/bgp/bgp.h | 3 +- proto/bgp/packets.c | 2 +- proto/rpki/packets.c | 8 +- sysdep/unix/krt.c | 256 ++-------------------- sysdep/unix/krt.h | 4 - 16 files changed, 677 insertions(+), 841 deletions(-) (limited to 'proto') diff --git a/lib/lists.c b/lib/lists.c index 200576cf..fe2b692b 100644 --- a/lib/lists.c +++ b/lib/lists.c @@ -109,6 +109,15 @@ add_head(list *l, node *n) l->head = n; } +LIST_INLINE void +self_link(node *n) +{ + ASSUME(n->prev == NULL); + ASSUME(n->next == NULL); + + n->prev = n->next = n; +} + /** * insert_node - insert a node to a list * @n: a new list node diff --git a/lib/lists.h b/lib/lists.h index 479f4ed1..64b4a981 100644 --- a/lib/lists.h +++ b/lib/lists.h @@ -78,6 +78,7 @@ typedef union list { /* In fact two overlayed nodes */ #define LIST_INLINE void add_tail(list *, node *); void add_head(list *, node *); +void self_link(node *); void rem_node(node *); void add_tail_list(list *, list *); void init_list(list *); diff --git a/nest/config.Y b/nest/config.Y index 29d6b0db..a56b25be 100644 --- a/nest/config.Y +++ b/nest/config.Y @@ -644,12 +644,12 @@ r_args: } | r_args IMPORT TABLE channel_arg { if (!$4->in_table) cf_error("No import table in channel %s.%s", $4->proto->name, $4->name); - rt_show_add_table($$, $4->in_table); + rt_show_add_table($$, $4->in_table->tab); $$->tables_defined_by = RSD_TDB_DIRECT; } | r_args EXPORT TABLE channel_arg { if (!$4->out_table) cf_error("No export table in channel %s.%s", $4->proto->name, $4->name); - rt_show_add_table($$, $4->out_table); + rt_show_add_table($$, $4->out_table->tab); $$->tables_defined_by = RSD_TDB_DIRECT; } | r_args FILTER filter { diff --git a/nest/proto.c b/nest/proto.c index 09582d2e..cf448fd9 100644 --- a/nest/proto.c +++ b/nest/proto.c @@ -47,7 +47,7 @@ static char *c_states[] = { "DOWN", "START", "UP", "STOP", "RESTART" }; extern struct protocol proto_unix_iface; -static void channel_request_reload(struct channel *c); +static void channel_aux_request_refeed(struct channel_aux_table *cat); static void proto_shutdown_loop(timer *); static void proto_rethink_goal(struct proto *p); static char *proto_state_name(struct proto *p); @@ -88,7 +88,9 @@ channel_export_log_state_change(struct rt_export_request *req, u8 state) switch (state) { case TES_FEEDING: - if (c->proto->feed_begin) + if (c->out_table) + rt_refresh_begin(&c->out_table->push); + else if (c->proto->feed_begin) c->proto->feed_begin(c, !c->refeeding); break; case TES_READY: @@ -179,6 +181,7 @@ proto_find_channel_by_name(struct proto *p, const char *n) } rte * channel_preimport(struct rt_import_request *req, rte *new, rte *old); +rte * channel_in_preimport(struct rt_import_request *req, rte *new, rte *old); void rt_notify_optimal(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe); void rt_notify_any(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe); @@ -295,14 +298,10 @@ static void channel_roa_in_changed(struct rt_subscription *s) { struct channel *c = s->data; - int active = c->reload_event && ev_active(c->reload_event); - CD(c, "Reload triggered by RPKI change%s", active ? " - already active" : ""); + CD(c, "Reload triggered by RPKI change"); - if (!active) - channel_request_reload(c); - else - c->reload_pending = 1; + channel_request_reload(c); } static void @@ -444,7 +443,6 @@ channel_start_import(struct channel *c) .dump_req = channel_dump_import_req, .log_state_change = channel_import_log_state_change, .preimport = channel_preimport, - .rte_modify = c->proto->rte_modify, }; ASSERT(c->channel_state == CS_UP); @@ -463,7 +461,8 @@ channel_start_export(struct channel *c) { if (c->out_req.hook) { - log(L_WARN "%s.%s: Attempted to start channel's already started export", c->proto->name, c->name); + c->restart_export = 1; + log(L_WARN "%s.%s: Fast channel export restart", c->proto->name, c->name); return; } @@ -514,7 +513,7 @@ channel_check_stopped(struct channel *c) switch (c->channel_state) { case CS_STOP: - if (c->out_req.hook || c->in_req.hook) + if (c->out_req.hook || c->in_req.hook || c->out_table || c->in_table) return; channel_set_state(c, CS_DOWN); @@ -541,9 +540,6 @@ channel_import_stopped(struct rt_import_request *req) req->hook = NULL; - if (c->in_table) - rt_prune_sync(c->in_table, 1); - mb_free(c->in_req.name); c->in_req.name = NULL; @@ -566,14 +562,16 @@ channel_export_stopped(struct rt_export_request *req) return; } - /* Free the routes from out_table */ - if (c->out_table) - rt_prune_sync(c->out_table, 1); - mb_free(c->out_req.name); c->out_req.name = NULL; - channel_check_stopped(c); + if (c->restart_export) + { + c->restart_export = 0; + channel_start_export(c); + } + else + channel_check_stopped(c); } static void @@ -595,72 +593,296 @@ channel_feed_end(struct channel *c) return; } - if (c->proto->feed_end) + if (c->out_table) + rt_refresh_end(&c->out_table->push); + else if (c->proto->feed_end) c->proto->feed_end(c); if (c->refeed_pending) rt_stop_export(req, channel_export_stopped); +} + +#define CHANNEL_AUX_TABLE_DUMP_REQ(inout, imex, pgimex, pushget) static void \ + channel_##inout##_##pushget##_dump_req(struct rt_##pgimex##_request *req) { \ + struct channel_aux_table *cat = SKIP_BACK(struct channel_aux_table, pushget, req); \ + debug(" Channel %s.%s " #imex " table " #pushget " request %p\n", cat->c->proto->name, cat->c->name, req); } + +CHANNEL_AUX_TABLE_DUMP_REQ(in, import, import, push) +CHANNEL_AUX_TABLE_DUMP_REQ(in, import, export, get) +CHANNEL_AUX_TABLE_DUMP_REQ(out, export, import, push) +CHANNEL_AUX_TABLE_DUMP_REQ(out, export, export, get) + +#undef CHANNEL_AUX_TABLE_DUMP_REQ + +static uint channel_aux_imex(struct channel_aux_table *cat) +{ + if (cat->c->in_table == cat) + return 0; + else if (cat->c->out_table == cat) + return 1; else - c->refeeding = 0; + bug("Channel aux table must be in_table or out_table"); } -/* Called by protocol for reload from in_table */ -void -channel_schedule_reload(struct channel *c) +static void +channel_aux_stopped(void *data) { - ASSERT(c->in_req.hook); + struct channel_aux_table *cat = data; + struct channel *c = cat->c; + + if (channel_aux_imex(cat)) + c->out_table = NULL; + else + c->in_table = NULL; + + mb_free(cat); + return channel_check_stopped(c); +} + +static void +channel_aux_import_stopped(struct rt_import_request *req) +{ + struct channel_aux_table *cat = SKIP_BACK(struct channel_aux_table, push, req); + ASSERT_DIE(cat->stop); +} + +static void +channel_aux_export_stopped(struct rt_export_request *req) +{ + struct channel_aux_table *cat = SKIP_BACK(struct channel_aux_table, push, req); + req->hook = NULL; + + if (cat->refeed_pending && !cat->stop) + { + cat->refeed_pending = 0; + rt_request_export(cat->tab, req); + } + else + ASSERT_DIE(cat->stop); +} + +static void +channel_aux_stop(struct channel_aux_table *cat) +{ + cat->stop = 1; - rt_reload_channel_abort(c); - ev_schedule_work(c->reload_event); + rt_stop_import(&cat->push, channel_aux_import_stopped); + rt_stop_export(&cat->get, channel_aux_export_stopped); + + rt_lock_table(cat->tab); + cat->tab->deleted = channel_aux_stopped; + cat->tab->del_data = cat; + rt_unlock_table(cat->tab); } static void -channel_reload_loop(void *ptr) +channel_push_log_state_change(struct rt_import_request *req, u8 state) { - struct channel *c = ptr; + struct channel_aux_table *cat = SKIP_BACK(struct channel_aux_table, push, req); + const char *imex = channel_aux_imex(cat) ? "export" : "import"; + CD(cat->c, "Channel %s table import state changed to %s", imex, rt_import_state_name(state)); +} - /* Start reload */ - if (!c->reload_active) - c->reload_pending = 0; +static void +channel_get_log_state_change(struct rt_export_request *req, u8 state) +{ + struct channel_aux_table *cat = SKIP_BACK(struct channel_aux_table, get, req); + const char *imex = channel_aux_imex(cat) ? "export" : "import"; + CD(cat->c, "Channel %s table export state changed to %s", imex, rt_export_state_name(state)); - if (!rt_reload_channel(c)) + switch (state) { - ev_schedule_work(c->reload_event); + case TES_FEEDING: + if (imex && cat->c->proto->feed_begin) + cat->c->proto->feed_begin(cat->c, !cat->c->refeeding); + else if (!imex) + rt_refresh_begin(&cat->c->in_req); + break; + + case TES_READY: + if (imex && cat->c->proto->feed_end) + cat->c->proto->feed_end(cat->c); + else if (!imex) + rt_refresh_end(&cat->c->in_req); + + if (cat->refeed_pending) + rt_stop_export(&cat->get, channel_aux_export_stopped); + + break; + } +} + +void rte_update_direct(struct channel *c, const net_addr *n, rte *new, struct rte_src *src); + +static void +channel_in_export_one_any(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe) +{ + struct channel_aux_table *cat = SKIP_BACK(struct channel_aux_table, get, req); + + if (!rpe->new && !rpe->old) + return; + + rte n0; + struct rte_src *src = rpe->new ? rpe->new->rte.src : rpe->old->rte.src; + rte_update_direct(cat->c, net, RTES_CLONE(rpe->new, &n0), src); +} + +static void +channel_in_export_one_best(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe) +{ + struct channel_aux_table *cat = SKIP_BACK(struct channel_aux_table, get, req); + + if (!rpe->new && !rpe->old) return; + + rte n0; + struct rte_src *src = rpe->old_best ? rpe->old_best->rte.src : rpe->new_best->rte.src; + rte_update_direct(cat->c, net, RTES_CLONE(rpe->new_best, &n0), src); +} + +static void +channel_in_export_bulk_any(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe UNUSED, rte **feed, uint count) +{ + struct channel_aux_table *cat = SKIP_BACK(struct channel_aux_table, get, req); + for (uint i=0; ic, net, &n0, n0.src); } +} - /* Restart reload */ - if (c->reload_pending) - channel_request_reload(c); +static void +channel_in_export_bulk_best(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe UNUSED, rte **feed, uint count) +{ + struct channel_aux_table *cat = SKIP_BACK(struct channel_aux_table, get, req); + if (!count) + return; + + rte n0 = *feed[0]; + rte_update_direct(cat->c, net, &n0, n0.src); +} + +void do_rt_notify_direct(struct channel *c, const net_addr *net, rte *new, const rte *old); + +static void +channel_out_export_one_any(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe) +{ + struct channel_aux_table *cat = SKIP_BACK(struct channel_aux_table, get, req); + rte n0; + do_rt_notify_direct(cat->c, net, RTES_CLONE(rpe->new, &n0), RTES_OR_NULL(rpe->old)); +} + +static void +channel_out_export_one_best(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe) +{ + struct channel_aux_table *cat = SKIP_BACK(struct channel_aux_table, get, req); + rte n0; + do_rt_notify_direct(cat->c, net, RTES_CLONE(rpe->new_best, &n0), RTES_OR_NULL(rpe->old_best)); +} + +static void +channel_out_export_bulk(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe UNUSED, rte **feed, uint count) +{ + struct channel_aux_table *cat = SKIP_BACK(struct channel_aux_table, get, req); + if (cat->c->ra_mode != RA_ANY) + ASSERT_DIE(count <= 1); + + for (uint i=0; ic, net, &n0, NULL); + } } /* Called by protocol to activate in_table */ void -channel_setup_in_table(struct channel *c) +channel_setup_in_table(struct channel *c, int best) { - struct rtable_config *cf = mb_allocz(c->proto->pool, sizeof(struct rtable_config)); + int nlen = sizeof("import") + strlen(c->name) + strlen(c->proto->name) + 3; + + struct { + struct channel_aux_table cat; + struct rtable_config tab_cf; + char name[0]; + } *cat = mb_allocz(c->proto->pool, sizeof(*cat) + nlen); + + bsprintf(cat->name, "%s.%s.import", c->proto->name, c->name); - cf->name = "import"; - cf->addr_type = c->net_type; - cf->internal = 1; + cat->tab_cf.name = cat->name; + cat->tab_cf.addr_type = c->net_type; + + c->in_table = &cat->cat; + c->in_table->push = (struct rt_import_request) { + .name = cat->name, + .trace_routes = c->debug | c->proto->debug, + .dump_req = channel_in_push_dump_req, + .log_state_change = channel_push_log_state_change, + .preimport = channel_in_preimport, + }; + c->in_table->get = (struct rt_export_request) { + .name = cat->name, + .trace_routes = c->debug | c->proto->debug, + .dump_req = channel_in_get_dump_req, + .log_state_change = channel_get_log_state_change, + .export_one = best ? channel_in_export_one_best : channel_in_export_one_any, + .export_bulk = best ? channel_in_export_bulk_best : channel_in_export_bulk_any, + }; - c->in_table = rt_setup(c->proto->pool, cf); + c->in_table->c = c; + c->in_table->tab = rt_setup(c->proto->pool, &cat->tab_cf); + self_link(&c->in_table->tab->n); - c->reload_event = ev_new_init(c->proto->pool, channel_reload_loop, c); + rt_request_import(c->in_table->tab, &c->in_table->push); + rt_request_export(c->in_table->tab, &c->in_table->get); } /* Called by protocol to activate out_table */ void channel_setup_out_table(struct channel *c) { - struct rtable_config *cf = mb_allocz(c->proto->pool, sizeof(struct rtable_config)); - cf->name = "export"; - cf->addr_type = c->net_type; - cf->internal = 1; + int nlen = sizeof("export") + strlen(c->name) + strlen(c->proto->name) + 3; + + struct { + struct channel_aux_table cat; + struct rtable_config tab_cf; + char name[0]; + } *cat = mb_allocz(c->proto->pool, sizeof(*cat) + nlen); + + bsprintf(cat->name, "%s.%s.export", c->proto->name, c->name); - c->out_table = rt_setup(c->proto->pool, cf); + cat->tab_cf.name = cat->name; + cat->tab_cf.addr_type = c->net_type; + + c->out_table = &cat->cat; + c->out_table->push = (struct rt_import_request) { + .name = cat->name, + .trace_routes = c->debug | c->proto->debug, + .dump_req = channel_out_push_dump_req, + .log_state_change = channel_push_log_state_change, + }; + c->out_table->get = (struct rt_export_request) { + .name = cat->name, + .trace_routes = c->debug | c->proto->debug, + .dump_req = channel_out_get_dump_req, + .log_state_change = channel_get_log_state_change, + .export_one = (c->ra_mode == RA_ANY) ? channel_out_export_one_any : channel_out_export_one_best, + .export_bulk = channel_out_export_bulk, + }; + + c->out_table->c = c; + c->out_table->tab = rt_setup(c->proto->pool, &cat->tab_cf); + self_link(&c->out_table->tab->n); + + rt_request_import(c->out_table->tab, &c->out_table->push); + rt_request_export(c->out_table->tab, &c->out_table->get); } +static void +channel_aux_request_refeed(struct channel_aux_table *cat) +{ + cat->refeed_pending = 1; + rt_stop_export(&cat->get, channel_aux_export_stopped); +} static void channel_do_start(struct channel *c) @@ -686,16 +908,12 @@ channel_do_up(struct channel *c) static void channel_do_pause(struct channel *c) { - /* Need to abort feeding */ - if (c->reload_event) - { - ev_postpone(c->reload_event); - rt_reload_channel_abort(c); - } - /* Stop export */ if (c->out_req.hook) + { rt_stop_export(&c->out_req, channel_export_stopped); + c->refeeding = 0; + } channel_roa_unsubscribe_all(c); @@ -706,6 +924,13 @@ channel_do_pause(struct channel *c) static void channel_do_stop(struct channel *c) { + /* Drop auxiliary tables */ + if (c->in_table) + channel_aux_stop(c->in_table); + + if (c->out_table) + channel_aux_stop(c->out_table); + /* Stop import */ if (c->in_req.hook) rt_stop_import(&c->in_req, channel_import_stopped); @@ -716,16 +941,13 @@ channel_do_stop(struct channel *c) CALL(c->channel->shutdown, c); - /* This have to be done in here, as channel pool is freed before channel_do_down() */ - c->in_table = NULL; - c->reload_event = NULL; - c->out_table = NULL; + channel_roa_unsubscribe_all(c); } static void channel_do_down(struct channel *c) { - ASSERT(!c->reload_active); + ASSERT(!c->out_req.hook && !c->in_req.hook && !c->out_table && !c->in_table); c->proto->active_channels--; @@ -733,14 +955,12 @@ channel_do_down(struct channel *c) memset(&c->import_stats, 0, sizeof(struct channel_import_stats)); memset(&c->export_stats, 0, sizeof(struct channel_export_stats)); - c->in_table = NULL; - c->reload_event = NULL; - c->out_table = NULL; - - /* The in_table and out_table are going to be freed by freeing their resource pools. */ - CALL(c->channel->cleanup, c); + /* This have to be done in here, as channel pool is freed before channel_do_down() */ + bmap_free(&c->export_map); + bmap_free(&c->export_reject_map); + /* Schedule protocol shutddown */ if (proto_is_done(c->proto)) ev_schedule(c->proto->event); @@ -769,7 +989,7 @@ channel_set_state(struct channel *c, uint state) break; case CS_UP: - ASSERT(cs == CS_DOWN || cs == CS_START); + ASSERT(cs == CS_DOWN || cs == CS_START || cs == CS_PAUSE); if (cs == CS_DOWN) channel_do_start(c); @@ -819,8 +1039,8 @@ channel_set_state(struct channel *c, uint state) * completed, it will switch back to ES_READY. This function can be called * even when feeding is already running, in that case it is restarted. */ -void -channel_request_feeding(struct channel *c) +static void +channel_request_table_feeding(struct channel *c) { ASSERT(c->out_req.hook); @@ -828,7 +1048,18 @@ channel_request_feeding(struct channel *c) rt_stop_export(&c->out_req, channel_export_stopped); } -static void +void +channel_request_feeding(struct channel *c) +{ + ASSERT(c->out_req.hook); + + if (c->out_table) + channel_aux_request_refeed(c->out_table); + else + channel_request_table_feeding(c); +} + +void channel_request_reload(struct channel *c) { ASSERT(c->in_req.hook); @@ -836,14 +1067,29 @@ channel_request_reload(struct channel *c) CD(c, "Reload requested"); - c->proto->reload_routes(c); + if (c->in_table) + channel_aux_request_refeed(c->in_table); + else + c->proto->reload_routes(c); +} - /* - * Should this be done before reload_routes() hook? - * Perhaps, but routes are updated asynchronously. - */ - channel_reset_limit(c, &c->rx_limit, PLD_RX); - channel_reset_limit(c, &c->in_limit, PLD_IN); +void +channel_refresh_begin(struct channel *c) +{ + CD(c, "Channel route refresh begin"); + if (c->in_table) + rt_refresh_begin(&c->in_table->push); + else + rt_refresh_begin(&c->in_req); +} + +void +channel_refresh_end(struct channel *c) +{ + if (c->in_table) + rt_refresh_end(&c->in_table->push); + else + rt_refresh_end(&c->in_req); } const struct channel_class channel_basic = { @@ -1001,7 +1247,7 @@ channel_reconfigure(struct channel *c, struct channel_config *cf) channel_request_reload(c); if (export_changed) - channel_request_feeding(c); + channel_request_table_feeding(c); done: CD(c, "Reconfigured"); @@ -1714,7 +1960,7 @@ protos_dump_all(void) WALK_LIST(p, proto_list) { #define DPF(x) (p->x ? " " #x : "") - debug(" protocol %s (%p) state %s with %d active channels flags: %s%s%s%s%s\n", + debug(" protocol %s (%p) state %s with %d active channels flags: %s%s%s%s\n", p->name, p, p_states[p->proto_state], p->active_channels, DPF(disabled), DPF(active), DPF(do_start), DPF(do_stop), DPF(reconfiguring)); #undef DPF @@ -1730,6 +1976,20 @@ protos_dump_all(void) debug("\tChannel state: %s/%s/%s\n", c_states[c->channel_state], c->in_req.hook ? rt_import_state_name(rt_import_get_state(c->in_req.hook)) : "-", c->out_req.hook ? rt_export_state_name(rt_export_get_state(c->out_req.hook)) : "-"); + if (c->in_table) + { + debug("\tInput aux table:\n"); + rt_dump_hooks(c->in_table->tab); + rt_dump(c->in_table->tab); + debug("\tEnd of input aux table.\n"); + } + if (c->out_table) + { + debug("\tOutput aux table:\n"); + rt_dump_hooks(c->in_table->tab); + rt_dump(c->in_table->tab); + debug("\tEnd of output aux table.\n"); + } } if (p->proto->dump && (p->proto_state != PS_DOWN)) @@ -2151,11 +2411,11 @@ channel_show_stats(struct channel *c) cli_msg(-1006, " Routes: %u imported, %u exported, %u preferred", in_routes, out_routes, SRI(pref)); - cli_msg(-1006, " Route change stats: received rejected filtered ignored RX limit IN limit accepted"); - cli_msg(-1006, " Import updates: %10u %10u %10u %10u %10u %10u %10u", + cli_msg(-1006, " Route change stats: received rejected filtered ignored limited accepted"); + cli_msg(-1006, " Import updates: %10u %10u %10u %10u %10u %10u", SCI(updates_received), SCI(updates_invalid), SCI(updates_filtered), SRI(updates_ignored), - SCI(updates_limited_rx), SCI(updates_limited_in), + SCI(updates_limited_rx) + SCI(updates_limited_in), SRI(updates_accepted)); cli_msg(-1006, " Import withdraws: %10u %10u --- %10u --- %10u", SCI(withdraws_received), SCI(withdraws_invalid), diff --git a/nest/protocol.h b/nest/protocol.h index 7447cbf0..f9996b18 100644 --- a/nest/protocol.h +++ b/nest/protocol.h @@ -207,7 +207,6 @@ struct proto { int (*rte_recalculate)(struct rtable *, struct network *, struct rte *, struct rte *, struct rte *); int (*rte_better)(struct rte *, struct rte *); int (*rte_mergable)(struct rte *, struct rte *); - struct rte *(*rte_modify)(struct rte *, struct linpool *); void (*rte_insert)(struct network *, struct rte *); void (*rte_remove)(struct network *, struct rte *); u32 (*rte_igp_metric)(struct rte *); @@ -544,24 +543,29 @@ struct channel { u8 reloadable; /* Hook reload_routes() is allowed on the channel */ u8 gr_lock; /* Graceful restart mechanism should wait for this channel */ u8 gr_wait; /* Route export to channel is postponed until graceful restart */ + u8 restart_export; /* Route export should restart as soon as it stops */ btime last_state_change; /* Time of last state transition */ - struct rtable *in_table; /* Internal table for received routes */ - struct event *reload_event; /* Event responsible for reloading from in_table */ - struct fib_iterator reload_fit; /* FIB iterator in in_table used during reloading */ - struct rte_storage *reload_next_rte; /* Route iterator in in_table used during reloading */ - u8 reload_active; /* Iterator reload_fit is linked */ + struct channel_aux_table *in_table; /* Internal table for received routes */ u8 reload_pending; /* Reloading and another reload is scheduled */ u8 refeed_pending; /* Refeeding and another refeed is scheduled */ u8 rpki_reload; /* RPKI changes trigger channel reload */ - struct rtable *out_table; /* Internal table for exported routes */ + struct channel_aux_table *out_table; /* Internal table for exported routes */ list roa_subscriptions; /* List of active ROA table subscriptions based on filters roa_check() */ }; +struct channel_aux_table { + struct channel *c; + struct rt_import_request push; + struct rt_export_request get; + rtable *tab; + u8 stop; + u8 refeed_pending; +}; /* * Channel states @@ -627,7 +631,7 @@ struct channel *proto_add_channel(struct proto *p, struct channel_config *cf); int proto_configure_channel(struct proto *p, struct channel **c, struct channel_config *cf); void channel_set_state(struct channel *c, uint state); -void channel_setup_in_table(struct channel *c); +void channel_setup_in_table(struct channel *c, int best); void channel_setup_out_table(struct channel *c); void channel_schedule_reload(struct channel *c); @@ -636,6 +640,9 @@ static inline void channel_open(struct channel *c) { channel_set_state(c, CS_UP) static inline void channel_close(struct channel *c) { channel_set_state(c, CS_STOP); } void channel_request_feeding(struct channel *c); +void channel_request_reload(struct channel *c); +void channel_refresh_begin(struct channel *c); +void channel_refresh_end(struct channel *c); void *channel_config_new(const struct channel_class *cc, const char *name, uint net_type, struct proto_config *proto); void *channel_config_get(const struct channel_class *cc, const char *name, uint net_type, struct proto_config *proto); int channel_reconfigure(struct channel *c, struct channel_config *cf); diff --git a/nest/route.h b/nest/route.h index d0568133..cb66be2a 100644 --- a/nest/route.h +++ b/nest/route.h @@ -150,7 +150,6 @@ struct rtable_config { int gc_max_ops; /* Maximum number of operations before GC is run */ int gc_min_time; /* Minimum time between two consecutive GC runs */ byte sorted; /* Routes of network are sorted according to rte_better() */ - byte internal; /* Internal table of a protocol */ btime min_settle_time; /* Minimum settle time for notifications */ btime max_settle_time; /* Maximum settle time for notifications */ }; @@ -172,10 +171,8 @@ typedef struct rtable { struct hmap id_map; struct hostcache *hostcache; struct rtable_config *config; /* Configuration of this table */ - struct config *deleted; /* Table doesn't exist in current configuration, - * delete as soon as use_count becomes 0 and remove - * obstacle from this routing table. - */ + void (*deleted)(void *); /* Table should free itself. Call this when it is done. */ + void *del_data; struct event *rt_event; /* Routing table event */ btime last_rt_change; /* Last time when route changed */ btime base_settle_time; /* Start time of rtable settling interval */ @@ -184,7 +181,6 @@ typedef struct rtable { byte prune_state; /* Table prune state, 1 -> scheduled, 2-> running */ byte hcu_scheduled; /* Hostcache update is scheduled */ byte nhu_state; /* Next Hop Update state */ - byte internal; /* This table is internal for some other object */ struct fib_iterator prune_fit; /* Rtable prune FIB iterator */ struct fib_iterator nhu_fit; /* Next Hop Update FIB iterator */ struct tbf rl_pipe; /* Rate limiting token buffer for pipe collisions */ @@ -249,6 +245,7 @@ typedef struct rte { u8 generation; /* If this route import is based on other previously exported route, this value should be 1 + MAX(generation of the parent routes). Otherwise the route is independent and this value is zero. */ + u8 stale_cycle; /* Auxiliary value for route refresh */ } rte; struct rte_storage { @@ -256,13 +253,11 @@ struct rte_storage { struct rte rte; /* Route data */ }; -#define RTE_COPY(r, l) ((r) ? (((*(l)) = (r)->rte), (l)) : NULL) -#define RTE_OR_NULL(r) ((r) ? &((r)->rte) : NULL) +#define RTES_CLONE(r, l) ((r) ? (((*(l)) = (r)->rte), (l)) : NULL) +#define RTES_OR_NULL(r) ((r) ? &((r)->rte) : NULL) #define REF_FILTERED 2 /* Route is rejected by import filter */ -#define REF_STALE 4 /* Route is stale in a refresh cycle */ -#define REF_DISCARD 8 /* Route is scheduled for discard */ -#define REF_MODIFY 16 /* Route is scheduled for modify */ +#define REF_USE_STALE 4 /* Do not reset route's stale_cycle to the actual value */ /* Route is valid for propagation (may depend on other flags in the future), accepts NULL */ static inline int rte_is_valid(const rte *r) { return r && !(r->flags & REF_FILTERED); } @@ -283,7 +278,6 @@ struct rt_import_request { /* Preimport is called when the @new route is just-to-be inserted, replacing @old. * Return a route (may be different or modified in-place) to continue or NULL to withdraw. */ struct rte *(*preimport)(struct rt_import_request *req, struct rte *new, struct rte *old); - struct rte *(*rte_modify)(struct rte *, struct linpool *); }; struct rt_import_hook { @@ -303,6 +297,10 @@ struct rt_import_hook { btime last_state_change; /* Time of last state transition */ u8 import_state; /* IS_* */ + u8 stale_set; /* Set this stale_cycle to imported routes */ + u8 stale_valid; /* Routes with this stale_cycle and bigger are considered valid */ + u8 stale_pruned; /* Last prune finished when this value was set at stale_valid */ + u8 stale_pruning; /* Last prune started when this value was set at stale_valid */ void (*stopped)(struct rt_import_request *); /* Stored callback when import is stopped */ }; @@ -455,9 +453,9 @@ void *net_route(rtable *tab, const net_addr *n); int net_roa_check(rtable *tab, const net_addr *n, u32 asn); int rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filter); rte *rt_export_merged(struct channel *c, rte ** feed, uint count, linpool *pool, int silent); -void rt_refresh_begin(rtable *t, struct rt_import_request *); -void rt_refresh_end(rtable *t, struct rt_import_request *); -void rt_modify_stale(rtable *t, struct rt_import_request *); + +void rt_refresh_begin(struct rt_import_request *); +void rt_refresh_end(struct rt_import_request *); void rt_schedule_prune(rtable *t); void rte_dump(struct rte_storage *); void rte_free(struct rte_storage *, rtable *); @@ -466,15 +464,9 @@ void rt_dump(rtable *); void rt_dump_all(void); void rt_dump_hooks(rtable *); void rt_dump_hooks_all(void); -int rt_reload_channel(struct channel *c); -void rt_reload_channel_abort(struct channel *c); -void rt_refeed_channel(struct channel *c); void rt_prune_sync(rtable *t, int all); -int rte_update_in(struct channel *c, const net_addr *n, rte *new, struct rte_src *src); -int rte_update_out(struct channel *c, const net_addr *n, rte *new, const rte *old, struct rte_storage **old_exported); struct rtable_config *rt_new_table(struct symbol *s, uint addr_type); - /* Default limit for ECMP next hops, defined in sysdep code */ extern const int rt_default_ecmp; @@ -789,6 +781,7 @@ void rta__free(rta *r); static inline void rta_free(rta *r) { if (r && !--r->uc) rta__free(r); } rta *rta_do_cow(rta *o, linpool *lp); static inline rta * rta_cow(rta *r, linpool *lp) { return rta_is_cached(r) ? rta_do_cow(r, lp) : r; } +static inline void rta_uncache(rta *r) { r->cached = 0; r->uc = 0; } void rta_dump(rta *); void rta_dump_all(void); void rta_show(struct cli *, rta *); diff --git a/nest/rt-attr.c b/nest/rt-attr.c index 1bece201..77fd3c3b 100644 --- a/nest/rt-attr.c +++ b/nest/rt-attr.c @@ -1245,8 +1245,7 @@ rta_do_cow(rta *o, linpool *lp) memcpy(*nhn, nho, nexthop_size(nho)); nhn = &((*nhn)->next); } - r->cached = 0; - r->uc = 0; + rta_uncache(r); return r; } diff --git a/nest/rt-show.c b/nest/rt-show.c index 235d72e4..d942b8e1 100644 --- a/nest/rt-show.c +++ b/nest/rt-show.c @@ -95,7 +95,10 @@ rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, int primary } if (d->verbose) + { + cli_printf(c, -1008, "\tInternal route ID: %uL %uG %uS", e->src->private_id, e->src->global_id, e->stale_cycle); rta_show(c, a); + } } static uint @@ -103,7 +106,7 @@ rte_feed_count(net *n) { uint count = 0; for (struct rte_storage *e = n->routes; e; e = e->next) - if (rte_is_valid(RTE_OR_NULL(e))) + if (rte_is_valid(RTES_OR_NULL(e))) count++; return count; } @@ -113,7 +116,7 @@ rte_feed_obtain(net *n, rte **feed, uint count) { uint i = 0; for (struct rte_storage *e = n->routes; e; e = e->next) - if (rte_is_valid(RTE_OR_NULL(e))) + if (rte_is_valid(RTES_OR_NULL(e))) { ASSERT_DIE(i < count); feed[i++] = &e->rte; diff --git a/nest/rt-table.c b/nest/rt-table.c index e7ff2816..2f480992 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -50,7 +50,6 @@ pool *rt_table_pool; static linpool *rte_update_pool; list routing_tables; -list deleted_routing_tables; static void rt_free_hostcache(rtable *tab); static void rt_notify_hostcache(rtable *tab, net *net); @@ -386,9 +385,11 @@ rte_mergable(rte *pri, rte *sec) static void rte_trace(const char *name, const rte *e, int dir, const char *msg) { - log(L_TRACE "%s %c %s %N %uL %uG %s", - name, dir, msg, e->net, e->src->private_id, e->src->global_id, - rta_dest_name(e->attrs->dest)); + log(L_TRACE "%s %c %s %N src %uL %uG %uS %s%s", + name, dir, msg, e->net, + e->src->private_id, e->src->global_id, e->stale_cycle, + rta_dest_name(e->attrs->dest), + rte_is_filtered(e) ? " (filtered)" : ""); } static inline void @@ -427,7 +428,7 @@ rte_feed_count(net *n) { uint count = 0; for (struct rte_storage *e = n->routes; e; e = e->next) - if (rte_is_valid(RTE_OR_NULL(e))) + if (rte_is_valid(RTES_OR_NULL(e))) count++; return count; } @@ -437,7 +438,7 @@ rte_feed_obtain(net *n, struct rte **feed, uint count) { uint i = 0; for (struct rte_storage *e = n->routes; e; e = e->next) - if (rte_is_valid(RTE_OR_NULL(e))) + if (rte_is_valid(RTES_OR_NULL(e))) { ASSERT_DIE(i < count); feed[i++] = &e->rte; @@ -508,10 +509,11 @@ export_filter(struct channel *c, rte *rt, int silent) return export_filter_(c, rt, rte_update_pool, silent); } +void do_rt_notify_direct(struct channel *c, const net_addr *net, rte *new, const rte *old); + static void do_rt_notify(struct channel *c, const net_addr *net, rte *new, const rte *old) { - struct proto *p = c->proto; struct channel_export_stats *stats = &c->export_stats; if (c->refeeding && new) @@ -528,28 +530,31 @@ do_rt_notify(struct channel *c, const net_addr *net, rte *new, const rte *old) if (!new && old) CHANNEL_LIMIT_POP(c, OUT); + /* Store route export state */ + if (old) + bmap_clear(&c->export_map, old->id); + + if (new) + bmap_set(&c->export_map, new->id); + /* Apply export table */ - struct rte_storage *old_exported = NULL; if (c->out_table) - { - if (!rte_update_out(c, net, new, old, &old_exported)) - { - channel_rte_trace_out(D_ROUTES, c, new, "idempotent"); - return; - } - } + rte_import(&c->out_table->push, net, new, old ? old->src : new->src); + else + do_rt_notify_direct(c, net, new, old); +} + +void +do_rt_notify_direct(struct channel *c, const net_addr *net, rte *new, const rte *old) +{ + struct proto *p = c->proto; + struct channel_export_stats *stats = &c->export_stats; if (new) stats->updates_accepted++; else stats->withdraws_accepted++; - if (old) - bmap_clear(&c->export_map, old->id); - - if (new) - bmap_set(&c->export_map, new->id); - if (p->debug & D_ROUTES) { if (new && old) @@ -560,10 +565,7 @@ do_rt_notify(struct channel *c, const net_addr *net, rte *new, const rte *old) channel_rte_trace_out(D_ROUTES, c, old, "removed"); } - p->rt_notify(p, c, net, new, old_exported ? &old_exported->rte : old); - - if (c->out_table && old_exported) - rte_free(old_exported, c->out_table); + p->rt_notify(p, c, net, new, old); } static void @@ -784,7 +786,7 @@ rt_notify_optimal(struct rt_export_request *req, const net_addr *net, struct rt_ rte n0; if (rpe->new_best != rpe->old_best) - rt_notify_basic(c, net, RTE_COPY(rpe->new_best, &n0), RTE_OR_NULL(rpe->old_best)); + rt_notify_basic(c, net, RTES_CLONE(rpe->new_best, &n0), RTES_OR_NULL(rpe->old_best)); /* Drop the old stored rejection if applicable. * new->id == old->id happens when updating hostentries. */ @@ -799,7 +801,7 @@ rt_notify_any(struct rt_export_request *req, const net_addr *net, struct rt_pend rte n0; if (rpe->new != rpe->old) - rt_notify_basic(c, net, RTE_COPY(rpe->new, &n0), RTE_OR_NULL(rpe->old)); + rt_notify_basic(c, net, RTES_CLONE(rpe->new, &n0), RTES_OR_NULL(rpe->old)); /* Drop the old stored rejection if applicable. * new->id == old->id happens when updating hostentries. */ @@ -974,6 +976,10 @@ rte_recalculate(struct rt_import_hook *c, net *net, rte *new, struct rte_src *sr rte *old_best = old_best_stored ? &old_best_stored->rte : NULL; rte *old = NULL; + /* Set the stale cycle unless already set */ + if (new && !(new->flags & REF_USE_STALE)) + new->stale_cycle = c->stale_set; + /* Find and remove original route from the same protocol */ struct rte_storage **before_old = rte_find(net, src); @@ -999,8 +1005,7 @@ rte_recalculate(struct rt_import_hook *c, net *net, rte *new, struct rte_src *sr if (new && rte_same(old, new)) { /* No changes, ignore the new route and refresh the old one */ - - old->flags &= ~(REF_STALE | REF_DISCARD | REF_MODIFY); + old->stale_cycle = new->stale_cycle; if (!rte_is_filtered(new)) { @@ -1138,18 +1143,23 @@ rte_recalculate(struct rt_import_hook *c, net *net, rte *new, struct rte_src *sr new_stored->rte.id = old->id; } + _Bool nb = (new_stored == net->routes); + _Bool ob = (old_best == old); + /* Log the route change */ - if (new_ok) - rt_rte_trace_in(D_ROUTES, req, &new_stored->rte, new_stored == net->routes ? "added [best]" : "added"); - else if (old_ok) + if (new_ok && old_ok) { - if (old != old_best) - rt_rte_trace_in(D_ROUTES, req, old, "removed"); - else if (net->routes && rte_is_ok(&net->routes->rte)) - rt_rte_trace_in(D_ROUTES, req, old, "removed [replaced]"); - else - rt_rte_trace_in(D_ROUTES, req, old, "removed [sole]"); + const char *best_indicator[2][2] = { { "updated", "updated [-best]" }, { "updated [+best]", "updated [best]" } }; + rt_rte_trace_in(D_ROUTES, req, &new_stored->rte, best_indicator[nb][ob]); } + else if (new_ok) + rt_rte_trace_in(D_ROUTES, req, &new_stored->rte, + (!net->routes->next || !rte_is_ok(&net->routes->next->rte)) ? "added [sole]" : + nb ? "added [best]" : "added"); + else if (old_ok) + rt_rte_trace_in(D_ROUTES, req, old, + (!net->routes || !rte_is_ok(&net->routes->rte)) ? "removed [sole]" : + ob ? "removed [best]" : "removed"); /* Propagate the route change */ rte_announce(table, net, new_stored, old_stored, @@ -1197,12 +1207,15 @@ channel_preimport(struct rt_import_request *req, rte *new, rte *old) { struct channel *c = SKIP_BACK(struct channel, in_req, req); - if (new && !old) - if (CHANNEL_LIMIT_PUSH(c, RX)) - return NULL; + if (!c->in_table) + { + if (new && !old) + if (CHANNEL_LIMIT_PUSH(c, RX)) + return NULL; - if (!new && old) - CHANNEL_LIMIT_POP(c, RX); + if (!new && old) + CHANNEL_LIMIT_POP(c, RX); + } int new_in = new && !rte_is_filtered(new); int old_in = old && !rte_is_filtered(old); @@ -1223,7 +1236,22 @@ channel_preimport(struct rt_import_request *req, rte *new, rte *old) return new; } -static void rte_update_direct(struct channel *c, const net_addr *n, rte *new, struct rte_src *src); +rte * +channel_in_preimport(struct rt_import_request *req, rte *new, rte *old) +{ + struct channel_aux_table *cat = SKIP_BACK(struct channel_aux_table, push, req); + + if (new && !old) + if (CHANNEL_LIMIT_PUSH(cat->c, RX)) + return NULL; + + if (!new && old) + CHANNEL_LIMIT_POP(cat->c, RX); + + return new; +} + +void rte_update_direct(struct channel *c, const net_addr *n, rte *new, struct rte_src *src); void rte_update(struct channel *c, const net_addr *n, rte *new, struct rte_src *src) @@ -1233,13 +1261,14 @@ rte_update(struct channel *c, const net_addr *n, rte *new, struct rte_src *src) ASSERT(c->channel_state == CS_UP); - if (c->in_table && !rte_update_in(c, n, new, src)) - return; - return rte_update_direct(c, n, new, src); + if (c->in_table) + rte_import(&c->in_table->push, n, new, src); + else + rte_update_direct(c, n, new, src); } -static void +void rte_update_direct(struct channel *c, const net_addr *n, rte *new, struct rte_src *src) { const struct filter *filter = c->in_filter; @@ -1323,24 +1352,6 @@ rte_discard(net *net, rte *old) /* Non-filtered route deletion, used during garb rte_update_unlock(); } -/* Modify existing route by protocol hook, used for long-lived graceful restart */ -static inline void -rte_modify(net *net, rte *old) -{ - rte_update_lock(); - - rte *new = old->sender->req->rte_modify(old, rte_update_pool); - if (new != old) - { - if (new) - new->flags = old->flags & ~REF_MODIFY; - - rte_recalculate(old->sender, net, new, old->src); - } - - rte_update_unlock(); -} - /* Check rtable for best route to given net whether it would be exported do p */ int rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filter) @@ -1419,6 +1430,9 @@ rt_request_import(rtable *tab, struct rt_import_request *req) hook->req = req; hook->table = tab; + if (!hook->stale_set) + hook->stale_set = hook->stale_valid = hook->stale_pruning = hook->stale_pruned = 1; + rt_set_import_state(hook, TIS_UP); hook->n = (node) {}; @@ -1499,20 +1513,41 @@ rt_stop_export(struct rt_export_request *req, void (*stopped)(struct rt_export_r * routes to the routing table (by rte_update()). After that, all protocol * routes (more precisely routes with @c as @sender) not sent during the * refresh cycle but still in the table from the past are pruned. This is - * implemented by marking all related routes as stale by REF_STALE flag in - * rt_refresh_begin(), then marking all related stale routes with REF_DISCARD - * flag in rt_refresh_end() and then removing such routes in the prune loop. - */ + * implemented by setting rte->stale_cycle to req->stale_set in rte_update() + * and then dropping all routes with old stale_cycle values in table prune loop. */ void -rt_refresh_begin(rtable *t, struct rt_import_request *req) +rt_refresh_begin(struct rt_import_request *req) { - FIB_WALK(&t->fib, net, n) - { - for (struct rte_storage *e = n->routes; e; e = e->next) - if (e->rte.sender == req->hook) - e->rte.flags |= REF_STALE; - } - FIB_WALK_END; + struct rt_import_hook *hook = req->hook; + ASSERT_DIE(hook); + + ASSERT_DIE(hook->stale_set == hook->stale_valid); + + /* If the pruning routine is too slow */ + if ((hook->stale_pruned < hook->stale_valid) && (hook->stale_pruned + 128 < hook->stale_valid) + || (hook->stale_pruned > hook->stale_valid) && (hook->stale_pruned > hook->stale_valid + 128)) + { + log(L_WARN "Route refresh flood in table %s", hook->table->name); + FIB_WALK(&hook->table->fib, net, n) + { + for (struct rte_storage *e = n->routes; e; e = e->next) + if (e->rte.sender == req->hook) + e->rte.stale_cycle = 0; + } + FIB_WALK_END; + hook->stale_set = 1; + hook->stale_valid = 0; + hook->stale_pruned = 0; + } + else if (!++hook->stale_set) + { + /* Let's reserve the stale_cycle zero value for always-invalid routes */ + hook->stale_set = 1; + hook->stale_valid = 0; + } + + if (req->trace_routes & D_STATES) + log(L_TRACE "%s: route refresh begin [%u]", req->name, hook->stale_set); } /** @@ -1524,43 +1559,18 @@ rt_refresh_begin(rtable *t, struct rt_import_request *req) * hook. See rt_refresh_begin() for description of refresh cycles. */ void -rt_refresh_end(rtable *t, struct rt_import_request *req) +rt_refresh_end(struct rt_import_request *req) { - int prune = 0; - - FIB_WALK(&t->fib, net, n) - { - for (struct rte_storage *e = n->routes; e; e = e->next) - if ((e->rte.sender == req->hook) && (e->rte.flags & REF_STALE)) - { - e->rte.flags |= REF_DISCARD; - prune = 1; - } - } - FIB_WALK_END; - - if (prune) - rt_schedule_prune(t); -} + struct rt_import_hook *hook = req->hook; + ASSERT_DIE(hook); -void -rt_modify_stale(rtable *t, struct rt_import_request *req) -{ - int prune = 0; + hook->stale_valid++; + ASSERT_DIE(hook->stale_set == hook->stale_valid); - FIB_WALK(&t->fib, net, n) - { - for (struct rte_storage *e = n->routes; e; e = e->next) - if ((e->rte.sender == req->hook) && (e->rte.flags & REF_STALE) && !(e->rte.flags & REF_FILTERED)) - { - e->rte.flags |= REF_MODIFY; - prune = 1; - } - } - FIB_WALK_END; + rt_schedule_prune(hook->table); - if (prune) - rt_schedule_prune(t); + if (req->trace_routes & D_STATES) + log(L_TRACE "%s: route refresh end [%u]", req->name, hook->stale_valid); } /** @@ -1613,9 +1623,6 @@ rt_dump_all(void) WALK_LIST2(t, n, routing_tables, n) rt_dump(t); - - WALK_LIST2(t, n, deleted_routing_tables, n) - rt_dump(t); } void @@ -1658,9 +1665,6 @@ rt_dump_hooks_all(void) WALK_LIST2(t, n, routing_tables, n) rt_dump_hooks(t); - - WALK_LIST2(t, n, deleted_routing_tables, n) - rt_dump_hooks(t); } static inline void @@ -1796,9 +1800,7 @@ rt_free(resource *_r) DBG("Deleting routing table %s\n", r->name); ASSERT_DIE(r->use_count == 0); - - if (r->internal) - return; + ASSERT_DIE(r->deleted); r->config->table = NULL; rem_node(&r->n); @@ -1853,20 +1855,17 @@ rt_setup(pool *pp, struct rtable_config *cf) fib_init(&t->fib, p, t->addr_type, sizeof(net), OFFSETOF(net, n), 0, NULL); - if (!(t->internal = cf->internal)) - { - init_list(&t->imports); - init_list(&t->exports); - hmap_init(&t->id_map, p, 1024); - hmap_set(&t->id_map, 0); + init_list(&t->imports); + init_list(&t->exports); + hmap_init(&t->id_map, p, 1024); + hmap_set(&t->id_map, 0); - init_list(&t->subscribers); + init_list(&t->subscribers); - t->rt_event = ev_new_init(p, rt_event, t); - t->last_rt_change = t->gc_time = current_time(); + t->rt_event = ev_new_init(p, rt_event, t); + t->last_rt_change = t->gc_time = current_time(); - t->rl_pipe = (struct tbf) TBF_DEFAULT_LOG_LIMITS; - } + t->rl_pipe = (struct tbf) TBF_DEFAULT_LOG_LIMITS; return t; } @@ -1884,7 +1883,6 @@ rt_init(void) rt_table_pool = rp_new(&root_pool, "Routing tables"); rte_update_pool = lp_new_default(rt_table_pool); init_list(&routing_tables); - init_list(&deleted_routing_tables); } @@ -1925,6 +1923,13 @@ rt_prune_table(rtable *tab) WALK_LIST2(ih, n, tab->imports, n) if (ih->import_state == TIS_STOP) rt_set_import_state(ih, TIS_FLUSHING); + else if ((ih->stale_valid != ih->stale_pruning) && (ih->stale_pruning == ih->stale_pruned)) + { + ih->stale_pruning = ih->stale_valid; + + if (ih->req->trace_routes & D_STATES) + log(L_TRACE "%s: table prune after refresh begin [%u]", ih->req->name, ih->stale_pruning); + } FIB_ITERATE_INIT(fit, &tab->fib); tab->prune_state = 2; @@ -1936,22 +1941,11 @@ again: rescan: for (struct rte_storage *e=n->routes; e; e=e->next) { - if ((e->rte.sender->import_state == TIS_FLUSHING) || (e->rte.flags & REF_DISCARD)) - { - if (limit <= 0) - { - FIB_ITERATE_PUT(fit); - ev_schedule(tab->rt_event); - return; - } - - rte_discard(n, &e->rte); - limit--; - - goto rescan; - } + struct rt_import_hook *s = e->rte.sender; - if (e->rte.flags & REF_MODIFY) + if ((s->import_state == TIS_FLUSHING) || + (e->rte.stale_cycle < s->stale_valid) || + (e->rte.stale_cycle > s->stale_set)) { if (limit <= 0) { @@ -1960,7 +1954,7 @@ again: return; } - rte_modify(n, &e->rte); + rte_discard(n, &e->rte); limit--; goto rescan; @@ -1998,6 +1992,13 @@ again: mb_free(ih); rt_unlock_table(tab); } + else if (ih->stale_pruning != ih->stale_pruned) + { + ih->stale_pruned = ih->stale_pruning; + + if (ih->req->trace_routes & D_STATES) + log(L_TRACE "%s: table prune after refresh end [%u]", ih->req->name, ih->stale_pruned); + } } void @@ -2204,7 +2205,10 @@ rt_next_hop_update_net(rtable *tab, net *n) for (int i=0; irte.sender->req, &updates[i].new->rte, best_indicator[nb][ob]); rte_announce_i(tab, n, updates[i].new, updates[i].old, new, old_best); } @@ -2308,11 +2312,12 @@ rt_unlock_table(rtable *r) { if (!--r->use_count && r->deleted) { - struct config *conf = r->deleted; + void *del_data = r->del_data; + void (*deleted)(void *) = r->deleted; /* Delete the routing table by freeing its pool */ rt_shutdown(r); - config_del_obstacle(conf); + deleted(del_data); } } @@ -2323,6 +2328,8 @@ rt_find_table_config(struct config *cf, char *name) return (sym && (sym->class == SYM_TABLE)) ? sym->table : NULL; } +static void rt_config_del_obstacle(void *data) { config_del_obstacle(data); } + /** * rt_commit - commit new routing table configuration * @new: new configuration @@ -2361,9 +2368,10 @@ rt_commit(struct config *new, struct config *old) else { DBG("\t%s: deleted\n", o->name); - ot->deleted = old; - config_add_obstacle(old); rt_lock_table(ot); + ot->deleted = rt_config_del_obstacle; + ot->del_data = old; + config_add_obstacle(old); rt_unlock_table(ot); } } @@ -2441,285 +2449,6 @@ done: } -/* - * Import table - */ - -int -rte_update_in(struct channel *c, const net_addr *n, rte *new, struct rte_src *src) -{ - struct rtable *tab = c->in_table; - net *net; - - if (new) - net = net_get(tab, n); - else - { - net = net_find(tab, n); - - if (!net) - goto drop_withdraw; - } - - /* Find the old rte */ - struct rte_storage **pos = rte_find(net, src); - if (*pos) - { - rte *old = &(*pos)->rte; - if (new && rte_same(old, new)) - { - /* Refresh the old rte, continue with update to main rtable */ - if (old->flags & (REF_STALE | REF_DISCARD | REF_MODIFY)) - { - old->flags &= ~(REF_STALE | REF_DISCARD | REF_MODIFY); - return 1; - } - - goto drop_update; - } - - if (!new) - CHANNEL_LIMIT_POP(c, RX); - - /* Move iterator if needed */ - if (*pos == c->reload_next_rte) - c->reload_next_rte = (*pos)->next; - - /* Remove the old rte */ - struct rte_storage *del = *pos; - *pos = (*pos)->next; - rte_free(del, tab); - tab->rt_count--; - } - else if (new) - { - if (CHANNEL_LIMIT_PUSH(c, RX)) - { - /* Required by rte_trace_in() */ - new->net = n; - - channel_rte_trace_in(D_FILTERS, c, new, "ignored [limit]"); - goto drop_update; - } - } - else - goto drop_withdraw; - - if (!new) - { - if (!net->routes) - fib_delete(&tab->fib, net); - - return 1; - } - - /* Insert the new rte */ - struct rte_storage *e = rte_store(new, net, tab); - e->rte.lastmod = current_time(); - e->next = *pos; - *pos = e; - tab->rt_count++; - return 1; - -drop_update: - c->import_stats.updates_received++; - c->in_req.hook->stats.updates_ignored++; - - if (!net->routes) - fib_delete(&tab->fib, net); - - return 0; - -drop_withdraw: - c->import_stats.withdraws_received++; - c->in_req.hook->stats.withdraws_ignored++; - return 0; -} - -int -rt_reload_channel(struct channel *c) -{ - struct rtable *tab = c->in_table; - struct fib_iterator *fit = &c->reload_fit; - int max_feed = 64; - - ASSERT(c->channel_state == CS_UP); - - if (!c->reload_active) - { - FIB_ITERATE_INIT(fit, &tab->fib); - c->reload_active = 1; - } - - do { - for (struct rte_storage *e = c->reload_next_rte; e; e = e->next) - { - if (max_feed-- <= 0) - { - c->reload_next_rte = e; - debug("%s channel reload burst split (max_feed=%d)", c->proto->name, max_feed); - return 0; - } - - rte r = e->rte; - rte_update_direct(c, r.net, &r, r.src); - } - - c->reload_next_rte = NULL; - - FIB_ITERATE_START(&tab->fib, fit, net, n) - { - if (c->reload_next_rte = n->routes) - { - FIB_ITERATE_PUT_NEXT(fit, &tab->fib); - break; - } - } - FIB_ITERATE_END; - } - while (c->reload_next_rte); - - c->reload_active = 0; - return 1; -} - -void -rt_reload_channel_abort(struct channel *c) -{ - if (c->reload_active) - { - /* Unlink the iterator */ - fit_get(&c->in_table->fib, &c->reload_fit); - c->reload_next_rte = NULL; - c->reload_active = 0; - } -} - -void -rt_prune_sync(rtable *t, int all) -{ - struct fib_iterator fit; - - FIB_ITERATE_INIT(&fit, &t->fib); - -again: - FIB_ITERATE_START(&t->fib, &fit, net, n) - { - struct rte_storage *e, **ee = &n->routes; - - while (e = *ee) - { - if (all || (e->rte.flags & (REF_STALE | REF_DISCARD))) - { - *ee = e->next; - rte_free(e, t); - t->rt_count--; - } - else - ee = &e->next; - } - - if (all || !n->routes) - { - FIB_ITERATE_PUT(&fit); - fib_delete(&t->fib, n); - goto again; - } - } - FIB_ITERATE_END; -} - - -/* - * Export table - */ - -int -rte_update_out(struct channel *c, const net_addr *n, rte *new, const rte *old0, struct rte_storage **old_exported) -{ - struct rtable *tab = c->out_table; - struct rte_src *src; - net *net; - - if (new) - { - net = net_get(tab, n); - src = new->src; - } - else - { - net = net_find(tab, n); - src = old0->src; - - if (!net) - goto drop; - } - - /* Find the old rte */ - struct rte_storage **pos = (c->ra_mode == RA_ANY) ? rte_find(net, src) : &net->routes; - struct rte_storage *old = NULL; - - if (old = *pos) - { - if (new && rte_same(&(*pos)->rte, new)) - goto drop; - - /* Remove the old rte */ - *pos = old->next; - *old_exported = old; - tab->rt_count--; - } - - if (!new) - { - if (!old) - goto drop; - - if (!net->routes) - fib_delete(&tab->fib, net); - - return 1; - } - - /* Insert the new rte */ - struct rte_storage *e = rte_store(new, net, tab); - e->rte.lastmod = current_time(); - e->next = *pos; - *pos = e; - tab->rt_count++; - return 1; - -drop: - return 0; -} - -void -rt_refeed_channel(struct channel *c) -{ - if (!c->out_table) - { - channel_request_feeding(c); - return; - } - - ASSERT_DIE(c->ra_mode != RA_ANY); - - c->proto->feed_begin(c, 0); - - FIB_WALK(&c->out_table->fib, net, n) - { - if (!n->routes) - continue; - - rte e = n->routes->rte; - c->proto->rt_notify(c->proto, c, n->n.addr, &e, NULL); - } - FIB_WALK_END; - - c->proto->feed_end(c); -} - - /* * Hostcache */ diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c index 90490b4f..892b26e3 100644 --- a/proto/bgp/attrs.c +++ b/proto/bgp/attrs.c @@ -2267,30 +2267,44 @@ bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best) return !old_suppressed; } -rte * -bgp_rte_modify_stale(struct rte *r, struct linpool *pool) +void +bgp_rte_modify_stale(struct rt_export_request *req, const net_addr *n, struct rt_pending_export *rpe UNUSED, rte **feed, uint count) { - eattr *ea = ea_find(r->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY)); - const struct adata *ad = ea ? ea->u.ptr : NULL; - uint flags = ea ? ea->flags : BAF_PARTIAL; + struct bgp_channel *c = SKIP_BACK(struct bgp_channel, stale_feed, req); - if (ad && int_set_contains(ad, BGP_COMM_NO_LLGR)) - return NULL; + do { + rte *r = feed[--count]; + if (r->sender != c->c.in_req.hook) + continue; + + /* A new route, do not mark as stale */ + if (r->stale_cycle == c->c.in_req.hook->stale_set) + continue; + + eattr *ea = ea_find(r->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY)); + const struct adata *ad = ea ? ea->u.ptr : NULL; + uint flags = ea ? ea->flags : BAF_PARTIAL; - if (ad && int_set_contains(ad, BGP_COMM_LLGR_STALE)) - return r; + rte e0 = *r; + e0.flags |= REF_USE_STALE; - rta *a = rta_do_cow(r->attrs, pool); - - _Thread_local static rte e0; - e0 = *r; - e0.attrs = a; + if (ad && int_set_contains(ad, BGP_COMM_NO_LLGR)) + rte_import(&c->c.in_req, n, NULL, r->src); - bgp_set_attr_ptr(&(a->eattrs), pool, BA_COMMUNITY, flags, - int_set_add(pool, ad, BGP_COMM_LLGR_STALE)); - e0.pflags |= BGP_REF_STALE; + else if (ad && int_set_contains(ad, BGP_COMM_LLGR_STALE)) + rte_import(&c->c.in_req, n, &e0, r->src); - return &e0; + else { + rta *a = e0.attrs = rta_do_cow(r->attrs, bgp_linpool); + + bgp_set_attr_ptr(&(a->eattrs), bgp_linpool, BA_COMMUNITY, flags, + int_set_add(bgp_linpool, ad, BGP_COMM_LLGR_STALE)); + e0.pflags |= BGP_REF_STALE; + + rte_import(&c->c.in_req, n, &e0, r->src); + lp_flush(bgp_linpool); + } + } while (count); } diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index 78c36bc7..35e9ea59 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -140,6 +140,15 @@ static void bgp_update_bfd(struct bgp_proto *p, const struct bfd_options *bfd); static int bgp_incoming_connection(sock *sk, uint dummy UNUSED); static void bgp_listen_sock_err(sock *sk UNUSED, int err); +static void bgp_graceful_restart_feed(struct bgp_channel *c); +static inline void channel_refresh_end_reload(struct channel *c) +{ + channel_refresh_end(c); + + if (c->in_table) + channel_request_reload(c); +} + /** * bgp_open - open a BGP instance * @p: BGP instance @@ -775,25 +784,25 @@ bgp_handle_graceful_restart(struct bgp_proto *p) { case BGP_GRS_NONE: c->gr_active = BGP_GRS_ACTIVE; - rt_refresh_begin(c->c.table, &c->c.in_req); + channel_refresh_begin(&c->c); break; case BGP_GRS_ACTIVE: - rt_refresh_end(c->c.table, &c->c.in_req); - rt_refresh_begin(c->c.table, &c->c.in_req); + channel_refresh_end(&c->c); + channel_refresh_begin(&c->c); break; case BGP_GRS_LLGR: - rt_refresh_begin(c->c.table, &c->c.in_req); - rt_modify_stale(c->c.table, &c->c.in_req); + channel_refresh_begin(&c->c); + bgp_graceful_restart_feed(c); break; } } else { /* Just flush the routes */ - rt_refresh_begin(c->c.table, &c->c.in_req); - rt_refresh_end(c->c.table, &c->c.in_req); + channel_refresh_begin(&c->c); + channel_refresh_end(&c->c); } /* Reset bucket and prefix tables */ @@ -811,6 +820,50 @@ bgp_handle_graceful_restart(struct bgp_proto *p) tm_start(p->gr_timer, p->conn->remote_caps->gr_time S); } +static void +bgp_graceful_restart_feed_done(struct rt_export_request *req) +{ + req->hook = NULL; +} + +static void +bgp_graceful_restart_feed_dump_req(struct rt_export_request *req) +{ + struct bgp_channel *c = SKIP_BACK(struct bgp_channel, stale_feed, req); + debug(" BGP-GR %s.%s export request %p\n", c->c.proto->name, c->c.name, req); +} + +static void +bgp_graceful_restart_feed_log_state_change(struct rt_export_request *req, u8 state) +{ + struct bgp_channel *c = SKIP_BACK(struct bgp_channel, stale_feed, req); + struct bgp_proto *p = (void *) c->c.proto; + BGP_TRACE(D_EVENTS, "Long-lived graceful restart export state changed to %s", rt_export_state_name(state)); + + if (state == TES_READY) + rt_stop_export(req, bgp_graceful_restart_feed_done); +} + +static void +bgp_graceful_restart_drop_export(struct rt_export_request *req UNUSED, const net_addr *n UNUSED, struct rt_pending_export *rpe UNUSED) +{ /* Nothing to do */ } + +static void +bgp_graceful_restart_feed(struct bgp_channel *c) +{ + c->stale_feed = (struct rt_export_request) { + .name = "BGP-GR", + .trace_routes = c->c.debug | c->c.proto->debug, + .dump_req = bgp_graceful_restart_feed_dump_req, + .log_state_change = bgp_graceful_restart_feed_log_state_change, + .export_bulk = bgp_rte_modify_stale, + .export_one = bgp_graceful_restart_drop_export, + }; + + rt_request_export(c->c.table, &c->stale_feed); +} + + /** * bgp_graceful_restart_done - finish active BGP graceful restart * @c: BGP channel @@ -833,8 +886,11 @@ bgp_graceful_restart_done(struct bgp_channel *c) if (!p->gr_active_num) BGP_TRACE(D_EVENTS, "Neighbor graceful restart done"); + if (c->stale_feed.hook) + rt_stop_export(&c->stale_feed, bgp_graceful_restart_feed_done); + tm_stop(c->stale_timer); - rt_refresh_end(c->c.table, &c->c.in_req); + channel_refresh_end_reload(&c->c); } /** @@ -876,7 +932,7 @@ bgp_graceful_restart_timeout(timer *t) /* Channel is in GR, and supports LLGR -> start LLGR */ c->gr_active = BGP_GRS_LLGR; tm_start(c->stale_timer, c->stale_time S); - rt_modify_stale(c->c.table, &c->c.in_req); + bgp_graceful_restart_feed(c); } } else @@ -914,10 +970,7 @@ bgp_refresh_begin(struct bgp_channel *c) { log(L_WARN "%s: BEGIN-OF-RR received before END-OF-RIB, ignoring", p->p.name); return; } c->load_state = BFS_REFRESHING; - rt_refresh_begin(c->c.table, &c->c.in_req); - - if (c->c.in_table) - rt_refresh_begin(c->c.in_table, &c->c.in_req); + channel_refresh_begin(&c->c); } /** @@ -938,10 +991,7 @@ bgp_refresh_end(struct bgp_channel *c) { log(L_WARN "%s: END-OF-RR received without prior BEGIN-OF-RR, ignoring", p->p.name); return; } c->load_state = BFS_NONE; - rt_refresh_end(c->c.table, &c->c.in_req); - - if (c->c.in_table) - rt_prune_sync(c->c.in_table, 0); + channel_refresh_end_reload(&c->c); } @@ -1408,12 +1458,9 @@ bgp_reload_routes(struct channel *C) struct bgp_proto *p = (void *) C->proto; struct bgp_channel *c = (void *) C; - ASSERT(p->conn && (p->route_refresh || c->c.in_table)); + ASSERT(p->conn && (p->route_refresh)); - if (c->c.in_table) - channel_schedule_reload(C); - else - bgp_schedule_packet(p->conn, c, PKT_ROUTE_REFRESH); + bgp_schedule_packet(p->conn, c, PKT_ROUTE_REFRESH); } static void @@ -1693,7 +1740,6 @@ bgp_init(struct proto_config *CF) P->rte_better = bgp_rte_better; P->rte_mergable = bgp_rte_mergable; P->rte_recalculate = cf->deterministic_med ? bgp_rte_recalculate : NULL; - P->rte_modify = bgp_rte_modify_stale; P->rte_igp_metric = bgp_rte_igp_metric; p->cf = cf; @@ -1756,7 +1802,7 @@ bgp_channel_start(struct channel *C) bgp_init_prefix_table(c); if (c->cf->import_table) - channel_setup_in_table(C); + channel_setup_in_table(C, 0); if (c->cf->export_table) channel_setup_out_table(C); diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h index c79dd1b2..342dc023 100644 --- a/proto/bgp/bgp.h +++ b/proto/bgp/bgp.h @@ -366,6 +366,7 @@ struct bgp_channel { timer *stale_timer; /* Long-lived stale timer for LLGR */ u32 stale_time; /* Stored LLGR stale time from last session */ + struct rt_export_request stale_feed; /* Feeder request for stale route modification */ u8 add_path_rx; /* Session expects receive of ADD-PATH extended NLRI */ u8 add_path_tx; /* Session expects transmit of ADD-PATH extended NLRI */ @@ -585,7 +586,7 @@ void bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *bp); int bgp_rte_better(struct rte *, struct rte *); int bgp_rte_mergable(rte *pri, rte *sec); int bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best); -struct rte *bgp_rte_modify_stale(struct rte *r, struct linpool *pool); +void bgp_rte_modify_stale(struct rt_export_request *, const net_addr *, struct rt_pending_export *, rte **, uint); u32 bgp_rte_igp_metric(struct rte *); void bgp_rt_notify(struct proto *P, struct channel *C, const net_addr *n, rte *new, const rte *old); int bgp_preexport(struct channel *, struct rte *); diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c index f1e6d7d2..647551e5 100644 --- a/proto/bgp/packets.c +++ b/proto/bgp/packets.c @@ -2695,7 +2695,7 @@ bgp_rx_route_refresh(struct bgp_conn *conn, byte *pkt, uint len) { case BGP_RR_REQUEST: BGP_TRACE(D_PACKETS, "Got ROUTE-REFRESH"); - rt_refeed_channel(&c->c); + channel_request_feeding(&c->c); break; case BGP_RR_BEGIN: diff --git a/proto/rpki/packets.c b/proto/rpki/packets.c index 943485d7..897edc09 100644 --- a/proto/rpki/packets.c +++ b/proto/rpki/packets.c @@ -661,9 +661,9 @@ rpki_handle_cache_response_pdu(struct rpki_cache *cache, const struct pdu_cache_ * a refresh cycle. */ if (cache->p->roa4_channel) - rt_refresh_begin(cache->p->roa4_channel->table, &cache->p->roa4_channel->in_req); + rt_refresh_begin(&cache->p->roa4_channel->in_req); if (cache->p->roa6_channel) - rt_refresh_begin(cache->p->roa6_channel->table, &cache->p->roa6_channel->in_req); + rt_refresh_begin(&cache->p->roa6_channel->in_req); cache->p->refresh_channels = 1; } @@ -819,9 +819,9 @@ rpki_handle_end_of_data_pdu(struct rpki_cache *cache, const struct pdu_end_of_da { cache->p->refresh_channels = 0; if (cache->p->roa4_channel) - rt_refresh_end(cache->p->roa4_channel->table, &cache->p->roa4_channel->in_req); + rt_refresh_end(&cache->p->roa4_channel->in_req); if (cache->p->roa6_channel) - rt_refresh_end(cache->p->roa6_channel->table, &cache->p->roa6_channel->in_req); + rt_refresh_end(&cache->p->roa6_channel->in_req); } cache->last_update = current_time(); diff --git a/sysdep/unix/krt.c b/sysdep/unix/krt.c index 40a58442..609ee921 100644 --- a/sysdep/unix/krt.c +++ b/sysdep/unix/krt.c @@ -285,249 +285,24 @@ krt_metric(rte *a) } static inline int -krt_same_key(rte *a, rte *b) +krt_rte_better(rte *a, rte *b) { - return (krt_metric(a) == krt_metric(b)); -} - -static inline int -krt_uptodate(rte *a, rte *b) -{ - return (a->attrs == b->attrs); -} - -static void -krt_learn_announce_update(struct krt_proto *p, rte *e) -{ - rte e0 = { - .attrs = rta_clone(e->attrs), - .src = p->p.main_source, - }; - - rte_update(p->p.main_channel, e->net, &e0, p->p.main_source); -} - -static void -krt_learn_announce_delete(struct krt_proto *p, net_addr *n) -{ - rte_update(p->p.main_channel, n, NULL, p->p.main_source); + return (krt_metric(a) > krt_metric(b)); } /* Called when alien route is discovered during scan */ static void -krt_learn_scan(struct krt_proto *p, rte *e) -{ - net *n = net_get(p->krt_table, e->net); - struct rte_storage *m, **mm; - - struct rte_storage *ee = rte_store(e, n, p->krt_table); - - for(mm = &n->routes; m = *mm; mm = &m->next) - if (krt_same_key(&m->rte, e)) - break; - if (m) - { - if (krt_uptodate(&m->rte, e)) - { - krt_trace_in_rl(&rl_alien, p, e, "[alien] seen"); - rte_free(ee, p->krt_table); - m->rte.pflags |= KRT_REF_SEEN; - } - else - { - krt_trace_in(p, e, "[alien] updated"); - *mm = m->next; - rte_free(m, p->krt_table); - m = NULL; - } - } - else - krt_trace_in(p, e, "[alien] created"); - - if (!m) - { - ee->next = n->routes; - n->routes = ee; - ee->rte.pflags |= KRT_REF_SEEN; - } -} - -static void -krt_learn_prune(struct krt_proto *p) +krt_learn_rte(struct krt_proto *p, rte *e) { - struct fib *fib = &p->krt_table->fib; - struct fib_iterator fit; - - KRT_TRACE(p, D_EVENTS, "Pruning inherited routes"); - - FIB_ITERATE_INIT(&fit, fib); -again: - FIB_ITERATE_START(fib, &fit, net, n) - { - struct rte_storage *e, **ee, *best, **pbest, *old_best; - - /* - * Note that old_best may be NULL even if there was an old best route in - * the previous step, because it might be replaced in krt_learn_scan(). - * But in that case there is a new valid best route. - */ - - old_best = NULL; - best = NULL; - pbest = NULL; - ee = &n->routes; - while (e = *ee) - { - if (e->rte.pflags & KRT_REF_BEST) - old_best = e; - - if (!(e->rte.pflags & KRT_REF_SEEN)) - { - *ee = e->next; - rte_free(e, p->krt_table); - continue; - } - - if (!best || krt_metric(&best->rte) > krt_metric(&e->rte)) - { - best = e; - pbest = ee; - } - - e->rte.pflags &= ~(KRT_REF_SEEN | KRT_REF_BEST); - ee = &e->next; - } - if (!n->routes) - { - DBG("%I/%d: deleting\n", n->n.prefix, n->n.pxlen); - if (old_best) - krt_learn_announce_delete(p, n->n.addr); - - FIB_ITERATE_PUT(&fit); - fib_delete(fib, n); - goto again; - } - - best->rte.pflags |= KRT_REF_BEST; - *pbest = best->next; - best->next = n->routes; - n->routes = best; - - if ((best != old_best) || p->reload) - { - DBG("%I/%d: announcing (metric=%d)\n", n->n.prefix, n->n.pxlen, krt_metric(&best->rte)); - krt_learn_announce_update(p, &best->rte); - } - else - DBG("%I/%d: uptodate (metric=%d)\n", n->n.prefix, n->n.pxlen, krt_metric(&best->rte)); - } - FIB_ITERATE_END; - - p->reload = 0; -} - -static void -krt_learn_async(struct krt_proto *p, rte *e, int new) -{ - net *n = net_get(p->krt_table, e->net); - struct rte_storage *g, **gg, *best, **bestp, *old_best; - - ASSERT(!e->attrs->cached); - e->attrs->pref = p->p.main_channel->preference; - - struct rte_storage *ee = rte_store(e, n, p->krt_table); - - old_best = n->routes; - for(gg=&n->routes; g = *gg; gg = &g->next) - if (krt_same_key(&g->rte, e)) - break; - if (new) - { - if (g) - { - if (krt_uptodate(&g->rte, e)) - { - krt_trace_in(p, e, "[alien async] same"); - rte_free(ee, p->krt_table); - return; - } - krt_trace_in(p, e, "[alien async] updated"); - *gg = g->next; - rte_free(g, p->krt_table); - } - else - krt_trace_in(p, e, "[alien async] created"); - - ee->next = n->routes; - n->routes = ee; - } - else if (!g) - { - krt_trace_in(p, e, "[alien async] delete failed"); - rte_free(ee, p->krt_table); - return; - } - else - { - krt_trace_in(p, e, "[alien async] removed"); - *gg = g->next; - rte_free(ee, p->krt_table); - rte_free(g, p->krt_table); - } - best = n->routes; - bestp = &n->routes; - for(gg=&n->routes; g=*gg; gg=&g->next) - { - if (krt_metric(&best->rte) > krt_metric(&g->rte)) - { - best = g; - bestp = gg; - } - - g->rte.pflags &= ~KRT_REF_BEST; - } - - if (best) - { - best->rte.pflags |= KRT_REF_BEST; - *bestp = best->next; - best->next = n->routes; - n->routes = best; - } - - if (best != old_best) - { - DBG("krt_learn_async: distributing change\n"); - if (best) - krt_learn_announce_update(p, &best->rte); - else - krt_learn_announce_delete(p, n->n.addr); - } + e->src = rt_get_source(&p->p, krt_metric(e)); + rte_update(p->p.main_channel, e->net, e, e->src); } static void krt_learn_init(struct krt_proto *p) { if (KRT_CF->learn) - { - struct rtable_config *cf = mb_allocz(p->p.pool, sizeof(struct rtable_config)); - cf->name = "Inherited"; - cf->addr_type = p->p.net_type; - cf->internal = 1; - - p->krt_table = rt_setup(p->p.pool, cf); - } -} - -static void -krt_dump(struct proto *P) -{ - struct krt_proto *p = (struct krt_proto *) P; - - if (!KRT_CF->learn) - return; - debug("KRT: Table of inheritable routes\n"); - rt_dump(p->krt_table); + channel_setup_in_table(p->p.main_channel, 1); } #endif @@ -547,7 +322,7 @@ rte_feed_count(net *n) { uint count = 0; for (struct rte_storage *e = n->routes; e; e = e->next) - if (rte_is_valid(RTE_OR_NULL(e))) + if (rte_is_valid(RTES_OR_NULL(e))) count++; return count; } @@ -557,7 +332,7 @@ rte_feed_obtain(net *n, rte **feed, uint count) { uint i = 0; for (struct rte_storage *e = n->routes; e; e = e->next) - if (rte_is_valid(RTE_OR_NULL(e))) + if (rte_is_valid(RTES_OR_NULL(e))) { ASSERT_DIE(i < count); feed[i++] = &e->rte; @@ -643,7 +418,7 @@ krt_got_route(struct krt_proto *p, rte *e, s8 src) case KRT_SRC_ALIEN: if (KRT_CF->learn) - krt_learn_scan(p, e); + krt_learn_rte(p, e); else krt_trace_in_rl(&rl_alien, p, e, "[alien] ignored"); return; @@ -712,6 +487,11 @@ static void krt_init_scan(struct krt_proto *p) { bmap_reset(&p->seen_map, 1024); + +#ifdef KRT_ALLOW_LEARN + if (KRT_CF->learn) + channel_refresh_begin(p->p.main_channel); +#endif } static void @@ -739,7 +519,7 @@ krt_prune(struct krt_proto *p) #ifdef KRT_ALLOW_LEARN if (KRT_CF->learn) - krt_learn_prune(p); + channel_refresh_end(p->p.main_channel); #endif if (p->ready) @@ -781,7 +561,7 @@ krt_got_route_async(struct krt_proto *p, rte *e, int new, s8 src) case KRT_SRC_ALIEN: if (KRT_CF->learn) { - krt_learn_async(p, e, new); + krt_learn_rte(p, e); return; } #endif @@ -1027,6 +807,7 @@ krt_init(struct proto_config *CF) p->p.if_notify = krt_if_notify; p->p.reload_routes = krt_reload_routes; p->p.feed_end = krt_feed_end; + p->p.rte_better = krt_rte_better; krt_sys_init(p); return &p->p; @@ -1182,7 +963,4 @@ struct protocol proto_unix_kernel = { .reconfigure = krt_reconfigure, .copy_config = krt_copy_config, .get_attr = krt_get_attr, -#ifdef KRT_ALLOW_LEARN - .dump = krt_dump, -#endif }; diff --git a/sysdep/unix/krt.h b/sysdep/unix/krt.h index f6ad6fde..968c5b16 100644 --- a/sysdep/unix/krt.h +++ b/sysdep/unix/krt.h @@ -51,10 +51,6 @@ struct krt_proto { struct proto p; struct krt_state sys; /* Sysdep state */ -#ifdef KRT_ALLOW_LEARN - struct rtable *krt_table; /* Internal table of inherited routes */ -#endif - #ifndef CONFIG_ALL_TABLES_AT_ONCE timer *scan_timer; #endif -- cgit v1.2.3 From c84ed603714db2c42a781f8dbb5b3fd540ff689f Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Fri, 18 Jun 2021 18:23:41 +0200 Subject: Moved BFD IO loop out of BFD as we want to use it as socket-io coroutine --- proto/bfd/Makefile | 4 +- proto/bfd/bfd.h | 2 +- proto/bfd/io.c | 508 -------------------------------------------------- proto/bfd/io.h | 34 ---- sysdep/unix/Makefile | 2 +- sysdep/unix/io-loop.c | 508 ++++++++++++++++++++++++++++++++++++++++++++++++++ sysdep/unix/io-loop.h | 34 ++++ 7 files changed, 546 insertions(+), 546 deletions(-) delete mode 100644 proto/bfd/io.c delete mode 100644 proto/bfd/io.h create mode 100644 sysdep/unix/io-loop.c create mode 100644 sysdep/unix/io-loop.h (limited to 'proto') diff --git a/proto/bfd/Makefile b/proto/bfd/Makefile index 402122fc..267dff98 100644 --- a/proto/bfd/Makefile +++ b/proto/bfd/Makefile @@ -1,6 +1,6 @@ -src := bfd.c io.c packets.c +src := bfd.c packets.c obj := $(src-o-files) $(all-daemon) $(cf-local) -tests_objs := $(tests_objs) $(src-o-files) \ No newline at end of file +tests_objs := $(tests_objs) $(src-o-files) diff --git a/proto/bfd/bfd.h b/proto/bfd/bfd.h index 91fdaa60..9d4cbbf8 100644 --- a/proto/bfd/bfd.h +++ b/proto/bfd/bfd.h @@ -22,7 +22,7 @@ #include "lib/string.h" #include "nest/bfd.h" -#include "io.h" +#include "sysdep/unix/io-loop.h" #define BFD_CONTROL_PORT 3784 diff --git a/proto/bfd/io.c b/proto/bfd/io.c deleted file mode 100644 index c5f1e024..00000000 --- a/proto/bfd/io.c +++ /dev/null @@ -1,508 +0,0 @@ -/* - * BIRD -- I/O and event loop - * - * Can be freely distributed and used under the terms of the GNU GPL. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "nest/bird.h" -#include "proto/bfd/io.h" - -#include "lib/buffer.h" -#include "lib/lists.h" -#include "lib/resource.h" -#include "lib/event.h" -#include "lib/timer.h" -#include "lib/socket.h" - - -struct birdloop -{ - pool *pool; - pthread_t thread; - pthread_mutex_t mutex; - - u8 stop_called; - u8 poll_active; - u8 wakeup_masked; - int wakeup_fds[2]; - - struct timeloop time; - list event_list; - list sock_list; - uint sock_num; - - BUFFER(sock *) poll_sk; - BUFFER(struct pollfd) poll_fd; - u8 poll_changed; - u8 close_scheduled; -}; - - -/* - * Current thread context - */ - -static _Thread_local struct birdloop *birdloop_current; - -static inline void -birdloop_set_current(struct birdloop *loop) -{ - birdloop_current = loop; - local_timeloop = loop ? &loop->time : &main_timeloop; -} - -/* - * Wakeup code for birdloop - */ - -static void -pipe_new(int *pfds) -{ - int rv = pipe(pfds); - if (rv < 0) - die("pipe: %m"); - - if (fcntl(pfds[0], F_SETFL, O_NONBLOCK) < 0) - die("fcntl(O_NONBLOCK): %m"); - - if (fcntl(pfds[1], F_SETFL, O_NONBLOCK) < 0) - die("fcntl(O_NONBLOCK): %m"); -} - -void -pipe_drain(int fd) -{ - char buf[64]; - int rv; - - try: - rv = read(fd, buf, 64); - if (rv < 0) - { - if (errno == EINTR) - goto try; - if (errno == EAGAIN) - return; - die("wakeup read: %m"); - } - if (rv == 64) - goto try; -} - -void -pipe_kick(int fd) -{ - u64 v = 1; - int rv; - - try: - rv = write(fd, &v, sizeof(u64)); - if (rv < 0) - { - if (errno == EINTR) - goto try; - if (errno == EAGAIN) - return; - die("wakeup write: %m"); - } -} - -static inline void -wakeup_init(struct birdloop *loop) -{ - pipe_new(loop->wakeup_fds); -} - -static inline void -wakeup_drain(struct birdloop *loop) -{ - pipe_drain(loop->wakeup_fds[0]); -} - -static inline void -wakeup_do_kick(struct birdloop *loop) -{ - pipe_kick(loop->wakeup_fds[1]); -} - -static inline void -wakeup_kick(struct birdloop *loop) -{ - if (!loop->wakeup_masked) - wakeup_do_kick(loop); - else - loop->wakeup_masked = 2; -} - -/* For notifications from outside */ -void -wakeup_kick_current(void) -{ - if (birdloop_current && birdloop_current->poll_active) - wakeup_kick(birdloop_current); -} - - -/* - * Events - */ - -static inline uint -events_waiting(struct birdloop *loop) -{ - return !EMPTY_LIST(loop->event_list); -} - -static inline void -events_init(struct birdloop *loop) -{ - init_list(&loop->event_list); -} - -static void -events_fire(struct birdloop *loop) -{ - times_update(); - ev_run_list(&loop->event_list); -} - -void -ev2_schedule(event *e) -{ - if (birdloop_current->poll_active && EMPTY_LIST(birdloop_current->event_list)) - wakeup_kick(birdloop_current); - - if (e->n.next) - rem_node(&e->n); - - add_tail(&birdloop_current->event_list, &e->n); -} - - -/* - * Sockets - */ - -static void -sockets_init(struct birdloop *loop) -{ - init_list(&loop->sock_list); - loop->sock_num = 0; - - BUFFER_INIT(loop->poll_sk, loop->pool, 4); - BUFFER_INIT(loop->poll_fd, loop->pool, 4); - loop->poll_changed = 1; /* add wakeup fd */ -} - -static void -sockets_add(struct birdloop *loop, sock *s) -{ - add_tail(&loop->sock_list, &s->n); - loop->sock_num++; - - s->index = -1; - loop->poll_changed = 1; - - if (loop->poll_active) - wakeup_kick(loop); -} - -void -sk_start(sock *s) -{ - sockets_add(birdloop_current, s); -} - -static void -sockets_remove(struct birdloop *loop, sock *s) -{ - rem_node(&s->n); - loop->sock_num--; - - if (s->index >= 0) - loop->poll_sk.data[s->index] = NULL; - - s->index = -1; - loop->poll_changed = 1; - - /* Wakeup moved to sk_stop() */ -} - -void -sk_stop(sock *s) -{ - sockets_remove(birdloop_current, s); - - if (birdloop_current->poll_active) - { - birdloop_current->close_scheduled = 1; - wakeup_kick(birdloop_current); - } - else - close(s->fd); - - s->fd = -1; -} - -static inline uint sk_want_events(sock *s) -{ return (s->rx_hook ? POLLIN : 0) | ((s->ttx != s->tpos) ? POLLOUT : 0); } - -/* -FIXME: this should be called from sock code - -static void -sockets_update(struct birdloop *loop, sock *s) -{ - if (s->index >= 0) - loop->poll_fd.data[s->index].events = sk_want_events(s); -} -*/ - -static void -sockets_prepare(struct birdloop *loop) -{ - BUFFER_SET(loop->poll_sk, loop->sock_num + 1); - BUFFER_SET(loop->poll_fd, loop->sock_num + 1); - - struct pollfd *pfd = loop->poll_fd.data; - sock **psk = loop->poll_sk.data; - uint i = 0; - node *n; - - WALK_LIST(n, loop->sock_list) - { - sock *s = SKIP_BACK(sock, n, n); - - ASSERT(i < loop->sock_num); - - s->index = i; - *psk = s; - pfd->fd = s->fd; - pfd->events = sk_want_events(s); - pfd->revents = 0; - - pfd++; - psk++; - i++; - } - - ASSERT(i == loop->sock_num); - - /* Add internal wakeup fd */ - *psk = NULL; - pfd->fd = loop->wakeup_fds[0]; - pfd->events = POLLIN; - pfd->revents = 0; - - loop->poll_changed = 0; -} - -static void -sockets_close_fds(struct birdloop *loop) -{ - struct pollfd *pfd = loop->poll_fd.data; - sock **psk = loop->poll_sk.data; - int poll_num = loop->poll_fd.used - 1; - - int i; - for (i = 0; i < poll_num; i++) - if (psk[i] == NULL) - close(pfd[i].fd); - - loop->close_scheduled = 0; -} - -int sk_read(sock *s, int revents); -int sk_write(sock *s); - -static void -sockets_fire(struct birdloop *loop) -{ - struct pollfd *pfd = loop->poll_fd.data; - sock **psk = loop->poll_sk.data; - int poll_num = loop->poll_fd.used - 1; - - times_update(); - - /* Last fd is internal wakeup fd */ - if (pfd[poll_num].revents & POLLIN) - wakeup_drain(loop); - - int i; - for (i = 0; i < poll_num; pfd++, psk++, i++) - { - int e = 1; - - if (! pfd->revents) - continue; - - if (pfd->revents & POLLNVAL) - die("poll: invalid fd %d", pfd->fd); - - if (pfd->revents & POLLIN) - while (e && *psk && (*psk)->rx_hook) - e = sk_read(*psk, 0); - - e = 1; - if (pfd->revents & POLLOUT) - while (e && *psk) - e = sk_write(*psk); - } -} - - -/* - * Birdloop - */ - -static void *birdloop_main(void *arg); - -struct birdloop * -birdloop_new(void) -{ - pool *p = rp_new(NULL, "Birdloop root"); - struct birdloop *loop = mb_allocz(p, sizeof(struct birdloop)); - loop->pool = p; - pthread_mutex_init(&loop->mutex, NULL); - - wakeup_init(loop); - - events_init(loop); - timers_init(&loop->time, p); - sockets_init(loop); - - return loop; -} - -void -birdloop_start(struct birdloop *loop) -{ - int rv = pthread_create(&loop->thread, NULL, birdloop_main, loop); - if (rv) - die("pthread_create(): %M", rv); -} - -void -birdloop_stop(struct birdloop *loop) -{ - pthread_mutex_lock(&loop->mutex); - loop->stop_called = 1; - wakeup_do_kick(loop); - pthread_mutex_unlock(&loop->mutex); - - int rv = pthread_join(loop->thread, NULL); - if (rv) - die("pthread_join(): %M", rv); -} - -void -birdloop_free(struct birdloop *loop) -{ - rfree(loop->pool); -} - - -void -birdloop_enter(struct birdloop *loop) -{ - /* TODO: these functions could save and restore old context */ - pthread_mutex_lock(&loop->mutex); - birdloop_set_current(loop); -} - -void -birdloop_leave(struct birdloop *loop) -{ - /* TODO: these functions could save and restore old context */ - birdloop_set_current(NULL); - pthread_mutex_unlock(&loop->mutex); -} - -void -birdloop_mask_wakeups(struct birdloop *loop) -{ - pthread_mutex_lock(&loop->mutex); - loop->wakeup_masked = 1; - pthread_mutex_unlock(&loop->mutex); -} - -void -birdloop_unmask_wakeups(struct birdloop *loop) -{ - pthread_mutex_lock(&loop->mutex); - if (loop->wakeup_masked == 2) - wakeup_do_kick(loop); - loop->wakeup_masked = 0; - pthread_mutex_unlock(&loop->mutex); -} - -static void * -birdloop_main(void *arg) -{ - struct birdloop *loop = arg; - timer *t; - int rv, timeout; - - birdloop_set_current(loop); - - pthread_mutex_lock(&loop->mutex); - while (1) - { - events_fire(loop); - timers_fire(&loop->time); - - times_update(); - if (events_waiting(loop)) - timeout = 0; - else if (t = timers_first(&loop->time)) - timeout = (tm_remains(t) TO_MS) + 1; - else - timeout = -1; - - if (loop->poll_changed) - sockets_prepare(loop); - - loop->poll_active = 1; - pthread_mutex_unlock(&loop->mutex); - - try: - rv = poll(loop->poll_fd.data, loop->poll_fd.used, timeout); - if (rv < 0) - { - if (errno == EINTR || errno == EAGAIN) - goto try; - die("poll: %m"); - } - - pthread_mutex_lock(&loop->mutex); - loop->poll_active = 0; - - if (loop->close_scheduled) - sockets_close_fds(loop); - - if (loop->stop_called) - break; - - if (rv) - sockets_fire(loop); - - timers_fire(&loop->time); - } - - loop->stop_called = 0; - pthread_mutex_unlock(&loop->mutex); - - return NULL; -} - - diff --git a/proto/bfd/io.h b/proto/bfd/io.h deleted file mode 100644 index ec706e9a..00000000 --- a/proto/bfd/io.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - * BIRD -- I/O and event loop - * - * Can be freely distributed and used under the terms of the GNU GPL. - */ - -#ifndef _BIRD_BFD_IO_H_ -#define _BIRD_BFD_IO_H_ - -#include "nest/bird.h" -#include "lib/lists.h" -#include "lib/resource.h" -#include "lib/event.h" -#include "lib/timer.h" -#include "lib/socket.h" - - -void ev2_schedule(event *e); - -void sk_start(sock *s); -void sk_stop(sock *s); - -struct birdloop *birdloop_new(void); -void birdloop_start(struct birdloop *loop); -void birdloop_stop(struct birdloop *loop); -void birdloop_free(struct birdloop *loop); - -void birdloop_enter(struct birdloop *loop); -void birdloop_leave(struct birdloop *loop); -void birdloop_mask_wakeups(struct birdloop *loop); -void birdloop_unmask_wakeups(struct birdloop *loop); - - -#endif /* _BIRD_BFD_IO_H_ */ diff --git a/sysdep/unix/Makefile b/sysdep/unix/Makefile index 69cf8131..07f454ab 100644 --- a/sysdep/unix/Makefile +++ b/sysdep/unix/Makefile @@ -1,4 +1,4 @@ -src := alloc.c io.c krt.c log.c main.c random.c coroutine.c +src := alloc.c io.c io-loop.c krt.c log.c main.c random.c coroutine.c obj := $(src-o-files) $(all-daemon) $(cf-local) diff --git a/sysdep/unix/io-loop.c b/sysdep/unix/io-loop.c new file mode 100644 index 00000000..a15d866a --- /dev/null +++ b/sysdep/unix/io-loop.c @@ -0,0 +1,508 @@ +/* + * BIRD -- I/O and event loop + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "nest/bird.h" +#include "sysdep/unix/io-loop.h" + +#include "lib/buffer.h" +#include "lib/lists.h" +#include "lib/resource.h" +#include "lib/event.h" +#include "lib/timer.h" +#include "lib/socket.h" + + +struct birdloop +{ + pool *pool; + pthread_t thread; + pthread_mutex_t mutex; + + u8 stop_called; + u8 poll_active; + u8 wakeup_masked; + int wakeup_fds[2]; + + struct timeloop time; + list event_list; + list sock_list; + uint sock_num; + + BUFFER(sock *) poll_sk; + BUFFER(struct pollfd) poll_fd; + u8 poll_changed; + u8 close_scheduled; +}; + + +/* + * Current thread context + */ + +static _Thread_local struct birdloop *birdloop_current; + +static inline void +birdloop_set_current(struct birdloop *loop) +{ + birdloop_current = loop; + local_timeloop = loop ? &loop->time : &main_timeloop; +} + +/* + * Wakeup code for birdloop + */ + +static void +pipe_new(int *pfds) +{ + int rv = pipe(pfds); + if (rv < 0) + die("pipe: %m"); + + if (fcntl(pfds[0], F_SETFL, O_NONBLOCK) < 0) + die("fcntl(O_NONBLOCK): %m"); + + if (fcntl(pfds[1], F_SETFL, O_NONBLOCK) < 0) + die("fcntl(O_NONBLOCK): %m"); +} + +void +pipe_drain(int fd) +{ + char buf[64]; + int rv; + + try: + rv = read(fd, buf, 64); + if (rv < 0) + { + if (errno == EINTR) + goto try; + if (errno == EAGAIN) + return; + die("wakeup read: %m"); + } + if (rv == 64) + goto try; +} + +void +pipe_kick(int fd) +{ + u64 v = 1; + int rv; + + try: + rv = write(fd, &v, sizeof(u64)); + if (rv < 0) + { + if (errno == EINTR) + goto try; + if (errno == EAGAIN) + return; + die("wakeup write: %m"); + } +} + +static inline void +wakeup_init(struct birdloop *loop) +{ + pipe_new(loop->wakeup_fds); +} + +static inline void +wakeup_drain(struct birdloop *loop) +{ + pipe_drain(loop->wakeup_fds[0]); +} + +static inline void +wakeup_do_kick(struct birdloop *loop) +{ + pipe_kick(loop->wakeup_fds[1]); +} + +static inline void +wakeup_kick(struct birdloop *loop) +{ + if (!loop->wakeup_masked) + wakeup_do_kick(loop); + else + loop->wakeup_masked = 2; +} + +/* For notifications from outside */ +void +wakeup_kick_current(void) +{ + if (birdloop_current && birdloop_current->poll_active) + wakeup_kick(birdloop_current); +} + + +/* + * Events + */ + +static inline uint +events_waiting(struct birdloop *loop) +{ + return !EMPTY_LIST(loop->event_list); +} + +static inline void +events_init(struct birdloop *loop) +{ + init_list(&loop->event_list); +} + +static void +events_fire(struct birdloop *loop) +{ + times_update(); + ev_run_list(&loop->event_list); +} + +void +ev2_schedule(event *e) +{ + if (birdloop_current->poll_active && EMPTY_LIST(birdloop_current->event_list)) + wakeup_kick(birdloop_current); + + if (e->n.next) + rem_node(&e->n); + + add_tail(&birdloop_current->event_list, &e->n); +} + + +/* + * Sockets + */ + +static void +sockets_init(struct birdloop *loop) +{ + init_list(&loop->sock_list); + loop->sock_num = 0; + + BUFFER_INIT(loop->poll_sk, loop->pool, 4); + BUFFER_INIT(loop->poll_fd, loop->pool, 4); + loop->poll_changed = 1; /* add wakeup fd */ +} + +static void +sockets_add(struct birdloop *loop, sock *s) +{ + add_tail(&loop->sock_list, &s->n); + loop->sock_num++; + + s->index = -1; + loop->poll_changed = 1; + + if (loop->poll_active) + wakeup_kick(loop); +} + +void +sk_start(sock *s) +{ + sockets_add(birdloop_current, s); +} + +static void +sockets_remove(struct birdloop *loop, sock *s) +{ + rem_node(&s->n); + loop->sock_num--; + + if (s->index >= 0) + loop->poll_sk.data[s->index] = NULL; + + s->index = -1; + loop->poll_changed = 1; + + /* Wakeup moved to sk_stop() */ +} + +void +sk_stop(sock *s) +{ + sockets_remove(birdloop_current, s); + + if (birdloop_current->poll_active) + { + birdloop_current->close_scheduled = 1; + wakeup_kick(birdloop_current); + } + else + close(s->fd); + + s->fd = -1; +} + +static inline uint sk_want_events(sock *s) +{ return (s->rx_hook ? POLLIN : 0) | ((s->ttx != s->tpos) ? POLLOUT : 0); } + +/* +FIXME: this should be called from sock code + +static void +sockets_update(struct birdloop *loop, sock *s) +{ + if (s->index >= 0) + loop->poll_fd.data[s->index].events = sk_want_events(s); +} +*/ + +static void +sockets_prepare(struct birdloop *loop) +{ + BUFFER_SET(loop->poll_sk, loop->sock_num + 1); + BUFFER_SET(loop->poll_fd, loop->sock_num + 1); + + struct pollfd *pfd = loop->poll_fd.data; + sock **psk = loop->poll_sk.data; + uint i = 0; + node *n; + + WALK_LIST(n, loop->sock_list) + { + sock *s = SKIP_BACK(sock, n, n); + + ASSERT(i < loop->sock_num); + + s->index = i; + *psk = s; + pfd->fd = s->fd; + pfd->events = sk_want_events(s); + pfd->revents = 0; + + pfd++; + psk++; + i++; + } + + ASSERT(i == loop->sock_num); + + /* Add internal wakeup fd */ + *psk = NULL; + pfd->fd = loop->wakeup_fds[0]; + pfd->events = POLLIN; + pfd->revents = 0; + + loop->poll_changed = 0; +} + +static void +sockets_close_fds(struct birdloop *loop) +{ + struct pollfd *pfd = loop->poll_fd.data; + sock **psk = loop->poll_sk.data; + int poll_num = loop->poll_fd.used - 1; + + int i; + for (i = 0; i < poll_num; i++) + if (psk[i] == NULL) + close(pfd[i].fd); + + loop->close_scheduled = 0; +} + +int sk_read(sock *s, int revents); +int sk_write(sock *s); + +static void +sockets_fire(struct birdloop *loop) +{ + struct pollfd *pfd = loop->poll_fd.data; + sock **psk = loop->poll_sk.data; + int poll_num = loop->poll_fd.used - 1; + + times_update(); + + /* Last fd is internal wakeup fd */ + if (pfd[poll_num].revents & POLLIN) + wakeup_drain(loop); + + int i; + for (i = 0; i < poll_num; pfd++, psk++, i++) + { + int e = 1; + + if (! pfd->revents) + continue; + + if (pfd->revents & POLLNVAL) + die("poll: invalid fd %d", pfd->fd); + + if (pfd->revents & POLLIN) + while (e && *psk && (*psk)->rx_hook) + e = sk_read(*psk, 0); + + e = 1; + if (pfd->revents & POLLOUT) + while (e && *psk) + e = sk_write(*psk); + } +} + + +/* + * Birdloop + */ + +static void *birdloop_main(void *arg); + +struct birdloop * +birdloop_new(void) +{ + pool *p = rp_new(NULL, "Birdloop root"); + struct birdloop *loop = mb_allocz(p, sizeof(struct birdloop)); + loop->pool = p; + pthread_mutex_init(&loop->mutex, NULL); + + wakeup_init(loop); + + events_init(loop); + timers_init(&loop->time, p); + sockets_init(loop); + + return loop; +} + +void +birdloop_start(struct birdloop *loop) +{ + int rv = pthread_create(&loop->thread, NULL, birdloop_main, loop); + if (rv) + die("pthread_create(): %M", rv); +} + +void +birdloop_stop(struct birdloop *loop) +{ + pthread_mutex_lock(&loop->mutex); + loop->stop_called = 1; + wakeup_do_kick(loop); + pthread_mutex_unlock(&loop->mutex); + + int rv = pthread_join(loop->thread, NULL); + if (rv) + die("pthread_join(): %M", rv); +} + +void +birdloop_free(struct birdloop *loop) +{ + rfree(loop->pool); +} + + +void +birdloop_enter(struct birdloop *loop) +{ + /* TODO: these functions could save and restore old context */ + pthread_mutex_lock(&loop->mutex); + birdloop_set_current(loop); +} + +void +birdloop_leave(struct birdloop *loop) +{ + /* TODO: these functions could save and restore old context */ + birdloop_set_current(NULL); + pthread_mutex_unlock(&loop->mutex); +} + +void +birdloop_mask_wakeups(struct birdloop *loop) +{ + pthread_mutex_lock(&loop->mutex); + loop->wakeup_masked = 1; + pthread_mutex_unlock(&loop->mutex); +} + +void +birdloop_unmask_wakeups(struct birdloop *loop) +{ + pthread_mutex_lock(&loop->mutex); + if (loop->wakeup_masked == 2) + wakeup_do_kick(loop); + loop->wakeup_masked = 0; + pthread_mutex_unlock(&loop->mutex); +} + +static void * +birdloop_main(void *arg) +{ + struct birdloop *loop = arg; + timer *t; + int rv, timeout; + + birdloop_set_current(loop); + + pthread_mutex_lock(&loop->mutex); + while (1) + { + events_fire(loop); + timers_fire(&loop->time); + + times_update(); + if (events_waiting(loop)) + timeout = 0; + else if (t = timers_first(&loop->time)) + timeout = (tm_remains(t) TO_MS) + 1; + else + timeout = -1; + + if (loop->poll_changed) + sockets_prepare(loop); + + loop->poll_active = 1; + pthread_mutex_unlock(&loop->mutex); + + try: + rv = poll(loop->poll_fd.data, loop->poll_fd.used, timeout); + if (rv < 0) + { + if (errno == EINTR || errno == EAGAIN) + goto try; + die("poll: %m"); + } + + pthread_mutex_lock(&loop->mutex); + loop->poll_active = 0; + + if (loop->close_scheduled) + sockets_close_fds(loop); + + if (loop->stop_called) + break; + + if (rv) + sockets_fire(loop); + + timers_fire(&loop->time); + } + + loop->stop_called = 0; + pthread_mutex_unlock(&loop->mutex); + + return NULL; +} + + diff --git a/sysdep/unix/io-loop.h b/sysdep/unix/io-loop.h new file mode 100644 index 00000000..d858b04e --- /dev/null +++ b/sysdep/unix/io-loop.h @@ -0,0 +1,34 @@ +/* + * BIRD -- I/O and event loop + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#ifndef _BIRD_IO_LOOP_H_ +#define _BIRD_IO_LOOP_H_ + +#include "nest/bird.h" +#include "lib/lists.h" +#include "lib/resource.h" +#include "lib/event.h" +#include "lib/timer.h" +#include "lib/socket.h" + + +void ev2_schedule(event *e); + +void sk_start(sock *s); +void sk_stop(sock *s); + +struct birdloop *birdloop_new(void); +void birdloop_start(struct birdloop *loop); +void birdloop_stop(struct birdloop *loop); +void birdloop_free(struct birdloop *loop); + +void birdloop_enter(struct birdloop *loop); +void birdloop_leave(struct birdloop *loop); +void birdloop_mask_wakeups(struct birdloop *loop); +void birdloop_unmask_wakeups(struct birdloop *loop); + + +#endif /* _BIRD_IO_LOOP_H_ */ -- cgit v1.2.3 From a4451535c69b8f934523905a8131ae2f16be2146 Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Wed, 4 Aug 2021 22:48:51 +0200 Subject: Unified time for whole BIRD In previous versions, every thread used its own time structures, effectively leading to different time in every thread and strange logging messages. The time processing code now uses global atomic variables to keep current time available for fast concurrent reading and safe updates. --- lib/timer.c | 29 +++++------------- lib/timer.h | 14 ++++----- proto/bfd/io.c | 8 ++--- sysdep/unix/io.c | 90 ++++++++++++++++++++++-------------------------------- sysdep/unix/main.c | 1 + 5 files changed, 56 insertions(+), 86 deletions(-) (limited to 'proto') diff --git a/lib/timer.c b/lib/timer.c index 6efcadb4..ff1fb5ef 100644 --- a/lib/timer.c +++ b/lib/timer.c @@ -32,6 +32,7 @@ #include "nest/bird.h" +#include "lib/coro.h" #include "lib/heap.h" #include "lib/resource.h" #include "lib/timer.h" @@ -45,22 +46,10 @@ struct timeloop main_timeloop; /* Data accessed and modified from proto/bfd/io.c */ _Thread_local struct timeloop *local_timeloop; -void wakeup_kick_current(void); - -btime -current_time(void) -{ - return local_timeloop->last_time; -} +_Atomic btime last_time; +_Atomic btime real_time; -btime -current_real_time(void) -{ - if (!local_timeloop->real_time) - times_update_real_time(local_timeloop); - - return local_timeloop->real_time; -} +void wakeup_kick_current(void); #define TIMER_LESS(a,b) ((a)->expires < (b)->expires) @@ -164,8 +153,6 @@ tm_stop(timer *t) void timers_init(struct timeloop *loop, pool *p) { - times_init(loop); - BUFFER_INIT(loop->timers, p, 4); BUFFER_PUSH(loop->timers) = NULL; } @@ -178,8 +165,8 @@ timers_fire(struct timeloop *loop) btime base_time; timer *t; - times_update(loop); - base_time = loop->last_time; + times_update(); + base_time = current_time(); while (t = timers_first(loop)) { @@ -190,8 +177,8 @@ timers_fire(struct timeloop *loop) { btime when = t->expires + t->recurrent; - if (when <= loop->last_time) - when = loop->last_time + t->recurrent; + if (when <= base_time) + when = base_time + t->recurrent; if (t->randomize) when += random() % (t->randomize + 1); diff --git a/lib/timer.h b/lib/timer.h index bc568ee6..b201b8c8 100644 --- a/lib/timer.h +++ b/lib/timer.h @@ -14,6 +14,10 @@ #include "lib/buffer.h" #include "lib/resource.h" +#include + +extern _Atomic btime last_time; +extern _Atomic btime real_time; typedef struct timer { @@ -31,8 +35,6 @@ typedef struct timer struct timeloop { BUFFER_(timer *) timers; - btime last_time; - btime real_time; }; static inline uint timers_count(struct timeloop *loop) @@ -44,8 +46,8 @@ static inline timer *timers_first(struct timeloop *loop) extern struct timeloop main_timeloop; extern _Thread_local struct timeloop *local_timeloop; -btime current_time(void); -btime current_real_time(void); +#define current_time() atomic_load_explicit(&last_time, memory_order_acquire) +#define current_real_time() atomic_load_explicit(&real_time, memory_order_acquire) //#define now (current_time() TO_S) //#define now_real (current_real_time() TO_S) @@ -95,9 +97,7 @@ tm_start_max(timer *t, btime after) } /* In sysdep code */ -void times_init(struct timeloop *loop); -void times_update(struct timeloop *loop); -void times_update_real_time(struct timeloop *loop); +void times_update(void); /* For I/O loop */ void timers_init(struct timeloop *loop, pool *p); diff --git a/proto/bfd/io.c b/proto/bfd/io.c index 8fdc84fb..c5f1e024 100644 --- a/proto/bfd/io.c +++ b/proto/bfd/io.c @@ -172,7 +172,7 @@ events_init(struct birdloop *loop) static void events_fire(struct birdloop *loop) { - times_update(&loop->time); + times_update(); ev_run_list(&loop->event_list); } @@ -332,7 +332,7 @@ sockets_fire(struct birdloop *loop) sock **psk = loop->poll_sk.data; int poll_num = loop->poll_fd.used - 1; - times_update(&loop->time); + times_update(); /* Last fd is internal wakeup fd */ if (pfd[poll_num].revents & POLLIN) @@ -365,7 +365,7 @@ sockets_fire(struct birdloop *loop) * Birdloop */ -static void * birdloop_main(void *arg); +static void *birdloop_main(void *arg); struct birdloop * birdloop_new(void) @@ -461,7 +461,7 @@ birdloop_main(void *arg) events_fire(loop); timers_fire(&loop->time); - times_update(&loop->time); + times_update(); if (events_waiting(loop)) timeout = 0; else if (t = timers_first(&loop->time)) diff --git a/sysdep/unix/io.c b/sysdep/unix/io.c index 40841ea4..90bb5d64 100644 --- a/sysdep/unix/io.c +++ b/sysdep/unix/io.c @@ -123,55 +123,50 @@ rf_fileno(struct rfile *f) btime boot_time; + void -times_init(struct timeloop *loop) +times_update(void) { struct timespec ts; int rv; + btime old_time = current_time(); + btime old_real_time = current_real_time(); + rv = clock_gettime(CLOCK_MONOTONIC, &ts); if (rv < 0) die("Monotonic clock is missing"); if ((ts.tv_sec < 0) || (((u64) ts.tv_sec) > ((u64) 1 << 40))) log(L_WARN "Monotonic clock is crazy"); - - loop->last_time = ts.tv_sec S + ts.tv_nsec NS; - loop->real_time = 0; -} - -void -times_update(struct timeloop *loop) -{ - struct timespec ts; - int rv; - - rv = clock_gettime(CLOCK_MONOTONIC, &ts); - if (rv < 0) - die("clock_gettime: %m"); - + btime new_time = ts.tv_sec S + ts.tv_nsec NS; - if (new_time < loop->last_time) + if (new_time < old_time) log(L_ERR "Monotonic clock is broken"); - loop->last_time = new_time; - loop->real_time = 0; -} - -void -times_update_real_time(struct timeloop *loop) -{ - struct timespec ts; - int rv; - rv = clock_gettime(CLOCK_REALTIME, &ts); if (rv < 0) die("clock_gettime: %m"); - loop->real_time = ts.tv_sec S + ts.tv_nsec NS; -} + btime new_real_time = ts.tv_sec S + ts.tv_nsec NS; + if (!atomic_compare_exchange_strong_explicit( + &last_time, + &old_time, + new_time, + memory_order_acq_rel, + memory_order_relaxed)) + DBG("Time update collision: last_time"); + + if (!atomic_compare_exchange_strong_explicit( + &real_time, + &old_real_time, + new_real_time, + memory_order_acq_rel, + memory_order_relaxed)) + DBG("Time update collision: real_time"); +} /** * DOC: Sockets @@ -2017,30 +2012,17 @@ struct event_log_entry static struct event_log_entry event_log[EVENT_LOG_LENGTH]; static struct event_log_entry *event_open; static int event_log_pos, event_log_num, watchdog_active; -static btime last_time; +static btime last_io_time; static btime loop_time; static void io_update_time(void) { - struct timespec ts; - int rv; - - /* - * This is third time-tracking procedure (after update_times() above and - * times_update() in BFD), dedicated to internal event log and latency - * tracking. Hopefully, we consolidate these sometimes. - */ - - rv = clock_gettime(CLOCK_MONOTONIC, &ts); - if (rv < 0) - die("clock_gettime: %m"); - - last_time = ts.tv_sec S + ts.tv_nsec NS; + last_io_time = current_time(); if (event_open) { - event_open->duration = last_time - event_open->timestamp; + event_open->duration = last_io_time - event_open->timestamp; if (event_open->duration > config->latency_limit) log(L_WARN "Event 0x%p 0x%p took %d ms", @@ -2069,7 +2051,7 @@ io_log_event(void *hook, void *data) en->hook = hook; en->data = data; - en->timestamp = last_time; + en->timestamp = last_io_time; en->duration = 0; event_log_num++; @@ -2097,14 +2079,14 @@ io_log_dump(void) struct event_log_entry *en = event_log + (event_log_pos + i) % EVENT_LOG_LENGTH; if (en->hook) log(L_DEBUG " Event 0x%p 0x%p at %8d for %d ms", en->hook, en->data, - (int) ((last_time - en->timestamp) TO_MS), (int) (en->duration TO_MS)); + (int) ((last_io_time - en->timestamp) TO_MS), (int) (en->duration TO_MS)); } } void watchdog_sigalrm(int sig UNUSED) { - /* Update last_time and duration, but skip latency check */ + /* Update last_io_time and duration, but skip latency check */ config->latency_limit = 0xffffffff; io_update_time(); @@ -2117,7 +2099,7 @@ watchdog_start1(void) { io_update_time(); - loop_time = last_time; + loop_time = last_io_time; } static inline void @@ -2125,7 +2107,7 @@ watchdog_start(void) { io_update_time(); - loop_time = last_time; + loop_time = last_io_time; event_log_num = 0; if (config->watchdog_timeout) @@ -2146,7 +2128,7 @@ watchdog_stop(void) watchdog_active = 0; } - btime duration = last_time - loop_time; + btime duration = last_io_time - loop_time; if (duration > config->watchdog_warning) log(L_WARN "I/O loop cycle took %d ms for %d events", (int) (duration TO_MS), event_log_num); @@ -2202,7 +2184,7 @@ io_loop(void) watchdog_start1(); for(;;) { - times_update(&main_timeloop); + times_update(); events = ev_run_list(&global_event_list); events = ev_run_list_limited(&global_work_list, WORK_EVENTS_MAX) || events; timers_fire(&main_timeloop); @@ -2212,7 +2194,7 @@ io_loop(void) poll_tout = (events ? 0 : 3000); /* Time in milliseconds */ if (t = timers_first(&main_timeloop)) { - times_update(&main_timeloop); + times_update(); timeout = (tm_remains(t) TO_MS) + 1; poll_tout = MIN(poll_tout, timeout); } @@ -2302,7 +2284,7 @@ io_loop(void) continue; } - times_update(&main_timeloop); + times_update(); /* guaranteed to be non-empty */ current_sock = SKIP_BACK(sock, n, HEAD(sock_list)); diff --git a/sysdep/unix/main.c b/sysdep/unix/main.c index dabfc554..d35424ff 100644 --- a/sysdep/unix/main.c +++ b/sysdep/unix/main.c @@ -903,6 +903,7 @@ main(int argc, char **argv) dmalloc_debug(0x2f03d00); #endif + times_update(); resource_sys_init(); parse_args(argc, argv); log_switch(1, NULL, NULL); -- cgit v1.2.3 From 94eb0858c2b938549d9d1703c872c6149901e7dd Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Sat, 19 Jun 2021 20:50:18 +0200 Subject: Converting the former BFD loop to a universal IO loop and protocol loop. There is a simple universal IO loop, taking care of events, timers and sockets. Primarily, one instance of a protocol should use exactly one IO loop to do all its work, as is now done in BFD. Contrary to previous versions, the loop is now launched and cleaned by the nest/proto.c code, allowing for a protocol to just request its own loop by setting the loop's lock order in config higher than the_bird. It is not supported nor checked if any protocol changed the requested lock order in reconfigure. No protocol should do it at all. --- filter/filter_test.c | 1 + lib/birdlib.h | 1 + lib/coro.h | 5 +- lib/event.c | 131 ++++++++++++-------- lib/event.h | 41 ++++++- lib/event_test.c | 7 +- lib/flowspec_test.c | 3 + lib/io-loop.h | 54 +++++++++ lib/lists.h | 12 ++ lib/locking.h | 21 +++- lib/socket.h | 3 + lib/timer.c | 53 ++++---- lib/timer.h | 28 +++-- nest/a-path_test.c | 3 + nest/a-set_test.c | 4 + nest/config.Y | 1 + nest/proto.c | 173 +++++++++++++++++--------- nest/protocol.h | 7 +- nest/route.h | 3 + nest/rt-table.c | 18 ++- proto/bfd/bfd.c | 166 ++++++++++++++----------- proto/bfd/bfd.h | 8 +- proto/bfd/config.Y | 1 + proto/bfd/packets.c | 4 +- proto/bgp/bgp.c | 1 + sysdep/unix/coroutine.c | 21 +++- sysdep/unix/io-loop.c | 313 ++++++++++++++++++++++++++---------------------- sysdep/unix/io-loop.h | 41 +++---- sysdep/unix/io.c | 85 ++++++++----- sysdep/unix/main.c | 5 +- sysdep/unix/unix.h | 1 - test/bt-utils.c | 7 +- 32 files changed, 780 insertions(+), 442 deletions(-) create mode 100644 lib/io-loop.h (limited to 'proto') diff --git a/filter/filter_test.c b/filter/filter_test.c index 7e4af092..2a0b5431 100644 --- a/filter/filter_test.c +++ b/filter/filter_test.c @@ -72,6 +72,7 @@ int main(int argc, char *argv[]) { bt_init(argc, argv); + bt_bird_init(); bt_assert_hook = bt_assert_filter; diff --git a/lib/birdlib.h b/lib/birdlib.h index 3dc39d19..385bf75c 100644 --- a/lib/birdlib.h +++ b/lib/birdlib.h @@ -71,6 +71,7 @@ static inline int u64_cmp(u64 i1, u64 i2) /* Macros for gcc attributes */ #define NORET __attribute__((noreturn)) +#define USE_RESULT __atribute__((warn_unused_result)) #define UNUSED __attribute__((unused)) #define PACKED __attribute__((packed)) #define NONNULL(...) __attribute__((nonnull((__VA_ARGS__)))) diff --git a/lib/coro.h b/lib/coro.h index 51712b36..17ccff89 100644 --- a/lib/coro.h +++ b/lib/coro.h @@ -2,7 +2,7 @@ * BIRD Coroutines * * (c) 2017 Martin Mares - * (c) 2020 Maria Matejka + * (c) 2020-2021 Maria Matejka * * Can be freely distributed and used under the terms of the GNU GPL. */ @@ -22,5 +22,8 @@ struct coroutine; */ struct coroutine *coro_run(pool *, void (*entry)(void *), void *data); +/* Get self. */ +extern _Thread_local struct coroutine *this_coro; + #endif diff --git a/lib/event.c b/lib/event.c index 273447e0..6c5c8b14 100644 --- a/lib/event.c +++ b/lib/event.c @@ -19,8 +19,14 @@ * events in them and explicitly ask to run them. */ +#undef LOCAL_DEBUG + #include "nest/bird.h" #include "lib/event.h" +#include "lib/locking.h" +#include "lib/io-loop.h" + +extern _Thread_local struct coroutine *this_coro; event_list global_event_list; event_list global_work_list; @@ -28,11 +34,16 @@ event_list global_work_list; inline void ev_postpone(event *e) { + event_list *el = e->list; + if (!el) + return; + + ASSERT_DIE(birdloop_inside(el->loop)); + + LOCK_DOMAIN(event, el->lock); if (ev_active(e)) - { - rem_node(&e->n); - e->n.next = NULL; - } + rem_node(&e->n); + UNLOCK_DOMAIN(event, el->lock); } static void @@ -95,40 +106,25 @@ ev_run(event *e) * list @l which can be run by calling ev_run_list(). */ inline void -ev_enqueue(event_list *l, event *e) +ev_send(event_list *l, event *e) { - ev_postpone(e); - add_tail(l, &e->n); -} + DBG("ev_send(%p, %p)\n", l, e); + ASSERT_DIE(e->hook); + ASSERT_DIE(!e->list || (e->list == l) || (e->list->loop == l->loop)); -/** - * ev_schedule - schedule an event - * @e: an event - * - * This function schedules an event by enqueueing it to a system-wide - * event list which is run by the platform dependent code whenever - * appropriate. - */ -void -ev_schedule(event *e) -{ - ev_enqueue(&global_event_list, e); -} + e->list = l; -/** - * ev_schedule_work - schedule a work-event. - * @e: an event - * - * This function schedules an event by enqueueing it to a system-wide work-event - * list which is run by the platform dependent code whenever appropriate. This - * is designated for work-events instead of regular events. They are executed - * less often in order to not clog I/O loop. - */ -void -ev_schedule_work(event *e) -{ - if (!ev_active(e)) - add_tail(&global_work_list, &e->n); + LOCK_DOMAIN(event, l->lock); + if (enlisted(&e->n)) + { + UNLOCK_DOMAIN(event, l->lock); + return; + } + + add_tail(&l->events, &e->n); + UNLOCK_DOMAIN(event, l->lock); + + birdloop_ping(l->loop); } void io_log_event(void *hook, void *data); @@ -142,35 +138,64 @@ void io_log_event(void *hook, void *data); int ev_run_list(event_list *l) { + const _Bool legacy = LEGACY_EVENT_LIST(l); + + if (legacy) + ASSERT_THE_BIRD_LOCKED; + node *n; - list tmp_list; + list tmp_list; init_list(&tmp_list); - add_tail_list(&tmp_list, l); - init_list(l); + + /* Move the event list contents to a local list to avoid executing repeatedly added events */ + LOCK_DOMAIN(event, l->lock); + add_tail_list(&tmp_list, &l->events); + init_list(&l->events); + UNLOCK_DOMAIN(event, l->lock); + WALK_LIST_FIRST(n, tmp_list) { event *e = SKIP_BACK(event, n, n); - /* This is ugly hack, we want to log just events executed from the main I/O loop */ - if ((l == &global_event_list) || (l == &global_work_list)) + if (legacy) + { + /* The legacy way of event execution */ io_log_event(e->hook, e->data); - - ev_run(e); + ev_postpone(e); + e->hook(e->data); + } + else + { + // io_log_event(e->hook, e->data); /* TODO: add support for event logging in other io loops */ + ASSERT_DIE(e->list == l); + LOCK_DOMAIN(event, l->lock); + rem_node(&e->n); + UNLOCK_DOMAIN(event, l->lock); + e->hook(e->data); + } } - return !EMPTY_LIST(*l); + LOCK_DOMAIN(event, l->lock); + int repeat = ! EMPTY_LIST(l->events); + UNLOCK_DOMAIN(event, l->lock); + return repeat; } int ev_run_list_limited(event_list *l, uint limit) { + ASSERT_DIE(LEGACY_EVENT_LIST(l)); + ASSERT_THE_BIRD_LOCKED; + node *n; list tmp_list; + LOCK_DOMAIN(event, l->lock); init_list(&tmp_list); - add_tail_list(&tmp_list, l); - init_list(l); + add_tail_list(&tmp_list, &l->events); + init_list(&l->events); + UNLOCK_DOMAIN(event, l->lock); WALK_LIST_FIRST(n, tmp_list) { @@ -179,21 +204,23 @@ ev_run_list_limited(event_list *l, uint limit) if (!limit) break; - /* This is ugly hack, we want to log just events executed from the main I/O loop */ - if ((l == &global_event_list) || (l == &global_work_list)) - io_log_event(e->hook, e->data); + io_log_event(e->hook, e->data); ev_run(e); limit--; } + LOCK_DOMAIN(event, l->lock); if (!EMPTY_LIST(tmp_list)) { /* Attach new items after the unprocessed old items */ - add_tail_list(&tmp_list, l); - init_list(l); - add_tail_list(l, &tmp_list); + add_tail_list(&tmp_list, &l->events); + init_list(&l->events); + add_tail_list(&l->events, &tmp_list); } - return !EMPTY_LIST(*l); + int repeat = ! EMPTY_LIST(l->events); + UNLOCK_DOMAIN(event, l->lock); + + return repeat; } diff --git a/lib/event.h b/lib/event.h index 5f3b78d8..6c358f84 100644 --- a/lib/event.h +++ b/lib/event.h @@ -10,33 +10,62 @@ #define _BIRD_EVENT_H_ #include "lib/resource.h" +#include "lib/locking.h" + +#include + +DEFINE_DOMAIN(event); typedef struct event { resource r; void (*hook)(void *); void *data; node n; /* Internal link */ + struct event_list *list; /* List where this event is put in */ } event; -typedef list event_list; +typedef struct event_list { + list events; + pool *pool; + struct birdloop *loop; + DOMAIN(event) lock; +} event_list; extern event_list global_event_list; extern event_list global_work_list; event *ev_new(pool *); void ev_run(event *); -#define ev_init_list(el) init_list(el) -void ev_enqueue(event_list *, event *); -void ev_schedule(event *); -void ev_schedule_work(event *); + +static inline void ev_init_list(event_list *el, struct birdloop *loop, const char *name) +{ + init_list(&el->events); + el->loop = loop; + el->lock = DOMAIN_NEW(event, name); +} + +void ev_send(event_list *, event *); +#define ev_send_loop(l, e) ev_send(birdloop_event_list((l)), (e)) + +#define ev_schedule(e) ({ ASSERT_THE_BIRD_LOCKED; if (!ev_active((e))) ev_send(&global_event_list, (e)); }) +#define ev_schedule_work(e) ({ ASSERT_THE_BIRD_LOCKED; if (!ev_active((e))) ev_send(&global_work_list, (e)); }) + void ev_postpone(event *); int ev_run_list(event_list *); int ev_run_list_limited(event_list *, uint); +#define LEGACY_EVENT_LIST(l) (((l) == &global_event_list) || ((l) == &global_work_list)) + +_Bool birdloop_inside(struct birdloop *loop); + static inline int ev_active(event *e) { - return e->n.next != NULL; + if (e->list == NULL) + return 0; + + ASSERT_DIE(birdloop_inside(e->list->loop)); + return enlisted(&e->n); } static inline event* diff --git a/lib/event_test.c b/lib/event_test.c index e1215bba..9dda3e2a 100644 --- a/lib/event_test.c +++ b/lib/event_test.c @@ -48,14 +48,17 @@ init_event_check_points(void) event_check_points[i] = 0; } +void resource_sys_init(void); + static int t_ev_run_list(void) { int i; + resource_sys_init(); resource_init(); olock_init(); - timer_init(); + birdloop_init(); io_init(); rt_init(); if_init(); @@ -82,7 +85,9 @@ main(int argc, char *argv[]) { bt_init(argc, argv); + the_bird_lock(); bt_test_suite(t_ev_run_list, "Schedule and run 3 events in right order."); + the_bird_unlock(); return bt_exit_value(); } diff --git a/lib/flowspec_test.c b/lib/flowspec_test.c index ed4afe51..f7f70982 100644 --- a/lib/flowspec_test.c +++ b/lib/flowspec_test.c @@ -666,10 +666,13 @@ t_formatting6(void) return 1; } +void resource_sys_init(void); + int main(int argc, char *argv[]) { bt_init(argc, argv); + resource_sys_init(); bt_test_suite(t_read_length, "Testing get NLRI length"); bt_test_suite(t_write_length, "Testing set NLRI length"); diff --git a/lib/io-loop.h b/lib/io-loop.h new file mode 100644 index 00000000..25f1b2a3 --- /dev/null +++ b/lib/io-loop.h @@ -0,0 +1,54 @@ +/* + * BIRD -- I/O and event loop + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#ifndef _BIRD_IO_LOOP_H_ +#define _BIRD_IO_LOOP_H_ + +#include "nest/bird.h" +#include "lib/lists.h" +#include "lib/locking.h" +#include "lib/resource.h" +#include "lib/event.h" +#include "lib/socket.h" + +void sk_start(sock *s); +void sk_stop(sock *s); +void sk_reloop(sock *s, struct birdloop *loop); + +extern struct birdloop main_birdloop; + +/* Start a new birdloop owned by given pool and domain */ +struct birdloop *birdloop_new(pool *p, uint order, const char *name); + +/* Stop the loop. At the end, the @stopped callback is called unlocked in tail + * position to finish cleanup. Run birdloop_free() from that callback to free + * the loop itself. */ +void birdloop_stop(struct birdloop *loop, void (*stopped)(void *data), void *data); +void birdloop_stop_self(struct birdloop *loop, void (*stopped)(void *data), void *data); +void birdloop_free(struct birdloop *loop); + +/* Get birdloop's event list */ +event_list *birdloop_event_list(struct birdloop *loop); + +/* Get birdloop's time heap */ +struct timeloop *birdloop_time_loop(struct birdloop *loop); + +/* Enter and exit the birdloop */ +void birdloop_enter(struct birdloop *loop); +void birdloop_leave(struct birdloop *loop); + +_Bool birdloop_inside(struct birdloop *loop); + +void birdloop_mask_wakeups(struct birdloop *loop); +void birdloop_unmask_wakeups(struct birdloop *loop); + +void birdloop_link(struct birdloop *loop); +void birdloop_unlink(struct birdloop *loop); + +void birdloop_ping(struct birdloop *loop); + +void birdloop_init(void); +#endif /* _BIRD_IO_LOOP_H_ */ diff --git a/lib/lists.h b/lib/lists.h index 64b4a981..dc49ec8a 100644 --- a/lib/lists.h +++ b/lib/lists.h @@ -68,6 +68,18 @@ typedef union list { /* In fact two overlayed nodes */ #define EMPTY_LIST(list) (!(list).head->next) +static inline _Bool +enlisted(node *n) +{ + switch ((!!n->next) + (!!n->prev)) + { + case 0: return 0; + case 2: return 1; + case 1: bug("Garbled event list node"); + } + + bug("Maths is broken. And you should see a new heaven and a new earth: for the first heaven and the first earth had been passed away."); +} #ifndef _BIRD_LISTS_C_ #define LIST_INLINE static inline diff --git a/lib/locking.h b/lib/locking.h index eef60154..ab5c06af 100644 --- a/lib/locking.h +++ b/lib/locking.h @@ -14,6 +14,9 @@ struct domain_generic; /* Here define the global lock order; first to last. */ struct lock_order { struct domain_generic *the_bird; + struct domain_generic *proto; + struct domain_generic *rtable; + struct domain_generic *event; }; extern _Thread_local struct lock_order locking_stack; @@ -21,24 +24,40 @@ extern _Thread_local struct domain_generic **last_locked; #define DOMAIN(type) struct domain__##type #define DEFINE_DOMAIN(type) DOMAIN(type) { struct domain_generic *type; } +#define DOMAIN_ORDER(type) OFFSETOF(struct lock_order, type) -#define DOMAIN_NEW(type, name) (DOMAIN(type)) { .type = domain_new(name, OFFSETOF(struct lock_order, type)) } +#define DOMAIN_NEW(type, name) (DOMAIN(type)) { .type = domain_new(name, DOMAIN_ORDER(type)) } struct domain_generic *domain_new(const char *name, uint order); +#define DOMAIN_FREE(type, d) domain_free((d).type) +void domain_free(struct domain_generic *); + #define DOMAIN_NULL(type) (DOMAIN(type)) {} #define LOCK_DOMAIN(type, d) do_lock(((d).type), &(locking_stack.type)) #define UNLOCK_DOMAIN(type, d) do_unlock(((d).type), &(locking_stack.type)) +#define DOMAIN_IS_LOCKED(type, d) (((d).type) == (locking_stack.type)) +#define DG_IS_LOCKED(d) ((d) == *(DG_LSP(d))) + /* Internal for locking */ void do_lock(struct domain_generic *dg, struct domain_generic **lsp); void do_unlock(struct domain_generic *dg, struct domain_generic **lsp); +uint dg_order(struct domain_generic *dg); + +#define DG_LSP(d) ((struct domain_generic **) (((void *) &locking_stack) + dg_order(d))) +#define DG_LOCK(d) do_lock(d, DG_LSP(d)) +#define DG_UNLOCK(d) do_unlock(d, DG_LSP(d)) + /* Use with care. To be removed in near future. */ DEFINE_DOMAIN(the_bird); extern DOMAIN(the_bird) the_bird_domain; #define the_bird_lock() LOCK_DOMAIN(the_bird, the_bird_domain) #define the_bird_unlock() UNLOCK_DOMAIN(the_bird, the_bird_domain) +#define the_bird_locked() DOMAIN_IS_LOCKED(the_bird, the_bird_domain) + +#define ASSERT_THE_BIRD_LOCKED ({ if (!the_bird_locked()) bug("The BIRD lock must be locked here: %s:%d", __FILE__, __LINE__); }) #endif diff --git a/lib/socket.h b/lib/socket.h index 96fedeeb..5bdab7f3 100644 --- a/lib/socket.h +++ b/lib/socket.h @@ -12,6 +12,7 @@ #include #include "lib/resource.h" +#include "lib/event.h" #ifdef HAVE_LIBSSH #define LIBSSH_LEGACY_0_4 #include @@ -79,6 +80,7 @@ typedef struct birdsock { const char *password; /* Password for MD5 authentication */ const char *err; /* Error message */ struct ssh_sock *ssh; /* Used in SK_SSH */ + struct event reloop; /* Reloop event */ } sock; sock *sock_new(pool *); /* Allocate new socket */ @@ -128,6 +130,7 @@ extern int sk_priority_control; /* Suggested priority for control traffic, shou #define SKF_TRUNCATED 0x200 /* Received packet was truncated, set by IO layer */ #define SKF_HDRINCL 0x400 /* Used internally */ #define SKF_PKTINFO 0x800 /* Used internally */ +#define SKF_PASSIVE_THREAD 0x1000 /* Child sockets used in thread, do not add to main loop */ /* * Socket types SA SP DA DP IF TTL SendTo (?=may, -=must not, *=must) diff --git a/lib/timer.c b/lib/timer.c index ff1fb5ef..eb7ea690 100644 --- a/lib/timer.c +++ b/lib/timer.c @@ -37,15 +37,8 @@ #include "lib/resource.h" #include "lib/timer.h" - -struct timeloop main_timeloop; - - #include -/* Data accessed and modified from proto/bfd/io.c */ -_Thread_local struct timeloop *local_timeloop; - _Atomic btime last_time; _Atomic btime real_time; @@ -76,7 +69,7 @@ tm_dump(resource *r) if (t->recurrent) debug("recur %d, ", t->recurrent); if (t->expires) - debug("expires in %d ms)\n", (t->expires - current_time()) TO_MS); + debug("in loop %p expires in %d ms)\n", t->loop, (t->expires - current_time()) TO_MS); else debug("inactive)\n"); } @@ -99,8 +92,8 @@ tm_new(pool *p) return t; } -void -tm_set(timer *t, btime when) +static void +tm_set_in_tl(timer *t, btime when, struct timeloop *local_timeloop) { uint tc = timers_count(local_timeloop); @@ -122,17 +115,17 @@ tm_set(timer *t, btime when) HEAP_DECREASE(local_timeloop->timers.data, tc, timer *, TIMER_LESS, TIMER_SWAP, t->index); } -#ifdef CONFIG_BFD - /* Hack to notify BFD loops */ - if ((local_timeloop != &main_timeloop) && (t->index == 1)) - wakeup_kick_current(); -#endif + t->loop = local_timeloop; + + if ((t->index == 1) && (local_timeloop->coro != this_coro)) + birdloop_ping(local_timeloop->loop); } void -tm_start(timer *t, btime after) +tm_set_in(timer *t, btime when, struct birdloop *loop) { - tm_set(t, current_time() + MAX(after, 0)); + ASSERT_DIE(birdloop_inside(loop)); + tm_set_in_tl(t, when, birdloop_time_loop(loop)); } void @@ -141,18 +134,23 @@ tm_stop(timer *t) if (!t->expires) return; - uint tc = timers_count(local_timeloop); + TLOCK_TIMER_ASSERT(t->loop); - HEAP_DELETE(local_timeloop->timers.data, tc, timer *, TIMER_LESS, TIMER_SWAP, t->index); - BUFFER_POP(local_timeloop->timers); + uint tc = timers_count(t->loop); + + HEAP_DELETE(t->loop->timers.data, tc, timer *, TIMER_LESS, TIMER_SWAP, t->index); + BUFFER_POP(t->loop->timers); t->index = -1; t->expires = 0; + t->loop = NULL; } void timers_init(struct timeloop *loop, pool *p) { + TLOCK_TIMER_ASSERT(loop); + BUFFER_INIT(loop->timers, p, 4); BUFFER_PUSH(loop->timers) = NULL; } @@ -160,8 +158,10 @@ timers_init(struct timeloop *loop, pool *p) void io_log_event(void *hook, void *data); void -timers_fire(struct timeloop *loop) +timers_fire(struct timeloop *loop, int io_log) { + TLOCK_TIMER_ASSERT(loop); + btime base_time; timer *t; @@ -183,26 +183,19 @@ timers_fire(struct timeloop *loop) if (t->randomize) when += random() % (t->randomize + 1); - tm_set(t, when); + tm_set_in_tl(t, when, loop); } else tm_stop(t); /* This is ugly hack, we want to log just timers executed from the main I/O loop */ - if (loop == &main_timeloop) + if (io_log) io_log_event(t->hook, t->data); t->hook(t); } } -void -timer_init(void) -{ - timers_init(&main_timeloop, &root_pool); - local_timeloop = &main_timeloop; -} - /** * tm_parse_time - parse a date and time diff --git a/lib/timer.h b/lib/timer.h index b201b8c8..04544ace 100644 --- a/lib/timer.h +++ b/lib/timer.h @@ -12,6 +12,8 @@ #include "nest/bird.h" #include "lib/buffer.h" +#include "lib/io-loop.h" +#include "lib/locking.h" #include "lib/resource.h" #include @@ -29,22 +31,27 @@ typedef struct timer uint randomize; /* Amount of randomization */ uint recurrent; /* Timer recurrence */ + struct timeloop *loop; /* Loop where the timer is active */ + int index; } timer; struct timeloop { BUFFER_(timer *) timers; + struct domain_generic *domain; + struct birdloop *loop; + struct coroutine *coro; }; +#define TLOCK_TIMER_ASSERT(loop) ASSERT_DIE((loop)->domain && DG_IS_LOCKED((loop)->domain)) +#define TLOCK_LOCAL_ASSERT(loop) ASSERT_DIE(!(loop)->domain || DG_IS_LOCKED((loop)->domain)) + static inline uint timers_count(struct timeloop *loop) -{ return loop->timers.used - 1; } +{ TLOCK_TIMER_ASSERT(loop); return loop->timers.used - 1; } static inline timer *timers_first(struct timeloop *loop) -{ return (loop->timers.used > 1) ? loop->timers.data[1] : NULL; } - -extern struct timeloop main_timeloop; -extern _Thread_local struct timeloop *local_timeloop; +{ TLOCK_TIMER_ASSERT(loop); return (loop->timers.used > 1) ? loop->timers.data[1] : NULL; } #define current_time() atomic_load_explicit(&last_time, memory_order_acquire) #define current_real_time() atomic_load_explicit(&real_time, memory_order_acquire) @@ -54,10 +61,13 @@ extern _Thread_local struct timeloop *local_timeloop; extern btime boot_time; timer *tm_new(pool *p); -void tm_set(timer *t, btime when); -void tm_start(timer *t, btime after); +#define tm_set(t, when) tm_set_in((t), (when), &main_birdloop) +#define tm_start(t, after) tm_start_in((t), (after), &main_birdloop) void tm_stop(timer *t); +void tm_set_in(timer *t, btime when, struct birdloop *loop); +#define tm_start_in(t, after, loop) tm_set_in((t), (current_time() + MAX_((after), 0)), loop) + static inline int tm_active(timer *t) { @@ -101,9 +111,7 @@ void times_update(void); /* For I/O loop */ void timers_init(struct timeloop *loop, pool *p); -void timers_fire(struct timeloop *loop); - -void timer_init(void); +void timers_fire(struct timeloop *loop, int io_log); struct timeformat { diff --git a/nest/a-path_test.c b/nest/a-path_test.c index 9ed0a786..2e6e4956 100644 --- a/nest/a-path_test.c +++ b/nest/a-path_test.c @@ -204,10 +204,13 @@ t_as_path_converting(void) } #endif +void resource_sys_init(void); + int main(int argc, char *argv[]) { bt_init(argc, argv); + resource_sys_init(); bt_test_suite(t_as_path_match, "Testing AS path matching and some a-path utilities."); bt_test_suite(t_path_format, "Testing formating as path into byte buffer"); diff --git a/nest/a-set_test.c b/nest/a-set_test.c index 96b6a727..efd1b67d 100644 --- a/nest/a-set_test.c +++ b/nest/a-set_test.c @@ -240,10 +240,14 @@ t_set_ec_delete(void) return 1; } + +void resource_sys_init(void); + int main(int argc, char *argv[]) { bt_init(argc, argv); + resource_sys_init(); bt_test_suite(t_set_int_contains, "Testing sets of integers: contains, get_data"); bt_test_suite(t_set_int_format, "Testing sets of integers: format"); diff --git a/nest/config.Y b/nest/config.Y index 92f1aad2..6e7689ed 100644 --- a/nest/config.Y +++ b/nest/config.Y @@ -434,6 +434,7 @@ proto: dev_proto '}' ; dev_proto_start: proto_start DIRECT { this_proto = proto_config_new(&proto_device, $1); init_list(&DIRECT_CFG->iface_list); + this_proto->late_if_feed = 1; } ; diff --git a/nest/proto.c b/nest/proto.c index fae0647a..ac0fb232 100644 --- a/nest/proto.c +++ b/nest/proto.c @@ -15,6 +15,7 @@ #include "lib/event.h" #include "lib/timer.h" #include "lib/string.h" +#include "lib/coro.h" #include "conf/conf.h" #include "nest/route.h" #include "nest/iface.h" @@ -58,7 +59,28 @@ static void channel_feed_end(struct channel *c); static void channel_export_stopped(struct rt_export_request *req); static inline int proto_is_done(struct proto *p) -{ return (p->proto_state == PS_DOWN) && (p->active_channels == 0); } +{ return (p->proto_state == PS_DOWN) && proto_is_inactive(p); } + +static inline event_list *proto_event_list(struct proto *p) +{ return p->loop == &main_birdloop ? &global_event_list : birdloop_event_list(p->loop); } + +static inline event_list *proto_work_list(struct proto *p) +{ return p->loop == &main_birdloop ? &global_work_list : birdloop_event_list(p->loop); } + +static inline void proto_send_event(struct proto *p) +{ ev_send(proto_event_list(p), p->event); } + +#define PROTO_ENTER_FROM_MAIN(p) ({ \ + ASSERT_DIE(birdloop_inside(&main_birdloop)); \ + struct birdloop *_loop = (p)->loop; \ + if (_loop != &main_birdloop) birdloop_enter(_loop); \ + _loop; \ + }) + +#define PROTO_LEAVE_FROM_MAIN(loop) ({ if (loop != &main_birdloop) birdloop_leave(loop); }) + +#define PROTO_LOCKED_FROM_MAIN(p) for (struct birdloop *_proto_loop = PROTO_ENTER_FROM_MAIN(p); _proto_loop; PROTO_LEAVE_FROM_MAIN(_proto_loop), (_proto_loop = NULL)) + static inline int channel_is_active(struct channel *c) { return (c->channel_state != CS_DOWN); } @@ -473,6 +495,7 @@ channel_start_export(struct channel *c) c->out_req = (struct rt_export_request) { .name = rn, + .list = proto_work_list(c->proto), .trace_routes = c->debug | c->proto->debug, .dump_req = channel_dump_export_req, .log_state_change = channel_export_log_state_change, @@ -517,7 +540,7 @@ channel_check_stopped(struct channel *c) return; channel_set_state(c, CS_DOWN); - ev_schedule(c->proto->event); + proto_send_event(c->proto); break; case CS_PAUSE: @@ -853,6 +876,7 @@ channel_setup_in_table(struct channel *c, int best) }; c->in_table->get = (struct rt_export_request) { .name = cat->name, + .list = proto_work_list(c->proto), .trace_routes = c->debug | c->proto->debug, .dump_req = channel_in_get_dump_req, .log_state_change = channel_get_log_state_change, @@ -895,6 +919,7 @@ channel_setup_out_table(struct channel *c) }; c->out_table->get = (struct rt_export_request) { .name = cat->name, + .list = proto_work_list(c->proto), .trace_routes = c->debug | c->proto->debug, .dump_req = channel_out_get_dump_req, .log_state_change = channel_get_log_state_change, @@ -997,7 +1022,7 @@ channel_do_down(struct channel *c) /* Schedule protocol shutddown */ if (proto_is_done(c->proto)) - ev_schedule(c->proto->event); + proto_send_event(c->proto); } void @@ -1085,9 +1110,12 @@ channel_request_table_feeding(struct channel *c) void channel_request_feeding(struct channel *c) { + if (c->gr_wait || !c->proto->rt_notify) + return; + CD(c, "Refeed requested"); - ASSERT(c->out_req.hook); + ASSERT_DIE(c->out_req.hook); if (c->out_table) channel_aux_request_refeed(c->out_table); @@ -1331,17 +1359,35 @@ proto_configure_channel(struct proto *p, struct channel **pc, struct channel_con return 1; } +static void +proto_cleanup(struct proto *p) +{ + rfree(p->pool); + p->pool = NULL; + + p->active = 0; + proto_log_state_change(p); + proto_rethink_goal(p); +} static void -proto_event(void *ptr) +proto_loop_stopped(void *ptr) { struct proto *p = ptr; - if (p->do_start) - { - if_feed_baby(p); - p->do_start = 0; - } + birdloop_enter(&main_birdloop); + + p->loop = &main_birdloop; + p->event->list = NULL; + proto_cleanup(p); + + birdloop_leave(&main_birdloop); +} + +static void +proto_event(void *ptr) +{ + struct proto *p = ptr; if (p->do_stop) { @@ -1351,14 +1397,10 @@ proto_event(void *ptr) } if (proto_is_done(p)) - { - rfree(p->pool); - p->pool = NULL; - - p->active = 0; - proto_log_state_change(p); - proto_rethink_goal(p); - } + if (p->loop != &main_birdloop) + birdloop_stop_self(p->loop, proto_loop_stopped, p); + else + proto_cleanup(p); } @@ -1399,6 +1441,7 @@ proto_init(struct proto_config *c, node *n) struct protocol *pr = c->protocol; struct proto *p = pr->init(c); + p->loop = &main_birdloop; p->proto_state = PS_DOWN; p->last_state_change = current_time(); p->vrf = c->vrf; @@ -1415,11 +1458,27 @@ proto_init(struct proto_config *c, node *n) static void proto_start(struct proto *p) { - /* Here we cannot use p->cf->name since it won't survive reconfiguration */ - p->pool = rp_new(proto_pool, p->proto->name); + DBG("Kicking %s up\n", p->name); + PD(p, "Starting"); + + int ns = strlen("Protocol ") + strlen(p->cf->name) + 1; + void *nb = mb_alloc(proto_pool, ns); + ASSERT_DIE(ns - 1 == bsnprintf(nb, ns, "Protocol %s", p->cf->name)); + + p->pool = rp_new(proto_pool, nb); if (graceful_restart_state == GRS_INIT) p->gr_recovery = 1; + + if (p->cf->loop_order != DOMAIN_ORDER(the_bird)) + p->loop = birdloop_new(p->pool, p->cf->loop_order, nb); + + p->event->list = proto_event_list(p); + + mb_move(nb, p->pool); + + PROTO_LOCKED_FROM_MAIN(p) + proto_notify_state(p, (p->proto->start ? p->proto->start(p) : PS_UP)); } @@ -1455,6 +1514,7 @@ proto_config_new(struct protocol *pr, int class) cf->class = class; cf->debug = new_config->proto_default_debug; cf->mrtdump = new_config->proto_default_mrtdump; + cf->loop_order = DOMAIN_ORDER(the_bird); init_list(&cf->channels); @@ -1744,11 +1804,20 @@ protos_commit(struct config *new, struct config *old, int force_reconfig, int ty } static void -proto_rethink_goal(struct proto *p) +proto_shutdown(struct proto *p) { - struct protocol *q; - byte goal; + if (p->proto_state == PS_START || p->proto_state == PS_UP) + { + /* Going down */ + DBG("Kicking %s down\n", p->name); + PD(p, "Shutting down"); + proto_notify_state(p, (p->proto->shutdown ? p->proto->shutdown(p) : PS_DOWN)); + } +} +static void +proto_rethink_goal(struct proto *p) +{ if (p->reconfiguring && !p->active) { struct proto_config *nc = p->cf_new; @@ -1768,32 +1837,12 @@ proto_rethink_goal(struct proto *p) /* Determine what state we want to reach */ if (p->disabled || p->reconfiguring) - goal = PS_DOWN; - else - goal = PS_UP; - - q = p->proto; - if (goal == PS_UP) - { - if (!p->active) - { - /* Going up */ - DBG("Kicking %s up\n", p->name); - PD(p, "Starting"); - proto_start(p); - proto_notify_state(p, (q->start ? q->start(p) : PS_UP)); - } - } - else { - if (p->proto_state == PS_START || p->proto_state == PS_UP) - { - /* Going down */ - DBG("Kicking %s down\n", p->name); - PD(p, "Shutting down"); - proto_notify_state(p, (q->shutdown ? q->shutdown(p) : PS_DOWN)); - } + PROTO_LOCKED_FROM_MAIN(p) + proto_shutdown(p); } + else if (!p->active) + proto_start(p); } struct proto * @@ -1998,7 +2047,7 @@ protos_dump_all(void) #define DPF(x) (p->x ? " " #x : "") debug(" protocol %s (%p) state %s with %d active channels flags: %s%s%s%s\n", p->name, p, p_states[p->proto_state], p->active_channels, - DPF(disabled), DPF(active), DPF(do_start), DPF(do_stop), DPF(reconfiguring)); + DPF(disabled), DPF(active), DPF(do_stop), DPF(reconfiguring)); #undef DPF struct channel *c; @@ -2286,8 +2335,8 @@ static inline void proto_do_start(struct proto *p) { p->active = 1; - p->do_start = 1; - ev_schedule(p->event); + if (!p->cf->late_if_feed) + if_feed_baby(p); } static void @@ -2300,6 +2349,9 @@ proto_do_up(struct proto *p) } proto_start_channels(p); + + if (p->cf->late_if_feed) + if_feed_baby(p); } static inline void @@ -2314,9 +2366,6 @@ proto_do_stop(struct proto *p) p->down_sched = 0; p->gr_recovery = 0; - p->do_stop = 1; - ev_schedule(p->event); - if (p->main_source) { rt_unlock_source(p->main_source); @@ -2324,6 +2373,9 @@ proto_do_stop(struct proto *p) } proto_stop_channels(p); + + p->do_stop = 1; + proto_send_event(p); } static void @@ -2334,7 +2386,7 @@ proto_do_down(struct proto *p) /* Shutdown is finished in the protocol event */ if (proto_is_done(p)) - ev_schedule(p->event); + proto_send_event(p); } @@ -2573,7 +2625,7 @@ proto_cmd_disable(struct proto *p, uintptr_t arg, int cnt UNUSED) p->disabled = 1; p->down_code = PDC_CMD_DISABLE; proto_set_message(p, (char *) arg, -1); - proto_rethink_goal(p); + proto_shutdown(p); cli_msg(-9, "%s: disabled", p->name); } @@ -2606,9 +2658,9 @@ proto_cmd_restart(struct proto *p, uintptr_t arg, int cnt UNUSED) p->disabled = 1; p->down_code = PDC_CMD_RESTART; proto_set_message(p, (char *) arg, -1); - proto_rethink_goal(p); + proto_shutdown(p); p->disabled = 0; - proto_rethink_goal(p); + /* After the protocol shuts down, proto_rethink_goal() is run from proto_event. */ cli_msg(-12, "%s: restarted", p->name); } @@ -2683,7 +2735,9 @@ proto_apply_cmd_symbol(const struct symbol *s, void (* cmd)(struct proto *, uint if (s->proto->proto) { - cmd(s->proto->proto, arg, 0); + struct proto *p = s->proto->proto; + PROTO_LOCKED_FROM_MAIN(p) + cmd(p, arg, 0); cli_msg(0, ""); } else @@ -2698,7 +2752,8 @@ proto_apply_cmd_patt(const char *patt, void (* cmd)(struct proto *, uintptr_t, i WALK_LIST(p, proto_list) if (!patt || patmatch(patt, p->name)) - cmd(p, arg, cnt++); + PROTO_LOCKED_FROM_MAIN(p) + cmd(p, arg, cnt++); if (!cnt) cli_msg(8003, "No protocols match"); diff --git a/nest/protocol.h b/nest/protocol.h index f9996b18..2fbd607c 100644 --- a/nest/protocol.h +++ b/nest/protocol.h @@ -120,8 +120,10 @@ struct proto_config { u8 net_type; /* Protocol network type (NET_*), 0 for undefined */ u8 disabled; /* Protocol enabled/disabled by default */ u8 vrf_set; /* Related VRF instance (below) is defined */ + u8 late_if_feed; /* Delay interface feed after channels are up */ u32 debug, mrtdump; /* Debugging bitfields, both use D_* constants */ u32 router_id; /* Protocol specific router ID */ + uint loop_order; /* Launch a birdloop on this locking level; use DOMAIN_ORDER(the_bird) for mainloop */ list channels; /* List of channel configs (struct channel_config) */ struct iface *vrf; /* Related VRF instance, NULL if global */ @@ -139,6 +141,7 @@ struct proto { struct proto_config *cf_new; /* Configuration we want to switch to after shutdown (NULL=delete) */ pool *pool; /* Pool containing local objects */ event *event; /* Protocol event */ + struct birdloop *loop; /* BIRDloop running this protocol */ list channels; /* List of channels to rtables (struct channel) */ struct channel *main_channel; /* Primary channel */ @@ -149,12 +152,12 @@ struct proto { u32 debug; /* Debugging flags */ u32 mrtdump; /* MRTDump flags */ uint active_channels; /* Number of active channels */ + uint active_coroutines; /* Number of active coroutines */ byte net_type; /* Protocol network type (NET_*), 0 for undefined */ byte disabled; /* Manually disabled */ byte vrf_set; /* Related VRF instance (above) is defined */ byte proto_state; /* Protocol state machine (PS_*, see below) */ byte active; /* From PS_START to cleanup after PS_STOP */ - byte do_start; /* Start actions are scheduled */ byte do_stop; /* Stop actions are scheduled */ byte reconfiguring; /* We're shutting down due to reconfiguration */ byte gr_recovery; /* Protocol should participate in graceful restart recovery */ @@ -356,6 +359,8 @@ void proto_notify_state(struct proto *p, unsigned state); * as a result of received ROUTE-REFRESH request). */ +static inline int proto_is_inactive(struct proto *p) +{ return (p->active_channels == 0) && (p->active_coroutines == 0); } /* diff --git a/nest/route.h b/nest/route.h index d6474e09..7c5fd02f 100644 --- a/nest/route.h +++ b/nest/route.h @@ -11,6 +11,7 @@ #define _BIRD_ROUTE_H_ #include "lib/lists.h" +#include "lib/event.h" #include "lib/bitmap.h" #include "lib/resource.h" #include "lib/net.h" @@ -329,6 +330,8 @@ struct rt_export_request { char *name; u8 trace_routes; + event_list *list; /* Where to schedule export events */ + /* There are two methods of export. You can either request feeding every single change * or feeding the whole route feed. In case of regular export, &export_one is preferred. * Anyway, when feeding, &export_bulk is preferred, falling back to &export_one. diff --git a/nest/rt-table.c b/nest/rt-table.c index c049101a..9c12ef56 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -1105,6 +1105,12 @@ rt_next_export(struct rt_export_hook *hook, rtable *tab) return tab->first_export; } +static inline void +rt_send_export_event(struct rt_export_hook *hook) +{ + ev_send(hook->req->list, hook->event); +} + static void rt_announce_exports(timer *tm) { @@ -1116,7 +1122,7 @@ rt_announce_exports(timer *tm) if (atomic_load_explicit(&c->export_state, memory_order_acquire) != TES_READY) continue; - ev_schedule_work(c->event); + rt_send_export_event(c); } } @@ -1169,7 +1175,7 @@ rt_export_hook(void *_data) rte_update_unlock(); } - ev_schedule_work(c->event); + rt_send_export_event(c); } @@ -1732,7 +1738,7 @@ rt_request_export(rtable *tab, struct rt_export_request *req) DBG("New export hook %p req %p in table %s uc=%u\n", hook, req, tab->name, tab->use_count); hook->event = ev_new_init(p, rt_feed_channel, hook); - ev_schedule_work(hook->event); + rt_send_export_event(hook); rt_set_export_state(hook, TES_FEEDING); } @@ -1754,7 +1760,7 @@ rt_stop_export(struct rt_export_request *req, void (*stopped)(struct rt_export_r hook->event->hook = rt_export_stopped; hook->stopped = stopped; - ev_schedule(hook->event); + rt_send_export_event(hook); rt_set_export_state(hook, TES_STOP); } @@ -2869,7 +2875,7 @@ rt_feed_channel(void *data) if (max_feed <= 0) { FIB_ITERATE_PUT(fit); - ev_schedule_work(c->event); + rt_send_export_event(c); return; } @@ -2904,7 +2910,7 @@ rt_feed_channel(void *data) FIB_ITERATE_END; c->event->hook = rt_export_hook; - ev_schedule_work(c->event); + rt_send_export_event(c); rt_set_export_state(c, TES_READY); } diff --git a/proto/bfd/bfd.c b/proto/bfd/bfd.c index dac184c5..3964c267 100644 --- a/proto/bfd/bfd.c +++ b/proto/bfd/bfd.c @@ -113,8 +113,16 @@ #define HASH_IP_EQ(a1,n1,a2,n2) ipa_equal(a1, a2) && n1 == n2 #define HASH_IP_FN(a,n) ipa_hash(a) ^ u32_hash(n) -static list bfd_proto_list; -static list bfd_wait_list; +DEFINE_DOMAIN(rtable); +#define BFD_LOCK LOCK_DOMAIN(rtable, bfd_global.lock) +#define BFD_UNLOCK UNLOCK_DOMAIN(rtable, bfd_global.lock) + +static struct { + DOMAIN(rtable) lock; + list wait_list; + list pickup_list; + list proto_list; +} bfd_global; const char *bfd_state_names[] = { "AdminDown", "Down", "Init", "Up" }; @@ -188,7 +196,7 @@ bfd_session_update_tx_interval(struct bfd_session *s) return; /* Set timer relative to last tx_timer event */ - tm_set(s->tx_timer, s->last_tx + tx_int_l); + tm_set_in(s->tx_timer, s->last_tx + tx_int_l, s->ifa->bfd->p.loop); } static void @@ -202,7 +210,7 @@ bfd_session_update_detection_time(struct bfd_session *s, int kick) if (!s->last_rx) return; - tm_set(s->hold_timer, s->last_rx + timeout); + tm_set_in(s->hold_timer, s->last_rx + timeout, s->ifa->bfd->p.loop); } static void @@ -226,7 +234,7 @@ bfd_session_control_tx_timer(struct bfd_session *s, int reset) if (reset || !tm_active(s->tx_timer)) { s->last_tx = 0; - tm_start(s->tx_timer, 0); + tm_start_in(s->tx_timer, 0, s->ifa->bfd->p.loop); } return; @@ -419,7 +427,7 @@ bfd_get_free_id(struct bfd_proto *p) static struct bfd_session * bfd_add_session(struct bfd_proto *p, ip_addr addr, ip_addr local, struct iface *iface, struct bfd_options *opts) { - birdloop_enter(p->loop); + ASSERT_DIE(birdloop_inside(p->p.loop)); struct bfd_iface *ifa = bfd_get_iface(p, local, iface); @@ -454,8 +462,6 @@ bfd_add_session(struct bfd_proto *p, ip_addr addr, ip_addr local, struct iface * TRACE(D_EVENTS, "Session to %I added", s->addr); - birdloop_leave(p->loop); - return s; } @@ -463,38 +469,34 @@ bfd_add_session(struct bfd_proto *p, ip_addr addr, ip_addr local, struct iface * static void bfd_open_session(struct bfd_proto *p, struct bfd_session *s, ip_addr local, struct iface *ifa) { - birdloop_enter(p->loop); + birdloop_enter(p->p.loop); s->opened = 1; bfd_session_control_tx_timer(s); - birdloop_leave(p->loop); + birdloop_leave(p->p.loop); } static void bfd_close_session(struct bfd_proto *p, struct bfd_session *s) { - birdloop_enter(p->loop); + birdloop_enter(p->p.loop); s->opened = 0; bfd_session_update_state(s, BFD_STATE_DOWN, BFD_DIAG_PATH_DOWN); bfd_session_control_tx_timer(s); - birdloop_leave(p->loop); + birdloop_leave(p->p.loop); } */ static void -bfd_remove_session(struct bfd_proto *p, struct bfd_session *s) +bfd_remove_session_locked(struct bfd_proto *p, struct bfd_session *s) { - ip_addr ip = s->addr; - /* Caller should ensure that request list is empty */ - birdloop_enter(p->loop); - /* Remove session from notify list if scheduled for notification */ /* No need for bfd_lock_sessions(), we are already protected by birdloop_enter() */ if (NODE_VALID(&s->n)) @@ -508,11 +510,17 @@ bfd_remove_session(struct bfd_proto *p, struct bfd_session *s) HASH_REMOVE(p->session_hash_id, HASH_ID, s); HASH_REMOVE(p->session_hash_ip, HASH_IP, s); - sl_free(p->session_slab, s); + TRACE(D_EVENTS, "Session to %I removed", s->addr); - TRACE(D_EVENTS, "Session to %I removed", ip); + sl_free(p->session_slab, s); +} - birdloop_leave(p->loop); +static void +bfd_remove_session(struct bfd_proto *p, struct bfd_session *s) +{ + birdloop_enter(p->p.loop); + bfd_remove_session_locked(p, s); + birdloop_leave(p->p.loop); } static void @@ -521,7 +529,7 @@ bfd_reconfigure_session(struct bfd_proto *p, struct bfd_session *s) if (EMPTY_LIST(s->request_list)) return; - birdloop_enter(p->loop); + birdloop_enter(p->p.loop); struct bfd_request *req = SKIP_BACK(struct bfd_request, n, HEAD(s->request_list)); s->cf = bfd_merge_options(s->ifa->cf, &req->opts); @@ -534,7 +542,7 @@ bfd_reconfigure_session(struct bfd_proto *p, struct bfd_session *s) bfd_session_control_tx_timer(s, 0); - birdloop_leave(p->loop); + birdloop_leave(p->p.loop); TRACE(D_EVENTS, "Session to %I reconfigured", s->addr); } @@ -618,9 +626,9 @@ bfd_reconfigure_iface(struct bfd_proto *p, struct bfd_iface *ifa, struct bfd_con (new->passive != old->passive); /* This should be probably changed to not access ifa->cf from the BFD thread */ - birdloop_enter(p->loop); + birdloop_enter(p->p.loop); ifa->cf = new; - birdloop_leave(p->loop); + birdloop_leave(p->p.loop); } @@ -681,41 +689,68 @@ bfd_add_request(struct bfd_proto *p, struct bfd_request *req) } static void -bfd_submit_request(struct bfd_request *req) +bfd_pickup_requests(void *_data UNUSED) { node *n; + WALK_LIST(n, bfd_global.proto_list) + { + struct bfd_proto *p = SKIP_BACK(struct bfd_proto, bfd_node, n); + birdloop_enter(p->p.loop); + BFD_LOCK; - WALK_LIST(n, bfd_proto_list) - if (bfd_add_request(SKIP_BACK(struct bfd_proto, bfd_node, n), req)) - return; + node *rn, *rnxt; + WALK_LIST_DELSAFE(rn, rnxt, bfd_global.pickup_list) + bfd_add_request(p, SKIP_BACK(struct bfd_request, n, rn)); - rem_node(&req->n); - add_tail(&bfd_wait_list, &req->n); - req->session = NULL; - bfd_request_notify(req, BFD_STATE_ADMIN_DOWN, 0); + BFD_UNLOCK; + birdloop_leave(p->p.loop); + } + + BFD_LOCK; + node *rn, *rnxt; + WALK_LIST_DELSAFE(rn, rnxt, bfd_global.pickup_list) + { + rem_node(rn); + add_tail(&bfd_global.wait_list, rn); + bfd_request_notify(SKIP_BACK(struct bfd_request, n, rn), BFD_STATE_ADMIN_DOWN, 0); + } + BFD_UNLOCK; } +static event bfd_pickup_event = { .hook = bfd_pickup_requests }; + static void bfd_take_requests(struct bfd_proto *p) { node *n, *nn; - - WALK_LIST_DELSAFE(n, nn, bfd_wait_list) + BFD_LOCK; + WALK_LIST_DELSAFE(n, nn, bfd_global.wait_list) bfd_add_request(p, SKIP_BACK(struct bfd_request, n, n)); + BFD_UNLOCK; } static void bfd_drop_requests(struct bfd_proto *p) { node *n; - - HASH_WALK(p->session_hash_id, next_id, s) + BFD_LOCK; + HASH_WALK_DELSAFE(p->session_hash_id, next_id, s) { - /* We assume that p is not in bfd_proto_list */ WALK_LIST_FIRST(n, s->request_list) - bfd_submit_request(SKIP_BACK(struct bfd_request, n, n)); + { + struct bfd_request *req = SKIP_BACK(struct bfd_request, n, n); + rem_node(&req->n); + add_tail(&bfd_global.pickup_list, &req->n); + req->session = NULL; + bfd_request_notify(req, BFD_STATE_ADMIN_DOWN, 0); + } + + ev_send(&global_event_list, &bfd_pickup_event); + + bfd_remove_session_locked(p, s); } HASH_WALK_END; + BFD_UNLOCK; } static struct resclass bfd_request_class; @@ -728,9 +763,6 @@ bfd_request_session(pool *p, ip_addr addr, ip_addr local, { struct bfd_request *req = ralloc(p, &bfd_request_class); - /* Hack: self-link req->n, we will call rem_node() on it */ - req->n.prev = req->n.next = &req->n; - req->addr = addr; req->local = local; req->iface = iface; @@ -739,11 +771,16 @@ bfd_request_session(pool *p, ip_addr addr, ip_addr local, if (opts) req->opts = *opts; - bfd_submit_request(req); - req->hook = hook; req->data = data; + req->session = NULL; + + BFD_LOCK; + add_tail(&bfd_global.pickup_list, &req->n); + ev_send(&global_event_list, &bfd_pickup_event); + BFD_UNLOCK; + return req; } @@ -1001,8 +1038,10 @@ bfd_notify_init(struct bfd_proto *p) void bfd_init_all(void) { - init_list(&bfd_proto_list); - init_list(&bfd_wait_list); + bfd_global.lock = DOMAIN_NEW(rtable, "BFD Global"); + init_list(&bfd_global.wait_list); + init_list(&bfd_global.pickup_list); + init_list(&bfd_global.proto_list); } static struct proto * @@ -1021,10 +1060,10 @@ bfd_start(struct proto *P) struct bfd_proto *p = (struct bfd_proto *) P; struct bfd_config *cf = (struct bfd_config *) (P->cf); - p->loop = birdloop_new(); - p->tpool = rp_new(NULL, "BFD thread root"); pthread_spin_init(&p->lock, PTHREAD_PROCESS_PRIVATE); + p->tpool = rp_new(P->pool, "BFD loop pool"); + p->session_slab = sl_new(P->pool, sizeof(struct bfd_session)); HASH_INIT(p->session_hash_id, P->pool, 8); HASH_INIT(p->session_hash_ip, P->pool, 8); @@ -1034,9 +1073,7 @@ bfd_start(struct proto *P) init_list(&p->notify_list); bfd_notify_init(p); - add_tail(&bfd_proto_list, &p->bfd_node); - - birdloop_enter(p->loop); + add_tail(&bfd_global.proto_list, &p->bfd_node); if (cf->accept_ipv4 && cf->accept_direct) p->rx4_1 = bfd_open_rx_sk(p, 0, SK_IPV4); @@ -1050,42 +1087,33 @@ bfd_start(struct proto *P) if (cf->accept_ipv6 && cf->accept_multihop) p->rx6_m = bfd_open_rx_sk(p, 1, SK_IPV6); - birdloop_leave(p->loop); - bfd_take_requests(p); struct bfd_neighbor *n; WALK_LIST(n, cf->neigh_list) bfd_start_neighbor(p, n); - birdloop_start(p->loop); - return PS_UP; } - static int bfd_shutdown(struct proto *P) { struct bfd_proto *p = (struct bfd_proto *) P; - struct bfd_config *cf = (struct bfd_config *) (P->cf); + struct bfd_config *cf = (struct bfd_config *) (p->p.cf); rem_node(&p->bfd_node); - birdloop_stop(p->loop); - - struct bfd_neighbor *n; - WALK_LIST(n, cf->neigh_list) - bfd_stop_neighbor(p, n); + struct bfd_neighbor *bn; + WALK_LIST(bn, cf->neigh_list) + bfd_stop_neighbor(p, bn); bfd_drop_requests(p); - /* FIXME: This is hack */ - birdloop_enter(p->loop); - rfree(p->tpool); - birdloop_leave(p->loop); - - birdloop_free(p->loop); + if (p->rx4_1) sk_stop(p->rx4_1); + if (p->rx4_m) sk_stop(p->rx4_m); + if (p->rx6_1) sk_stop(p->rx6_1); + if (p->rx6_m) sk_stop(p->rx6_m); return PS_DOWN; } @@ -1105,7 +1133,7 @@ bfd_reconfigure(struct proto *P, struct proto_config *c) (new->accept_multihop != old->accept_multihop)) return 0; - birdloop_mask_wakeups(p->loop); + birdloop_mask_wakeups(p->p.loop); WALK_LIST(ifa, p->iface_list) bfd_reconfigure_iface(p, ifa, new); @@ -1119,7 +1147,7 @@ bfd_reconfigure(struct proto *P, struct proto_config *c) bfd_reconfigure_neighbors(p, new); - birdloop_unmask_wakeups(p->loop); + birdloop_unmask_wakeups(p->p.loop); return 1; } diff --git a/proto/bfd/bfd.h b/proto/bfd/bfd.h index 9d4cbbf8..8430064b 100644 --- a/proto/bfd/bfd.h +++ b/proto/bfd/bfd.h @@ -17,12 +17,12 @@ #include "nest/password.h" #include "conf/conf.h" #include "lib/hash.h" +#include "lib/io-loop.h" #include "lib/resource.h" #include "lib/socket.h" #include "lib/string.h" #include "nest/bfd.h" -#include "sysdep/unix/io-loop.h" #define BFD_CONTROL_PORT 3784 @@ -87,9 +87,11 @@ struct bfd_neighbor struct bfd_proto { struct proto p; - struct birdloop *loop; - pool *tpool; + pthread_spinlock_t lock; + + pool *tpool; + node bfd_node; slab *session_slab; diff --git a/proto/bfd/config.Y b/proto/bfd/config.Y index df1cba42..ed5479fb 100644 --- a/proto/bfd/config.Y +++ b/proto/bfd/config.Y @@ -36,6 +36,7 @@ proto: bfd_proto ; bfd_proto_start: proto_start BFD { this_proto = proto_config_new(&proto_bfd, $1); + this_proto->loop_order = DOMAIN_ORDER(proto); init_list(&BFD_CFG->patt_list); init_list(&BFD_CFG->neigh_list); BFD_CFG->accept_ipv4 = BFD_CFG->accept_ipv6 = 1; diff --git a/proto/bfd/packets.c b/proto/bfd/packets.c index 7618e20f..37d77f37 100644 --- a/proto/bfd/packets.c +++ b/proto/bfd/packets.c @@ -410,7 +410,7 @@ bfd_err_hook(sock *sk, int err) sock * bfd_open_rx_sk(struct bfd_proto *p, int multihop, int af) { - sock *sk = sk_new(p->tpool); + sock *sk = sk_new(p->p.pool); sk->type = SK_UDP; sk->subtype = af; sk->sport = !multihop ? BFD_CONTROL_PORT : BFD_MULTI_CTL_PORT; @@ -441,7 +441,7 @@ bfd_open_rx_sk(struct bfd_proto *p, int multihop, int af) sock * bfd_open_tx_sk(struct bfd_proto *p, ip_addr local, struct iface *ifa) { - sock *sk = sk_new(p->tpool); + sock *sk = sk_new(p->p.pool); sk->type = SK_UDP; sk->saddr = local; sk->dport = ifa ? BFD_CONTROL_PORT : BFD_MULTI_CTL_PORT; diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index 35e9ea59..96df671f 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -853,6 +853,7 @@ bgp_graceful_restart_feed(struct bgp_channel *c) { c->stale_feed = (struct rt_export_request) { .name = "BGP-GR", + .list = &global_work_list, .trace_routes = c->c.debug | c->c.proto->debug, .dump_req = bgp_graceful_restart_feed_dump_req, .log_state_change = bgp_graceful_restart_feed_log_state_change, diff --git a/sysdep/unix/coroutine.c b/sysdep/unix/coroutine.c index 2eba142c..4758c056 100644 --- a/sysdep/unix/coroutine.c +++ b/sysdep/unix/coroutine.c @@ -21,10 +21,9 @@ #include "lib/resource.h" #include "lib/timer.h" -/* Using a rather big stack for coroutines to allow for stack-local allocations. - * In real world, the kernel doesn't alloc this memory until it is used. - * */ -#define CORO_STACK_SIZE 1048576 +#include "conf/conf.h" + +#define CORO_STACK_SIZE 65536 /* * Implementation of coroutines based on POSIX threads @@ -79,6 +78,11 @@ domain_free(struct domain_generic *dg) xfree(dg); } +uint dg_order(struct domain_generic *dg) +{ + return dg->order; +} + void do_lock(struct domain_generic *dg, struct domain_generic **lsp) { if ((char *) lsp - (char *) &locking_stack != dg->order) @@ -89,7 +93,11 @@ void do_lock(struct domain_generic *dg, struct domain_generic **lsp) if (*lsp) bug("Inconsistent locking stack state on lock"); + btime lock_begin = current_time(); pthread_mutex_lock(&dg->mutex); + btime duration = current_time() - lock_begin; + if (config && (duration > config->watchdog_warning)) + log(L_WARN "Locking of %s took %d ms", dg->name, (int) (duration TO_MS)); if (dg->prev || dg->locked_by) bug("Previous unlock not finished correctly"); @@ -140,11 +148,16 @@ static struct resclass coro_class = { .free = coro_free, }; +_Thread_local struct coroutine *this_coro = NULL; + static void *coro_entry(void *p) { struct coroutine *c = p; + ASSERT_DIE(c->entry); + this_coro = c; + c->entry(c->data); ASSERT_DIE(coro_cleaned_up); diff --git a/sysdep/unix/io-loop.c b/sysdep/unix/io-loop.c index a15d866a..275d38a0 100644 --- a/sysdep/unix/io-loop.c +++ b/sysdep/unix/io-loop.c @@ -15,50 +15,47 @@ #include #include "nest/bird.h" -#include "sysdep/unix/io-loop.h" #include "lib/buffer.h" +#include "lib/coro.h" #include "lib/lists.h" #include "lib/resource.h" #include "lib/event.h" #include "lib/timer.h" #include "lib/socket.h" - -struct birdloop -{ - pool *pool; - pthread_t thread; - pthread_mutex_t mutex; - - u8 stop_called; - u8 poll_active; - u8 wakeup_masked; - int wakeup_fds[2]; - - struct timeloop time; - list event_list; - list sock_list; - uint sock_num; - - BUFFER(sock *) poll_sk; - BUFFER(struct pollfd) poll_fd; - u8 poll_changed; - u8 close_scheduled; -}; - +#include "lib/io-loop.h" +#include "sysdep/unix/io-loop.h" +#include "conf/conf.h" /* * Current thread context */ -static _Thread_local struct birdloop *birdloop_current; +_Thread_local struct birdloop *birdloop_current; +static _Thread_local struct birdloop *birdloop_wakeup_masked; +static _Thread_local uint birdloop_wakeup_masked_count; -static inline void -birdloop_set_current(struct birdloop *loop) +event_list * +birdloop_event_list(struct birdloop *loop) { - birdloop_current = loop; - local_timeloop = loop ? &loop->time : &main_timeloop; + return &loop->event_list; +} + +struct timeloop * +birdloop_time_loop(struct birdloop *loop) +{ + return &loop->time; +} + +_Bool +birdloop_inside(struct birdloop *loop) +{ + for (struct birdloop *c = birdloop_current; c; c = c->prev_loop) + if (loop == c) + return 1; + + return 0; } /* @@ -135,57 +132,17 @@ wakeup_do_kick(struct birdloop *loop) pipe_kick(loop->wakeup_fds[1]); } -static inline void -wakeup_kick(struct birdloop *loop) -{ - if (!loop->wakeup_masked) - wakeup_do_kick(loop); - else - loop->wakeup_masked = 2; -} - -/* For notifications from outside */ void -wakeup_kick_current(void) +birdloop_ping(struct birdloop *loop) { - if (birdloop_current && birdloop_current->poll_active) - wakeup_kick(birdloop_current); -} - - -/* - * Events - */ + u32 ping_sent = atomic_fetch_add_explicit(&loop->ping_sent, 1, memory_order_acq_rel); + if (ping_sent) + return; -static inline uint -events_waiting(struct birdloop *loop) -{ - return !EMPTY_LIST(loop->event_list); -} - -static inline void -events_init(struct birdloop *loop) -{ - init_list(&loop->event_list); -} - -static void -events_fire(struct birdloop *loop) -{ - times_update(); - ev_run_list(&loop->event_list); -} - -void -ev2_schedule(event *e) -{ - if (birdloop_current->poll_active && EMPTY_LIST(birdloop_current->event_list)) - wakeup_kick(birdloop_current); - - if (e->n.next) - rem_node(&e->n); - - add_tail(&birdloop_current->event_list, &e->n); + if (loop == birdloop_wakeup_masked) + birdloop_wakeup_masked_count++; + else + wakeup_do_kick(loop); } @@ -213,13 +170,13 @@ sockets_add(struct birdloop *loop, sock *s) s->index = -1; loop->poll_changed = 1; - if (loop->poll_active) - wakeup_kick(loop); + birdloop_ping(loop); } void sk_start(sock *s) { + ASSERT_DIE(birdloop_current != &main_birdloop); sockets_add(birdloop_current, s); } @@ -230,28 +187,21 @@ sockets_remove(struct birdloop *loop, sock *s) loop->sock_num--; if (s->index >= 0) + { loop->poll_sk.data[s->index] = NULL; - - s->index = -1; - loop->poll_changed = 1; - - /* Wakeup moved to sk_stop() */ + s->index = -1; + loop->poll_changed = 1; + loop->close_scheduled = 1; + birdloop_ping(loop); + } + else + close(s->fd); } void sk_stop(sock *s) { sockets_remove(birdloop_current, s); - - if (birdloop_current->poll_active) - { - birdloop_current->close_scheduled = 1; - wakeup_kick(birdloop_current); - } - else - close(s->fd); - - s->fd = -1; } static inline uint sk_want_events(sock *s) @@ -351,12 +301,15 @@ sockets_fire(struct birdloop *loop) if (pfd->revents & POLLIN) while (e && *psk && (*psk)->rx_hook) - e = sk_read(*psk, 0); + e = sk_read(*psk, pfd->revents); e = 1; if (pfd->revents & POLLOUT) + { + loop->poll_changed = 1; while (e && *psk) e = sk_write(*psk); + } } } @@ -365,104 +318,168 @@ sockets_fire(struct birdloop *loop) * Birdloop */ -static void *birdloop_main(void *arg); +struct birdloop main_birdloop; + +static void birdloop_enter_locked(struct birdloop *loop); + +void +birdloop_init(void) +{ + wakeup_init(&main_birdloop); + + main_birdloop.time.domain = the_bird_domain.the_bird; + main_birdloop.time.loop = &main_birdloop; + + times_update(); + timers_init(&main_birdloop.time, &root_pool); + + birdloop_enter_locked(&main_birdloop); +} + +static void birdloop_main(void *arg); struct birdloop * -birdloop_new(void) +birdloop_new(pool *pp, uint order, const char *name) { - pool *p = rp_new(NULL, "Birdloop root"); + struct domain_generic *dg = domain_new(name, order); + + pool *p = rp_new(pp, name); struct birdloop *loop = mb_allocz(p, sizeof(struct birdloop)); loop->pool = p; - pthread_mutex_init(&loop->mutex, NULL); - wakeup_init(loop); + loop->time.domain = dg; + loop->time.loop = loop; + + birdloop_enter(loop); - events_init(loop); + wakeup_init(loop); + ev_init_list(&loop->event_list, loop, name); timers_init(&loop->time, p); sockets_init(loop); + loop->time.coro = coro_run(p, birdloop_main, loop); + + birdloop_leave(loop); + return loop; } +static void +birdloop_do_stop(struct birdloop *loop, void (*stopped)(void *data), void *data) +{ + loop->stopped = stopped; + loop->stop_data = data; + wakeup_do_kick(loop); +} + void -birdloop_start(struct birdloop *loop) +birdloop_stop(struct birdloop *loop, void (*stopped)(void *data), void *data) { - int rv = pthread_create(&loop->thread, NULL, birdloop_main, loop); - if (rv) - die("pthread_create(): %M", rv); + DG_LOCK(loop->time.domain); + birdloop_do_stop(loop, stopped, data); + DG_UNLOCK(loop->time.domain); } void -birdloop_stop(struct birdloop *loop) +birdloop_stop_self(struct birdloop *loop, void (*stopped)(void *data), void *data) { - pthread_mutex_lock(&loop->mutex); - loop->stop_called = 1; - wakeup_do_kick(loop); - pthread_mutex_unlock(&loop->mutex); + ASSERT_DIE(loop == birdloop_current); + ASSERT_DIE(DG_IS_LOCKED(loop->time.domain)); - int rv = pthread_join(loop->thread, NULL); - if (rv) - die("pthread_join(): %M", rv); + birdloop_do_stop(loop, stopped, data); } void birdloop_free(struct birdloop *loop) { + ASSERT_DIE(loop->links == 0); + domain_free(loop->time.domain); rfree(loop->pool); } +static void +birdloop_enter_locked(struct birdloop *loop) +{ + ASSERT_DIE(DG_IS_LOCKED(loop->time.domain)); + ASSERT_DIE(!birdloop_inside(loop)); + + /* Store the old context */ + loop->prev_loop = birdloop_current; + + /* Put the new context */ + birdloop_current = loop; +} void birdloop_enter(struct birdloop *loop) { - /* TODO: these functions could save and restore old context */ - pthread_mutex_lock(&loop->mutex); - birdloop_set_current(loop); + DG_LOCK(loop->time.domain); + return birdloop_enter_locked(loop); +} + +static void +birdloop_leave_locked(struct birdloop *loop) +{ + /* Check the current context */ + ASSERT_DIE(birdloop_current == loop); + + /* Restore the old context */ + birdloop_current = loop->prev_loop; } void birdloop_leave(struct birdloop *loop) { - /* TODO: these functions could save and restore old context */ - birdloop_set_current(NULL); - pthread_mutex_unlock(&loop->mutex); + birdloop_leave_locked(loop); + DG_UNLOCK(loop->time.domain); } void birdloop_mask_wakeups(struct birdloop *loop) { - pthread_mutex_lock(&loop->mutex); - loop->wakeup_masked = 1; - pthread_mutex_unlock(&loop->mutex); + ASSERT_DIE(birdloop_wakeup_masked == NULL); + birdloop_wakeup_masked = loop; } void birdloop_unmask_wakeups(struct birdloop *loop) { - pthread_mutex_lock(&loop->mutex); - if (loop->wakeup_masked == 2) + ASSERT_DIE(birdloop_wakeup_masked == loop); + birdloop_wakeup_masked = NULL; + if (birdloop_wakeup_masked_count) wakeup_do_kick(loop); - loop->wakeup_masked = 0; - pthread_mutex_unlock(&loop->mutex); + + birdloop_wakeup_masked_count = 0; } -static void * +void +birdloop_link(struct birdloop *loop) +{ + ASSERT_DIE(birdloop_inside(loop)); + loop->links++; +} + +void +birdloop_unlink(struct birdloop *loop) +{ + ASSERT_DIE(birdloop_inside(loop)); + loop->links--; +} + +static void birdloop_main(void *arg) { struct birdloop *loop = arg; timer *t; int rv, timeout; - birdloop_set_current(loop); + btime loop_begin = current_time(); - pthread_mutex_lock(&loop->mutex); + birdloop_enter(loop); while (1) { - events_fire(loop); - timers_fire(&loop->time); - - times_update(); - if (events_waiting(loop)) + timers_fire(&loop->time, 0); + if (ev_run_list(&loop->event_list)) timeout = 0; else if (t = timers_first(&loop->time)) timeout = (tm_remains(t) TO_MS) + 1; @@ -472,8 +489,11 @@ birdloop_main(void *arg) if (loop->poll_changed) sockets_prepare(loop); - loop->poll_active = 1; - pthread_mutex_unlock(&loop->mutex); + btime duration = current_time() - loop_begin; + if (duration > config->watchdog_warning) + log(L_WARN "I/O loop cycle took %d ms", (int) (duration TO_MS)); + + birdloop_leave(loop); try: rv = poll(loop->poll_fd.data, loop->poll_fd.used, timeout); @@ -484,25 +504,32 @@ birdloop_main(void *arg) die("poll: %m"); } - pthread_mutex_lock(&loop->mutex); - loop->poll_active = 0; + birdloop_enter(loop); if (loop->close_scheduled) sockets_close_fds(loop); - if (loop->stop_called) + if (loop->stopped) break; + loop_begin = current_time(); + if (rv) sockets_fire(loop); - timers_fire(&loop->time); + atomic_exchange_explicit(&loop->ping_sent, 0, memory_order_acq_rel); } - loop->stop_called = 0; - pthread_mutex_unlock(&loop->mutex); + /* Flush remaining events */ + ASSERT_DIE(!ev_run_list(&loop->event_list)); + + /* No timers allowed */ + ASSERT_DIE(timers_count(&loop->time) == 0); + ASSERT_DIE(EMPTY_LIST(loop->sock_list)); + ASSERT_DIE(loop->sock_num == 0); - return NULL; + birdloop_leave(loop); + loop->stopped(loop->stop_data); } diff --git a/sysdep/unix/io-loop.h b/sysdep/unix/io-loop.h index d858b04e..4024b6c5 100644 --- a/sysdep/unix/io-loop.h +++ b/sysdep/unix/io-loop.h @@ -4,31 +4,32 @@ * Can be freely distributed and used under the terms of the GNU GPL. */ -#ifndef _BIRD_IO_LOOP_H_ -#define _BIRD_IO_LOOP_H_ +#ifndef _BIRD_SYSDEP_UNIX_IO_LOOP_H_ +#define _BIRD_SYSDEP_UNIX_IO_LOOP_H_ -#include "nest/bird.h" -#include "lib/lists.h" -#include "lib/resource.h" -#include "lib/event.h" -#include "lib/timer.h" -#include "lib/socket.h" +struct birdloop +{ + pool *pool; + struct timeloop time; + event_list event_list; + list sock_list; + uint sock_num; -void ev2_schedule(event *e); + BUFFER(sock *) poll_sk; + BUFFER(struct pollfd) poll_fd; + u8 poll_changed; + u8 close_scheduled; -void sk_start(sock *s); -void sk_stop(sock *s); + _Atomic u32 ping_sent; + int wakeup_fds[2]; -struct birdloop *birdloop_new(void); -void birdloop_start(struct birdloop *loop); -void birdloop_stop(struct birdloop *loop); -void birdloop_free(struct birdloop *loop); + uint links; -void birdloop_enter(struct birdloop *loop); -void birdloop_leave(struct birdloop *loop); -void birdloop_mask_wakeups(struct birdloop *loop); -void birdloop_unmask_wakeups(struct birdloop *loop); + void (*stopped)(void *data); + void *stop_data; + struct birdloop *prev_loop; +}; -#endif /* _BIRD_IO_LOOP_H_ */ +#endif diff --git a/sysdep/unix/io.c b/sysdep/unix/io.c index 90bb5d64..c91f2856 100644 --- a/sysdep/unix/io.c +++ b/sysdep/unix/io.c @@ -43,6 +43,7 @@ #include "conf/conf.h" #include "sysdep/unix/unix.h" +#include "sysdep/unix/io-loop.h" #include CONFIG_INCLUDE_SYSIO_H /* Maximum number of calls of tx handler for one socket in one @@ -800,18 +801,16 @@ sk_free(resource *r) sk_ssh_free(s); #endif - if (s->fd < 0) + if ((s->fd < 0) || (s->flags & SKF_THREAD)) return; - /* FIXME: we should call sk_stop() for SKF_THREAD sockets */ - if (!(s->flags & SKF_THREAD)) - { - if (s == current_sock) - current_sock = sk_next(s); - if (s == stored_sock) - stored_sock = sk_next(s); + if (s == current_sock) + current_sock = sk_next(s); + if (s == stored_sock) + stored_sock = sk_next(s); + + if (enlisted(&s->n)) rem_node(&s->n); - } if (s->type != SK_SSH && s->type != SK_SSH_ACTIVE) close(s->fd); @@ -1104,7 +1103,11 @@ sk_passive_connected(sock *s, int type) return 1; } - sk_insert(t); + if (s->flags & SKF_PASSIVE_THREAD) + t->flags |= SKF_THREAD; + else + sk_insert(t); + sk_alloc_bufs(t); s->rx_hook(t, 0); return 1; @@ -1508,6 +1511,36 @@ sk_open_unix(sock *s, char *name) return 0; } +static void +sk_reloop_hook(void *_vs) +{ + sock *s = _vs; + if (birdloop_inside(&main_birdloop)) + { + s->flags &= ~SKF_THREAD; + sk_insert(s); + } + else + { + s->flags |= SKF_THREAD; + sk_start(s); + } +} + +void +sk_reloop(sock *s, struct birdloop *loop) +{ + if (enlisted(&s->n)) + rem_node(&s->n); + + s->reloop = (event) { + .hook = sk_reloop_hook, + .data = s, + }; + + ev_send_loop(loop, &s->reloop); +} + #define CMSG_RX_SPACE MAX(CMSG4_SPACE_PKTINFO+CMSG4_SPACE_TTL, \ CMSG6_SPACE_PKTINFO+CMSG6_SPACE_TTL) @@ -2143,8 +2176,9 @@ void io_init(void) { init_list(&sock_list); - init_list(&global_event_list); - init_list(&global_work_list); + ev_init_list(&global_event_list, &main_birdloop, "Global event list"); + ev_init_list(&global_work_list, &main_birdloop, "Global work list"); + ev_init_list(&main_birdloop.event_list, &main_birdloop, "Global fast event list"); krt_io_init(); // XXX init_times(); // XXX update_times(); @@ -2158,14 +2192,7 @@ static int short_loops = 0; #define SHORT_LOOP_MAX 10 #define WORK_EVENTS_MAX 10 -static int poll_reload_pipe[2]; - -void -io_loop_reload(void) -{ - char b; - write(poll_reload_pipe[1], &b, 1); -} +void pipe_drain(int fd); void io_loop(void) @@ -2178,21 +2205,19 @@ io_loop(void) int fdmax = 256; struct pollfd *pfd = xmalloc(fdmax * sizeof(struct pollfd)); - if (pipe(poll_reload_pipe) < 0) - die("pipe(poll_reload_pipe) failed: %m"); - watchdog_start1(); for(;;) { times_update(); events = ev_run_list(&global_event_list); events = ev_run_list_limited(&global_work_list, WORK_EVENTS_MAX) || events; - timers_fire(&main_timeloop); + events = ev_run_list(&main_birdloop.event_list) || events; + timers_fire(&main_birdloop.time, 1); io_close_event(); // FIXME poll_tout = (events ? 0 : 3000); /* Time in milliseconds */ - if (t = timers_first(&main_timeloop)) + if (t = timers_first(&main_birdloop.time)) { times_update(); timeout = (tm_remains(t) TO_MS) + 1; @@ -2200,7 +2225,7 @@ io_loop(void) } /* A hack to reload main io_loop() when something has changed asynchronously. */ - pfd[0].fd = poll_reload_pipe[0]; + pfd[0].fd = main_birdloop.wakeup_fds[0]; pfd[0].events = POLLIN; nfds = 1; @@ -2263,9 +2288,9 @@ io_loop(void) /* And finally enter poll() to find active sockets */ watchdog_stop(); - the_bird_unlock(); + birdloop_leave(&main_birdloop); pout = poll(pfd, nfds, poll_tout); - the_bird_lock(); + birdloop_enter(&main_birdloop); watchdog_start(); if (pout < 0) @@ -2279,8 +2304,8 @@ io_loop(void) if (pfd[0].revents & POLLIN) { /* IO loop reload requested */ - char b; - read(poll_reload_pipe[0], &b, 1); + pipe_drain(main_birdloop.wakeup_fds[0]); + atomic_exchange_explicit(&main_birdloop.ping_sent, 0, memory_order_acq_rel); continue; } diff --git a/sysdep/unix/main.c b/sysdep/unix/main.c index d35424ff..5da27cb6 100644 --- a/sysdep/unix/main.c +++ b/sysdep/unix/main.c @@ -908,10 +908,12 @@ main(int argc, char **argv) parse_args(argc, argv); log_switch(1, NULL, NULL); + the_bird_lock(); + random_init(); net_init(); resource_init(); - timer_init(); + birdloop_init(); olock_init(); io_init(); rt_init(); @@ -961,7 +963,6 @@ main(int argc, char **argv) dup2(0, 2); } - the_bird_lock(); main_thread_init(); diff --git a/sysdep/unix/unix.h b/sysdep/unix/unix.h index 313c97c3..ad85d1ea 100644 --- a/sysdep/unix/unix.h +++ b/sysdep/unix/unix.h @@ -106,7 +106,6 @@ extern volatile sig_atomic_t async_shutdown_flag; void io_init(void); void io_loop(void); -void io_loop_reload(void); void io_log_dump(void); int sk_open_unix(struct birdsock *s, char *name); struct rfile *rf_open(struct pool *, const char *name, const char *mode); diff --git a/test/bt-utils.c b/test/bt-utils.c index cbca3a6b..90815e77 100644 --- a/test/bt-utils.c +++ b/test/bt-utils.c @@ -53,16 +53,20 @@ cf_file_read(byte *dest, uint max_len, int fd) return l; } +void resource_sys_init(void); + void bt_bird_init(void) { + resource_sys_init(); if(bt_verbose) log_init_debug(""); log_switch(bt_verbose != 0, NULL, NULL); + the_bird_lock(); resource_init(); olock_init(); - timer_init(); + birdloop_init(); io_init(); rt_init(); if_init(); @@ -79,6 +83,7 @@ void bt_bird_cleanup(void) class_to_protocol[i] = NULL; config = new_config = NULL; + the_bird_unlock(); } static char * -- cgit v1.2.3 From ad686c55c3fad13f39e44ee5732c38296caff782 Mon Sep 17 00:00:00 2001 From: Ondrej Zajicek Date: Sun, 5 Jun 2022 04:03:43 +0200 Subject: Babel: Do not try to remove multicast seqno request objects from neighbour list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Babel seqno request code keeps track of which seqno requests are outstanding for a neighbour by putting them onto a per-neighbour list. When reusing a seqno request, it will try to remove this node, but if the seqno request in question was a multicast request with no neighbour attached this will result in a crash because it tries to remove a list node that wasn't added to any list. Fix this by making the list remove conditional. Also fix neighbor removal which were changing seqno requests to multicast ones instead of removing them. Fixes: ebd5751cdeb4 ("Babel: Seqno requests are properly decoupled from neighbors when the underlying interface disappears"). Based on the patch from Toke Høiland-Jørgensen , bug reported by Stefan Haller , thanks. --- proto/babel/babel.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'proto') diff --git a/proto/babel/babel.c b/proto/babel/babel.c index 4a7d550f..71452a6f 100644 --- a/proto/babel/babel.c +++ b/proto/babel/babel.c @@ -312,7 +312,9 @@ babel_add_seqno_request(struct babel_proto *p, struct babel_entry *e, /* Found older */ rem_node(NODE sr); - rem_node(&sr->nbr_node); + + if (sr->nbr) + rem_node(&sr->nbr_node); goto found; } @@ -452,10 +454,7 @@ babel_flush_neighbor(struct babel_proto *p, struct babel_neighbor *nbr) struct babel_seqno_request *sr; WALK_LIST_FIRST2(sr, nbr_node, nbr->requests) - { - sr->nbr = NULL; - rem_node(&sr->nbr_node); - } + babel_remove_seqno_request(p, sr); nbr->ifa = NULL; rem_node(NODE nbr); -- cgit v1.2.3 From beb5f78ada79ac90f31f2c4923302c74d9ab38bf Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Mon, 27 Jun 2022 19:04:22 +0200 Subject: Preexport callback now takes the channel instead of protocol as argument Passing protocol to preexport was in fact a historical relic from the old times when channels weren't a thing. Refactoring that to match current extensibility needs. --- nest/protocol.h | 2 +- nest/route.h | 2 +- nest/rt-show.c | 2 +- nest/rt-table.c | 7 ++++--- proto/babel/babel.c | 4 ++-- proto/bgp/attrs.c | 4 ++-- proto/bgp/bgp.h | 2 +- proto/ospf/ospf.c | 8 ++++---- proto/pipe/pipe.c | 4 ++-- proto/radv/radv.c | 6 +++--- sysdep/unix/krt.c | 4 ++-- 11 files changed, 23 insertions(+), 22 deletions(-) (limited to 'proto') diff --git a/nest/protocol.h b/nest/protocol.h index abcc505d..3b3812a5 100644 --- a/nest/protocol.h +++ b/nest/protocol.h @@ -217,7 +217,7 @@ struct proto { void (*neigh_notify)(struct neighbor *neigh); void (*make_tmp_attrs)(struct rte *rt, struct linpool *pool); void (*store_tmp_attrs)(struct rte *rt, struct linpool *pool); - int (*preexport)(struct proto *, struct rte **rt, struct linpool *pool); + int (*preexport)(struct channel *, struct rte **rt, struct linpool *pool); void (*reload_routes)(struct channel *); void (*feed_begin)(struct channel *, int initial); void (*feed_end)(struct channel *); diff --git a/nest/route.h b/nest/route.h index 395139a3..1dacd92f 100644 --- a/nest/route.h +++ b/nest/route.h @@ -357,7 +357,7 @@ rte *rte_find(net *net, struct rte_src *src); rte *rte_get_temp(struct rta *); void rte_update2(struct channel *c, const net_addr *n, rte *new, struct rte_src *src); /* rte_update() moved to protocol.h to avoid dependency conflicts */ -int rt_examine(rtable *t, net_addr *a, struct proto *p, const struct filter *filter); +int rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filter); rte *rt_export_merged(struct channel *c, net *net, rte **rt_free, linpool *pool, int silent); void rt_refresh_begin(rtable *t, struct channel *c); void rt_refresh_end(rtable *t, struct channel *c); diff --git a/nest/rt-show.c b/nest/rt-show.c index f8b7ba51..7639e5f7 100644 --- a/nest/rt-show.c +++ b/nest/rt-show.c @@ -154,7 +154,7 @@ rt_show_net(struct cli *c, net *n, struct rt_show_data *d) else if (d->export_mode) { struct proto *ep = ec->proto; - int ic = ep->preexport ? ep->preexport(ep, &e, c->show_pool) : 0; + int ic = ep->preexport ? ep->preexport(ec, &e, c->show_pool) : 0; if (ec->ra_mode == RA_OPTIMAL || ec->ra_mode == RA_MERGED) pass = 1; diff --git a/nest/rt-table.c b/nest/rt-table.c index 2cf55421..a2fa5e77 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -870,7 +870,7 @@ export_filter_(struct channel *c, rte *rt0, rte **rt_free, linpool *pool, int si rt = rt0; *rt_free = NULL; - v = p->preexport ? p->preexport(p, &rt, pool) : 0; + v = p->preexport ? p->preexport(c, &rt, pool) : 0; if (v < 0) { if (silent) @@ -1883,8 +1883,9 @@ rte_modify(rte *old) /* Check rtable for best route to given net whether it would be exported do p */ int -rt_examine(rtable *t, net_addr *a, struct proto *p, const struct filter *filter) +rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filter) { + struct proto *p = c->proto; net *n = net_find(t, a); rte *rt = n ? n->routes : NULL; @@ -1894,7 +1895,7 @@ rt_examine(rtable *t, net_addr *a, struct proto *p, const struct filter *filter) rte_update_lock(); /* Rest is stripped down export_filter() */ - int v = p->preexport ? p->preexport(p, &rt, rte_update_pool) : 0; + int v = p->preexport ? p->preexport(c, &rt, rte_update_pool) : 0; if (v == RIC_PROCESS) { rte_make_tmp_attrs(&rt, rte_update_pool, NULL); diff --git a/proto/babel/babel.c b/proto/babel/babel.c index 71452a6f..0d0304f9 100644 --- a/proto/babel/babel.c +++ b/proto/babel/babel.c @@ -2230,12 +2230,12 @@ babel_kick_timer(struct babel_proto *p) static int -babel_preexport(struct proto *P, struct rte **new, struct linpool *pool UNUSED) +babel_preexport(struct channel *C, struct rte **new, struct linpool *pool UNUSED) { struct rta *a = (*new)->attrs; /* Reject our own unreachable routes */ - if ((a->dest == RTD_UNREACHABLE) && (a->src->proto == P)) + if ((a->dest == RTD_UNREACHABLE) && (a->src->proto == C->proto)) return -1; return 0; diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c index a9787173..9dd9fb1a 100644 --- a/proto/bgp/attrs.c +++ b/proto/bgp/attrs.c @@ -1669,11 +1669,11 @@ bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *px) */ int -bgp_preexport(struct proto *P, rte **new, struct linpool *pool UNUSED) +bgp_preexport(struct channel *C, rte **new, struct linpool *pool UNUSED) { rte *e = *new; struct proto *SRC = e->attrs->src->proto; - struct bgp_proto *p = (struct bgp_proto *) P; + struct bgp_proto *p = (struct bgp_proto *) C->proto; struct bgp_proto *src = (SRC->proto == &proto_bgp) ? (struct bgp_proto *) SRC : NULL; /* Reject our routes */ diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h index 4969c0b9..7cd1c27d 100644 --- a/proto/bgp/bgp.h +++ b/proto/bgp/bgp.h @@ -589,7 +589,7 @@ int bgp_rte_mergable(rte *pri, rte *sec); int bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best); struct rte *bgp_rte_modify_stale(struct rte *r, struct linpool *pool); void bgp_rt_notify(struct proto *P, struct channel *C, net *n, rte *new, rte *old); -int bgp_preexport(struct proto *, struct rte **, struct linpool *); +int bgp_preexport(struct channel *, struct rte **, struct linpool *); int bgp_get_attr(const struct eattr *e, byte *buf, int buflen); void bgp_get_route_info(struct rte *, byte *buf); int bgp_total_aigp_metric_(rte *e, u64 *metric, const struct adata **ad); diff --git a/proto/ospf/ospf.c b/proto/ospf/ospf.c index ba8c2e2b..42ffdb06 100644 --- a/proto/ospf/ospf.c +++ b/proto/ospf/ospf.c @@ -107,7 +107,7 @@ #include #include "ospf.h" -static int ospf_preexport(struct proto *P, rte **new, struct linpool *pool); +static int ospf_preexport(struct channel *C, rte **new, struct linpool *pool); static void ospf_make_tmp_attrs(struct rte *rt, struct linpool *pool); static void ospf_store_tmp_attrs(struct rte *rt, struct linpool *pool); static void ospf_reload_routes(struct channel *C); @@ -484,14 +484,14 @@ ospf_disp(timer * timer) * import to the filters. */ static int -ospf_preexport(struct proto *P, rte **new, struct linpool *pool UNUSED) +ospf_preexport(struct channel *C, rte **new, struct linpool *pool UNUSED) { - struct ospf_proto *p = (struct ospf_proto *) P; + struct ospf_proto *p = (struct ospf_proto *) C->proto; struct ospf_area *oa = ospf_main_area(p); rte *e = *new; /* Reject our own routes */ - if (e->attrs->src->proto == P) + if (e->attrs->src->proto == &p->p) return -1; /* Do not export routes to stub areas */ diff --git a/proto/pipe/pipe.c b/proto/pipe/pipe.c index f991d09a..481f5804 100644 --- a/proto/pipe/pipe.c +++ b/proto/pipe/pipe.c @@ -101,11 +101,11 @@ pipe_rt_notify(struct proto *P, struct channel *src_ch, net *n, rte *new, rte *o } static int -pipe_preexport(struct proto *P, rte **ee, struct linpool *p UNUSED) +pipe_preexport(struct channel *C, rte **ee, struct linpool *p UNUSED) { struct proto *pp = (*ee)->sender->proto; - if (pp == P) + if (pp == C->proto) return -1; /* Avoid local loops automatically */ return 0; diff --git a/proto/radv/radv.c b/proto/radv/radv.c index 66e8eb4b..fe3713ef 100644 --- a/proto/radv/radv.c +++ b/proto/radv/radv.c @@ -391,10 +391,10 @@ radv_net_match_trigger(struct radv_config *cf, net *n) } int -radv_preexport(struct proto *P, rte **new, struct linpool *pool UNUSED) +radv_preexport(struct channel *C, rte **new, struct linpool *pool UNUSED) { // struct radv_proto *p = (struct radv_proto *) P; - struct radv_config *cf = (struct radv_config *) (P->cf); + struct radv_config *cf = (struct radv_config *) (C->proto->cf); if (radv_net_match_trigger(cf, (*new)->net)) return RIC_PROCESS; @@ -555,7 +555,7 @@ radv_check_active(struct radv_proto *p) return 1; struct channel *c = p->p.main_channel; - return rt_examine(c->table, &cf->trigger, &p->p, c->out_filter); + return rt_examine(c->table, &cf->trigger, c, c->out_filter); } static void diff --git a/sysdep/unix/krt.c b/sysdep/unix/krt.c index 7c2614b1..a02cf977 100644 --- a/sysdep/unix/krt.c +++ b/sysdep/unix/krt.c @@ -903,12 +903,12 @@ krt_store_tmp_attrs(struct rte *rt, struct linpool *pool) } static int -krt_preexport(struct proto *P, rte **new, struct linpool *pool UNUSED) +krt_preexport(struct channel *C, rte **new, struct linpool *pool UNUSED) { // struct krt_proto *p = (struct krt_proto *) P; rte *e = *new; - if (e->attrs->src->proto == P) + if (e->attrs->src->proto == C->proto) return -1; if (!krt_capable(e)) -- cgit v1.2.3 From a2527ee53d9d8fe7a1c29b56f8450b9ef1f9c7bc Mon Sep 17 00:00:00 2001 From: "Ondrej Zajicek (work)" Date: Wed, 9 Mar 2022 02:32:29 +0100 Subject: Filter: Improve handling of stack frames in filter bytecode When f_line is done, we have to pop the stack frame. The old code just removed nominal number of args/vars. Change it to use stored ventry value modified by number of returned values. This allows to allocate variables on a stack frame during execution of f_lines instead of just at start. But we need to know the number of returned values for a f_line. It is 1 for term, 0 for cmd. Store that to f_line during linearization. --- conf/confbase.Y | 4 ++-- filter/config.Y | 14 +++++++------- filter/decl.m4 | 6 ++++-- filter/f-inst.c | 8 ++++---- filter/f-inst.h | 7 ++++--- filter/f-util.c | 2 +- filter/filter.c | 3 +-- nest/config.Y | 4 ++-- proto/static/config.Y | 2 +- 9 files changed, 26 insertions(+), 24 deletions(-) (limited to 'proto') diff --git a/conf/confbase.Y b/conf/confbase.Y index 18ca8489..1d5738ff 100644 --- a/conf/confbase.Y +++ b/conf/confbase.Y @@ -153,14 +153,14 @@ conf: definition ; definition: DEFINE symbol '=' term ';' { struct f_val *val = cfg_allocz(sizeof(struct f_val)); - if (f_eval(f_linearize($4), cfg_mem, val) > F_RETURN) cf_error("Runtime error"); + if (f_eval(f_linearize($4, 1), cfg_mem, val) > F_RETURN) cf_error("Runtime error"); cf_define_symbol($2, SYM_CONSTANT | val->type, val, val); } ; expr: NUM - | '(' term ')' { $$ = f_eval_int(f_linearize($2)); } + | '(' term ')' { $$ = f_eval_int(f_linearize($2, 1)); } | CF_SYM_KNOWN { if ($1->class != (SYM_CONSTANT | T_INT)) cf_error("Number constant expected"); $$ = SYM_VAL($1).i; } diff --git a/filter/config.Y b/filter/config.Y index a3acf245..b67ca925 100644 --- a/filter/config.Y +++ b/filter/config.Y @@ -329,7 +329,7 @@ filter_def: conf: filter_eval ; filter_eval: - EVAL term { f_eval_int(f_linearize($2)); } + EVAL term { f_eval_int(f_linearize($2, 1)); } ; conf: custom_attr ; @@ -453,7 +453,7 @@ where_filter: function_body: function_vars '{' cmds '}' { - $$ = f_linearize($3); + $$ = f_linearize($3, 0); $$->vars = $1; } ; @@ -537,7 +537,7 @@ set_atom: | VPN_RD { $$.type = T_RD; $$.val.ec = $1; } | ENUM { $$.type = pair_a($1); $$.val.i = pair_b($1); } | '(' term ')' { - if (f_eval(f_linearize($2), cfg_mem, &($$)) > F_RETURN) cf_error("Runtime error"); + if (f_eval(f_linearize($2, 1), cfg_mem, &($$)) > F_RETURN) cf_error("Runtime error"); if (!f_valid_set_type($$.type)) cf_error("Set-incompatible type"); } | CF_SYM_KNOWN { @@ -549,13 +549,13 @@ set_atom: switch_atom: NUM { $$.type = T_INT; $$.val.i = $1; } - | '(' term ')' { $$.type = T_INT; $$.val.i = f_eval_int(f_linearize($2)); } + | '(' term ')' { $$.type = T_INT; $$.val.i = f_eval_int(f_linearize($2, 1)); } | fipa { $$ = $1; } | ENUM { $$.type = pair_a($1); $$.val.i = pair_b($1); } ; cnum: - term { $$ = f_eval_int(f_linearize($1)); } + term { $$ = f_eval_int(f_linearize($1, 1)); } pair_item: '(' cnum ',' cnum ')' { $$ = f_new_pair_item($2, $2, $4, $4); } @@ -642,7 +642,7 @@ switch_body: /* EMPTY */ { $$ = NULL; } | switch_body switch_items ':' cmds { /* Fill data fields */ struct f_tree *t; - struct f_line *line = f_linearize($4); + struct f_line *line = f_linearize($4, 0); for (t = $2; t; t = t->left) t->data = line; $$ = f_merge_items($1, $2); @@ -651,7 +651,7 @@ switch_body: /* EMPTY */ { $$ = NULL; } struct f_tree *t = f_new_tree(); t->from.type = t->to.type = T_VOID; t->right = t; - t->data = f_linearize($3); + t->data = f_linearize($3, 0); $$ = f_merge_items($1, t); } ; diff --git a/filter/decl.m4 b/filter/decl.m4 index 3d118e34..4c56cd9c 100644 --- a/filter/decl.m4 +++ b/filter/decl.m4 @@ -216,7 +216,7 @@ whati->f$1 = f$1; FID_DUMP_BODY()m4_dnl f_dump_line(item->fl$1, indent + 1); FID_LINEARIZE_BODY()m4_dnl -item->fl$1 = f_linearize(whati->f$1); +item->fl$1 = f_linearize(whati->f$1, $2); FID_SAME_BODY()m4_dnl if (!f_same(f1->fl$1, f2->fl$1)) return 0; FID_ITERATE_BODY()m4_dnl @@ -568,7 +568,7 @@ FID_WR_PUT(8) } struct f_line * -f_linearize_concat(const struct f_inst * const inst[], uint count) +f_linearize_concat(const struct f_inst * const inst[], uint count, uint results) { uint len = 0; for (uint i=0; ilen = linearize(out, inst[i], out->len); + out->results = results; + #ifdef LOCAL_DEBUG f_dump_line(out, 0); #endif diff --git a/filter/f-inst.c b/filter/f-inst.c index e75b5e01..5b8310c3 100644 --- a/filter/f-inst.c +++ b/filter/f-inst.c @@ -64,7 +64,7 @@ * m4_dnl ARG_ANY(num); argument with no type check accessible by v1, v2, v3 * m4_dnl ARG_TYPE(num, type); just declare the type of argument * m4_dnl VARARG; variable-length argument list; accessible by vv(i) and whati->varcount - * m4_dnl LINE(num, unused); this argument has to be converted to its own f_line + * m4_dnl LINE(num, out); this argument has to be converted to its own f_line * m4_dnl SYMBOL; symbol handed from config * m4_dnl STATIC_ATTR; static attribute definition * m4_dnl DYNAMIC_ATTR; dynamic attribute definition @@ -298,7 +298,7 @@ RESULT_TYPE(T_BOOL); if (v1.val.i) - LINE(2,0); + LINE(2,1); else RESULT_VAL(v1); } @@ -308,7 +308,7 @@ RESULT_TYPE(T_BOOL); if (!v1.val.i) - LINE(2,0); + LINE(2,1); else RESULT_VAL(v1); } @@ -536,7 +536,7 @@ if (v1.val.i) LINE(2,0); else - LINE(3,1); + LINE(3,0); } INST(FI_PRINT, 0, 0) { diff --git a/filter/f-inst.h b/filter/f-inst.h index 58563c79..e35f71c6 100644 --- a/filter/f-inst.h +++ b/filter/f-inst.h @@ -46,14 +46,15 @@ struct f_line { uint len; /* Line length */ u8 args; /* Function: Args required */ u8 vars; + u8 results; /* Results left on stack: cmd -> 0, term -> 1 */ struct f_arg *arg_list; struct f_line_item items[0]; /* The items themselves */ }; /* Convert the f_inst infix tree to the f_line structures */ -struct f_line *f_linearize_concat(const struct f_inst * const inst[], uint count); -static inline struct f_line *f_linearize(const struct f_inst *root) -{ return f_linearize_concat(&root, 1); } +struct f_line *f_linearize_concat(const struct f_inst * const inst[], uint count, uint results); +static inline struct f_line *f_linearize(const struct f_inst *root, uint results) +{ return f_linearize_concat(&root, 1, results); } void f_dump_line(const struct f_line *, uint indent); diff --git a/filter/f-util.c b/filter/f-util.c index 410999a6..fdb314b5 100644 --- a/filter/f-util.c +++ b/filter/f-util.c @@ -37,7 +37,7 @@ struct filter *f_new_where(struct f_inst *where) f_new_inst(FI_DIE, F_REJECT)); struct filter *f = cfg_allocz(sizeof(struct filter)); - f->root = f_linearize(cond); + f->root = f_linearize(cond, 0); return f; } diff --git a/filter/filter.c b/filter/filter.c index e505d570..20a380dc 100644 --- a/filter/filter.c +++ b/filter/filter.c @@ -215,8 +215,7 @@ interpret(struct filter_state *fs, const struct f_line *line, struct f_val *val) } /* End of current line. Drop local variables before exiting. */ - fstk->vcnt -= curline.line->vars; - fstk->vcnt -= curline.line->args; + fstk->vcnt = curline.ventry + curline.line->results; fstk->ecnt--; } diff --git a/nest/config.Y b/nest/config.Y index 17999422..ac599c09 100644 --- a/nest/config.Y +++ b/nest/config.Y @@ -165,7 +165,7 @@ rtrid: idval: NUM { $$ = $1; } - | '(' term ')' { $$ = f_eval_int(f_linearize($2)); } + | '(' term ')' { $$ = f_eval_int(f_linearize($2, 1)); } | IP4 { $$ = ip4_to_u32($1); } | CF_SYM_KNOWN { if ($1->class == (SYM_CONSTANT | T_INT) || $1->class == (SYM_CONSTANT | T_QUAD)) @@ -860,7 +860,7 @@ CF_CLI(DUMP FILTER ALL,,, [[Dump all filters in linearized form]]) { filters_dump_all(); cli_msg(0, ""); } ; CF_CLI(EVAL, term, , [[Evaluate an expression]]) -{ cmd_eval(f_linearize($2)); } ; +{ cmd_eval(f_linearize($2, 1)); } ; CF_CLI_HELP(ECHO, ..., [[Control echoing of log messages]]) CF_CLI(ECHO, echo_mask echo_size, (all | off | { debug|trace|info|remote|warning|error|auth [, ...] }) [], [[Control echoing of log messages]]) { diff --git a/proto/static/config.Y b/proto/static/config.Y index 41e10dbf..9d26ee82 100644 --- a/proto/static/config.Y +++ b/proto/static/config.Y @@ -40,7 +40,7 @@ static_route_finish(void) if (net_type_match(this_srt->net, NB_DEST) == !this_srt->dest) cf_error("Unexpected or missing nexthop/type"); - this_srt->cmds = f_linearize(this_srt_cmds); + this_srt->cmds = f_linearize(this_srt_cmds, 0); } CF_DECLS -- cgit v1.2.3 From 080cbd1219ba86dd44712d0d24ceae884b34ec4b Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Tue, 12 Jul 2022 10:36:10 +0200 Subject: Route refresh in tables uses a stale counter. Until now, we were marking routes as REF_STALE and REF_DISCARD to cleanup old routes after route refresh. This needed a synchronous route table walk at both beginning and the end of route refresh routine, marking the routes by the flags. We avoid these walks by using a stale counter. Every route contains: u8 stale_cycle; Every import hook contains: u8 stale_set; u8 stale_valid; u8 stale_pruned; u8 stale_pruning; In base_state, stale_set == stale_valid == stale_pruned == stale_pruning and all routes' stale_cycle also have the same value. The route refresh looks like follows: + ----------- + --------- + ----------- + ------------- + ------------ + | | stale_set | stale_valid | stale_pruning | stale_pruned | | Base | x | x | x | x | | Begin | x+1 | x | x | x | ... now routes are being inserted with stale_cycle == (x+1) | End | x+1 | x+1 | x | x | ... now table pruning routine is scheduled | Prune begin | x+1 | x+1 | x+1 | x | ... now routes with stale_cycle not between stale_set and stale_valid are deleted | Prune end | x+1 | x+1 | x+1 | x+1 | + ----------- + --------- + ----------- + ------------- + ------------ + The pruning routine is asynchronous and may have high latency in high-load environments. Therefore, multiple route refresh requests may happen before the pruning routine starts, leading to this situation: | Prune begin | x+k | x+k | x -> x+k | x | ... or even | Prune begin | x+k+1 | x+k | x -> x+k | x | ... if the prune event starts while another route refresh is running. In such a case, the pruning routine still deletes routes not fitting between stale_set and and stale_valid, effectively pruning the remnants of all unpruned route refreshes from before: | Prune end | x+k | x+k | x+k | x+k | In extremely rare cases, there may happen too many route refreshes before any route prune routine finishes. If the difference between stale_valid and stale_pruned becomes more than 128 when requesting for another route refresh, the routine walks the table synchronously and resets all the stale values to a base state, while logging a warning. --- lib/route.h | 3 +- nest/rt-show.c | 4 +++ nest/rt-table.c | 91 +++++++++++++++++++++++++++++++++++++--------------- nest/rt.h | 8 +++-- proto/bgp/bgp.c | 18 +++++------ proto/rpki/packets.c | 8 ++--- 6 files changed, 90 insertions(+), 42 deletions(-) (limited to 'proto') diff --git a/lib/route.h b/lib/route.h index 68596316..9f78ed00 100644 --- a/lib/route.h +++ b/lib/route.h @@ -29,11 +29,10 @@ typedef struct rte { u8 generation; /* If this route import is based on other previously exported route, this value should be 1 + MAX(generation of the parent routes). Otherwise the route is independent and this value is zero. */ + u8 stale_cycle; /* Auxiliary value for route refresh */ } rte; #define REF_FILTERED 2 /* Route is rejected by import filter */ -#define REF_STALE 4 /* Route is stale in a refresh cycle */ -#define REF_DISCARD 8 /* Route is scheduled for discard */ #define REF_MODIFY 16 /* Route is scheduled for modify */ #define REF_PENDING 32 /* Route has not propagated completely yet */ diff --git a/nest/rt-show.c b/nest/rt-show.c index c3294518..dd0fe595 100644 --- a/nest/rt-show.c +++ b/nest/rt-show.c @@ -77,7 +77,11 @@ rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, int primary e->src->proto->name, tm, from, primary ? (sync_error ? " !" : " *") : "", info); if (d->verbose) + { ea_show_list(c, a); + cli_printf(c, -1008, "\tInternal route handling values: %uL %uG %uS", + e->src->private_id, e->src->global_id, e->stale_cycle); + } else if (dest == RTD_UNICAST) ea_show_nexthop_list(c, nhad); else if (had) diff --git a/nest/rt-table.c b/nest/rt-table.c index d30573de..2ba28e33 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -1323,7 +1323,8 @@ rte_recalculate(struct rt_import_hook *c, net *net, rte *new, struct rte_src *sr { /* No changes, ignore the new route and refresh the old one */ - old->flags &= ~(REF_STALE | REF_DISCARD | REF_MODIFY); + old->flags &= ~REF_MODIFY; + old->stale_cycle = new->stale_cycle; if (!rte_is_filtered(new)) { @@ -1639,6 +1640,9 @@ rte_import(struct rt_import_request *req, const net_addr *n, rte *new, struct rt nn = net_get(hook->table, n); new->net = nn->n.addr; new->sender = hook; + + /* Set the stale cycle */ + new->stale_cycle = hook->stale_set; } else if (!(nn = net_find(hook->table, n))) { @@ -1916,15 +1920,38 @@ rt_stop_export(struct rt_export_request *req, void (*stopped)(struct rt_export_r * flag in rt_refresh_end() and then removing such routes in the prune loop. */ void -rt_refresh_begin(rtable *t, struct rt_import_request *req) +rt_refresh_begin(struct rt_import_request *req) { - FIB_WALK(&t->fib, net, n) - { - for (struct rte_storage *e = n->routes; e; e = e->next) - if (e->rte.sender == req->hook) - e->rte.flags |= REF_STALE; - } - FIB_WALK_END; + struct rt_import_hook *hook = req->hook; + ASSERT_DIE(hook); + ASSERT_DIE(hook->stale_set == hook->stale_valid); + + /* If the pruning routine is too slow */ + if ((hook->stale_pruned < hook->stale_valid) && (hook->stale_pruned + 128 < hook->stale_valid) + || (hook->stale_pruned > hook->stale_valid) && (hook->stale_pruned > hook->stale_valid + 128)) + { + log(L_WARN "Route refresh flood in table %s", hook->table->name); + FIB_WALK(&hook->table->fib, net, n) + { + for (struct rte_storage *e = n->routes; e; e = e->next) + if (e->rte.sender == req->hook) + e->rte.stale_cycle = 0; + } + FIB_WALK_END; + hook->stale_set = 1; + hook->stale_valid = 0; + hook->stale_pruned = 0; + } + /* Setting a new value of the stale modifier */ + else if (!++hook->stale_set) + { + /* Let's reserve the stale_cycle zero value for always-invalid routes */ + hook->stale_set = 1; + hook->stale_valid = 0; + } + + if (req->trace_routes & D_STATES) + log(L_TRACE "%s: route refresh begin [%u]", req->name, hook->stale_set); } /** @@ -1936,34 +1963,32 @@ rt_refresh_begin(rtable *t, struct rt_import_request *req) * hook. See rt_refresh_begin() for description of refresh cycles. */ void -rt_refresh_end(rtable *t, struct rt_import_request *req) +rt_refresh_end(struct rt_import_request *req) { - int prune = 0; + struct rt_import_hook *hook = req->hook; + ASSERT_DIE(hook); - FIB_WALK(&t->fib, net, n) - { - for (struct rte_storage *e = n->routes; e; e = e->next) - if ((e->rte.sender == req->hook) && (e->rte.flags & REF_STALE)) - { - e->rte.flags |= REF_DISCARD; - prune = 1; - } - } - FIB_WALK_END; + hook->stale_valid++; + ASSERT_DIE(hook->stale_set == hook->stale_valid); - if (prune) - rt_schedule_prune(t); + rt_schedule_prune(hook->table); + + if (req->trace_routes & D_STATES) + log(L_TRACE "%s: route refresh end [%u]", req->name, hook->stale_valid); } void rt_modify_stale(rtable *t, struct rt_import_request *req) { int prune = 0; + struct rt_import_hook *s = req->hook; FIB_WALK(&t->fib, net, n) { for (struct rte_storage *e = n->routes; e; e = e->next) - if ((e->rte.sender == req->hook) && (e->rte.flags & REF_STALE) && !(e->rte.flags & REF_FILTERED)) + if ((e->rte.sender == s) && + !(e->rte.flags & REF_FILTERED) && + (e->rte.stale_cycle + 1 == s->stale_set)) { e->rte.flags |= REF_MODIFY; prune = 1; @@ -2434,6 +2459,13 @@ rt_prune_table(rtable *tab) WALK_LIST2(ih, n, tab->imports, n) if (ih->import_state == TIS_STOP) rt_set_import_state(ih, TIS_FLUSHING); + else if ((ih->stale_valid != ih->stale_pruning) && (ih->stale_pruning == ih->stale_pruned)) + { + ih->stale_pruning = ih->stale_valid; + + if (ih->req->trace_routes & D_STATES) + log(L_TRACE "%s: table prune after refresh begin [%u]", ih->req->name, ih->stale_pruning); + } FIB_ITERATE_INIT(fit, &tab->fib); tab->prune_state = 2; @@ -2459,7 +2491,10 @@ again: for (struct rte_storage *e=n->routes; e; e=e->next) { - if ((e->rte.sender->import_state == TIS_FLUSHING) || (e->rte.flags & REF_DISCARD)) + struct rt_import_hook *s = e->rte.sender; + if ((s->import_state == TIS_FLUSHING) || + (e->rte.stale_cycle < s->stale_valid) || + (e->rte.stale_cycle > s->stale_set)) { rte_discard(n, &e->rte); limit--; @@ -2544,6 +2579,12 @@ again: mb_free(ih); rt_unlock_table(tab); } + else if (ih->stale_pruning != ih->stale_pruned) + { + ih->stale_pruned = ih->stale_pruning; + if (ih->req->trace_routes & D_STATES) + log(L_TRACE "%s: table prune after refresh end [%u]", ih->req->name, ih->stale_pruned); + } } /** diff --git a/nest/rt.h b/nest/rt.h index 4b347170..b9ae7d10 100644 --- a/nest/rt.h +++ b/nest/rt.h @@ -201,6 +201,10 @@ struct rt_import_hook { btime last_state_change; /* Time of last state transition */ u8 import_state; /* IS_* */ + u8 stale_set; /* Set this stale_cycle to imported routes */ + u8 stale_valid; /* Routes with this stale_cycle and bigger are considered valid */ + u8 stale_pruned; /* Last prune finished when this value was set at stale_valid */ + u8 stale_pruning; /* Last prune started when this value was set at stale_valid */ void (*stopped)(struct rt_import_request *); /* Stored callback when import is stopped */ }; @@ -381,8 +385,8 @@ net *net_get(rtable *tab, const net_addr *addr); net *net_route(rtable *tab, const net_addr *n); int rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filter); rte *rt_export_merged(struct channel *c, rte ** feed, uint count, linpool *pool, int silent); -void rt_refresh_begin(rtable *t, struct rt_import_request *); -void rt_refresh_end(rtable *t, struct rt_import_request *); +void rt_refresh_begin(struct rt_import_request *); +void rt_refresh_end(struct rt_import_request *); void rt_modify_stale(rtable *t, struct rt_import_request *); void rt_schedule_prune(rtable *t); void rte_dump(struct rte_storage *); diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index 6ffe8824..d9008b9a 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -760,16 +760,16 @@ bgp_handle_graceful_restart(struct bgp_proto *p) { case BGP_GRS_NONE: c->gr_active = BGP_GRS_ACTIVE; - rt_refresh_begin(c->c.table, &c->c.in_req); + rt_refresh_begin(&c->c.in_req); break; case BGP_GRS_ACTIVE: - rt_refresh_end(c->c.table, &c->c.in_req); - rt_refresh_begin(c->c.table, &c->c.in_req); + rt_refresh_end(&c->c.in_req); + rt_refresh_begin(&c->c.in_req); break; case BGP_GRS_LLGR: - rt_refresh_begin(c->c.table, &c->c.in_req); + rt_refresh_begin(&c->c.in_req); rt_modify_stale(c->c.table, &c->c.in_req); break; } @@ -777,8 +777,8 @@ bgp_handle_graceful_restart(struct bgp_proto *p) else { /* Just flush the routes */ - rt_refresh_begin(c->c.table, &c->c.in_req); - rt_refresh_end(c->c.table, &c->c.in_req); + rt_refresh_begin(&c->c.in_req); + rt_refresh_end(&c->c.in_req); } /* Reset bucket and prefix tables */ @@ -819,7 +819,7 @@ bgp_graceful_restart_done(struct bgp_channel *c) BGP_TRACE(D_EVENTS, "Neighbor graceful restart done"); tm_stop(c->stale_timer); - rt_refresh_end(c->c.table, &c->c.in_req); + rt_refresh_end(&c->c.in_req); } /** @@ -899,7 +899,7 @@ bgp_refresh_begin(struct bgp_channel *c) { log(L_WARN "%s: BEGIN-OF-RR received before END-OF-RIB, ignoring", p->p.name); return; } c->load_state = BFS_REFRESHING; - rt_refresh_begin(c->c.table, &c->c.in_req); + rt_refresh_begin(&c->c.in_req); } /** @@ -920,7 +920,7 @@ bgp_refresh_end(struct bgp_channel *c) { log(L_WARN "%s: END-OF-RR received without prior BEGIN-OF-RR, ignoring", p->p.name); return; } c->load_state = BFS_NONE; - rt_refresh_end(c->c.table, &c->c.in_req); + rt_refresh_end(&c->c.in_req); } diff --git a/proto/rpki/packets.c b/proto/rpki/packets.c index 4a52b54b..108da61b 100644 --- a/proto/rpki/packets.c +++ b/proto/rpki/packets.c @@ -661,9 +661,9 @@ rpki_handle_cache_response_pdu(struct rpki_cache *cache, const struct pdu_cache_ * a refresh cycle. */ if (cache->p->roa4_channel) - rt_refresh_begin(cache->p->roa4_channel->table, &cache->p->roa4_channel->in_req); + rt_refresh_begin(&cache->p->roa4_channel->in_req); if (cache->p->roa6_channel) - rt_refresh_begin(cache->p->roa6_channel->table, &cache->p->roa6_channel->in_req); + rt_refresh_begin(&cache->p->roa6_channel->in_req); cache->p->refresh_channels = 1; } @@ -846,9 +846,9 @@ rpki_handle_end_of_data_pdu(struct rpki_cache *cache, const struct pdu_end_of_da { cache->p->refresh_channels = 0; if (cache->p->roa4_channel) - rt_refresh_end(cache->p->roa4_channel->table, &cache->p->roa4_channel->in_req); + rt_refresh_end(&cache->p->roa4_channel->in_req); if (cache->p->roa6_channel) - rt_refresh_end(cache->p->roa6_channel->table, &cache->p->roa6_channel->in_req); + rt_refresh_end(&cache->p->roa6_channel->in_req); } cache->last_update = current_time(); -- cgit v1.2.3 From bc2ce4aaa8d1e4d56776ee35352c5e2caa09a0e5 Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Tue, 12 Jul 2022 12:40:18 +0200 Subject: Removing the rte_modify API For BGP LLGR purposes, there was an API allowing a protocol to directly modify their stale routes in table before flushing them. This API was called by the table prune routine which violates the future locking requirements. Instead of this, BGP now requests a special route export and reimports these routes into the table, allowing for asynchronous execution without locking the table on export. --- lib/route.h | 1 - nest/proto.c | 1 - nest/protocol.h | 1 - nest/rt-table.c | 51 --------------------------------------------- nest/rt.h | 1 - proto/bgp/attrs.c | 62 +++++++++++++++++++++++++++++++++++++++++-------------- proto/bgp/bgp.c | 54 +++++++++++++++++++++++++++++++++++++++++++++--- proto/bgp/bgp.h | 3 ++- 8 files changed, 100 insertions(+), 74 deletions(-) (limited to 'proto') diff --git a/lib/route.h b/lib/route.h index 9f78ed00..88a4373d 100644 --- a/lib/route.h +++ b/lib/route.h @@ -33,7 +33,6 @@ typedef struct rte { } rte; #define REF_FILTERED 2 /* Route is rejected by import filter */ -#define REF_MODIFY 16 /* Route is scheduled for modify */ #define REF_PENDING 32 /* Route has not propagated completely yet */ /* Route is valid for propagation (may depend on other flags in the future), accepts NULL */ diff --git a/nest/proto.c b/nest/proto.c index 061205c1..72e479d7 100644 --- a/nest/proto.c +++ b/nest/proto.c @@ -438,7 +438,6 @@ channel_start_import(struct channel *c) .dump_req = channel_dump_import_req, .log_state_change = channel_import_log_state_change, .preimport = channel_preimport, - .rte_modify = c->proto->rte_modify, }; ASSERT(c->channel_state == CS_UP); diff --git a/nest/protocol.h b/nest/protocol.h index 3ccd364a..026d42ab 100644 --- a/nest/protocol.h +++ b/nest/protocol.h @@ -189,7 +189,6 @@ struct proto { int (*rte_recalculate)(struct rtable *, struct network *, struct rte *, struct rte *, struct rte *); int (*rte_better)(struct rte *, struct rte *); int (*rte_mergable)(struct rte *, struct rte *); - struct rte *(*rte_modify)(struct rte *, struct linpool *); void (*rte_insert)(struct network *, struct rte *); void (*rte_remove)(struct network *, struct rte *); u32 (*rte_igp_metric)(const struct rte *); diff --git a/nest/rt-table.c b/nest/rt-table.c index 2ba28e33..50ddc141 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -1322,8 +1322,6 @@ rte_recalculate(struct rt_import_hook *c, net *net, rte *new, struct rte_src *sr if (new && rte_same(old, &new_stored->rte)) { /* No changes, ignore the new route and refresh the old one */ - - old->flags &= ~REF_MODIFY; old->stale_cycle = new->stale_cycle; if (!rte_is_filtered(new)) @@ -1673,24 +1671,6 @@ rte_discard(net *net, rte *old) /* Non-filtered route deletion, used during garb rte_update_unlock(); } -/* Modify existing route by protocol hook, used for long-lived graceful restart */ -static inline void -rte_modify(net *net, rte *old) -{ - rte_update_lock(); - - rte *new = old->sender->req->rte_modify(old, rte_update_pool); - if (new != old) - { - if (new) - new->flags = old->flags & ~REF_MODIFY; - - rte_recalculate(old->sender, net, new, old->src); - } - - rte_update_unlock(); -} - /* Check rtable for best route to given net whether it would be exported do p */ int rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filter) @@ -1977,29 +1957,6 @@ rt_refresh_end(struct rt_import_request *req) log(L_TRACE "%s: route refresh end [%u]", req->name, hook->stale_valid); } -void -rt_modify_stale(rtable *t, struct rt_import_request *req) -{ - int prune = 0; - struct rt_import_hook *s = req->hook; - - FIB_WALK(&t->fib, net, n) - { - for (struct rte_storage *e = n->routes; e; e = e->next) - if ((e->rte.sender == s) && - !(e->rte.flags & REF_FILTERED) && - (e->rte.stale_cycle + 1 == s->stale_set)) - { - e->rte.flags |= REF_MODIFY; - prune = 1; - } - } - FIB_WALK_END; - - if (prune) - rt_schedule_prune(t); -} - /** * rte_dump - dump a route * @e: &rte to be dumped @@ -2499,14 +2456,6 @@ again: rte_discard(n, &e->rte); limit--; - goto rescan; - } - - if (e->rte.flags & REF_MODIFY) - { - rte_modify(n, &e->rte); - limit--; - goto rescan; } } diff --git a/nest/rt.h b/nest/rt.h index b9ae7d10..4a7a087f 100644 --- a/nest/rt.h +++ b/nest/rt.h @@ -181,7 +181,6 @@ struct rt_import_request { /* Preimport is called when the @new route is just-to-be inserted, replacing @old. * Return a route (may be different or modified in-place) to continue or NULL to withdraw. */ int (*preimport)(struct rt_import_request *req, struct rte *new, struct rte *old); - struct rte *(*rte_modify)(struct rte *, struct linpool *); }; struct rt_import_hook { diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c index 084c9b63..28eb6fee 100644 --- a/proto/bgp/attrs.c +++ b/proto/bgp/attrs.c @@ -2546,27 +2546,59 @@ bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best) return !old_suppressed; } -rte * -bgp_rte_modify_stale(struct rte *r, struct linpool *pool) +void +bgp_rte_modify_stale(struct rt_export_request *req, const net_addr *n, struct rt_pending_export *rpe UNUSED, rte **feed, uint count) { - eattr *ea = ea_find(r->attrs, BGP_EA_ID(BA_COMMUNITY)); - const struct adata *ad = ea ? ea->u.ptr : NULL; - uint flags = ea ? ea->flags : BAF_PARTIAL; + struct bgp_channel *c = SKIP_BACK(struct bgp_channel, stale_feed, req); + struct rt_import_hook *irh = c->c.in_req.hook; - if (ad && int_set_contains(ad, BGP_COMM_NO_LLGR)) - return NULL; + /* Find our routes among others */ + for (uint i=0; isender != irh) + continue; - _Thread_local static rte e0; - e0 = *r; + /* A new route, do not mark as stale */ + if (r->stale_cycle == irh->stale_set) + continue; - bgp_set_attr_ptr(&e0.attrs, BA_COMMUNITY, flags, - int_set_add(pool, ad, BGP_COMM_LLGR_STALE)); - e0.pflags |= BGP_REF_STALE; + eattr *ea = ea_find(r->attrs, BGP_EA_ID(BA_COMMUNITY)); + const struct adata *ad = ea ? ea->u.ptr : NULL; + uint flags = ea ? ea->flags : BAF_PARTIAL; + + /* LLGR not allowed, withdraw the route */ + if (ad && int_set_contains(ad, BGP_COMM_NO_LLGR)) + { + rte_import(&c->c.in_req, n, NULL, r->src); + continue; + } - return &e0; + /* Route already marked as LLGR, do nothing */ + if (ad && int_set_contains(ad, BGP_COMM_LLGR_STALE)) + continue; + + /* Store the tmp_linpool state to aggresively save memory */ + struct lp_state tmpp; + lp_save(tmp_linpool, &tmpp); + + /* Mark the route as LLGR */ + rte e0 = *r; + bgp_set_attr_ptr(&e0.attrs, BA_COMMUNITY, flags, int_set_add(tmp_linpool, ad, BGP_COMM_LLGR_STALE)); + e0.pflags &= ~BGP_REF_NOT_STALE; + e0.pflags |= BGP_REF_STALE; + + /* We need to update the route but keep it stale. */ + ASSERT_DIE(irh->stale_set == irh->stale_valid + 1); + irh->stale_set--; + rte_import(&c->c.in_req, n, &e0, r->src); + irh->stale_set++; + + /* Restore the memory state */ + lp_restore(tmp_linpool, &tmpp); + } } diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index d9008b9a..fb8fa529 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -139,6 +139,9 @@ static void bgp_update_bfd(struct bgp_proto *p, const struct bfd_options *bfd); static int bgp_incoming_connection(sock *sk, uint dummy UNUSED); static void bgp_listen_sock_err(sock *sk UNUSED, int err); +static void bgp_graceful_restart_feed(struct bgp_channel *c); + + /** * bgp_open - open a BGP instance * @p: BGP instance @@ -770,7 +773,7 @@ bgp_handle_graceful_restart(struct bgp_proto *p) case BGP_GRS_LLGR: rt_refresh_begin(&c->c.in_req); - rt_modify_stale(c->c.table, &c->c.in_req); + bgp_graceful_restart_feed(c); break; } } @@ -796,6 +799,52 @@ bgp_handle_graceful_restart(struct bgp_proto *p) tm_start(p->gr_timer, p->conn->remote_caps->gr_time S); } +static void +bgp_graceful_restart_feed_done(struct rt_export_request *req) +{ + req->hook = NULL; +} + +static void +bgp_graceful_restart_feed_dump_req(struct rt_export_request *req) +{ + struct bgp_channel *c = SKIP_BACK(struct bgp_channel, stale_feed, req); + debug(" BGP-GR %s.%s export request %p\n", c->c.proto->name, c->c.name, req); +} + +static void +bgp_graceful_restart_feed_log_state_change(struct rt_export_request *req, u8 state) +{ + struct bgp_channel *c = SKIP_BACK(struct bgp_channel, stale_feed, req); + struct bgp_proto *p = (void *) c->c.proto; + BGP_TRACE(D_EVENTS, "Long-lived graceful restart export state changed to %s", rt_export_state_name(state)); + + if (state == TES_READY) + rt_stop_export(req, bgp_graceful_restart_feed_done); +} + +static void +bgp_graceful_restart_drop_export(struct rt_export_request *req UNUSED, const net_addr *n UNUSED, struct rt_pending_export *rpe UNUSED) +{ /* Nothing to do */ } + +static void +bgp_graceful_restart_feed(struct bgp_channel *c) +{ + c->stale_feed = (struct rt_export_request) { + .name = "BGP-GR", + .trace_routes = c->c.debug | c->c.proto->debug, + .dump_req = bgp_graceful_restart_feed_dump_req, + .log_state_change = bgp_graceful_restart_feed_log_state_change, + .export_bulk = bgp_rte_modify_stale, + .export_one = bgp_graceful_restart_drop_export, + }; + + rt_request_export(&c->c.table->exporter, &c->stale_feed); +} + + + + /** * bgp_graceful_restart_done - finish active BGP graceful restart * @c: BGP channel @@ -861,7 +910,7 @@ bgp_graceful_restart_timeout(timer *t) /* Channel is in GR, and supports LLGR -> start LLGR */ c->gr_active = BGP_GRS_LLGR; tm_start(c->stale_timer, c->stale_time S); - rt_modify_stale(c->c.table, &c->c.in_req); + bgp_graceful_restart_feed(c); } } else @@ -1672,7 +1721,6 @@ bgp_init(struct proto_config *CF) P->rte_better = bgp_rte_better; P->rte_mergable = bgp_rte_mergable; P->rte_recalculate = cf->deterministic_med ? bgp_rte_recalculate : NULL; - P->rte_modify = bgp_rte_modify_stale; P->rte_igp_metric = bgp_rte_igp_metric; p->cf = cf; diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h index 003893e0..2e7615ea 100644 --- a/proto/bgp/bgp.h +++ b/proto/bgp/bgp.h @@ -371,6 +371,7 @@ struct bgp_channel { timer *stale_timer; /* Long-lived stale timer for LLGR */ u32 stale_time; /* Stored LLGR stale time from last session */ + struct rt_export_request stale_feed; /* Feeder request for stale route modification */ u8 add_path_rx; /* Session expects receive of ADD-PATH extended NLRI */ u8 add_path_tx; /* Session expects transmit of ADD-PATH extended NLRI */ @@ -576,7 +577,7 @@ void bgp_done_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bu int bgp_rte_better(struct rte *, struct rte *); int bgp_rte_mergable(rte *pri, rte *sec); int bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best); -struct rte *bgp_rte_modify_stale(struct rte *r, struct linpool *pool); +void bgp_rte_modify_stale(struct rt_export_request *req, const net_addr *n, struct rt_pending_export *rpe UNUSED, rte **feed, uint count); u32 bgp_rte_igp_metric(const rte *); void bgp_rt_notify(struct proto *P, struct channel *C, const net_addr *n, rte *new, const rte *old); int bgp_preexport(struct channel *, struct rte *); -- cgit v1.2.3 From 1c2851ecfa94f3d0b732a267c6c2db8b817c37f4 Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Thu, 14 Jul 2022 11:09:23 +0200 Subject: Fixed invalid routes handling The invalid routes were filtered out before they could ever get exported, yet some of the routines need them available, e.g. for display or import reload. Now the invalid routes are properly exported and dropped in channel export routines instead. --- nest/rt-table.c | 51 ++++++++++++++++++++++++++------------------------- nest/rt.h | 6 ++++-- proto/bgp/attrs.c | 10 ++++------ 3 files changed, 34 insertions(+), 33 deletions(-) (limited to 'proto') diff --git a/nest/rt-table.c b/nest/rt-table.c index e7281b2e..5e07c129 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -726,8 +726,8 @@ rte_feed_count(net *n) { uint count = 0; for (struct rte_storage *e = n->routes; e; e = e->next) - if (rte_is_valid(RTE_OR_NULL(e))) - count++; + count++; + return count; } @@ -736,11 +736,11 @@ rte_feed_obtain(net *n, struct rte **feed, uint count) { uint i = 0; for (struct rte_storage *e = n->routes; e; e = e->next) - if (rte_is_valid(RTE_OR_NULL(e))) { ASSERT_DIE(i < count); feed[i++] = &e->rte; } + ASSERT_DIE(i == count); } @@ -1059,10 +1059,15 @@ void rt_notify_optimal(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe) { struct channel *c = SKIP_BACK(struct channel, out_req, req); - rte n0; if (rpe->new_best != rpe->old_best) - rt_notify_basic(c, net, RTE_COPY(rpe->new_best, &n0), RTE_OR_NULL(rpe->old_best)); + { + rte n0 = RTE_COPY_VALID(rpe->new_best); + rte *o = RTE_VALID_OR_NULL(rpe->old_best); + + if (n0.src || o) + rt_notify_basic(c, net, n0.src ? &n0 : NULL, o); + } /* Drop the old stored rejection if applicable. * new->id == old->id happens when updating hostentries. */ @@ -1074,10 +1079,14 @@ void rt_notify_any(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe) { struct channel *c = SKIP_BACK(struct channel, out_req, req); - rte n0; if (rpe->new != rpe->old) - rt_notify_basic(c, net, RTE_COPY(rpe->new, &n0), RTE_OR_NULL(rpe->old)); + { + rte n0 = RTE_COPY_VALID(rpe->new); + rte *o = RTE_VALID_OR_NULL(rpe->old); + if (n0.src || o) + rt_notify_basic(c, net, n0.src ? &n0 : NULL, o); + } /* Drop the old stored rejection if applicable. * new->id == old->id happens when updating hostentries. */ @@ -1091,10 +1100,11 @@ rt_feed_any(struct rt_export_request *req, const net_addr *net, struct rt_pendin struct channel *c = SKIP_BACK(struct channel, out_req, req); for (uint i=0; irte.sender->stats.pref++; - if (old_best) + if (old_best_valid) old_best->rte.sender->stats.pref--; if (tab->hostcache) diff --git a/nest/rt.h b/nest/rt.h index b13c06be..bdbea05b 100644 --- a/nest/rt.h +++ b/nest/rt.h @@ -167,8 +167,10 @@ struct rte_storage { struct rte rte; /* Route data */ }; -#define RTE_COPY(r, l) ((r) ? (((*(l)) = (r)->rte), (l)) : NULL) -#define RTE_OR_NULL(r) ((r) ? &((r)->rte) : NULL) +#define RTE_COPY(r) ((r) ? (r)->rte : (rte) {}) +#define RTE_COPY_VALID(r) (((r) && (rte_is_valid(&(r)->rte))) ? (r)->rte : (rte) {}) +#define RTE_OR_NULL(r) ((r) ? &((r)->rte) : NULL) +#define RTE_VALID_OR_NULL(r) (((r) && (rte_is_valid(&(r)->rte))) ? &((r)->rte) : NULL) /* Table-channel connections */ diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c index 1ca77fd5..883a9746 100644 --- a/proto/bgp/attrs.c +++ b/proto/bgp/attrs.c @@ -2557,12 +2557,10 @@ bgp_rte_modify_stale(struct rt_export_request *req, const net_addr *n, struct rt { rte *r = feed[i]; - /* Not our route */ - if (r->sender != irh) - continue; - - /* A new route, do not mark as stale */ - if (r->stale_cycle == irh->stale_set) + if ( + !rte_is_valid(r) || /* Not a valid route */ + (r->sender != irh) || /* Not our route */ + (r->stale_cycle == irh->stale_set)) /* A new route, do not mark as stale */ continue; eattr *ea = ea_find(r->attrs, BGP_EA_ID(BA_COMMUNITY)); -- cgit v1.2.3 From 449cd471adfc214c4ec0f0d574818d469b13deec Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Tue, 2 Aug 2022 12:54:11 +0200 Subject: BGP: respecting table cork --- proto/bgp/bgp.c | 4 ++++ proto/bgp/bgp.h | 2 ++ proto/bgp/packets.c | 22 ++++++++++++++++++++++ 3 files changed, 28 insertions(+) (limited to 'proto') diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index 3b624a49..33849b0b 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -376,6 +376,7 @@ bgp_close_conn(struct bgp_conn *conn) conn->keepalive_timer = NULL; rfree(conn->hold_timer); conn->hold_timer = NULL; + rfree(conn->tx_ev); conn->tx_ev = NULL; rfree(conn->sk); @@ -514,6 +515,7 @@ void bgp_stop(struct bgp_proto *p, int subcode, byte *data, uint len) { proto_notify_state(&p->p, PS_STOP); + p->uncork_ev->data = NULL; bgp_graceful_close_conn(&p->outgoing_conn, subcode, data, len); bgp_graceful_close_conn(&p->incoming_conn, subcode, data, len); ev_schedule(p->event); @@ -1576,6 +1578,8 @@ bgp_start(struct proto *P) p->last_rx_update = 0; p->event = ev_new_init(p->p.pool, bgp_decision, p); + p->uncork_ev = ev_new_init(p->p.pool, bgp_uncork, p); + p->startup_timer = tm_new_init(p->p.pool, bgp_startup_timeout, p, 0, 0); p->gr_timer = tm_new_init(p->p.pool, bgp_graceful_restart_timeout, p, 0, 0); diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h index 2e7615ea..469f0cb9 100644 --- a/proto/bgp/bgp.h +++ b/proto/bgp/bgp.h @@ -319,6 +319,7 @@ struct bgp_proto { struct bgp_socket *sock; /* Shared listening socket */ struct bfd_request *bfd_req; /* BFD request, if BFD is used */ struct birdsock *postponed_sk; /* Postponed incoming socket for dynamic BGP */ + event *uncork_ev; /* Uncork event in case of congestion */ struct bgp_stats stats; /* BGP statistics */ btime last_established; /* Last time of enter/leave of established state */ btime last_rx_update; /* Last time of RX update */ @@ -610,6 +611,7 @@ void bgp_schedule_packet(struct bgp_conn *conn, struct bgp_channel *c, int type) void bgp_kick_tx(void *vconn); void bgp_tx(struct birdsock *sk); int bgp_rx(struct birdsock *sk, uint size); +void bgp_uncork(void *vp); const char * bgp_error_dsc(unsigned code, unsigned subcode); void bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsigned subcode, byte *data, unsigned len); diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c index 4d4ae3eb..de976588 100644 --- a/proto/bgp/packets.c +++ b/proto/bgp/packets.c @@ -3175,6 +3175,21 @@ bgp_rx_packet(struct bgp_conn *conn, byte *pkt, uint len) } } +void +bgp_uncork(void *vp) +{ + struct bgp_proto *p = vp; + + if (p && p->conn && (p->conn->state == BS_ESTABLISHED) && !p->conn->sk->rx_hook) + { + struct birdsock *sk = p->conn->sk; + ASSERT_DIE(sk->rpos > sk->rbuf); + sk->rx_hook = bgp_rx; + bgp_rx(sk, sk->rpos - sk->rbuf); + BGP_TRACE(D_PACKETS, "Uncorked"); + } +} + /** * bgp_rx - handle received data * @sk: socket @@ -3189,6 +3204,7 @@ int bgp_rx(sock *sk, uint size) { struct bgp_conn *conn = sk->data; + struct bgp_proto *p = conn->bgp; byte *pkt_start = sk->rbuf; byte *end = pkt_start + size; uint i, len; @@ -3198,6 +3214,12 @@ bgp_rx(sock *sk, uint size) { if ((conn->state == BS_CLOSE) || (conn->sk != sk)) return 0; + if ((conn->state == BS_ESTABLISHED) && rt_cork_check(conn->bgp->uncork_ev)) + { + sk->rx_hook = NULL; + BGP_TRACE(D_PACKETS, "Corked"); + return 0; + } for(i=0; i<16; i++) if (pkt_start[i] != 0xff) { -- cgit v1.2.3