diff options
author | Maria Matejka <mq@ucw.cz> | 2022-10-04 15:40:52 +0200 |
---|---|---|
committer | Maria Matejka <mq@ucw.cz> | 2022-10-04 15:40:52 +0200 |
commit | becca314e2546d6005a23398ce2d3012d4b396cb (patch) | |
tree | bdd2f55e81d42e6a1108593840c9273106676e09 /nest | |
parent | 61c127c021ac34eba25d3245ccf8f9eb9dd352f5 (diff) | |
parent | 0072d11f3431165240656edf6ade473554b8747e (diff) |
Merge commit '0072d11f' into tmp-learn
Diffstat (limited to 'nest')
-rw-r--r-- | nest/Makefile | 6 | ||||
-rw-r--r-- | nest/config.Y | 15 | ||||
-rw-r--r-- | nest/proto.c | 160 | ||||
-rw-r--r-- | nest/protocol.h | 8 | ||||
-rw-r--r-- | nest/rt-show.c | 29 | ||||
-rw-r--r-- | nest/rt-table.c | 1159 | ||||
-rw-r--r-- | nest/rt.h | 108 |
7 files changed, 1121 insertions, 364 deletions
diff --git a/nest/Makefile b/nest/Makefile index 0350c3b6..39617350 100644 --- a/nest/Makefile +++ b/nest/Makefile @@ -6,7 +6,11 @@ $(call proto-build,dev_build) $(proto-build-c): $(lastword $(MAKEFILE_LIST)) $(E)echo GEN $@ - $(Q)echo "#include \"lib/birdlib.h\"\n$(patsubst %,void %(void);\n,$(PROTO_BUILD)) void protos_build_gen(void) { $(patsubst %, %();\n,$(PROTO_BUILD))}" > $@ + $(Q)echo "#include \"lib/birdlib.h\"" > $@ + $(Q)$(patsubst %,echo 'void %(void);' >> $@;,$(PROTO_BUILD)) + $(Q)echo "void protos_build_gen(void) {" >> $@ + $(Q)$(patsubst %,echo ' %();'>>$@;,$(PROTO_BUILD)) + $(Q)echo "}" >> $@ tests_src := tests_targets := $(tests_targets) $(tests-target-files) diff --git a/nest/config.Y b/nest/config.Y index edddfc2b..91147a29 100644 --- a/nest/config.Y +++ b/nest/config.Y @@ -125,7 +125,7 @@ CF_KEYWORDS(TIMEFORMAT, ISO, SHORT, LONG, ROUTE, PROTOCOL, BASE, LOG, S, MS, US) CF_KEYWORDS(GRACEFUL, RESTART, WAIT, MAX, AS) CF_KEYWORDS(MIN, IDLE, RX, TX, INTERVAL, MULTIPLIER, PASSIVE) CF_KEYWORDS(CHECK, LINK) -CF_KEYWORDS(SORTED, TRIE, MIN, MAX, SETTLE, TIME) +CF_KEYWORDS(CORK, SORTED, TRIE, MIN, MAX, SETTLE, TIME, GC, THRESHOLD, PERIOD) /* For r_args_channel */ CF_KEYWORDS(IPV4, IPV4_MC, IPV4_MPLS, IPV6, IPV6_MC, IPV6_MPLS, IPV6_SADR, VPN4, VPN4_MC, VPN4_MPLS, VPN6, VPN6_MC, VPN6_MPLS, ROA4, ROA6, FLOW4, FLOW6, MPLS, PRI, SEC) @@ -165,7 +165,7 @@ rtrid: idval: NUM { $$ = $1; } - | '(' term ')' { $$ = f_eval_int(f_linearize($2)); } + | '(' term ')' { $$ = f_eval_int(f_linearize($2, 1)); } | IP4 { $$ = ip4_to_u32($1); } | CF_SYM_KNOWN { if ($1->class == (SYM_CONSTANT | T_INT) || $1->class == (SYM_CONSTANT | T_QUAD)) @@ -229,6 +229,12 @@ table_opt: } | MIN SETTLE TIME expr_us { this_table->min_settle_time = $4; } | MAX SETTLE TIME expr_us { this_table->max_settle_time = $4; } + | GC THRESHOLD expr { this_table->gc_threshold = $3; } + | GC PERIOD expr_us { this_table->gc_period = (uint) $3; if ($3 > 3600 S_) cf_error("GC period must be at most 3600 s"); } + | CORK THRESHOLD expr expr { + if ($3 > $4) cf_error("Cork low threshold must be lower than the high threshold."); + this_table->cork_threshold.low = $3; + this_table->cork_threshold.high = $4; } ; table_opts: @@ -384,7 +390,7 @@ debug_default: DEBUG PROTOCOLS debug_mask { new_config->proto_default_debug = $3; } | DEBUG CHANNELS debug_mask { new_config->channel_default_debug = $3; } | DEBUG COMMANDS expr { new_config->cli_debug = $3; } - | DEBUG PIPE bool { new_config->pipe_debug = $3; } + | DEBUG TABLES bool { new_config->table_debug = $3; } ; /* MRTDUMP PROTOCOLS is in systep/unix/config.Y */ @@ -470,6 +476,7 @@ proto: dev_proto '}' ; dev_proto_start: proto_start DIRECT { this_proto = proto_config_new(&proto_device, $1); init_list(&DIRECT_CFG->iface_list); + this_proto->late_if_feed = 1; } ; @@ -875,7 +882,7 @@ CF_CLI(DUMP FILTER ALL,,, [[Dump all filters in linearized form]]) { filters_dump_all(); cli_msg(0, ""); } ; CF_CLI(EVAL, term, <expr>, [[Evaluate an expression]]) -{ cmd_eval(f_linearize($2)); } ; +{ cmd_eval(f_linearize($2, 1)); } ; CF_CLI_HELP(ECHO, ..., [[Control echoing of log messages]]) CF_CLI(ECHO, echo_mask echo_size, (all | off | { debug|trace|info|remote|warning|error|auth [, ...] }) [<buffer-size>], [[Control echoing of log messages]]) { diff --git a/nest/proto.c b/nest/proto.c index 061205c1..1d480ba2 100644 --- a/nest/proto.c +++ b/nest/proto.c @@ -57,7 +57,28 @@ static void channel_feed_end(struct channel *c); static void channel_export_stopped(struct rt_export_request *req); static inline int proto_is_done(struct proto *p) -{ return (p->proto_state == PS_DOWN) && (p->active_channels == 0); } +{ return (p->proto_state == PS_DOWN) && proto_is_inactive(p); } + +static inline event_list *proto_event_list(struct proto *p) +{ return p->loop == &main_birdloop ? &global_event_list : birdloop_event_list(p->loop); } + +static inline event_list *proto_work_list(struct proto *p) +{ return p->loop == &main_birdloop ? &global_work_list : birdloop_event_list(p->loop); } + +static inline void proto_send_event(struct proto *p) +{ ev_send(proto_event_list(p), p->event); } + +#define PROTO_ENTER_FROM_MAIN(p) ({ \ + ASSERT_DIE(birdloop_inside(&main_birdloop)); \ + struct birdloop *_loop = (p)->loop; \ + if (_loop != &main_birdloop) birdloop_enter(_loop); \ + _loop; \ + }) + +#define PROTO_LEAVE_FROM_MAIN(loop) ({ if (loop != &main_birdloop) birdloop_leave(loop); }) + +#define PROTO_LOCKED_FROM_MAIN(p) for (struct birdloop *_proto_loop = PROTO_ENTER_FROM_MAIN(p); _proto_loop; PROTO_LEAVE_FROM_MAIN(_proto_loop), (_proto_loop = NULL)) + static inline int channel_is_active(struct channel *c) { return (c->channel_state != CS_DOWN); } @@ -438,7 +459,6 @@ channel_start_import(struct channel *c) .dump_req = channel_dump_import_req, .log_state_change = channel_import_log_state_change, .preimport = channel_preimport, - .rte_modify = c->proto->rte_modify, }; ASSERT(c->channel_state == CS_UP); @@ -468,6 +488,7 @@ channel_start_export(struct channel *c) c->out_req = (struct rt_export_request) { .name = rn, + .list = proto_work_list(c->proto), .addr = c->out_subprefix, .addr_mode = c->out_subprefix ? TE_ADDR_IN : TE_ADDR_NONE, .trace_routes = c->debug | c->proto->debug, @@ -514,7 +535,7 @@ channel_check_stopped(struct channel *c) return; channel_set_state(c, CS_DOWN); - ev_schedule(c->proto->event); + proto_send_event(c->proto); break; case CS_PAUSE: @@ -634,6 +655,7 @@ channel_setup_in_table(struct channel *c) { c->reload_req = (struct rt_export_request) { .name = mb_sprintf(c->proto->pool, "%s.%s.import", c->proto->name, c->name), + .list = proto_work_list(c->proto), .trace_routes = c->debug | c->proto->debug, .export_bulk = channel_reload_export_bulk, .dump_req = channel_reload_dump_req, @@ -698,8 +720,6 @@ channel_do_stop(struct channel *c) CALL(c->channel->shutdown, c); - /* This have to be done in here, as channel pool is freed before channel_do_down() */ - c->out_table = NULL; } static void @@ -721,7 +741,7 @@ channel_do_down(struct channel *c) /* Schedule protocol shutddown */ if (proto_is_done(c->proto)) - ev_schedule(c->proto->event); + proto_send_event(c->proto); } void @@ -1032,17 +1052,34 @@ proto_configure_channel(struct proto *p, struct channel **pc, struct channel_con return 1; } +static void +proto_cleanup(struct proto *p) +{ + rfree(p->pool); + p->pool = NULL; + + p->active = 0; + proto_log_state_change(p); + proto_rethink_goal(p); +} static void -proto_event(void *ptr) +proto_loop_stopped(void *ptr) { struct proto *p = ptr; - if (p->do_start) - { - if_feed_baby(p); - p->do_start = 0; - } + birdloop_enter(&main_birdloop); + + p->loop = &main_birdloop; + proto_cleanup(p); + + birdloop_leave(&main_birdloop); +} + +static void +proto_event(void *ptr) +{ + struct proto *p = ptr; if (p->do_stop) { @@ -1052,14 +1089,10 @@ proto_event(void *ptr) } if (proto_is_done(p)) - { - rfree(p->pool); - p->pool = NULL; - - p->active = 0; - proto_log_state_change(p); - proto_rethink_goal(p); - } + if (p->loop != &main_birdloop) + birdloop_stop_self(p->loop, proto_loop_stopped, p); + else + proto_cleanup(p); } @@ -1100,6 +1133,7 @@ proto_init(struct proto_config *c, node *n) struct protocol *pr = c->protocol; struct proto *p = pr->init(c); + p->loop = &main_birdloop; p->proto_state = PS_DOWN; p->last_state_change = current_time(); p->vrf = c->vrf; @@ -1116,11 +1150,19 @@ proto_init(struct proto_config *c, node *n) static void proto_start(struct proto *p) { - /* Here we cannot use p->cf->name since it won't survive reconfiguration */ - p->pool = rp_new(proto_pool, p->proto->name); + DBG("Kicking %s up\n", p->name); + PD(p, "Starting"); + + p->pool = rp_newf(proto_pool, "Protocol %s", p->cf->name); if (graceful_restart_state == GRS_INIT) p->gr_recovery = 1; + + if (p->cf->loop_order != DOMAIN_ORDER(the_bird)) + p->loop = birdloop_new(p->pool, p->cf->loop_order, p->pool->name); + + PROTO_LOCKED_FROM_MAIN(p) + proto_notify_state(p, (p->proto->start ? p->proto->start(p) : PS_UP)); } @@ -1156,6 +1198,7 @@ proto_config_new(struct protocol *pr, int class) cf->class = class; cf->debug = new_config->proto_default_debug; cf->mrtdump = new_config->proto_default_mrtdump; + cf->loop_order = DOMAIN_ORDER(the_bird); init_list(&cf->channels); @@ -1445,11 +1488,20 @@ protos_commit(struct config *new, struct config *old, int force_reconfig, int ty } static void -proto_rethink_goal(struct proto *p) +proto_shutdown(struct proto *p) { - struct protocol *q; - byte goal; + if (p->proto_state == PS_START || p->proto_state == PS_UP) + { + /* Going down */ + DBG("Kicking %s down\n", p->name); + PD(p, "Shutting down"); + proto_notify_state(p, (p->proto->shutdown ? p->proto->shutdown(p) : PS_DOWN)); + } +} +static void +proto_rethink_goal(struct proto *p) +{ if (p->reconfiguring && !p->active) { struct proto_config *nc = p->cf_new; @@ -1469,32 +1521,12 @@ proto_rethink_goal(struct proto *p) /* Determine what state we want to reach */ if (p->disabled || p->reconfiguring) - goal = PS_DOWN; - else - goal = PS_UP; - - q = p->proto; - if (goal == PS_UP) - { - if (!p->active) - { - /* Going up */ - DBG("Kicking %s up\n", p->name); - PD(p, "Starting"); - proto_start(p); - proto_notify_state(p, (q->start ? q->start(p) : PS_UP)); - } - } - else { - if (p->proto_state == PS_START || p->proto_state == PS_UP) - { - /* Going down */ - DBG("Kicking %s down\n", p->name); - PD(p, "Shutting down"); - proto_notify_state(p, (q->shutdown ? q->shutdown(p) : PS_DOWN)); - } + PROTO_LOCKED_FROM_MAIN(p) + proto_shutdown(p); } + else if (!p->active) + proto_start(p); } struct proto * @@ -1699,7 +1731,7 @@ protos_dump_all(void) #define DPF(x) (p->x ? " " #x : "") debug(" protocol %s (%p) state %s with %d active channels flags: %s%s%s%s%s\n", p->name, p, p_states[p->proto_state], p->active_channels, - DPF(disabled), DPF(active), DPF(do_start), DPF(do_stop), DPF(reconfiguring)); + DPF(disabled), DPF(active), DPF(do_stop), DPF(reconfiguring)); #undef DPF struct channel *c; @@ -1935,8 +1967,8 @@ static inline void proto_do_start(struct proto *p) { p->active = 1; - p->do_start = 1; - ev_schedule(p->event); + if (!p->cf->late_if_feed) + if_feed_baby(p); } static void @@ -1949,6 +1981,9 @@ proto_do_up(struct proto *p) } proto_start_channels(p); + + if (p->cf->late_if_feed) + if_feed_baby(p); } static inline void @@ -1963,9 +1998,6 @@ proto_do_stop(struct proto *p) p->down_sched = 0; p->gr_recovery = 0; - p->do_stop = 1; - ev_schedule(p->event); - if (p->main_source) { rt_unlock_source(p->main_source); @@ -1973,6 +2005,9 @@ proto_do_stop(struct proto *p) } proto_stop_channels(p); + + p->do_stop = 1; + proto_send_event(p); } static void @@ -1983,7 +2018,7 @@ proto_do_down(struct proto *p) /* Shutdown is finished in the protocol event */ if (proto_is_done(p)) - ev_schedule(p->event); + proto_send_event(p); } @@ -2222,7 +2257,7 @@ proto_cmd_disable(struct proto *p, uintptr_t arg, int cnt UNUSED) p->disabled = 1; p->down_code = PDC_CMD_DISABLE; proto_set_message(p, (char *) arg, -1); - proto_rethink_goal(p); + proto_shutdown(p); cli_msg(-9, "%s: disabled", p->name); } @@ -2255,9 +2290,9 @@ proto_cmd_restart(struct proto *p, uintptr_t arg, int cnt UNUSED) p->disabled = 1; p->down_code = PDC_CMD_RESTART; proto_set_message(p, (char *) arg, -1); - proto_rethink_goal(p); + proto_shutdown(p); p->disabled = 0; - proto_rethink_goal(p); + /* After the protocol shuts down, proto_rethink_goal() is run from proto_event. */ cli_msg(-12, "%s: restarted", p->name); } @@ -2332,7 +2367,9 @@ proto_apply_cmd_symbol(const struct symbol *s, void (* cmd)(struct proto *, uint if (s->proto->proto) { - cmd(s->proto->proto, arg, 0); + struct proto *p = s->proto->proto; + PROTO_LOCKED_FROM_MAIN(p) + cmd(p, arg, 0); cli_msg(0, ""); } else @@ -2347,7 +2384,8 @@ proto_apply_cmd_patt(const char *patt, void (* cmd)(struct proto *, uintptr_t, i WALK_LIST(p, proto_list) if (!patt || patmatch(patt, p->name)) - cmd(p, arg, cnt++); + PROTO_LOCKED_FROM_MAIN(p) + cmd(p, arg, cnt++); if (!cnt) cli_msg(8003, "No protocols match"); diff --git a/nest/protocol.h b/nest/protocol.h index 3ccd364a..3c823ae1 100644 --- a/nest/protocol.h +++ b/nest/protocol.h @@ -102,8 +102,10 @@ struct proto_config { u8 net_type; /* Protocol network type (NET_*), 0 for undefined */ u8 disabled; /* Protocol enabled/disabled by default */ u8 vrf_set; /* Related VRF instance (below) is defined */ + u8 late_if_feed; /* Delay interface feed after channels are up */ u32 debug, mrtdump; /* Debugging bitfields, both use D_* constants */ u32 router_id; /* Protocol specific router ID */ + uint loop_order; /* Launch a birdloop on this locking level; use DOMAIN_ORDER(the_bird) for mainloop */ list channels; /* List of channel configs (struct channel_config) */ struct iface *vrf; /* Related VRF instance, NULL if global */ @@ -121,6 +123,7 @@ struct proto { struct proto_config *cf_new; /* Configuration we want to switch to after shutdown (NULL=delete) */ pool *pool; /* Pool containing local objects */ event *event; /* Protocol event */ + struct birdloop *loop; /* BIRDloop running this protocol */ list channels; /* List of channels to rtables (struct channel) */ struct channel *main_channel; /* Primary channel */ @@ -131,12 +134,12 @@ struct proto { u32 debug; /* Debugging flags */ u32 mrtdump; /* MRTDump flags */ uint active_channels; /* Number of active channels */ + uint active_loops; /* Number of active IO loops */ byte net_type; /* Protocol network type (NET_*), 0 for undefined */ byte disabled; /* Manually disabled */ byte vrf_set; /* Related VRF instance (above) is defined */ byte proto_state; /* Protocol state machine (PS_*, see below) */ byte active; /* From PS_START to cleanup after PS_STOP */ - byte do_start; /* Start actions are scheduled */ byte do_stop; /* Stop actions are scheduled */ byte reconfiguring; /* We're shutting down due to reconfiguration */ byte gr_recovery; /* Protocol should participate in graceful restart recovery */ @@ -189,7 +192,6 @@ struct proto { int (*rte_recalculate)(struct rtable *, struct network *, struct rte *, struct rte *, struct rte *); int (*rte_better)(struct rte *, struct rte *); int (*rte_mergable)(struct rte *, struct rte *); - struct rte *(*rte_modify)(struct rte *, struct linpool *); void (*rte_insert)(struct network *, struct rte *); void (*rte_remove)(struct network *, struct rte *); u32 (*rte_igp_metric)(const struct rte *); @@ -339,6 +341,8 @@ void proto_notify_state(struct proto *p, unsigned state); * as a result of received ROUTE-REFRESH request). */ +static inline int proto_is_inactive(struct proto *p) +{ return (p->active_channels == 0) && (p->active_loops == 0); } /* diff --git a/nest/rt-show.c b/nest/rt-show.c index c3294518..b784bf83 100644 --- a/nest/rt-show.c +++ b/nest/rt-show.c @@ -77,7 +77,11 @@ rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, int primary e->src->proto->name, tm, from, primary ? (sync_error ? " !" : " *") : "", info); if (d->verbose) + { ea_show_list(c, a); + cli_printf(c, -1008, "\tInternal route handling values: %uL %uG %uS id %u", + e->src->private_id, e->src->global_id, e->stale_cycle, e->id); + } else if (dest == RTD_UNICAST) ea_show_nexthop_list(c, nhad); else if (had) @@ -256,6 +260,18 @@ rt_show_dump_req(struct rt_export_request *req) } static void +rt_show_done(struct rt_show_data *d) +{ + /* No more action */ + d->cli->cleanup = NULL; + d->cli->cont = NULL; + d->cli->rover = NULL; + + /* Write pending messages */ + cli_write_trigger(d->cli); +} + +static void rt_show_cont(struct rt_show_data *d) { struct cli *c = d->cli; @@ -263,18 +279,13 @@ rt_show_cont(struct rt_show_data *d) if (d->running_on_config && (d->running_on_config != config)) { cli_printf(c, 8004, "Stopped due to reconfiguration"); - - /* No more action */ - c->cleanup = NULL; - c->cont = NULL; - c->rover = NULL; - cli_write_trigger(c); - return; + return rt_show_done(d); } d->req = (struct rt_export_request) { .addr = d->addr, .name = "CLI Show Route", + .list = &global_work_list, .export_bulk = rt_show_net_export_bulk, .dump_req = rt_show_dump_req, .log_state_change = rt_show_log_state_change, @@ -316,7 +327,6 @@ rt_show_export_stopped(struct rt_export_request *req) if (NODE_VALID(d->tab)) return rt_show_cont(d); - /* Printout total stats */ if (d->stats && (d->table_counter > 1)) { @@ -329,7 +339,8 @@ rt_show_export_stopped(struct rt_export_request *req) else cli_printf(d->cli, 0, ""); - cli_write_trigger(d->cli); + /* No more route showing */ + rt_show_done(d); } struct rt_show_data_rtable * diff --git a/nest/rt-table.c b/nest/rt-table.c index 29690414..15dbc371 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -110,6 +110,8 @@ #include "proto/bgp/bgp.h" #endif +#include <stdatomic.h> + pool *rt_table_pool; static linpool *rte_update_pool; @@ -117,6 +119,18 @@ static linpool *rte_update_pool; list routing_tables; list deleted_routing_tables; +struct rt_cork rt_cork; + +/* Data structures for export journal */ +#define RT_PENDING_EXPORT_ITEMS (page_size - sizeof(struct rt_export_block)) / sizeof(struct rt_pending_export) + +struct rt_export_block { + node n; + _Atomic u32 end; + _Atomic _Bool not_last; + struct rt_pending_export export[]; +}; + static void rt_free_hostcache(rtable *tab); static void rt_notify_hostcache(rtable *tab, net *net); static void rt_update_hostcache(rtable *tab); @@ -126,11 +140,23 @@ static inline void rt_flowspec_resolve_rte(rte *r, struct channel *c); static inline void rt_prune_table(rtable *tab); static inline void rt_schedule_notify(rtable *tab); static void rt_flowspec_notify(rtable *tab, net *net); +static void rt_kick_prune_timer(rtable *tab); static void rt_feed_by_fib(void *); static void rt_feed_by_trie(void *); static void rt_feed_equal(void *); static void rt_feed_for(void *); static uint rt_feed_net(struct rt_export_hook *c, net *n); +static void rt_check_cork_low(rtable *tab); +static void rt_check_cork_high(rtable *tab); +static void rt_cork_release_hook(void *); + +static inline void rt_export_used(struct rt_exporter *); +static void rt_export_cleanup(rtable *tab); + +static inline void rte_update_lock(void); +static inline void rte_update_unlock(void); + +static int rte_same(rte *x, rte *y); const char *rt_import_state_name_array[TIS_MAX] = { [TIS_DOWN] = "DOWN", @@ -684,8 +710,9 @@ rte_mergable(rte *pri, rte *sec) static void rte_trace(const char *name, const rte *e, int dir, const char *msg) { - log(L_TRACE "%s %c %s %N %uL %uG %s", - name, dir, msg, e->net, e->src->private_id, e->src->global_id, + log(L_TRACE "%s %c %s %N src %uL %uG %uS id %u %s", + name, dir, msg, e->net, + e->src->private_id, e->src->global_id, e->stale_cycle, e->id, rta_dest_name(rte_dest(e))); } @@ -725,8 +752,8 @@ rte_feed_count(net *n) { uint count = 0; for (struct rte_storage *e = n->routes; e; e = e->next) - if (rte_is_valid(RTE_OR_NULL(e))) - count++; + count++; + return count; } @@ -735,11 +762,11 @@ rte_feed_obtain(net *n, struct rte **feed, uint count) { uint i = 0; for (struct rte_storage *e = n->routes; e; e = e->next) - if (rte_is_valid(RTE_OR_NULL(e))) { ASSERT_DIE(i < count); feed[i++] = &e->rte; } + ASSERT_DIE(i == count); } @@ -847,6 +874,16 @@ do_rt_notify(struct channel *c, const net_addr *net, rte *new, const rte *old) static void rt_notify_basic(struct channel *c, const net_addr *net, rte *new, rte *old) { + if (new && old && rte_same(new, old)) + { + if ((new->id != old->id) && bmap_test(&c->export_map, old->id)) + { + bmap_set(&c->export_map, new->id); + bmap_clear(&c->export_map, old->id); + } + return; + } + if (new) new = export_filter(c, new, 0); @@ -859,6 +896,16 @@ rt_notify_basic(struct channel *c, const net_addr *net, rte *new, rte *old) do_rt_notify(c, net, new, old); } +static void +channel_rpe_mark_seen(struct rt_export_request *req, struct rt_pending_export *rpe) +{ + struct channel *c = SKIP_BACK(struct channel, out_req, req); + + rpe_mark_seen(req->hook, rpe); + if (rpe->old) + bmap_clear(&c->export_reject_map, rpe->old->rte.id); +} + void rt_notify_accepted(struct rt_export_request *req, const net_addr *n, struct rt_pending_export *rpe, struct rte **feed, uint count) @@ -902,38 +949,30 @@ rt_notify_accepted(struct rt_export_request *req, const net_addr *n, struct rt_p } } +done: /* Check obsolete routes for previously exported */ - if (!old_best) - if (rpe && rpe->old && bmap_test(&c->export_map, rpe->old->rte.id)) - old_best = &rpe->old->rte; - -/* for (; rpe; rpe = atomic_load_explicit(&rpe->next, memory_order_relaxed)) + while (rpe) + { + channel_rpe_mark_seen(req, rpe); + if (rpe->old) { - if (rpe->old && bmap_test(&hook->accept_map, rpe->old->id)) + if (bmap_test(&c->export_map, rpe->old->rte.id)) { - old_best = &rpe->old.rte; - break; + ASSERT_DIE(old_best == NULL); + old_best = &rpe->old->rte; } - - if (rpe == rpe_last) - break; } - */ + rpe = rpe_next(rpe, NULL); + } /* Nothing to export */ if (!new_best && !old_best) { DBG("rt_notify_accepted: nothing to export\n"); - goto done; + return; } do_rt_notify(c, n, new_best, old_best); - -done: - /* Drop the old stored rejection if applicable. - * new->id == old->id happens when updating hostentries. */ - if (rpe && rpe->old && (!rpe->new || (rpe->new->rte.id != rpe->old->rte.id))) - bmap_clear(&c->export_reject_map, rpe->old->rte.id); } rte * @@ -1025,63 +1064,74 @@ rt_notify_merged(struct rt_export_request *req, const net_addr *n, struct rt_pen } /* Check obsolete routes for previously exported */ - if (!old_best) - if (rpe && rpe->old && bmap_test(&c->export_map, rpe->old->rte.id)) - old_best = &rpe->old->rte; - -/* for (; rpe; rpe = atomic_load_explicit(&rpe->next, memory_order_relaxed)) + while (rpe) + { + channel_rpe_mark_seen(req, rpe); + if (rpe->old) { - if (rpe->old && bmap_test(&hook->accept_map, rpe->old->id)) + if (bmap_test(&c->export_map, rpe->old->rte.id)) { - old_best = &rpe->old.rte; - break; + ASSERT_DIE(old_best == NULL); + old_best = &rpe->old->rte; } - - if (rpe == rpe_last) - break; } - */ + rpe = rpe_next(rpe, NULL); + } /* Prepare new merged route */ rte *new_merged = count ? rt_export_merged(c, feed, count, rte_update_pool, 0) : NULL; if (new_merged || old_best) do_rt_notify(c, n, new_merged, old_best); - - /* Drop the old stored rejection if applicable. - * new->id == old->id happens when updating hostentries. */ - if (rpe && rpe->old && (!rpe->new || (rpe->new->rte.id != rpe->old->rte.id))) - bmap_clear(&c->export_reject_map, rpe->old->rte.id); } void rt_notify_optimal(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe) { struct channel *c = SKIP_BACK(struct channel, out_req, req); - rte n0; - if (rpe->new_best != rpe->old_best) - rt_notify_basic(c, net, RTE_COPY(rpe->new_best, &n0), RTE_OR_NULL(rpe->old_best)); + rte *o = RTE_VALID_OR_NULL(rpe->old_best); + struct rte_storage *new_best = rpe->new_best; - /* Drop the old stored rejection if applicable. - * new->id == old->id happens when updating hostentries. */ - if (rpe->old && (!rpe->new || (rpe->new->rte.id != rpe->old->rte.id))) - bmap_clear(&c->export_reject_map, rpe->old->rte.id); + while (rpe) + { + channel_rpe_mark_seen(req, rpe); + new_best = rpe->new_best; + rpe = rpe_next(rpe, NULL); + } + + rte n0 = RTE_COPY_VALID(new_best); + if (n0.src || o) + rt_notify_basic(c, net, n0.src ? &n0 : NULL, o); } void rt_notify_any(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe) { struct channel *c = SKIP_BACK(struct channel, out_req, req); - rte n0; - if (rpe->new != rpe->old) - rt_notify_basic(c, net, RTE_COPY(rpe->new, &n0), RTE_OR_NULL(rpe->old)); + rte *n = RTE_VALID_OR_NULL(rpe->new); + rte *o = RTE_VALID_OR_NULL(rpe->old); - /* Drop the old stored rejection if applicable. - * new->id == old->id happens when updating hostentries. */ - if (rpe->old && (!rpe->new || (rpe->new->rte.id != rpe->old->rte.id))) - bmap_clear(&c->export_reject_map, rpe->old->rte.id); + if (!n && !o) + { + channel_rpe_mark_seen(req, rpe); + return; + } + + struct rte_src *src = n ? n->src : o->src; + struct rte_storage *new_latest = rpe->new; + + while (rpe) + { + channel_rpe_mark_seen(req, rpe); + new_latest = rpe->new; + rpe = rpe_next(rpe, src); + } + + rte n0 = RTE_COPY_VALID(new_latest); + if (n0.src || o) + rt_notify_basic(c, net, n0.src ? &n0 : NULL, o); } void @@ -1090,10 +1140,103 @@ rt_feed_any(struct rt_export_request *req, const net_addr *net, struct rt_pendin struct channel *c = SKIP_BACK(struct channel, out_req, req); for (uint i=0; i<count; i++) + if (rte_is_valid(feed[i])) + { + rte n0 = *feed[i]; + rt_notify_basic(c, net, &n0, NULL); + } +} + +void +rpe_mark_seen(struct rt_export_hook *hook, struct rt_pending_export *rpe) +{ + bmap_set(&hook->seq_map, rpe->seq); +} + +struct rt_pending_export * +rpe_next(struct rt_pending_export *rpe, struct rte_src *src) +{ + struct rt_pending_export *next = atomic_load_explicit(&rpe->next, memory_order_acquire); + + if (!next) + return NULL; + + if (!src) + return next; + + while (rpe = next) + if (src == (rpe->new ? rpe->new->rte.src : rpe->old->rte.src)) + return rpe; + else + next = atomic_load_explicit(&rpe->next, memory_order_acquire); + + return NULL; +} + +static struct rt_pending_export * rt_next_export_fast(struct rt_pending_export *last); +static void +rte_export(struct rt_export_hook *hook, struct rt_pending_export *rpe) +{ + if (bmap_test(&hook->seq_map, rpe->seq)) + goto ignore; /* Seen already */ + + const net_addr *n = rpe->new_best ? rpe->new_best->rte.net : rpe->old_best->rte.net; + + switch (hook->req->addr_mode) + { + case TE_ADDR_NONE: + break; + + case TE_ADDR_IN: + if (!net_in_netX(n, hook->req->addr)) + goto ignore; + break; + + case TE_ADDR_EQUAL: + if (!net_equal(n, hook->req->addr)) + goto ignore; + break; + + case TE_ADDR_FOR: + bug("Continuos export of best prefix match not implemented yet."); + + default: + bug("Strange table export address mode: %d", hook->req->addr_mode); + } + + if (rpe->new) + hook->stats.updates_received++; + else + hook->stats.withdraws_received++; + + if (hook->req->export_one) + hook->req->export_one(hook->req, n, rpe); + else if (hook->req->export_bulk) { - rte n0 = *feed[i]; - rt_notify_basic(c, net, &n0, NULL); + net *net = SKIP_BACK(struct network, n.addr, (net_addr (*)[0]) n); + uint count = rte_feed_count(net); + rte **feed = NULL; + if (count) + { + feed = alloca(count * sizeof(rte *)); + rte_feed_obtain(net, feed, count); + } + hook->req->export_bulk(hook->req, n, rpe, feed, count); } + else + bug("Export request must always provide an export method"); + +ignore: + /* Get the next export if exists */ + hook->rpe_next = rt_next_export_fast(rpe); + + /* The last block may be available to free */ + if (PAGE_HEAD(hook->rpe_next) != PAGE_HEAD(rpe)) + CALL(hook->table->used, hook->table); + + /* Releasing this export for cleanup routine */ + DBG("store hook=%p last_export=%p seq=%lu\n", hook, rpe, rpe->seq); + atomic_store_explicit(&hook->last_export, rpe, memory_order_release); } /** @@ -1131,26 +1274,17 @@ static void rte_announce(rtable *tab, net *net, struct rte_storage *new, struct rte_storage *old, struct rte_storage *new_best, struct rte_storage *old_best) { - if (!rte_is_valid(RTE_OR_NULL(new))) - new = NULL; - - if (!rte_is_valid(RTE_OR_NULL(old))) - old = NULL; + int new_best_valid = rte_is_valid(RTE_OR_NULL(new_best)); + int old_best_valid = rte_is_valid(RTE_OR_NULL(old_best)); - if (!rte_is_valid(RTE_OR_NULL(new_best))) - new_best = NULL; - - if (!rte_is_valid(RTE_OR_NULL(old_best))) - old_best = NULL; - - if (!new && !old && !new_best && !old_best) + if ((new == old) && (new_best == old_best)) return; - if (new_best != old_best) + if (new_best_valid || old_best_valid) { - if (new_best) + if (new_best_valid) new_best->rte.sender->stats.pref++; - if (old_best) + if (old_best_valid) old_best->rte.sender->stats.pref--; if (tab->hostcache) @@ -1162,57 +1296,206 @@ rte_announce(rtable *tab, net *net, struct rte_storage *new, struct rte_storage rt_schedule_notify(tab); - struct rt_pending_export rpe = { .new = new, .old = old, .new_best = new_best, .old_best = old_best }; - uint count = rte_feed_count(net); - rte **feed = NULL; - if (count) + if (EMPTY_LIST(tab->exporter.hooks) && EMPTY_LIST(tab->exporter.pending)) { - feed = alloca(count * sizeof(rte *)); - rte_feed_obtain(net, feed, count); + /* No export hook and no pending exports to cleanup. We may free the route immediately. */ + if (!old) + return; + + hmap_clear(&tab->id_map, old->rte.id); + rte_free(old); + return; } - struct rt_export_hook *eh; - WALK_LIST(eh, tab->exporter.hooks) + /* Get the pending export structure */ + struct rt_export_block *rpeb = NULL, *rpebsnl = NULL; + u32 end = 0; + + if (!EMPTY_LIST(tab->exporter.pending)) + { + rpeb = TAIL(tab->exporter.pending); + end = atomic_load_explicit(&rpeb->end, memory_order_relaxed); + if (end >= RT_PENDING_EXPORT_ITEMS) + { + ASSERT_DIE(end == RT_PENDING_EXPORT_ITEMS); + rpebsnl = rpeb; + + rpeb = NULL; + end = 0; + } + } + + if (!rpeb) + { + rpeb = alloc_page(); + *rpeb = (struct rt_export_block) {}; + add_tail(&tab->exporter.pending, &rpeb->n); + } + + /* Fill the pending export */ + struct rt_pending_export *rpe = &rpeb->export[rpeb->end]; + *rpe = (struct rt_pending_export) { + .new = new, + .new_best = new_best, + .old = old, + .old_best = old_best, + .seq = tab->exporter.next_seq++, + }; + + DBGL("rte_announce: table=%s net=%N new=%p id %u from %s old=%p id %u from %s new_best=%p id %u old_best=%p id %u seq=%lu", + tab->name, net->n.addr, + new, new ? new->rte.id : 0, new ? new->rte.sender->req->name : NULL, + old, old ? old->rte.id : 0, old ? old->rte.sender->req->name : NULL, + new_best, old_best, rpe->seq); + + ASSERT_DIE(atomic_fetch_add_explicit(&rpeb->end, 1, memory_order_release) == end); + + if (rpebsnl) + { + _Bool f = 0; + ASSERT_DIE(atomic_compare_exchange_strong_explicit(&rpebsnl->not_last, &f, 1, + memory_order_release, memory_order_relaxed)); + } + + /* Append to the same-network squasher list */ + if (net->last) + { + struct rt_pending_export *rpenull = NULL; + ASSERT_DIE(atomic_compare_exchange_strong_explicit( + &net->last->next, &rpenull, rpe, + memory_order_relaxed, + memory_order_relaxed)); + + } + + net->last = rpe; + + if (!net->first) + net->first = rpe; + + if (tab->exporter.first == NULL) + tab->exporter.first = rpe; + + rt_check_cork_high(tab); + + if (!tm_active(tab->exporter.export_timer)) + tm_start(tab->exporter.export_timer, tab->config->export_settle_time); +} + +static struct rt_pending_export * +rt_next_export_fast(struct rt_pending_export *last) +{ + /* Get the whole export block and find our position in there. */ + struct rt_export_block *rpeb = PAGE_HEAD(last); + u32 pos = (last - &rpeb->export[0]); + u32 end = atomic_load_explicit(&rpeb->end, memory_order_acquire); + ASSERT_DIE(pos < end); + + /* Next is in the same block. */ + if (++pos < end) + return &rpeb->export[pos]; + + /* There is another block. */ + if (atomic_load_explicit(&rpeb->not_last, memory_order_acquire)) { - if (eh->export_state == TES_STOP) + /* This is OK to do non-atomically because of the not_last flag. */ + rpeb = NODE_NEXT(rpeb); + return &rpeb->export[0]; + } + + /* There is nothing more. */ + return NULL; +} + +static struct rt_pending_export * +rt_next_export(struct rt_export_hook *hook, struct rt_exporter *tab) +{ + /* As the table is locked, it is safe to reload the last export pointer */ + struct rt_pending_export *last = atomic_load_explicit(&hook->last_export, memory_order_acquire); + + /* It is still valid, let's reuse it */ + if (last) + return rt_next_export_fast(last); + + /* No, therefore we must process the table's first pending export */ + else + return tab->first; +} + +static inline void +rt_send_export_event(struct rt_export_hook *hook) +{ + ev_send(hook->req->list, hook->event); +} + +static void +rt_announce_exports(timer *tm) +{ + rtable *tab = tm->data; + + struct rt_export_hook *c; node *n; + WALK_LIST2(c, n, tab->exporter.hooks, n) + { + if (atomic_load_explicit(&c->export_state, memory_order_acquire) != TES_READY) continue; - switch (eh->req->addr_mode) - { - case TE_ADDR_NONE: - break; + rt_send_export_event(c); + } +} - case TE_ADDR_IN: - if (!net_in_netX(net->n.addr, eh->req->addr)) - continue; - break; +static struct rt_pending_export * +rt_last_export(struct rt_exporter *tab) +{ + struct rt_pending_export *rpe = NULL; - case TE_ADDR_EQUAL: - if (!net_equal(net->n.addr, eh->req->addr)) - continue; - break; + if (!EMPTY_LIST(tab->pending)) + { + /* We'll continue processing exports from this export on */ + struct rt_export_block *reb = TAIL(tab->pending); + ASSERT_DIE(reb->end); + rpe = &reb->export[reb->end - 1]; + } - case TE_ADDR_FOR: - bug("Continuos export of best prefix match not implemented yet."); + return rpe; +} - default: - bug("Strange table export address mode: %d", eh->req->addr_mode); +#define RT_EXPORT_BULK 1024 + +static void +rt_export_hook(void *_data) +{ + struct rt_export_hook *c = _data; + + ASSERT_DIE(atomic_load_explicit(&c->export_state, memory_order_relaxed) == TES_READY); + + if (!c->rpe_next) + { + c->rpe_next = rt_next_export(c, c->table); + + if (!c->rpe_next) + { + CALL(c->table->used, c->table); + return; } + } - if (new) - eh->stats.updates_received++; - else - eh->stats.withdraws_received++; + /* Process the export */ + for (uint i=0; i<RT_EXPORT_BULK; i++) + { + rte_update_lock(); - if (eh->req->export_one) - eh->req->export_one(eh->req, net->n.addr, &rpe); - else if (eh->req->export_bulk) - eh->req->export_bulk(eh->req, net->n.addr, &rpe, feed, count); - else - bug("Export request must always provide an export method"); + rte_export(c, c->rpe_next); + + if (!c->rpe_next) + break; + + rte_update_unlock(); } + + rt_send_export_event(c); } + static inline int rte_validate(struct channel *ch, rte *e) { @@ -1322,8 +1605,7 @@ rte_recalculate(struct rt_import_hook *c, net *net, rte *new, struct rte_src *sr if (new && rte_same(old, &new_stored->rte)) { /* No changes, ignore the new route and refresh the old one */ - - old->flags &= ~(REF_STALE | REF_DISCARD | REF_MODIFY); + old->stale_cycle = new->stale_cycle; if (!rte_is_filtered(new)) { @@ -1458,14 +1740,8 @@ rte_recalculate(struct rt_import_hook *c, net *net, rte *new, struct rte_src *sr if (new_stored) { new_stored->rte.lastmod = current_time(); - - if (!old) - { - new_stored->rte.id = hmap_first_zero(&table->id_map); - hmap_set(&table->id_map, new_stored->rte.id); - } - else - new_stored->rte.id = old->id; + new_stored->rte.id = hmap_first_zero(&table->id_map); + hmap_set(&table->id_map, new_stored->rte.id); } /* Log the route change */ @@ -1486,9 +1762,8 @@ rte_recalculate(struct rt_import_hook *c, net *net, rte *new, struct rte_src *sr net->routes, old_best_stored); if (!net->routes && - (table->gc_counter++ >= table->config->gc_max_ops) && - (table->gc_time + table->config->gc_min_time <= current_time())) - rt_schedule_prune(table); + (table->gc_counter++ >= table->config->gc_threshold)) + rt_kick_prune_timer(table); #if 0 /* Enable and reimplement these callbacks if anybody wants to use them */ @@ -1498,13 +1773,6 @@ rte_recalculate(struct rt_import_hook *c, net *net, rte *new, struct rte_src *sr p->rte_insert(net, &new_stored->rte); #endif - if (old) - { - if (!new_stored) - hmap_clear(&table->id_map, old->id); - - rte_free(old_stored); - } } static int rte_update_nest_cnt; /* Nesting counter to allow recursive updates */ @@ -1639,6 +1907,9 @@ rte_import(struct rt_import_request *req, const net_addr *n, rte *new, struct rt nn = net_get(hook->table, n); new->net = nn->n.addr; new->sender = hook; + + /* Set the stale cycle */ + new->stale_cycle = hook->stale_set; } else if (!(nn = net_find(hook->table, n))) { @@ -1669,24 +1940,6 @@ rte_discard(net *net, rte *old) /* Non-filtered route deletion, used during garb rte_update_unlock(); } -/* Modify existing route by protocol hook, used for long-lived graceful restart */ -static inline void -rte_modify(net *net, rte *old) -{ - rte_update_lock(); - - rte *new = old->sender->req->rte_modify(old, rte_update_pool); - if (new != old) - { - if (new) - new->flags = old->flags & ~REF_MODIFY; - - rte_recalculate(old->sender, net, new, old->src); - } - - rte_update_unlock(); -} - /* Check rtable for best route to given net whether it would be exported do p */ int rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filter) @@ -1726,16 +1979,19 @@ rt_export_stopped(void *data) struct rt_export_hook *hook = data; struct rt_exporter *tab = hook->table; + /* Drop pending exports */ + CALL(tab->used, tab); + /* Unlist */ rem_node(&hook->n); - /* Reporting the channel as stopped. */ + /* Report the channel as stopped. */ hook->stopped(hook->req); /* Reporting the hook as finished. */ CALL(tab->done, hook); - /* Freeing the hook together with its coroutine. */ + /* Free the hook. */ rfree(hook->pool); } @@ -1753,7 +2009,7 @@ void rt_set_export_state(struct rt_export_hook *hook, u8 state) { hook->last_state_change = current_time(); - hook->export_state = state; + atomic_store_explicit(&hook->export_state, state, memory_order_release); if (hook->req->log_state_change) hook->req->log_state_change(hook->req, state); @@ -1799,7 +2055,6 @@ rt_table_export_start(struct rt_exporter *re, struct rt_export_request *req) pool *p = rp_new(tab->rp, "Export hook"); struct rt_export_hook *hook = mb_allocz(p, sizeof(struct rt_export_hook)); hook->pool = p; - hook->lp = lp_new_default(p); /* stats zeroed by mb_allocz */ switch (req->addr_mode) @@ -1844,12 +2099,18 @@ rt_request_export(struct rt_exporter *re, struct rt_export_request *req) hook->req = req; hook->table = re; + bmap_init(&hook->seq_map, hook->pool, 1024); + + struct rt_pending_export *rpe = rt_last_export(hook->table); + DBG("store hook=%p last_export=%p seq=%lu\n", hook, rpe, rpe ? rpe->seq : 0); + atomic_store_explicit(&hook->last_export, rpe, memory_order_relaxed); + hook->n = (node) {}; add_tail(&re->hooks, &hook->n); /* Regular export */ rt_set_export_state(hook, TES_FEEDING); - ev_schedule_work(hook->event); + rt_send_export_event(hook); } static void @@ -1857,7 +2118,7 @@ rt_table_export_stop(struct rt_export_hook *hook) { rtable *tab = SKIP_BACK(rtable, exporter, hook->table); - if (hook->export_state != TES_FEEDING) + if (atomic_load_explicit(&hook->export_state, memory_order_relaxed) != TES_FEEDING) return; switch (hook->req->addr_mode) @@ -1898,7 +2159,7 @@ rt_stop_export(struct rt_export_request *req, void (*stopped)(struct rt_export_r rt_set_export_state(hook, TES_STOP); /* Run the stopped event */ - ev_schedule(hook->event); + rt_send_export_event(hook); } /** @@ -1916,15 +2177,38 @@ rt_stop_export(struct rt_export_request *req, void (*stopped)(struct rt_export_r * flag in rt_refresh_end() and then removing such routes in the prune loop. */ void -rt_refresh_begin(rtable *t, struct rt_import_request *req) +rt_refresh_begin(struct rt_import_request *req) { - FIB_WALK(&t->fib, net, n) - { - for (struct rte_storage *e = n->routes; e; e = e->next) - if (e->rte.sender == req->hook) - e->rte.flags |= REF_STALE; - } - FIB_WALK_END; + struct rt_import_hook *hook = req->hook; + ASSERT_DIE(hook); + ASSERT_DIE(hook->stale_set == hook->stale_valid); + + /* If the pruning routine is too slow */ + if ((hook->stale_pruned < hook->stale_valid) && (hook->stale_pruned + 128 < hook->stale_valid) + || (hook->stale_pruned > hook->stale_valid) && (hook->stale_pruned > hook->stale_valid + 128)) + { + log(L_WARN "Route refresh flood in table %s", hook->table->name); + FIB_WALK(&hook->table->fib, net, n) + { + for (struct rte_storage *e = n->routes; e; e = e->next) + if (e->rte.sender == req->hook) + e->rte.stale_cycle = 0; + } + FIB_WALK_END; + hook->stale_set = 1; + hook->stale_valid = 0; + hook->stale_pruned = 0; + } + /* Setting a new value of the stale modifier */ + else if (!++hook->stale_set) + { + /* Let's reserve the stale_cycle zero value for always-invalid routes */ + hook->stale_set = 1; + hook->stale_valid = 0; + } + + if (req->trace_routes & D_STATES) + log(L_TRACE "%s: route refresh begin [%u]", req->name, hook->stale_set); } /** @@ -1936,43 +2220,18 @@ rt_refresh_begin(rtable *t, struct rt_import_request *req) * hook. See rt_refresh_begin() for description of refresh cycles. */ void -rt_refresh_end(rtable *t, struct rt_import_request *req) +rt_refresh_end(struct rt_import_request *req) { - int prune = 0; - - FIB_WALK(&t->fib, net, n) - { - for (struct rte_storage *e = n->routes; e; e = e->next) - if ((e->rte.sender == req->hook) && (e->rte.flags & REF_STALE)) - { - e->rte.flags |= REF_DISCARD; - prune = 1; - } - } - FIB_WALK_END; - - if (prune) - rt_schedule_prune(t); -} + struct rt_import_hook *hook = req->hook; + ASSERT_DIE(hook); -void -rt_modify_stale(rtable *t, struct rt_import_request *req) -{ - int prune = 0; + hook->stale_valid++; + ASSERT_DIE(hook->stale_set == hook->stale_valid); - FIB_WALK(&t->fib, net, n) - { - for (struct rte_storage *e = n->routes; e; e = e->next) - if ((e->rte.sender == req->hook) && (e->rte.flags & REF_STALE) && !(e->rte.flags & REF_FILTERED)) - { - e->rte.flags |= REF_MODIFY; - prune = 1; - } - } - FIB_WALK_END; + rt_schedule_prune(hook->table); - if (prune) - rt_schedule_prune(t); + if (req->trace_routes & D_STATES) + log(L_TRACE "%s: route refresh end [%u]", req->name, hook->stale_valid); } /** @@ -2054,8 +2313,8 @@ rt_dump_hooks(rtable *tab) { eh->req->dump_req(eh->req); debug(" Export hook %p requested by %p:" - " refeed_pending=%u last_state_change=%t export_state=%u stopped=%p\n", - eh, eh->req, eh->refeed_pending, eh->last_state_change, eh->export_state, eh->stopped); + " refeed_pending=%u last_state_change=%t export_state=%u\n", + eh, eh->req, eh->refeed_pending, eh->last_state_change, atomic_load_explicit(&eh->export_state, memory_order_relaxed)); } debug("\n"); } @@ -2108,6 +2367,20 @@ rt_schedule_prune(rtable *tab) tab->prune_state |= 1; } +static void +rt_export_used(struct rt_exporter *e) +{ + rtable *tab = SKIP_BACK(rtable, exporter, e); + + if (config->table_debug) + log(L_TRACE "%s: Export cleanup requested", tab->name); + + if (tab->export_used) + return; + + tab->export_used = 1; + ev_schedule(tab->rt_event); +} static void rt_event(void *ptr) @@ -2116,6 +2389,25 @@ rt_event(void *ptr) rt_lock_table(tab); + if (tab->export_used) + rt_export_cleanup(tab); + + if ( + tab->hcu_corked || + tab->nhu_corked || + (tab->hcu_scheduled || tab->nhu_state) && rt_cork_check(tab->uncork_event) + ) + { + if (!tab->hcu_corked && !tab->nhu_corked && config->table_debug) + log(L_TRACE "%s: Auxiliary routines corked", tab->name); + + tab->hcu_corked |= tab->hcu_scheduled; + tab->hcu_scheduled = 0; + + tab->nhu_corked |= tab->nhu_state; + tab->nhu_state = 0; + } + if (tab->hcu_scheduled) rt_update_hostcache(tab); @@ -2128,6 +2420,45 @@ rt_event(void *ptr) rt_unlock_table(tab); } +static void +rt_uncork_event(void *ptr) +{ + rtable *tab = ptr; + + tab->hcu_scheduled |= tab->hcu_corked; + tab->hcu_corked = 0; + + tab->nhu_state |= tab->nhu_corked; + tab->nhu_corked = 0; + + if (config->table_debug) + log(L_TRACE "%s: Auxiliary routines uncorked", tab->name); + + ev_schedule(tab->rt_event); +} + +static void +rt_prune_timer(timer *t) +{ + rtable *tab = t->data; + + if (tab->gc_counter >= tab->config->gc_threshold) + rt_schedule_prune(tab); +} + +static void +rt_kick_prune_timer(rtable *tab) +{ + /* Return if prune is already scheduled */ + if (tm_active(tab->prune_timer) || (tab->prune_state & 1)) + return; + + /* Randomize GC period to +/- 50% */ + btime gc_period = tab->config->gc_period; + gc_period = (gc_period / 2) + (random_u32() % (uint) gc_period); + tm_start(tab->prune_timer, gc_period); +} + static inline btime rt_settled_time(rtable *tab) @@ -2293,9 +2624,6 @@ rt_free(resource *_r) DBG("Deleting routing table %s\n", r->name); ASSERT_DIE(r->use_count == 0); - if (r->internal) - return; - r->config->table = NULL; rem_node(&r->n); @@ -2359,28 +2687,34 @@ rt_setup(pool *pp, struct rtable_config *cf) .start = rt_table_export_start, .stop = rt_table_export_stop, .done = rt_table_export_done, + .used = rt_export_used, }; + init_list(&t->exporter.hooks); + init_list(&t->exporter.pending); - if (!(t->internal = cf->internal)) - { - init_list(&t->imports); + init_list(&t->imports); - hmap_init(&t->id_map, p, 1024); - hmap_set(&t->id_map, 0); + hmap_init(&t->id_map, p, 1024); + hmap_set(&t->id_map, 0); - init_list(&t->subscribers); + init_list(&t->subscribers); - t->rt_event = ev_new_init(p, rt_event, t); - t->last_rt_change = t->gc_time = current_time(); + t->rt_event = ev_new_init(p, rt_event, t); + t->uncork_event = ev_new_init(p, rt_uncork_event, t); + t->prune_timer = tm_new_init(p, rt_prune_timer, t, 0, 0); + t->exporter.export_timer = tm_new_init(p, rt_announce_exports, t, 0, 0); + t->last_rt_change = t->gc_time = current_time(); + t->exporter.next_seq = 1; - t->rl_pipe = (struct tbf) TBF_DEFAULT_LOG_LIMITS; + t->cork_threshold = cf->cork_threshold; - if (rt_is_flow(t)) - { - t->flowspec_trie = f_new_trie(lp_new_default(p), 0); - t->flowspec_trie->ipv4 = (t->addr_type == NET_FLOW4); - } + t->rl_pipe = (struct tbf) TBF_DEFAULT_LOG_LIMITS; + + if (rt_is_flow(t)) + { + t->flowspec_trie = f_new_trie(lp_new_default(p), 0); + t->flowspec_trie->ipv4 = (t->addr_type == NET_FLOW4); } return t; @@ -2400,6 +2734,8 @@ rt_init(void) rte_update_pool = lp_new_default(rt_table_pool); init_list(&routing_tables); init_list(&deleted_routing_tables); + ev_init_list(&rt_cork.queue, &main_birdloop, "Route cork release"); + rt_cork.run = (event) { .hook = rt_cork_release_hook }; } @@ -2440,10 +2776,20 @@ rt_prune_table(rtable *tab) WALK_LIST2(ih, n, tab->imports, n) if (ih->import_state == TIS_STOP) rt_set_import_state(ih, TIS_FLUSHING); + else if ((ih->stale_valid != ih->stale_pruning) && (ih->stale_pruning == ih->stale_pruned)) + { + ih->stale_pruning = ih->stale_valid; + + if (ih->req->trace_routes & D_STATES) + log(L_TRACE "%s: table prune after refresh begin [%u]", ih->req->name, ih->stale_pruning); + } FIB_ITERATE_INIT(fit, &tab->fib); tab->prune_state = 2; + tab->gc_counter = 0; + tab->gc_time = current_time(); + if (tab->prune_trie) { /* Init prefix trie pruning */ @@ -2465,24 +2811,19 @@ again: for (struct rte_storage *e=n->routes; e; e=e->next) { - if ((e->rte.sender->import_state == TIS_FLUSHING) || (e->rte.flags & REF_DISCARD)) + struct rt_import_hook *s = e->rte.sender; + if ((s->import_state == TIS_FLUSHING) || + (e->rte.stale_cycle < s->stale_valid) || + (e->rte.stale_cycle > s->stale_set)) { rte_discard(n, &e->rte); limit--; goto rescan; } - - if (e->rte.flags & REF_MODIFY) - { - rte_modify(n, &e->rte); - limit--; - - goto rescan; - } } - if (!n->routes) /* Orphaned FIB entry */ + if (!n->routes && !n->first) /* Orphaned FIB entry */ { FIB_ITERATE_PUT(fit); fib_delete(&tab->fib, n); @@ -2501,9 +2842,6 @@ again: fib_check(&tab->fib); #endif - tab->gc_counter = 0; - tab->gc_time = current_time(); - /* state change 2->0, 3->1 */ tab->prune_state &= 1; @@ -2540,16 +2878,212 @@ again: rt_prune_sources(); + uint flushed_channels = 0; + /* Close flushed channels */ WALK_LIST2_DELSAFE(ih, n, x, tab->imports, n) if (ih->import_state == TIS_FLUSHING) { - rt_set_import_state(ih, TIS_CLEARED); - ih->stopped(ih->req); - rem_node(&ih->n); - mb_free(ih); - rt_unlock_table(tab); + ih->flush_seq = tab->exporter.next_seq; + rt_set_import_state(ih, TIS_WAITING); + flushed_channels++; + } + else if (ih->stale_pruning != ih->stale_pruned) + { + ih->stale_pruned = ih->stale_pruning; + if (ih->req->trace_routes & D_STATES) + log(L_TRACE "%s: table prune after refresh end [%u]", ih->req->name, ih->stale_pruned); + } + + /* In some cases, we may want to directly proceed to export cleanup */ + if (EMPTY_LIST(tab->exporter.hooks) && flushed_channels) + rt_export_cleanup(tab); +} + +static void +rt_export_cleanup(rtable *tab) +{ + tab->export_used = 0; + + u64 min_seq = ~((u64) 0); + struct rt_pending_export *last_export_to_free = NULL; + struct rt_pending_export *first = tab->exporter.first; + + struct rt_export_hook *eh; + node *n; + WALK_LIST2(eh, n, tab->exporter.hooks, n) + { + switch (atomic_load_explicit(&eh->export_state, memory_order_acquire)) + { + case TES_DOWN: + continue; + + case TES_READY: + { + struct rt_pending_export *last = atomic_load_explicit(&eh->last_export, memory_order_acquire); + if (!last) + /* No last export means that the channel has exported nothing since last cleanup */ + goto done; + + else if (min_seq > last->seq) + { + min_seq = last->seq; + last_export_to_free = last; + } + continue; + } + + default: + /* It's only safe to cleanup when the export state is idle or regular. No feeding or stopping allowed. */ + goto done; + } + } + + tab->exporter.first = last_export_to_free ? rt_next_export_fast(last_export_to_free) : NULL; + + if (config->table_debug) + log(L_TRACE "%s: Export cleanup, old exporter.first seq %lu, new %lu, min_seq %ld", + tab->name, + first ? first->seq : 0, + tab->exporter.first ? tab->exporter.first->seq : 0, + min_seq); + + WALK_LIST2(eh, n, tab->exporter.hooks, n) + { + if (atomic_load_explicit(&eh->export_state, memory_order_acquire) != TES_READY) + continue; + + struct rt_pending_export *last = atomic_load_explicit(&eh->last_export, memory_order_acquire); + if (last == last_export_to_free) + { + /* This may fail when the channel managed to export more inbetween. This is OK. */ + atomic_compare_exchange_strong_explicit( + &eh->last_export, &last, NULL, + memory_order_release, + memory_order_relaxed); + + DBG("store hook=%p last_export=NULL\n", eh); + } + } + + while (first && (first->seq <= min_seq)) + { + ASSERT_DIE(first->new || first->old); + + const net_addr *n = first->new ? + first->new->rte.net : + first->old->rte.net; + net *net = SKIP_BACK(struct network, n.addr, (net_addr (*)[0]) n); + + ASSERT_DIE(net->first == first); + + if (first == net->last) + /* The only export here */ + net->last = net->first = NULL; + else + /* First is now the next one */ + net->first = atomic_load_explicit(&first->next, memory_order_relaxed); + + /* For now, the old route may be finally freed */ + if (first->old) + { + rt_rte_trace_in(D_ROUTES, first->old->rte.sender->req, &first->old->rte, "freed"); + hmap_clear(&tab->id_map, first->old->rte.id); + rte_free(first->old); } + +#ifdef LOCAL_DEBUG + memset(first, 0xbd, sizeof(struct rt_pending_export)); +#endif + + struct rt_export_block *reb = HEAD(tab->exporter.pending); + ASSERT_DIE(reb == PAGE_HEAD(first)); + + u32 pos = (first - &reb->export[0]); + u32 end = atomic_load_explicit(&reb->end, memory_order_relaxed); + ASSERT_DIE(pos < end); + + struct rt_pending_export *next = NULL; + + if (++pos < end) + next = &reb->export[pos]; + else + { + rem_node(&reb->n); + +#ifdef LOCAL_DEBUG + memset(reb, 0xbe, page_size); +#endif + + free_page(reb); + + if (EMPTY_LIST(tab->exporter.pending)) + { + if (config->table_debug) + log(L_TRACE "%s: Resetting export seq", tab->name); + + node *n; + WALK_LIST2(eh, n, tab->exporter.hooks, n) + { + if (atomic_load_explicit(&eh->export_state, memory_order_acquire) != TES_READY) + continue; + + ASSERT_DIE(atomic_load_explicit(&eh->last_export, memory_order_acquire) == NULL); + bmap_reset(&eh->seq_map, 1024); + } + + tab->exporter.next_seq = 1; + } + else + { + reb = HEAD(tab->exporter.pending); + next = &reb->export[0]; + } + } + + first = next; + } + + rt_check_cork_low(tab); + +done:; + struct rt_import_hook *ih; node *x; + _Bool imports_stopped = 0; + WALK_LIST2_DELSAFE(ih, n, x, tab->imports, n) + if (ih->import_state == TIS_WAITING) + if (!first || (first->seq >= ih->flush_seq)) + { + ih->import_state = TIS_CLEARED; + ih->stopped(ih->req); + rem_node(&ih->n); + mb_free(ih); + rt_unlock_table(tab); + imports_stopped = 1; + } + + if (tab->export_used) + ev_schedule(tab->rt_event); + + if (imports_stopped) + { + if (config->table_debug) + log(L_TRACE "%s: Sources pruning routine requested", tab->name); + + rt_prune_sources(); + } + + if (EMPTY_LIST(tab->exporter.pending) && tm_active(tab->exporter.export_timer)) + tm_stop(tab->exporter.export_timer); +} + +static void +rt_cork_release_hook(void *data UNUSED) +{ + do synchronize_rcu(); + while ( + !atomic_load_explicit(&rt_cork.active, memory_order_acquire) && + ev_run_list(&rt_cork.queue) + ); } /** @@ -2627,6 +3161,20 @@ rt_preconfig(struct config *c) rt_new_table(cf_get_symbol("master6"), NET_IP6); } +void +rt_postconfig(struct config *c) +{ + uint num_tables = list_length(&c->tables); + btime def_gc_period = 400 MS * num_tables; + def_gc_period = MAX(def_gc_period, 10 S); + def_gc_period = MIN(def_gc_period, 600 S); + + struct rtable_config *rc; + WALK_LIST(rc, c->tables) + if (rc->gc_period == (uint) -1) + rc->gc_period = (uint) def_gc_period; +} + /* * Some functions for handing internal next hop updates @@ -3017,6 +3565,11 @@ rt_next_hop_update_net(rtable *tab, net *n) /* Replace the route in the list */ new->next = e->next; *k = e = new; + + /* Get a new ID for the route */ + new->rte.lastmod = current_time(); + new->rte.id = hmap_first_zero(&tab->id_map); + hmap_set(&tab->id_map, new->rte.id); } ASSERT_DIE(pos <= count); @@ -3043,14 +3596,14 @@ rt_next_hop_update_net(rtable *tab, net *n) for (int i=0; i<count; i++) { _Bool nb = (new == updates[i].new), ob = (old_best == updates[i].old); - const char *best_indicator[2][2] = { { "updated", "updated [-best]" }, { "updated [+best]", "updated [best]" } }; + const char *best_indicator[2][2] = { + { "autoupdated", "autoupdated [-best]" }, + { "autoupdated [+best]", "autoupdated [best]" } + }; rt_rte_trace_in(D_ROUTES, updates[i].new->rte.sender->req, &updates[i].new->rte, best_indicator[nb][ob]); rte_announce_i(tab, n, updates[i].new, updates[i].old, new, old_best); } - for (int i=0; i<count; i++) - rte_free(updates[i].old); - return count; } @@ -3109,10 +3662,12 @@ rt_new_table(struct symbol *s, uint addr_type) cf_define_symbol(s, SYM_TABLE, table, c); c->name = s->name; c->addr_type = addr_type; - c->gc_max_ops = 1000; - c->gc_min_time = 5; + c->gc_threshold = 1000; + c->gc_period = (uint) -1; /* set in rt_postconfig() */ c->min_settle_time = 1 S; c->max_settle_time = 20 S; + c->cork_threshold.low = 128; + c->cork_threshold.high = 512; add_tail(&new_config->tables, &c->n); @@ -3158,6 +3713,36 @@ rt_unlock_table(rtable *r) } } +static void +rt_check_cork_low(rtable *tab) +{ + if (!tab->cork_active) + return; + + if (!tab->exporter.first || (tab->exporter.first->seq + tab->cork_threshold.low > tab->exporter.next_seq)) + { + tab->cork_active = 0; + rt_cork_release(); + + if (config->table_debug) + log(L_TRACE "%s: Uncorked", tab->name); + } +} + +static void +rt_check_cork_high(rtable *tab) +{ + if (!tab->cork_active && tab->exporter.first && (tab->exporter.first->seq + tab->cork_threshold.high <= tab->exporter.next_seq)) + { + tab->cork_active = 1; + rt_cork_acquire(); + + if (config->table_debug) + log(L_TRACE "%s: Corked", tab->name); + } +} + + static int rt_reconfigure(rtable *tab, struct rtable_config *new, struct rtable_config *old) { @@ -3171,6 +3756,14 @@ rt_reconfigure(rtable *tab, struct rtable_config *new, struct rtable_config *old tab->name = new->name; tab->config = new; + tab->cork_threshold = new->cork_threshold; + + if (new->cork_threshold.high != old->cork_threshold.high) + rt_check_cork_high(tab); + + if (new->cork_threshold.low != old->cork_threshold.low) + rt_check_cork_low(tab); + return 1; } @@ -3229,6 +3822,16 @@ rt_commit(struct config *new, struct config *old) DBG("\tdone\n"); } +static void +rt_feed_done(struct rt_export_hook *c) +{ + c->event->hook = rt_export_hook; + + rt_set_export_state(c, TES_READY); + + rt_send_export_event(c); +} + /** * rt_feed_by_fib - advertise all routes to a channel by walking a fib * @c: channel to be fed @@ -3246,7 +3849,7 @@ rt_feed_by_fib(void *data) struct fib_iterator *fit = &c->feed_fit; int max_feed = 256; - ASSERT(c->export_state == TES_FEEDING); + ASSERT(atomic_load_explicit(&c->export_state, memory_order_relaxed) == TES_FEEDING); rtable *tab = SKIP_BACK(rtable, exporter, c->table); @@ -3255,18 +3858,19 @@ rt_feed_by_fib(void *data) if (max_feed <= 0) { FIB_ITERATE_PUT(fit); - ev_schedule_work(c->event); + rt_send_export_event(c); return; } - ASSERT(c->export_state == TES_FEEDING); + if (atomic_load_explicit(&c->export_state, memory_order_acquire) != TES_FEEDING) + return; if ((c->req->addr_mode == TE_ADDR_NONE) || net_in_netX(n->n.addr, c->req->addr)) max_feed -= rt_feed_net(c, n); } FIB_ITERATE_END; - rt_set_export_state(c, TES_READY); + rt_feed_done(c); } static void @@ -3280,7 +3884,7 @@ rt_feed_by_trie(void *data) int max_feed = 256; - ASSERT_DIE(c->export_state == TES_FEEDING); + ASSERT(atomic_load_explicit(&c->export_state, memory_order_relaxed) == TES_FEEDING); net_addr addr; while (trie_walk_next(ws, &addr)) @@ -3292,7 +3896,8 @@ rt_feed_by_trie(void *data) if ((max_feed -= rt_feed_net(c, n)) <= 0) return; - ASSERT_DIE(c->export_state == TES_FEEDING); + if (atomic_load_explicit(&c->export_state, memory_order_acquire) != TES_FEEDING) + return; } rt_unlock_trie(tab, c->walk_lock); @@ -3301,7 +3906,7 @@ rt_feed_by_trie(void *data) mb_free(c->walk_state); c->walk_state = NULL; - rt_set_export_state(c, TES_READY); + rt_feed_done(c); } static void @@ -3310,14 +3915,14 @@ rt_feed_equal(void *data) struct rt_export_hook *c = data; rtable *tab = SKIP_BACK(rtable, exporter, c->table); - ASSERT_DIE(c->export_state == TES_FEEDING); + ASSERT_DIE(atomic_load_explicit(&c->export_state, memory_order_relaxed) == TES_FEEDING); ASSERT_DIE(c->req->addr_mode == TE_ADDR_EQUAL); net *n = net_find(tab, c->req->addr); if (n) rt_feed_net(c, n); - rt_set_export_state(c, TES_READY); + rt_feed_done(c); } static void @@ -3326,52 +3931,53 @@ rt_feed_for(void *data) struct rt_export_hook *c = data; rtable *tab = SKIP_BACK(rtable, exporter, c->table); - ASSERT_DIE(c->export_state == TES_FEEDING); + ASSERT_DIE(atomic_load_explicit(&c->export_state, memory_order_relaxed) == TES_FEEDING); ASSERT_DIE(c->req->addr_mode == TE_ADDR_FOR); net *n = net_route(tab, c->req->addr); if (n) rt_feed_net(c, n); - rt_set_export_state(c, TES_READY); + rt_feed_done(c); } static uint rt_feed_net(struct rt_export_hook *c, net *n) { - if (c->req->export_bulk) - { - uint count = rte_feed_count(n); - if (count) - { - rte_update_lock(); - rte **feed = alloca(count * sizeof(rte *)); - rte_feed_obtain(n, feed, count); - struct rt_pending_export rpe = { .new_best = n->routes }; - c->req->export_bulk(c->req, n->n.addr, &rpe, feed, count); - rte_update_unlock(); - } - return count; - } + uint count = 0; - if (n->routes && rte_is_valid(&n->routes->rte)) - { - rte_update_lock(); - struct rt_pending_export rpe = { .new = n->routes, .new_best = n->routes }; - c->req->export_one(c->req, n->n.addr, &rpe); - rte_update_unlock(); - return 1; - } + if (c->req->export_bulk) + { + count = rte_feed_count(n); + if (count) + { + rte_update_lock(); + rte **feed = alloca(count * sizeof(rte *)); + rte_feed_obtain(n, feed, count); + c->req->export_bulk(c->req, n->n.addr, NULL, feed, count); + rte_update_unlock(); + } + } - return 0; -} + else if (n->routes) + { + rte_update_lock(); + struct rt_pending_export rpe = { .new = n->routes, .new_best = n->routes }; + c->req->export_one(c->req, n->n.addr, &rpe); + rte_update_unlock(); + count = 1; + } + for (struct rt_pending_export *rpe = n->first; rpe; rpe = rpe_next(rpe, NULL)) + rpe_mark_seen(c, rpe); + + return count; +} /* * Import table */ - void channel_reload_export_bulk(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe UNUSED, rte **feed, uint count) { struct channel *c = SKIP_BACK(struct channel, reload_req, req); @@ -3589,9 +4195,12 @@ rt_update_hostentry(rtable *tab, struct hostentry *he) { struct rte_storage *e = n->routes; ea_list *a = e->rte.attrs; - pxlen = n->n.addr->pxlen; + u32 pref = rt_get_preference(&e->rte); - if (ea_find(a, &ea_gen_hostentry)) + for (struct rte_storage *ee = n->routes; ee; ee = ee->next) + if (rte_is_valid(&ee->rte) && + (rt_get_preference(&ee->rte) >= pref) && + ea_find(ee->rte.attrs, &ea_gen_hostentry)) { /* Recursive route should not depend on another recursive route */ log(L_WARN "Next hop address %I resolvable through recursive route for %N", @@ -3599,6 +4208,8 @@ rt_update_hostentry(rtable *tab, struct hostentry *he) goto done; } + pxlen = n->n.addr->pxlen; + eattr *nhea = ea_find(a, &ea_gen_nexthop); ASSERT_DIE(nhea); struct nexthop_adata *nhad = (void *) nhea->u.ptr; @@ -17,6 +17,10 @@ #include "lib/type.h" #include "lib/fib.h" #include "lib/route.h" +#include "lib/event.h" +#include "lib/rcu.h" + +#include <stdatomic.h> struct ea_list; struct protocol; @@ -30,6 +34,10 @@ struct f_trie; struct f_trie_walk_state; struct cli; +struct rt_cork_threshold { + u64 low, high; +}; + /* * Master Routing Tables. Generally speaking, each of them contains a FIB * with each entry pointing to a list of route entries representing routes @@ -47,13 +55,14 @@ struct rtable_config { struct rtable *table; struct proto_config *krt_attached; /* Kernel syncer attached to this table */ uint addr_type; /* Type of address data stored in table (NET_*) */ - int gc_max_ops; /* Maximum number of operations before GC is run */ - int gc_min_time; /* Minimum time between two consecutive GC runs */ + uint gc_threshold; /* Maximum number of operations before GC is run */ + uint gc_period; /* Approximate time between two consecutive GC runs */ byte sorted; /* Routes of network are sorted according to rte_better() */ - byte internal; /* Internal table of a protocol */ byte trie_used; /* Rtable has attached trie */ btime min_settle_time; /* Minimum settle time for notifications */ btime max_settle_time; /* Maximum settle time for notifications */ + btime export_settle_time; /* Delay before exports are announced */ + struct rt_cork_threshold cork_threshold; /* Cork threshold values */ }; struct rt_export_hook; @@ -62,9 +71,17 @@ struct rt_export_request; struct rt_exporter { list hooks; /* Registered route export hooks */ uint addr_type; /* Type of address data exported (NET_*) */ + struct rt_export_hook *(*start)(struct rt_exporter *, struct rt_export_request *); void (*stop)(struct rt_export_hook *); void (*done)(struct rt_export_hook *); + void (*used)(struct rt_exporter *); + + list pending; /* List of packed struct rt_pending_export */ + struct timer *export_timer; + + struct rt_pending_export *first; /* First export to announce */ + u64 next_seq; /* The next export will have this ID */ }; typedef struct rtable { @@ -90,15 +107,21 @@ typedef struct rtable { * obstacle from this routing table. */ struct event *rt_event; /* Routing table event */ + struct event *uncork_event; /* Called when uncork happens */ + struct timer *prune_timer; /* Timer for periodic pruning / GC */ btime last_rt_change; /* Last time when route changed */ btime base_settle_time; /* Start time of rtable settling interval */ btime gc_time; /* Time of last GC */ - int gc_counter; /* Number of operations since last GC */ + uint gc_counter; /* Number of operations since last GC */ byte prune_state; /* Table prune state, 1 -> scheduled, 2-> running */ byte prune_trie; /* Prune prefix trie during next table prune */ byte hcu_scheduled; /* Hostcache update is scheduled */ + byte hcu_corked; /* Hostcache update is corked with this state */ byte nhu_state; /* Next Hop Update state */ - byte internal; /* This table is internal for some other object */ + byte nhu_corked; /* Next Hop Update is corked with this state */ + byte export_used; /* Pending Export pruning is scheduled */ + byte cork_active; /* Cork has been activated */ + struct rt_cork_threshold cork_threshold; /* Threshold for table cork */ struct fib_iterator prune_fit; /* Rtable prune FIB iterator */ struct fib_iterator nhu_fit; /* Next Hop Update FIB iterator */ struct f_trie *trie_new; /* New prefix trie defined during pruning */ @@ -127,13 +150,48 @@ struct rt_flowspec_link { u32 uc; }; +extern struct rt_cork { + _Atomic uint active; + event_list queue; + event run; +} rt_cork; + +static inline void rt_cork_acquire(void) +{ + atomic_fetch_add_explicit(&rt_cork.active, 1, memory_order_acq_rel); +} + +static inline void rt_cork_release(void) +{ + if (atomic_fetch_sub_explicit(&rt_cork.active, 1, memory_order_acq_rel) == 1) + { + synchronize_rcu(); + ev_schedule_work(&rt_cork.run); + } +} + +static inline int rt_cork_check(event *e) +{ + rcu_read_lock(); + + int corked = (atomic_load_explicit(&rt_cork.active, memory_order_acquire) > 0); + if (corked) + ev_send(&rt_cork.queue, e); + + rcu_read_unlock(); + + return corked; +} + + #define NHU_CLEAN 0 #define NHU_SCHEDULED 1 #define NHU_RUNNING 2 #define NHU_DIRTY 3 typedef struct network { - struct rte_storage *routes; /* Available routes for this network */ + struct rte_storage *routes; /* Available routes for this network */ + struct rt_pending_export *first, *last; struct fib_node n; /* FIB flags reserved for kernel syncer */ } net; @@ -168,8 +226,10 @@ struct rte_storage { struct rte rte; /* Route data */ }; -#define RTE_COPY(r, l) ((r) ? (((*(l)) = (r)->rte), (l)) : NULL) -#define RTE_OR_NULL(r) ((r) ? &((r)->rte) : NULL) +#define RTE_COPY(r) ((r) ? (r)->rte : (rte) {}) +#define RTE_COPY_VALID(r) (((r) && (rte_is_valid(&(r)->rte))) ? (r)->rte : (rte) {}) +#define RTE_OR_NULL(r) ((r) ? &((r)->rte) : NULL) +#define RTE_VALID_OR_NULL(r) (((r) && (rte_is_valid(&(r)->rte))) ? &((r)->rte) : NULL) /* Table-channel connections */ @@ -183,7 +243,6 @@ struct rt_import_request { /* Preimport is called when the @new route is just-to-be inserted, replacing @old. * Return a route (may be different or modified in-place) to continue or NULL to withdraw. */ int (*preimport)(struct rt_import_request *req, struct rte *new, struct rte *old); - struct rte *(*rte_modify)(struct rte *, struct linpool *); }; struct rt_import_hook { @@ -200,15 +259,22 @@ struct rt_import_hook { u32 withdraws_accepted; /* Number of route withdraws accepted and processed */ } stats; + u64 flush_seq; /* Table export seq when the channel announced flushing */ btime last_state_change; /* Time of last state transition */ u8 import_state; /* IS_* */ + u8 stale_set; /* Set this stale_cycle to imported routes */ + u8 stale_valid; /* Routes with this stale_cycle and bigger are considered valid */ + u8 stale_pruned; /* Last prune finished when this value was set at stale_valid */ + u8 stale_pruning; /* Last prune started when this value was set at stale_valid */ void (*stopped)(struct rt_import_request *); /* Stored callback when import is stopped */ }; struct rt_pending_export { + struct rt_pending_export * _Atomic next; /* Next export for the same destination */ struct rte_storage *new, *new_best, *old, *old_best; + u64 seq; /* Sequential ID (table-local) of the pending export */ }; struct rt_export_request { @@ -218,6 +284,8 @@ struct rt_export_request { u8 trace_routes; u8 addr_mode; /* Network prefilter mode (TE_ADDR_*) */ + event_list *list; /* Where to schedule export events */ + /* There are two methods of export. You can either request feeding every single change * or feeding the whole route feed. In case of regular export, &export_one is preferred. * Anyway, when feeding, &export_bulk is preferred, falling back to &export_one. @@ -237,7 +305,6 @@ struct rt_export_hook { struct rt_exporter *table; /* The connected table */ pool *pool; - linpool *lp; struct rt_export_request *req; /* The requestor */ @@ -256,10 +323,15 @@ struct rt_export_hook { u32 hash_iter; /* Iterator over hash */ }; + struct bmap seq_map; /* Keep track which exports were already procesed */ + + struct rt_pending_export * _Atomic last_export;/* Last export processed */ + struct rt_pending_export *rpe_next; /* Next pending export to process */ + btime last_state_change; /* Time of last state transition */ u8 refeed_pending; /* Refeeding and another refeed is scheduled */ - u8 export_state; /* Route export state (TES_*, see below) */ + _Atomic u8 export_state; /* Route export state (TES_*, see below) */ u8 feed_type; /* Which feeding method is used (TFT_*, see below) */ struct event *event; /* Event running all the export operations */ @@ -310,6 +382,15 @@ void rt_set_export_state(struct rt_export_hook *hook, u8 state); void rte_import(struct rt_import_request *req, const net_addr *net, rte *new, struct rte_src *src); +/* Get next rpe. If src is given, it must match. */ +struct rt_pending_export *rpe_next(struct rt_pending_export *rpe, struct rte_src *src); + +/* Mark the pending export processed */ +void rpe_mark_seen(struct rt_export_hook *hook, struct rt_pending_export *rpe); + +/* Get pending export seen status */ +int rpe_get_seen(struct rt_export_hook *hook, struct rt_pending_export *rpe); + /* Types of route announcement, also used as flags */ #define RA_UNDEF 0 /* Undefined RA type */ #define RA_OPTIMAL 1 /* Announcement of optimal route change */ @@ -363,6 +444,7 @@ struct config; void rt_init(void); void rt_preconfig(struct config *); +void rt_postconfig(struct config *); void rt_commit(struct config *new, struct config *old); void rt_lock_table(rtable *); void rt_unlock_table(rtable *); @@ -383,8 +465,8 @@ net *net_get(rtable *tab, const net_addr *addr); net *net_route(rtable *tab, const net_addr *n); int rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filter); rte *rt_export_merged(struct channel *c, rte ** feed, uint count, linpool *pool, int silent); -void rt_refresh_begin(rtable *t, struct rt_import_request *); -void rt_refresh_end(rtable *t, struct rt_import_request *); +void rt_refresh_begin(struct rt_import_request *); +void rt_refresh_end(struct rt_import_request *); void rt_modify_stale(rtable *t, struct rt_import_request *); void rt_schedule_prune(rtable *t); void rte_dump(struct rte_storage *); |