diff options
author | Maria Matejka <mq@ucw.cz> | 2022-10-04 16:09:41 +0200 |
---|---|---|
committer | Maria Matejka <mq@ucw.cz> | 2022-10-04 16:09:41 +0200 |
commit | f69ba3921a1842f9cac9b9fbd0a32800615da02e (patch) | |
tree | 25d3b5b4ef7ffc4fb9937d6f48806934f4da2b62 | |
parent | a414ba6b975a1187a59cac1f58bc68e5e4b7d37d (diff) | |
parent | fb7fb6744582b2bb74b3b1e32696bd5534e93054 (diff) |
Merge commit 'fb7fb674' into HEAD
-rw-r--r-- | conf/conf.h | 2 | ||||
-rw-r--r-- | doc/bird.sgml | 39 | ||||
-rw-r--r-- | filter/f-inst.c | 2 | ||||
-rw-r--r-- | lib/birdlib.h | 3 | ||||
-rw-r--r-- | lib/route.h | 4 | ||||
-rw-r--r-- | nest/config.Y | 17 | ||||
-rw-r--r-- | nest/proto.c | 144 | ||||
-rw-r--r-- | nest/protocol.h | 15 | ||||
-rw-r--r-- | nest/rt-show.c | 17 | ||||
-rw-r--r-- | nest/rt-table.c | 1696 | ||||
-rw-r--r-- | nest/rt.h | 264 | ||||
-rw-r--r-- | proto/bfd/bfd.c | 1 | ||||
-rw-r--r-- | proto/bgp/attrs.c | 70 | ||||
-rw-r--r-- | proto/bgp/bgp.c | 6 | ||||
-rw-r--r-- | proto/bgp/bgp.h | 2 | ||||
-rw-r--r-- | proto/mrt/mrt.c | 39 | ||||
-rw-r--r-- | proto/mrt/mrt.h | 6 | ||||
-rw-r--r-- | proto/perf/perf.c | 4 | ||||
-rw-r--r-- | proto/rip/rip.c | 18 | ||||
-rw-r--r-- | proto/static/static.c | 4 | ||||
-rw-r--r-- | sysdep/unix/krt.c | 21 |
21 files changed, 1474 insertions, 900 deletions
diff --git a/conf/conf.h b/conf/conf.h index 50e01bf5..ffefa519 100644 --- a/conf/conf.h +++ b/conf/conf.h @@ -27,7 +27,7 @@ struct config { int mrtdump_file; /* Configured MRTDump file (sysdep, fd in unix) */ const char *syslog_name; /* Name used for syslog (NULL -> no syslog) */ - struct rtable_config *def_tables[NET_MAX]; /* Default routing tables for each network */ + struct symbol *def_tables[NET_MAX]; /* Default routing tables for each network */ struct iface_patt *router_id_from; /* Configured list of router ID iface patterns */ u32 router_id; /* Our Router ID */ diff --git a/doc/bird.sgml b/doc/bird.sgml index c29353fc..2bce3d57 100644 --- a/doc/bird.sgml +++ b/doc/bird.sgml @@ -505,6 +505,11 @@ include "tablename.conf";; See <ref id="channel-debug" name="debug"> in the channel section. Default: off. + <tag><label id="opt-debug-tables">debug tables all|off|{ states|routes|filters|events [, <m/.../] }</tag> + Set global defaults of table debugging options. + See <ref id="rtable-debug" name="debug"> in the table section. + Default: off. + <tag><label id="opt-debug-commands">debug commands <m/number/</tag> Control logging of client connections (0 for no logging, 1 for logging of connects and disconnects, 2 and higher for logging of all client @@ -670,21 +675,6 @@ to set options. disadvantage is that trie-enabled routing tables require more memory, which may be an issue especially in multi-table setups. Default: off. - <tag><label id="rtable-min-settle-time">min settle time <m/time/</tag> - Specify a minimum value of the settle time. When a ROA table changes, - automatic <ref id="proto-rpki-reload" name="RPKI reload"> may be - triggered, after a short settle time. Minimum settle time is a delay - from the last ROA table change to wait for more updates. Default: 1 s. - - - <tag><label id="rtable-max-settle-time">max settle time <m/time/</tag> - Specify a maximum value of the settle time. When a ROA table changes, - automatic <ref id="proto-rpki-reload" name="RPKI reload"> may be - triggered, after a short settle time. Maximum settle time is an upper - limit to the settle time from the initial ROA table change even if - there are consecutive updates gradually renewing the settle time. - Default: 20 s. - <tag><label id="rtable-gc-threshold">gc threshold <m/number/</tag> Specify a minimum amount of removed networks that triggers a garbage collection (GC) cycle. Default: 1000. @@ -708,6 +698,16 @@ to set options. second one is the high threshold (when to pause). The higher is the threshold, the more memory can get used. In most cases, the defaults should work for you. Default: 128, 512. + + <tag><label id="rtable-debug">debug all|off|{ states|routes|events [, <m/.../] }</tag> + Set table debugging options. Each table can write some trace messages + into log with category <cf/trace/. You can request <cf/all/ trace messages + or select some types: <cf/states/ for table state changes and auxiliary + processes, <cf/routes/ for auxiliary route notifications (next hop update, + flowspec revalidation) and <cf/events/ for more detailed auxiliary routine + debug. See also <ref id="channel-debug" name="channel debugging option">. + Default: off. + </descrip> @@ -965,6 +965,15 @@ inherited from templates can be updated by new definitions. <ref id="bgp-export-table" name="export table"> (for respective direction). Default: on. + <tag><label id="rtable-min-settle-time">min settle time <m/time/</tag> + Minimum settle time is a delay from the last ROA table change to wait + for more updates before triggering automatic reload. Default: 1 s. + + <tag><label id="rtable-min-settle-time">min settle time <m/time/</tag> + Maximum settle time is an upper limit to the settle time from the + initial ROA table change even if there are consecutive updates gradually + renewing the settle time. Default: 20 s. + <tag><label id="proto-import-limit">import limit [<m/number/ | off ] [action warn | block | restart | disable]</tag> Specify an import route limit (a maximum number of routes imported from the protocol) and optionally the action to be taken when the limit is diff --git a/filter/f-inst.c b/filter/f-inst.c index 60042476..426b598f 100644 --- a/filter/f-inst.c +++ b/filter/f-inst.c @@ -1467,7 +1467,7 @@ ARG(1, T_NET); ARG(2, T_INT); RTC(3); - struct rtable *table = rtc->table; + rtable *table = rtc->table; u32 as = v2.val.i; diff --git a/lib/birdlib.h b/lib/birdlib.h index d55b1a44..d743ecdf 100644 --- a/lib/birdlib.h +++ b/lib/birdlib.h @@ -14,8 +14,9 @@ /* Ugly structure offset handling macros */ +#define SAME_TYPE(a, b) ({ int _ = ((a) != (b)); !_; }) #define OFFSETOF(s, i) ((size_t) &((s *)0)->i) -#define SKIP_BACK(s, i, p) ({ s *_ptr = ((s *)((char *)p - OFFSETOF(s, i))); ASSERT_DIE(&_ptr->i == p); _ptr; }) +#define SKIP_BACK(s, i, p) ({ s *_ptr = ((s *)((char *)p - OFFSETOF(s, i))); SAME_TYPE(&_ptr->i, p); _ptr; }) #define BIRD_ALIGN(s, a) (((s)+a-1)&~(a-1)) #define CPU_STRUCT_ALIGN (MAX_(_Alignof(void*), _Alignof(u64))) #define BIRD_CPU_ALIGN(s) BIRD_ALIGN((s), CPU_STRUCT_ALIGN) diff --git a/lib/route.h b/lib/route.h index f5456396..eae251e7 100644 --- a/lib/route.h +++ b/lib/route.h @@ -20,7 +20,7 @@ struct network; struct proto; struct cli; -struct rtable; +struct rtable_private; typedef struct rte { struct ea_list *attrs; /* Attributes of this route */ @@ -63,7 +63,7 @@ struct rte_owner_class { struct rte_owner { struct rte_owner_class *class; - int (*rte_recalculate)(struct rtable *, struct network *, struct rte *, struct rte *, struct rte *); + int (*rte_recalculate)(struct rtable_private *, struct network *, struct rte *, struct rte *, struct rte *); HASH(struct rte_src) hash; const char *name; u32 hash_key; diff --git a/nest/config.Y b/nest/config.Y index 91147a29..84c76ae9 100644 --- a/nest/config.Y +++ b/nest/config.Y @@ -227,14 +227,13 @@ table_opt: cf_error("Trie option not supported for %s table", net_label[this_table->addr_type]); this_table->trie_used = $2; } - | MIN SETTLE TIME expr_us { this_table->min_settle_time = $4; } - | MAX SETTLE TIME expr_us { this_table->max_settle_time = $4; } | GC THRESHOLD expr { this_table->gc_threshold = $3; } | GC PERIOD expr_us { this_table->gc_period = (uint) $3; if ($3 > 3600 S_) cf_error("GC period must be at most 3600 s"); } | CORK THRESHOLD expr expr { if ($3 > $4) cf_error("Cork low threshold must be lower than the high threshold."); this_table->cork_threshold.low = $3; this_table->cork_threshold.high = $4; } + | DEBUG bool { this_table->debug = $2; } ; table_opts: @@ -322,6 +321,8 @@ channel_item_: | RECEIVE LIMIT limit_spec { this_channel->rx_limit = $3; } | IMPORT LIMIT limit_spec { this_channel->in_limit = $3; } | EXPORT LIMIT limit_spec { this_channel->out_limit = $3; } + | MIN SETTLE TIME expr_us { this_channel->min_settle_time = $4; } + | MAX SETTLE TIME expr_us { this_channel->max_settle_time = $4; } | PREFERENCE expr { this_channel->preference = $2; check_u16($2); } | IMPORT KEEP FILTERED bool { if ($4) @@ -361,7 +362,11 @@ channel_end: proto_channel: channel_start channel_opt_list channel_end; -rtable: CF_SYM_KNOWN { cf_assert_symbol($1, SYM_TABLE); $$ = $1->table; } ; +rtable: CF_SYM_KNOWN { + cf_assert_symbol($1, SYM_TABLE); + if (!$1->table) rt_new_default_table($1); + $$ = $1->table; +} ; imexport: FILTER filter { $$ = $2; } @@ -390,7 +395,7 @@ debug_default: DEBUG PROTOCOLS debug_mask { new_config->proto_default_debug = $3; } | DEBUG CHANNELS debug_mask { new_config->channel_default_debug = $3; } | DEBUG COMMANDS expr { new_config->cli_debug = $3; } - | DEBUG TABLES bool { new_config->table_debug = $3; } + | DEBUG TABLES debug_mask { new_config->table_debug = $3; } ; /* MRTDUMP PROTOCOLS is in systep/unix/config.Y */ @@ -683,6 +688,7 @@ r_args: } | r_args TABLE symbol_known { cf_assert_symbol($3, SYM_TABLE); + if (!$3->table) cf_error("Table %s not configured", $3->name); $$ = $1; rt_show_add_table($$, $3->table->table); $$->tables_defined_by = RSD_TDB_DIRECT; @@ -696,7 +702,8 @@ r_args: } | r_args IMPORT TABLE channel_arg { if (!($4->in_keep & RIK_PREFILTER)) cf_error("No import table in channel %s.%s", $4->proto->name, $4->name); - rt_show_add_exporter($$, &$4->table->exporter, "import")->prefilter = $4; + RT_LOCKED($4->table, tab) + rt_show_add_exporter($$, &tab->exporter.e, "import")->prefilter = $4; $$->tables_defined_by = RSD_TDB_DIRECT; } | r_args EXPORT TABLE channel_arg { diff --git a/nest/proto.c b/nest/proto.c index 853b1cf9..783a936c 100644 --- a/nest/proto.c +++ b/nest/proto.c @@ -55,6 +55,7 @@ static void channel_update_limit(struct channel *c, struct limit *l, int dir, st static void channel_reset_limit(struct channel *c, struct limit *l, int dir); static void channel_feed_end(struct channel *c); static void channel_export_stopped(struct rt_export_request *req); +static void channel_check_stopped(struct channel *c); static inline int proto_is_done(struct proto *p) { return (p->proto_state == PS_DOWN) && proto_is_inactive(p); } @@ -168,7 +169,7 @@ proto_cf_find_channel(struct proto_config *pc, uint net_type) * Returns pointer to channel or NULL */ struct channel * -proto_find_channel_by_table(struct proto *p, struct rtable *t) +proto_find_channel_by_table(struct proto *p, rtable *t) { struct channel *c; @@ -312,10 +313,19 @@ proto_remove_channels(struct proto *p) proto_remove_channel(p, c); } +struct roa_subscription { + node roa_node; + timer t; + btime base_settle_time; /* Start of settling interval */ + struct channel *c; + struct rt_export_request req; +}; + static void -channel_roa_in_changed(void *_data) +channel_roa_in_changed(struct timer *t) { - struct channel *c = _data; + struct roa_subscription *s = SKIP_BACK(struct roa_subscription, t, t); + struct channel *c = s->c; int active = !!c->reload_req.hook; CD(c, "Reload triggered by RPKI change%s", active ? " - already active" : ""); @@ -327,9 +337,11 @@ channel_roa_in_changed(void *_data) } static void -channel_roa_out_changed(void *_data) +channel_roa_out_changed(struct timer *t) { - struct channel *c = _data; + struct roa_subscription *s = SKIP_BACK(struct roa_subscription, t, t); + struct channel *c = s->c; + CD(c, "Feeding triggered by RPKI change"); c->refeed_pending = 1; @@ -338,29 +350,57 @@ channel_roa_out_changed(void *_data) rt_stop_export(&c->out_req, channel_export_stopped); } -/* Temporary code, subscriptions should be changed to resources */ -struct roa_subscription { - struct rt_subscription s; - node roa_node; -}; +static void +channel_export_one_roa(struct rt_export_request *req, const net_addr *net UNUSED, struct rt_pending_export *first) +{ + struct roa_subscription *s = SKIP_BACK(struct roa_subscription, req, req); + + /* TODO: use the information about what roa has changed */ + + if (!tm_active(&s->t)) + { + s->base_settle_time = current_time(); + tm_start(&s->t, s->base_settle_time + s->c->min_settle_time); + } + else + tm_set(&s->t, + MIN(s->base_settle_time + s->c->max_settle_time, + current_time() + s->c->min_settle_time)); + + + rpe_mark_seen_all(req->hook, first, NULL); +} + +static void +channel_dump_roa_req(struct rt_export_request *req) +{ + struct roa_subscription *s = SKIP_BACK(struct roa_subscription, req, req); + struct channel *c = s->c; + struct rtable_private *tab = SKIP_BACK(struct rtable_private, exporter.e, req->hook->table); + + debug(" Channel %s.%s ROA %s change notifier from table %s request %p\n", + c->proto->name, c->name, + (s->t.hook == channel_roa_in_changed) ? "import" : "export", + tab->name, req); +} static int channel_roa_is_subscribed(struct channel *c, rtable *tab, int dir) { - void (*hook)(void *) = + void (*hook)(struct timer *) = dir ? channel_roa_in_changed : channel_roa_out_changed; struct roa_subscription *s; node *n; WALK_LIST2(s, n, c->roa_subscriptions, roa_node) - if ((s->s.tab == tab) && (s->s.event->hook == hook)) + if ((tab == SKIP_BACK(rtable, priv.exporter.e, s->req.hook->table)) + && (s->t.hook == hook)) return 1; return 0; } - static void channel_roa_subscribe(struct channel *c, rtable *tab, int dir) { @@ -368,28 +408,47 @@ channel_roa_subscribe(struct channel *c, rtable *tab, int dir) return; struct roa_subscription *s = mb_allocz(c->proto->pool, sizeof(struct roa_subscription)); - s->s.event = ev_new_init(c->proto->pool, dir ? channel_roa_in_changed : channel_roa_out_changed, c); - s->s.list = proto_work_list(c->proto); - rt_subscribe(tab, &s->s); + *s = (struct roa_subscription) { + .t = { .hook = dir ? channel_roa_in_changed : channel_roa_out_changed, }, + .c = c, + .req = { + .name = mb_sprintf(c->proto->pool, "%s.%s.roa-%s.%s", + c->proto->name, c->name, dir ? "in" : "out", tab->name), + .list = proto_work_list(c->proto), + .trace_routes = c->debug | c->proto->debug, + .dump_req = channel_dump_roa_req, + .export_one = channel_export_one_roa, + }, + }; add_tail(&c->roa_subscriptions, &s->roa_node); + rt_request_export(tab, &s->req); } static void -channel_roa_unsubscribe(struct roa_subscription *s) +channel_roa_unsubscribed(struct rt_export_request *req) { - rt_unsubscribe(&s->s); + struct roa_subscription *s = SKIP_BACK(struct roa_subscription, req, req); + struct channel *c = s->c; + rem_node(&s->roa_node); - rfree(s->s.event); mb_free(s); + + channel_check_stopped(c); +} + +static void +channel_roa_unsubscribe(struct roa_subscription *s) +{ + rt_stop_export(&s->req, channel_roa_unsubscribed); } static void channel_roa_subscribe_filter(struct channel *c, int dir) { const struct filter *f = dir ? c->in_filter : c->out_filter; - struct rtable *tab; + rtable *tab; int valid = 1, found = 0; if ((f == FILTER_ACCEPT) || (f == FILTER_REJECT)) @@ -450,13 +509,10 @@ channel_start_import(struct channel *c) return; } - int nlen = strlen(c->name) + strlen(c->proto->name) + 2; - char *rn = mb_allocz(c->proto->pool, nlen); - bsprintf(rn, "%s.%s", c->proto->name, c->name); - c->in_req = (struct rt_import_request) { - .name = rn, + .name = mb_sprintf(c->proto->pool, "%s.%s", c->proto->name, c->name), .trace_routes = c->debug | c->proto->debug, + .list = proto_work_list(c->proto), .dump_req = channel_dump_import_req, .log_state_change = channel_import_log_state_change, .preimport = channel_preimport, @@ -483,12 +539,9 @@ channel_start_export(struct channel *c) } ASSERT(c->channel_state == CS_UP); - int nlen = strlen(c->name) + strlen(c->proto->name) + 2; - char *rn = mb_allocz(c->proto->pool, nlen); - bsprintf(rn, "%s.%s", c->proto->name, c->name); c->out_req = (struct rt_export_request) { - .name = rn, + .name = mb_sprintf(c->proto->pool, "%s.%s", c->proto->name, c->name), .list = proto_work_list(c->proto), .addr = c->out_subprefix, .addr_mode = c->out_subprefix ? TE_ADDR_IN : TE_ADDR_NONE, @@ -523,7 +576,7 @@ channel_start_export(struct channel *c) } DBG("%s.%s: Channel start export req=%p\n", c->proto->name, c->name, &c->out_req); - rt_request_export(&c->table->exporter, &c->out_req); + rt_request_export(c->table, &c->out_req); } static void @@ -532,7 +585,7 @@ channel_check_stopped(struct channel *c) switch (c->channel_state) { case CS_STOP: - if (c->out_req.hook || c->in_req.hook) + if (!EMPTY_LIST(c->roa_subscriptions) || c->out_req.hook || c->in_req.hook) return; channel_set_state(c, CS_DOWN); @@ -540,7 +593,7 @@ channel_check_stopped(struct channel *c) break; case CS_PAUSE: - if (c->out_req.hook) + if (!EMPTY_LIST(c->roa_subscriptions) || c->out_req.hook) return; channel_set_state(c, CS_START); @@ -557,8 +610,6 @@ channel_import_stopped(struct rt_import_request *req) { struct channel *c = SKIP_BACK(struct channel, in_req, req); - req->hook = NULL; - mb_free(c->in_req.name); c->in_req.name = NULL; @@ -577,7 +628,7 @@ channel_export_stopped(struct rt_export_request *req) { c->refeeding = 1; c->refeed_pending = 0; - rt_request_export(&c->table->exporter, req); + rt_request_export(c->table, req); return; } @@ -621,7 +672,7 @@ channel_schedule_reload(struct channel *c) { ASSERT(c->in_req.hook); - rt_request_export(&c->table->exporter, &c->reload_req); + rt_request_export(c->table, &c->reload_req); } static void @@ -864,7 +915,7 @@ channel_config_new(const struct channel_class *cc, const char *name, uint net_ty if (proto->net_type && (net_type != proto->net_type)) cf_error("Different channel type"); - tab = new_config->def_tables[net_type]; + tab = rt_get_default_table(new_config, net_type); } if (!cc) @@ -883,6 +934,9 @@ channel_config_new(const struct channel_class *cc, const char *name, uint net_ty cf->debug = new_config->channel_default_debug; cf->rpki_reload = 1; + cf->min_settle_time = 1 S; + cf->max_settle_time = 20 S; + add_tail(&proto->channels, &cf->n); return cf; @@ -963,6 +1017,22 @@ channel_reconfigure(struct channel *c, struct channel_config *cf) c->in_req.trace_routes = c->out_req.trace_routes = c->debug | c->proto->debug; c->rpki_reload = cf->rpki_reload; + if ( (c->min_settle_time != cf->min_settle_time) + || (c->max_settle_time != cf->max_settle_time)) + { + c->min_settle_time = cf->min_settle_time; + c->max_settle_time = cf->max_settle_time; + + struct roa_subscription *s; + node *n; + + WALK_LIST2(s, n, c->roa_subscriptions, roa_node) + if (tm_active(&s->t)) + tm_set(&s->t, + MIN(s->base_settle_time + c->max_settle_time, + current_time() + c->min_settle_time)); + } + /* Execute channel-specific reconfigure hook */ if (c->channel->reconfigure && !c->channel->reconfigure(c, cf, &import_changed, &export_changed)) return 0; diff --git a/nest/protocol.h b/nest/protocol.h index b4730126..c88598cc 100644 --- a/nest/protocol.h +++ b/nest/protocol.h @@ -18,7 +18,6 @@ struct iface; struct ifa; -struct rtable; struct rte; struct neighbor; struct rta; @@ -187,7 +186,7 @@ struct proto { * rte_remove Called whenever a rte is removed from the routing table. */ - int (*rte_recalculate)(struct rtable *, struct network *, struct rte *, struct rte *, struct rte *); + int (*rte_recalculate)(struct rtable_private *, struct network *, struct rte *, struct rte *, struct rte *); int (*rte_mergable)(struct rte *, struct rte *); void (*rte_insert)(struct network *, struct rte *); void (*rte_remove)(struct network *, struct rte *); @@ -460,6 +459,9 @@ struct channel_config { struct channel_limit in_limit; /* Limit for importing routes from protocol */ struct channel_limit out_limit; /* Limit for exporting routes to protocol */ + btime min_settle_time; /* Minimum settle time for ROA-induced reload */ + btime max_settle_time; /* Maximum settle time for ROA-induced reload */ + u8 net_type; /* Routing table network type (NET_*), 0 for undefined */ u8 ra_mode; /* Mode of received route advertisements (RA_*) */ u16 preference; /* Default route preference */ @@ -476,7 +478,7 @@ struct channel { const struct channel_class *channel; struct proto *proto; - struct rtable *table; + rtable *table; const struct filter *in_filter; /* Input filter */ const struct filter *out_filter; /* Output filter */ const net_addr *out_subprefix; /* Export only subprefixes of this net */ @@ -487,6 +489,9 @@ struct channel { struct limit in_limit; /* Input limit */ struct limit out_limit; /* Output limit */ + btime min_settle_time; /* Minimum settle time for ROA-induced reload */ + btime max_settle_time; /* Maximum settle time for ROA-induced reload */ + u8 limit_actions[PLD_MAX]; /* Limit actions enum */ u8 limit_active; /* Flags for active limits */ @@ -540,7 +545,7 @@ struct channel { struct rt_exporter *out_table; /* Internal table for exported routes */ - list roa_subscriptions; /* List of active ROA table subscriptions based on filters roa_check() */ + list roa_subscriptions; /* List of active ROA table subscriptions based on filters' roa_check() calls */ }; #define RIK_REJECTED 1 /* Routes rejected in import filter are kept */ @@ -604,7 +609,7 @@ struct channel_config *proto_cf_find_channel(struct proto_config *p, uint net_ty static inline struct channel_config *proto_cf_main_channel(struct proto_config *pc) { return proto_cf_find_channel(pc, pc->net_type); } -struct channel *proto_find_channel_by_table(struct proto *p, struct rtable *t); +struct channel *proto_find_channel_by_table(struct proto *p, rtable *t); struct channel *proto_find_channel_by_name(struct proto *p, const char *n); struct channel *proto_add_channel(struct proto *p, struct channel_config *cf); int proto_configure_channel(struct proto *p, struct channel **c, struct channel_config *cf); diff --git a/nest/rt-show.c b/nest/rt-show.c index 17400029..dc88047a 100644 --- a/nest/rt-show.c +++ b/nest/rt-show.c @@ -301,7 +301,7 @@ rt_show_cont(struct rt_show_data *d) if (d->tables_defined_by & RSD_TDB_SET) rt_show_table(d); - rt_request_export(d->tab->table, &d->req); + rt_request_export_other(d->tab->table, &d->req); } static void @@ -354,9 +354,11 @@ rt_show_add_exporter(struct rt_show_data *d, struct rt_exporter *t, const char * } struct rt_show_data_rtable * -rt_show_add_table(struct rt_show_data *d, struct rtable *t) +rt_show_add_table(struct rt_show_data *d, rtable *t) { - struct rt_show_data_rtable *rsdr = rt_show_add_exporter(d, &t->exporter, t->name); + struct rt_show_data_rtable *rsdr; + RT_LOCKED(t, tp) + rsdr = rt_show_add_exporter(d, &tp->exporter.e, t->name); struct proto_config *krt = t->config->krt_attached; if (krt) @@ -400,8 +402,8 @@ rt_show_get_default_tables(struct rt_show_data *d) } for (int i=1; i<NET_MAX; i++) - if (config->def_tables[i] && config->def_tables[i]->table) - rt_show_add_table(d, config->def_tables[i]->table); + if (config->def_tables[i] && config->def_tables[i]->table && config->def_tables[i]->table->table) + rt_show_add_table(d, config->def_tables[i]->table->table); } static inline void @@ -418,12 +420,13 @@ rt_show_prepare_tables(struct rt_show_data *d) /* Ensure there is defined export_channel for each table */ if (d->export_mode) { + rtable *rt = SKIP_BACK(rtable, priv.exporter.e, tab->table); if (!tab->export_channel && d->export_channel && - (tab->table == &d->export_channel->table->exporter)) + (rt == d->export_channel->table)) tab->export_channel = d->export_channel; if (!tab->export_channel && d->export_protocol) - tab->export_channel = proto_find_channel_by_table(d->export_protocol, SKIP_BACK(rtable, exporter, tab->table)); + tab->export_channel = proto_find_channel_by_table(d->export_protocol, rt); if (!tab->export_channel) { diff --git a/nest/rt-table.c b/nest/rt-table.c index 3ade4237..95248635 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -43,10 +43,10 @@ * all prefixes that may influence resolving of tracked next hops. * * When a best route changes in the src table, the hostcache is notified using - * rt_notify_hostcache(), which immediately checks using the trie whether the + * an auxiliary export request, which checks using the trie whether the * change is relevant and if it is, then it schedules asynchronous hostcache * recomputation. The recomputation is done by rt_update_hostcache() (called - * from rt_event() of src table), it walks through all hostentries and resolves + * as an event of src table), it walks through all hostentries and resolves * them (by rt_update_hostentry()). It also updates the trie. If a change in * hostentry resolution was found, then it schedules asynchronous nexthop * recomputation of associated dst table. That is done by rt_next_hop_update() @@ -60,15 +60,14 @@ * routes depends of resolving their network prefixes in IP routing tables. This * is similar to the recursive next hop mechanism, but simpler as there are no * intermediate hostcache and hostentries (because flows are less likely to - * share common net prefix than routes sharing a common next hop). In src table, - * there is a list of dst tables (list flowspec_links), this list is updated by - * flowpsec channels (by rt_flowspec_link() and rt_flowspec_unlink() during - * channel start/stop). Each dst table has its own trie of prefixes that may - * influence validation of flowspec routes in it (flowspec_trie). + * share common net prefix than routes sharing a common next hop). Every dst + * table has its own export request in every src table. Each dst table has its + * own trie of prefixes that may influence validation of flowspec routes in it + * (flowspec_trie). * - * When a best route changes in the src table, rt_flowspec_notify() immediately - * checks all dst tables from the list using their tries to see whether the - * change is relevant for them. If it is, then an asynchronous re-validation of + * When a best route changes in the src table, the notification mechanism is + * invoked by the export request which checks its dst table's trie to see + * whether the change is relevant, and if so, an asynchronous re-validation of * flowspec routes in the dst table is scheduled. That is also done by function * rt_next_hop_update(), like nexthop recomputation above. It iterates over all * flowspec routes and re-validates them. It also recalculates the trie. @@ -83,9 +82,8 @@ * will be re-validated later in this round anyway. * * The third mechanism is used for RPKI re-validation of IP routes and it is the - * simplest. It is just a list of subscribers in src table, who are notified - * when any change happened, but only after a settle time. Also, in RPKI case - * the dst is not a table, but a channel, who refeeds routes through a filter. + * simplest. It is also an auxiliary export request belonging to the + * appropriate channel, triggering its reload/refeed timer after a settle time. */ #undef LOCAL_DEBUG @@ -105,6 +103,7 @@ #include "lib/string.h" #include "lib/alloca.h" #include "lib/flowspec.h" +#include "lib/idm.h" #ifdef CONFIG_BGP #include "proto/bgp/bgp.h" @@ -129,27 +128,24 @@ struct rt_export_block { struct rt_pending_export export[]; }; -static void rt_free_hostcache(rtable *tab); -static void rt_notify_hostcache(rtable *tab, net *net); -static void rt_update_hostcache(rtable *tab); -static void rt_next_hop_update(rtable *tab); +static void rt_free_hostcache(struct rtable_private *tab); +static void rt_update_hostcache(void *tab); +static void rt_next_hop_update(void *tab); static inline void rt_next_hop_resolve_rte(rte *r); static inline void rt_flowspec_resolve_rte(rte *r, struct channel *c); -static inline void rt_prune_table(rtable *tab); -static inline void rt_schedule_notify(rtable *tab); -static void rt_flowspec_notify(rtable *tab, net *net); -static void rt_kick_prune_timer(rtable *tab); +static inline void rt_prune_table(struct rtable_private *tab); +static void rt_kick_prune_timer(struct rtable_private *tab); static void rt_feed_by_fib(void *); static void rt_feed_by_trie(void *); static void rt_feed_equal(void *); static void rt_feed_for(void *); -static uint rt_feed_net(struct rt_export_hook *c, net *n); -static void rt_check_cork_low(rtable *tab); -static void rt_check_cork_high(rtable *tab); +static void rt_check_cork_low(struct rtable_private *tab); +static void rt_check_cork_high(struct rtable_private *tab); static void rt_cork_release_hook(void *); +static void rt_delete(void *); -static inline void rt_export_used(struct rt_exporter *); -static void rt_export_cleanup(rtable *tab); +static void rt_export_used(struct rt_table_exporter *); +static void rt_export_cleanup(struct rtable_private *tab); static int rte_same(rte *x, rte *y); @@ -185,13 +181,18 @@ const char *rt_export_state_name(u8 state) return rt_export_state_name_array[state]; } -static inline struct rte_storage *rt_next_hop_update_rte(rtable *tab, net *n, rte *old); -static struct hostentry *rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep); +static struct hostentry *rt_get_hostentry(struct rtable_private *tab, ip_addr a, ip_addr ll, rtable *dep); + +#define rt_trace(tab, level, fmt, args...) do {\ + struct rtable_private *t = (tab); \ + if (t->config->debug & (level)) \ + log(L_TRACE "%s: " fmt, t->name, ##args); \ +} while (0) static void net_init_with_trie(struct fib *f, void *N) { - rtable *tab = SKIP_BACK(rtable, fib, f); + struct rtable_private *tab = SKIP_BACK(struct rtable_private, fib, f); net *n = N; if (tab->trie) @@ -202,7 +203,7 @@ net_init_with_trie(struct fib *f, void *N) } static inline net * -net_route_ip4_trie(rtable *t, const net_addr_ip4 *n0) +net_route_ip4_trie(struct rtable_private *t, const net_addr_ip4 *n0) { TRIE_WALK_TO_ROOT_IP4(t->trie, n0, n) { @@ -216,7 +217,7 @@ net_route_ip4_trie(rtable *t, const net_addr_ip4 *n0) } static inline net * -net_route_vpn4_trie(rtable *t, const net_addr_vpn4 *n0) +net_route_vpn4_trie(struct rtable_private *t, const net_addr_vpn4 *n0) { TRIE_WALK_TO_ROOT_IP4(t->trie, (const net_addr_ip4 *) n0, px) { @@ -232,7 +233,7 @@ net_route_vpn4_trie(rtable *t, const net_addr_vpn4 *n0) } static inline net * -net_route_ip6_trie(rtable *t, const net_addr_ip6 *n0) +net_route_ip6_trie(struct rtable_private *t, const net_addr_ip6 *n0) { TRIE_WALK_TO_ROOT_IP6(t->trie, n0, n) { @@ -246,7 +247,7 @@ net_route_ip6_trie(rtable *t, const net_addr_ip6 *n0) } static inline net * -net_route_vpn6_trie(rtable *t, const net_addr_vpn6 *n0) +net_route_vpn6_trie(struct rtable_private *t, const net_addr_vpn6 *n0) { TRIE_WALK_TO_ROOT_IP6(t->trie, (const net_addr_ip6 *) n0, px) { @@ -262,7 +263,7 @@ net_route_vpn6_trie(rtable *t, const net_addr_vpn6 *n0) } static inline void * -net_route_ip6_sadr_trie(rtable *t, const net_addr_ip6_sadr *n0) +net_route_ip6_sadr_trie(struct rtable_private *t, const net_addr_ip6_sadr *n0) { TRIE_WALK_TO_ROOT_IP6(t->trie, (const net_addr_ip6 *) n0, px) { @@ -295,7 +296,7 @@ net_route_ip6_sadr_trie(rtable *t, const net_addr_ip6_sadr *n0) } static inline net * -net_route_ip4_fib(rtable *t, const net_addr_ip4 *n0) +net_route_ip4_fib(struct rtable_private *t, const net_addr_ip4 *n0) { net_addr_ip4 n; net_copy_ip4(&n, n0); @@ -311,7 +312,7 @@ net_route_ip4_fib(rtable *t, const net_addr_ip4 *n0) } static inline net * -net_route_vpn4_fib(rtable *t, const net_addr_vpn4 *n0) +net_route_vpn4_fib(struct rtable_private *t, const net_addr_vpn4 *n0) { net_addr_vpn4 n; net_copy_vpn4(&n, n0); @@ -327,7 +328,7 @@ net_route_vpn4_fib(rtable *t, const net_addr_vpn4 *n0) } static inline net * -net_route_ip6_fib(rtable *t, const net_addr_ip6 *n0) +net_route_ip6_fib(struct rtable_private *t, const net_addr_ip6 *n0) { net_addr_ip6 n; net_copy_ip6(&n, n0); @@ -343,7 +344,7 @@ net_route_ip6_fib(rtable *t, const net_addr_ip6 *n0) } static inline net * -net_route_vpn6_fib(rtable *t, const net_addr_vpn6 *n0) +net_route_vpn6_fib(struct rtable_private *t, const net_addr_vpn6 *n0) { net_addr_vpn6 n; net_copy_vpn6(&n, n0); @@ -359,7 +360,7 @@ net_route_vpn6_fib(rtable *t, const net_addr_vpn6 *n0) } static inline void * -net_route_ip6_sadr_fib(rtable *t, const net_addr_ip6_sadr *n0) +net_route_ip6_sadr_fib(struct rtable_private *t, const net_addr_ip6_sadr *n0) { net_addr_ip6_sadr n; net_copy_ip6_sadr(&n, n0); @@ -399,7 +400,7 @@ net_route_ip6_sadr_fib(rtable *t, const net_addr_ip6_sadr *n0) } net * -net_route(rtable *tab, const net_addr *n) +net_route(struct rtable_private *tab, const net_addr *n) { ASSERT(tab->addr_type == n->type); @@ -442,7 +443,7 @@ net_route(rtable *tab, const net_addr *n) static int -net_roa_check_ip4_trie(rtable *tab, const net_addr_ip4 *px, u32 asn) +net_roa_check_ip4_trie(struct rtable_private *tab, const net_addr_ip4 *px, u32 asn) { int anything = 0; @@ -470,7 +471,7 @@ net_roa_check_ip4_trie(rtable *tab, const net_addr_ip4 *px, u32 asn) } static int -net_roa_check_ip4_fib(rtable *tab, const net_addr_ip4 *px, u32 asn) +net_roa_check_ip4_fib(struct rtable_private *tab, const net_addr_ip4 *px, u32 asn) { struct net_addr_roa4 n = NET_ADDR_ROA4(px->prefix, px->pxlen, 0, 0); struct fib_node *fn; @@ -502,7 +503,7 @@ net_roa_check_ip4_fib(rtable *tab, const net_addr_ip4 *px, u32 asn) } static int -net_roa_check_ip6_trie(rtable *tab, const net_addr_ip6 *px, u32 asn) +net_roa_check_ip6_trie(struct rtable_private *tab, const net_addr_ip6 *px, u32 asn) { int anything = 0; @@ -530,7 +531,7 @@ net_roa_check_ip6_trie(rtable *tab, const net_addr_ip6 *px, u32 asn) } static int -net_roa_check_ip6_fib(rtable *tab, const net_addr_ip6 *px, u32 asn) +net_roa_check_ip6_fib(struct rtable_private *tab, const net_addr_ip6 *px, u32 asn) { struct net_addr_roa6 n = NET_ADDR_ROA6(px->prefix, px->pxlen, 0, 0); struct fib_node *fn; @@ -577,24 +578,30 @@ net_roa_check_ip6_fib(rtable *tab, const net_addr_ip6 *px, u32 asn) * must have type NET_IP4 or NET_IP6, respectively. */ int -net_roa_check(rtable *tab, const net_addr *n, u32 asn) +net_roa_check(rtable *tp, const net_addr *n, u32 asn) { - if ((tab->addr_type == NET_ROA4) && (n->type == NET_IP4)) - { - if (tab->trie) - return net_roa_check_ip4_trie(tab, (const net_addr_ip4 *) n, asn); - else - return net_roa_check_ip4_fib (tab, (const net_addr_ip4 *) n, asn); - } - else if ((tab->addr_type == NET_ROA6) && (n->type == NET_IP6)) + int out = ROA_UNKNOWN; + + RT_LOCKED(tp, tab) { - if (tab->trie) - return net_roa_check_ip6_trie(tab, (const net_addr_ip6 *) n, asn); + if ((tab->addr_type == NET_ROA4) && (n->type == NET_IP4)) + { + if (tab->trie) + out = net_roa_check_ip4_trie(tab, (const net_addr_ip4 *) n, asn); + else + out = net_roa_check_ip4_fib (tab, (const net_addr_ip4 *) n, asn); + } + else if ((tab->addr_type == NET_ROA6) && (n->type == NET_IP6)) + { + if (tab->trie) + out = net_roa_check_ip6_trie(tab, (const net_addr_ip6 *) n, asn); + else + out = net_roa_check_ip6_fib (tab, (const net_addr_ip6 *) n, asn); + } else - return net_roa_check_ip6_fib (tab, (const net_addr_ip6 *) n, asn); + out = ROA_UNKNOWN; /* Should not happen */ } - else - return ROA_UNKNOWN; /* Should not happen */ + return out; } /** @@ -618,7 +625,7 @@ rte_find(net *net, struct rte_src *src) struct rte_storage * -rte_store(const rte *r, net *net, rtable *tab) +rte_store(const rte *r, net *net, struct rtable_private *tab) { struct rte_storage *e = sl_alloc(tab->rte_slab); @@ -902,7 +909,7 @@ channel_rpe_mark_seen(struct rt_export_request *req, struct rt_pending_export *r } void -rt_notify_accepted(struct rt_export_request *req, const net_addr *n, struct rt_pending_export *rpe, +rt_notify_accepted(struct rt_export_request *req, const net_addr *n, struct rt_pending_export *first, struct rte **feed, uint count) { struct channel *c = SKIP_BACK(struct channel, out_req, req); @@ -946,7 +953,7 @@ rt_notify_accepted(struct rt_export_request *req, const net_addr *n, struct rt_p done: /* Check obsolete routes for previously exported */ - while (rpe) + RPE_WALK(first, rpe, NULL) { channel_rpe_mark_seen(req, rpe); if (rpe->old) @@ -957,7 +964,6 @@ done: old_best = &rpe->old->rte; } } - rpe = rpe_next(rpe, NULL); } /* Nothing to export */ @@ -1030,7 +1036,7 @@ rt_export_merged(struct channel *c, struct rte **feed, uint count, linpool *pool } void -rt_notify_merged(struct rt_export_request *req, const net_addr *n, struct rt_pending_export *rpe, +rt_notify_merged(struct rt_export_request *req, const net_addr *n, struct rt_pending_export *first, struct rte **feed, uint count) { struct channel *c = SKIP_BACK(struct channel, out_req, req); @@ -1056,7 +1062,7 @@ rt_notify_merged(struct rt_export_request *req, const net_addr *n, struct rt_pen } /* Check obsolete routes for previously exported */ - while (rpe) + RPE_WALK(first, rpe, NULL) { channel_rpe_mark_seen(req, rpe); if (rpe->old) @@ -1067,7 +1073,6 @@ rt_notify_merged(struct rt_export_request *req, const net_addr *n, struct rt_pen old_best = &rpe->old->rte; } } - rpe = rpe_next(rpe, NULL); } /* Prepare new merged route */ @@ -1078,17 +1083,16 @@ rt_notify_merged(struct rt_export_request *req, const net_addr *n, struct rt_pen } void -rt_notify_optimal(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe) +rt_notify_optimal(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *first) { struct channel *c = SKIP_BACK(struct channel, out_req, req); - rte *o = RTE_VALID_OR_NULL(rpe->old_best); - struct rte_storage *new_best = rpe->new_best; + rte *o = RTE_VALID_OR_NULL(first->old_best); + struct rte_storage *new_best = first->new_best; - while (rpe) + RPE_WALK(first, rpe, NULL) { channel_rpe_mark_seen(req, rpe); new_best = rpe->new_best; - rpe = rpe_next(rpe, NULL); } rte n0 = RTE_COPY_VALID(new_best); @@ -1097,27 +1101,26 @@ rt_notify_optimal(struct rt_export_request *req, const net_addr *net, struct rt_ } void -rt_notify_any(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe) +rt_notify_any(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *first) { struct channel *c = SKIP_BACK(struct channel, out_req, req); - rte *n = RTE_VALID_OR_NULL(rpe->new); - rte *o = RTE_VALID_OR_NULL(rpe->old); + rte *n = RTE_VALID_OR_NULL(first->new); + rte *o = RTE_VALID_OR_NULL(first->old); if (!n && !o) { - channel_rpe_mark_seen(req, rpe); + channel_rpe_mark_seen(req, first); return; } struct rte_src *src = n ? n->src : o->src; - struct rte_storage *new_latest = rpe->new; + struct rte_storage *new_latest = first->new; - while (rpe) + RPE_WALK(first, rpe, src) { channel_rpe_mark_seen(req, rpe); new_latest = rpe->new; - rpe = rpe_next(rpe, src); } rte n0 = RTE_COPY_VALID(new_latest); @@ -1165,9 +1168,11 @@ rpe_next(struct rt_pending_export *rpe, struct rte_src *src) } static struct rt_pending_export * rt_next_export_fast(struct rt_pending_export *last); -static void -rte_export(struct rt_export_hook *hook, struct rt_pending_export *rpe) +static int +rte_export(struct rt_table_export_hook *th, struct rt_pending_export *rpe) { + rtable *tab = RT_PUB(SKIP_BACK(struct rtable_private, exporter, th->table)); + struct rt_export_hook *hook = &th->h; if (bmap_test(&hook->seq_map, rpe->seq)) goto ignore; /* Seen already */ @@ -1205,6 +1210,7 @@ rte_export(struct rt_export_hook *hook, struct rt_pending_export *rpe) else if (hook->req->export_bulk) { net *net = SKIP_BACK(struct network, n.addr, (net_addr (*)[0]) n); + RT_LOCK(tab); uint count = rte_feed_count(net); rte **feed = NULL; if (count) @@ -1212,6 +1218,7 @@ rte_export(struct rt_export_hook *hook, struct rt_pending_export *rpe) feed = alloca(count * sizeof(rte *)); rte_feed_obtain(net, feed, count); } + RT_UNLOCK(tab); hook->req->export_bulk(hook->req, n, rpe, feed, count); } else @@ -1219,15 +1226,16 @@ rte_export(struct rt_export_hook *hook, struct rt_pending_export *rpe) ignore: /* Get the next export if exists */ - hook->rpe_next = rt_next_export_fast(rpe); + th->rpe_next = rt_next_export_fast(rpe); /* The last block may be available to free */ - if (PAGE_HEAD(hook->rpe_next) != PAGE_HEAD(rpe)) - CALL(hook->table->used, hook->table); + int used = (PAGE_HEAD(th->rpe_next) != PAGE_HEAD(rpe)); /* Releasing this export for cleanup routine */ DBG("store hook=%p last_export=%p seq=%lu\n", hook, rpe, rpe->seq); - atomic_store_explicit(&hook->last_export, rpe, memory_order_release); + atomic_store_explicit(&th->last_export, rpe, memory_order_release); + + return used; } /** @@ -1262,7 +1270,7 @@ ignore: * done outside of scope of rte_announce(). */ static void -rte_announce(rtable *tab, net *net, struct rte_storage *new, struct rte_storage *old, +rte_announce(struct rtable_private *tab, net *net, struct rte_storage *new, struct rte_storage *old, struct rte_storage *new_best, struct rte_storage *old_best) { int new_best_valid = rte_is_valid(RTE_OR_NULL(new_best)); @@ -1271,23 +1279,12 @@ rte_announce(rtable *tab, net *net, struct rte_storage *new, struct rte_storage if ((new == old) && (new_best == old_best)) return; - if (new_best_valid || old_best_valid) - { - if (new_best_valid) - new_best->rte.sender->stats.pref++; - if (old_best_valid) - old_best->rte.sender->stats.pref--; - - if (tab->hostcache) - rt_notify_hostcache(tab, net); + if (new_best_valid) + new_best->rte.sender->stats.pref++; + if (old_best_valid) + old_best->rte.sender->stats.pref--; - if (!EMPTY_LIST(tab->flowspec_links)) - rt_flowspec_notify(tab, net); - } - - rt_schedule_notify(tab); - - if (EMPTY_LIST(tab->exporter.hooks) && EMPTY_LIST(tab->exporter.pending)) + if (EMPTY_LIST(tab->exporter.e.hooks) && EMPTY_LIST(tab->exporter.pending)) { /* No export hook and no pending exports to cleanup. We may free the route immediately. */ if (!old) @@ -1356,7 +1353,7 @@ rte_announce(rtable *tab, net *net, struct rte_storage *new, struct rte_storage &net->last->next, &rpenull, rpe, memory_order_relaxed, memory_order_relaxed)); - + } net->last = rpe; @@ -1368,9 +1365,6 @@ rte_announce(rtable *tab, net *net, struct rte_storage *new, struct rte_storage tab->exporter.first = rpe; rt_check_cork_high(tab); - - if (!tm_active(tab->exporter.export_timer)) - tm_start(tab->exporter.export_timer, tab->config->export_settle_time); } static struct rt_pending_export * @@ -1399,8 +1393,10 @@ rt_next_export_fast(struct rt_pending_export *last) } static struct rt_pending_export * -rt_next_export(struct rt_export_hook *hook, struct rt_exporter *tab) +rt_next_export(struct rt_table_export_hook *hook, struct rt_table_exporter *tab) { + ASSERT_DIE(RT_IS_LOCKED(SKIP_BACK(struct rtable_private, exporter, tab))); + /* As the table is locked, it is safe to reload the last export pointer */ struct rt_pending_export *last = atomic_load_explicit(&hook->last_export, memory_order_acquire); @@ -1416,26 +1412,57 @@ rt_next_export(struct rt_export_hook *hook, struct rt_exporter *tab) static inline void rt_send_export_event(struct rt_export_hook *hook) { - ev_send(hook->req->list, hook->event); + ev_send(hook->req->list, &hook->event); } static void rt_announce_exports(timer *tm) { - rtable *tab = tm->data; + RT_LOCKED((rtable *) tm->data, tab) + if (!EMPTY_LIST(tab->exporter.pending)) + { + struct rt_export_hook *c; node *n; + WALK_LIST2(c, n, tab->exporter.e.hooks, n) + { + if (atomic_load_explicit(&c->export_state, memory_order_acquire) != TES_READY) + continue; - struct rt_export_hook *c; node *n; - WALK_LIST2(c, n, tab->exporter.hooks, n) + rt_send_export_event(c); + } + } +} + +static void +rt_import_announce_exports(void *_hook) +{ + struct rt_import_hook *hook = _hook; + RT_LOCKED(hook->table, tab) { - if (atomic_load_explicit(&c->export_state, memory_order_acquire) != TES_READY) - continue; + if (hook->import_state == TIS_CLEARED) + { + void (*stopped)(struct rt_import_request *) = hook->stopped; + struct rt_import_request *req = hook->req; + req->hook = NULL; + + rt_trace(tab, D_EVENTS, "Hook %s stopped", req->name); + rem_node(&hook->n); + mb_free(hook); + rt_unlock_table(tab); + RT_UNLOCK(tab); - rt_send_export_event(c); + stopped(req); + return; + } + + rt_trace(tab, D_EVENTS, "Announcing exports after imports from %s", hook->req->name); + + if (!tm_active(tab->exporter.export_timer)) + tm_start(tab->exporter.export_timer, tab->config->export_settle_time); } } static struct rt_pending_export * -rt_last_export(struct rt_exporter *tab) +rt_last_export(struct rt_table_exporter *tab) { struct rt_pending_export *rpe = NULL; @@ -1455,31 +1482,42 @@ rt_last_export(struct rt_exporter *tab) static void rt_export_hook(void *_data) { - struct rt_export_hook *c = _data; + struct rt_table_export_hook *c = _data; + rtable *tab = SKIP_BACK(rtable, priv.exporter, c->table); - ASSERT_DIE(atomic_load_explicit(&c->export_state, memory_order_relaxed) == TES_READY); + ASSERT_DIE(atomic_load_explicit(&c->h.export_state, memory_order_relaxed) == TES_READY); if (!c->rpe_next) { + RT_LOCK(tab); c->rpe_next = rt_next_export(c, c->table); if (!c->rpe_next) { - CALL(c->table->used, c->table); + rt_export_used(c->table); + RT_UNLOCK(tab); return; } + + RT_UNLOCK(tab); } + int used = 0; + /* Process the export */ for (uint i=0; i<RT_EXPORT_BULK; i++) { - rte_export(c, c->rpe_next); + used += rte_export(c, c->rpe_next); if (!c->rpe_next) break; } - rt_send_export_event(c); + if (used) + RT_LOCKED(tab, _) + rt_export_used(c->table); + + rt_send_export_event(&c->h); } @@ -1549,11 +1587,10 @@ rte_same(rte *x, rte *y) static inline int rte_is_ok(rte *e) { return e && !rte_is_filtered(e); } -static void -rte_recalculate(struct rt_import_hook *c, net *net, rte *new, struct rte_src *src) +static int +rte_recalculate(struct rtable_private *table, struct rt_import_hook *c, net *net, rte *new, struct rte_src *src) { struct rt_import_request *req = c->req; - struct rtable *table = c->table; struct rt_import_stats *stats = &c->stats; struct rte_storage *old_best_stored = net->routes, *old_stored = NULL; rte *old_best = old_best_stored ? &old_best_stored->rte : NULL; @@ -1602,7 +1639,7 @@ rte_recalculate(struct rt_import_hook *c, net *net, rte *new, struct rte_src *sr /* We need to free the already stored route here before returning */ rte_free(new_stored); - return; + return 0; } *before_old = (*before_old)->next; @@ -1612,7 +1649,7 @@ rte_recalculate(struct rt_import_hook *c, net *net, rte *new, struct rte_src *sr if (!old && !new) { stats->withdraws_ignored++; - return; + return 0; } /* If rejected by import limit, we need to pretend there is no route */ @@ -1748,18 +1785,7 @@ rte_recalculate(struct rt_import_hook *c, net *net, rte *new, struct rte_src *sr rte_announce(table, net, new_stored, old_stored, net->routes, old_best_stored); - if (!net->routes && - (table->gc_counter++ >= table->config->gc_threshold)) - rt_kick_prune_timer(table); - -#if 0 - /* Enable and reimplement these callbacks if anybody wants to use them */ - if (old_ok && p->rte_remove) - p->rte_remove(net, old); - if (new_ok && p->rte_insert) - p->rte_insert(net, &new_stored->rte); -#endif - + return 1; } int @@ -1870,39 +1896,47 @@ rte_import(struct rt_import_request *req, const net_addr *n, rte *new, struct rt if (!hook) return; - net *nn; - if (new) + RT_LOCKED(hook->table, tab) + { + net *nn; + if (new) { /* Use the actual struct network, not the dummy one */ - nn = net_get(hook->table, n); + nn = net_get(tab, n); new->net = nn->n.addr; new->sender = hook; /* Set the stale cycle */ new->stale_cycle = hook->stale_set; } - else if (!(nn = net_find(hook->table, n))) + else if (!(nn = net_find(tab, n))) { req->hook->stats.withdraws_ignored++; - return; + RT_RETURN(tab); } - /* And recalculate the best route */ - rte_recalculate(hook, nn, new, src); + /* Recalculate the best route */ + if (rte_recalculate(tab, hook, nn, new, src)) + ev_send(req->list, &hook->announce_event); + } } /* Check rtable for best route to given net whether it would be exported do p */ int -rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filter) +rt_examine(rtable *tp, net_addr *a, struct channel *c, const struct filter *filter) { - net *n = net_find(t, a); + rte rt = {}; - if (!n || !rte_is_valid(RTE_OR_NULL(n->routes))) - return 0; + RT_LOCKED(tp, t) + { + net *n = net_find(t, a); + if (n) + rt = RTE_COPY_VALID(n->routes); + } - rte rt = n->routes->rte; + if (!rt.src) + return 0; - /* Rest is stripped down export_filter() */ int v = c->proto->preexport ? c->proto->preexport(c, &rt) : 0; if (v == RIC_PROCESS) v = (f_run(filter, &rt, FF_SILENT) <= F_ACCEPT); @@ -1911,34 +1945,41 @@ rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filte } static void -rt_table_export_done(struct rt_export_hook *hook) +rt_table_export_done(void *hh) { - struct rt_exporter *re = hook->table; - struct rtable *tab = SKIP_BACK(struct rtable, exporter, re); + struct rt_table_export_hook *hook = hh; + struct rt_export_request *req = hook->h.req; + void (*stopped)(struct rt_export_request *) = hook->h.stopped; + rtable *t = SKIP_BACK(rtable, priv.exporter, hook->table); - rt_unlock_table(tab); - DBG("Export hook %p in table %s finished uc=%u\n", hook, tab->name, tab->use_count); + RT_LOCKED(t, tab) + { + DBG("Export hook %p in table %s finished uc=%u\n", hook, tab->name, tab->use_count); + + /* Drop pending exports */ + rt_export_used(&tab->exporter); + + /* Do the common code; this frees the hook */ + rt_export_stopped(&hook->h); + } + + /* Report the channel as stopped. */ + CALL(stopped, req); + + /* Unlock the table; this may free it */ + rt_unlock_table(t); } -static void -rt_export_stopped(void *data) +void +rt_export_stopped(struct rt_export_hook *hook) { - struct rt_export_hook *hook = data; - struct rt_exporter *tab = hook->table; - - /* Drop pending exports */ - CALL(tab->used, tab); + /* Unlink from the request */ + hook->req->hook = NULL; /* Unlist */ rem_node(&hook->n); - /* Report the channel as stopped. */ - hook->stopped(hook->req); - - /* Reporting the hook as finished. */ - CALL(tab->done, hook); - - /* Free the hook. */ + /* Free the hook itself together with its pool */ rfree(hook->pool); } @@ -1948,8 +1989,7 @@ rt_set_import_state(struct rt_import_hook *hook, u8 state) hook->last_state_change = current_time(); hook->import_state = state; - if (hook->req->log_state_change) - hook->req->log_state_change(hook->req, state); + CALL(hook->req->log_state_change, hook->req, state); } void @@ -1958,26 +1998,28 @@ rt_set_export_state(struct rt_export_hook *hook, u8 state) hook->last_state_change = current_time(); atomic_store_explicit(&hook->export_state, state, memory_order_release); - if (hook->req->log_state_change) - hook->req->log_state_change(hook->req, state); + CALL(hook->req->log_state_change, hook->req, state); } void -rt_request_import(rtable *tab, struct rt_import_request *req) +rt_request_import(rtable *t, struct rt_import_request *req) { - rt_lock_table(tab); + RT_LOCKED(t, tab) + { + rt_lock_table(tab); - struct rt_import_hook *hook = req->hook = mb_allocz(tab->rp, sizeof(struct rt_import_hook)); + struct rt_import_hook *hook = req->hook = mb_allocz(tab->rp, sizeof(struct rt_import_hook)); - DBG("Lock table %s for import %p req=%p uc=%u\n", tab->name, hook, req, tab->use_count); + hook->announce_event = (event) { .hook = rt_import_announce_exports, .data = hook }; - hook->req = req; - hook->table = tab; + DBG("Lock table %s for import %p req=%p uc=%u\n", tab->name, hook, req, tab->use_count); - rt_set_import_state(hook, TIS_UP); + hook->req = req; + hook->table = t; - hook->n = (node) {}; - add_tail(&tab->imports, &hook->n); + rt_set_import_state(hook, TIS_UP); + add_tail(&tab->imports, &hook->n); + } } void @@ -1986,22 +2028,24 @@ rt_stop_import(struct rt_import_request *req, void (*stopped)(struct rt_import_r ASSERT_DIE(req->hook); struct rt_import_hook *hook = req->hook; - rt_schedule_prune(hook->table); - - rt_set_import_state(hook, TIS_STOP); - - hook->stopped = stopped; + RT_LOCKED(hook->table, tab) + { + rt_schedule_prune(tab); + rt_set_import_state(hook, TIS_STOP); + hook->stopped = stopped; + } } -static struct rt_export_hook * -rt_table_export_start(struct rt_exporter *re, struct rt_export_request *req) +static void +rt_table_export_start_locked(struct rtable_private *tab, struct rt_export_request *req) { - rtable *tab = SKIP_BACK(rtable, exporter, re); + struct rt_exporter *re = &tab->exporter.e; rt_lock_table(tab); - pool *p = rp_new(tab->rp, "Export hook"); - struct rt_export_hook *hook = mb_allocz(p, sizeof(struct rt_export_hook)); - hook->pool = p; + req->hook = rt_alloc_export(re, sizeof(struct rt_table_export_hook)); + req->hook->req = req; + + struct rt_table_export_hook *hook = SKIP_BACK(struct rt_table_export_hook, h, req->hook); /* stats zeroed by mb_allocz */ switch (req->addr_mode) @@ -2009,24 +2053,25 @@ rt_table_export_start(struct rt_exporter *re, struct rt_export_request *req) case TE_ADDR_IN: if (tab->trie && net_val_match(tab->addr_type, NB_IP)) { - hook->walk_state = mb_allocz(p, sizeof (struct f_trie_walk_state)); + hook->walk_state = mb_allocz(hook->h.pool, sizeof (struct f_trie_walk_state)); hook->walk_lock = rt_lock_trie(tab); trie_walk_init(hook->walk_state, tab->trie, req->addr); - hook->event = ev_new_init(p, rt_feed_by_trie, hook); + hook->h.event.hook = rt_feed_by_trie; + hook->walk_last.type = 0; break; } /* fall through */ case TE_ADDR_NONE: FIB_ITERATE_INIT(&hook->feed_fit, &tab->fib); - hook->event = ev_new_init(p, rt_feed_by_fib, hook); + hook->h.event.hook = rt_feed_by_fib; break; case TE_ADDR_EQUAL: - hook->event = ev_new_init(p, rt_feed_equal, hook); + hook->h.event.hook = rt_feed_equal; break; case TE_ADDR_FOR: - hook->event = ev_new_init(p, rt_feed_for, hook); + hook->h.event.hook = rt_feed_for; break; default: @@ -2035,22 +2080,50 @@ rt_table_export_start(struct rt_exporter *re, struct rt_export_request *req) DBG("New export hook %p req %p in table %s uc=%u\n", hook, req, tab->name, tab->use_count); - return hook; + struct rt_pending_export *rpe = rt_last_export(hook->table); + DBG("store hook=%p last_export=%p seq=%lu\n", hook, rpe, rpe ? rpe->seq : 0); + atomic_store_explicit(&hook->last_export, rpe, memory_order_relaxed); + + rt_init_export(re, req->hook); +} + +static void +rt_table_export_start(struct rt_exporter *re, struct rt_export_request *req) +{ + RT_LOCKED(SKIP_BACK(rtable, priv.exporter.e, re), tab) + rt_table_export_start_locked(tab, req); +} + +void rt_request_export(rtable *t, struct rt_export_request *req) +{ + RT_LOCKED(t, tab) + rt_table_export_start_locked(tab, req); /* Is locked inside */ } void -rt_request_export(struct rt_exporter *re, struct rt_export_request *req) +rt_request_export_other(struct rt_exporter *re, struct rt_export_request *req) +{ + return re->class->start(re, req); +} + +struct rt_export_hook * +rt_alloc_export(struct rt_exporter *re, uint size) { - struct rt_export_hook *hook = req->hook = re->start(re, req); + pool *p = rp_new(re->rp, "Export hook"); + struct rt_export_hook *hook = mb_allocz(p, size); - hook->req = req; + hook->pool = p; hook->table = re; - bmap_init(&hook->seq_map, hook->pool, 1024); + return hook; +} - struct rt_pending_export *rpe = rt_last_export(hook->table); - DBG("store hook=%p last_export=%p seq=%lu\n", hook, rpe, rpe ? rpe->seq : 0); - atomic_store_explicit(&hook->last_export, rpe, memory_order_relaxed); +void +rt_init_export(struct rt_exporter *re, struct rt_export_hook *hook) +{ + hook->event.data = hook; + + bmap_init(&hook->seq_map, hook->pool, 1024); hook->n = (node) {}; add_tail(&re->hooks, &hook->n); @@ -2061,45 +2134,57 @@ rt_request_export(struct rt_exporter *re, struct rt_export_request *req) } static void -rt_table_export_stop(struct rt_export_hook *hook) +rt_table_export_stop_locked(struct rt_export_hook *hh) { - rtable *tab = SKIP_BACK(rtable, exporter, hook->table); + struct rt_table_export_hook *hook = SKIP_BACK(struct rt_table_export_hook, h, hh); + struct rtable_private *tab = SKIP_BACK(struct rtable_private, exporter, hook->table); - if (atomic_load_explicit(&hook->export_state, memory_order_relaxed) != TES_FEEDING) - return; - - switch (hook->req->addr_mode) - { - case TE_ADDR_IN: - if (hook->walk_lock) - { - rt_unlock_trie(tab, hook->walk_lock); - hook->walk_lock = NULL; - mb_free(hook->walk_state); - hook->walk_state = NULL; + if (atomic_load_explicit(&hh->export_state, memory_order_relaxed) == TES_FEEDING) + switch (hh->req->addr_mode) + { + case TE_ADDR_IN: + if (hook->walk_lock) + { + rt_unlock_trie(tab, hook->walk_lock); + hook->walk_lock = NULL; + mb_free(hook->walk_state); + hook->walk_state = NULL; + break; + } + /* fall through */ + case TE_ADDR_NONE: + fit_get(&tab->fib, &hook->feed_fit); break; - } - /* fall through */ - case TE_ADDR_NONE: - fit_get(&tab->fib, &hook->feed_fit); - break; - } + } +} + +static void +rt_table_export_stop(struct rt_export_hook *hh) +{ + struct rt_table_export_hook *hook = SKIP_BACK(struct rt_table_export_hook, h, hh); + rtable *t = SKIP_BACK(rtable, priv.exporter, hook->table); + if (RT_IS_LOCKED(t)) + rt_table_export_stop_locked(hh); + else + RT_LOCKED(t, tab) + rt_table_export_stop_locked(hh); } void rt_stop_export(struct rt_export_request *req, void (*stopped)(struct rt_export_request *)) { + ASSERT_DIE(birdloop_inside(req->list->loop)); ASSERT_DIE(req->hook); struct rt_export_hook *hook = req->hook; /* Cancel the feeder event */ - ev_postpone(hook->event); + ev_postpone(&hook->event); /* Stop feeding from the exporter */ - CALL(hook->table->stop, hook); + CALL(hook->table->class->stop, hook); /* Reset the event as the stopped event */ - hook->event->hook = rt_export_stopped; + hook->event.hook = hook->table->class->done; hook->stopped = stopped; /* Update export state */ @@ -2130,12 +2215,15 @@ rt_refresh_begin(struct rt_import_request *req) ASSERT_DIE(hook); ASSERT_DIE(hook->stale_set == hook->stale_valid); + RT_LOCKED(hook->table, tab) + { + /* If the pruning routine is too slow */ if ((hook->stale_pruned < hook->stale_valid) && (hook->stale_pruned + 128 < hook->stale_valid) || (hook->stale_pruned > hook->stale_valid) && (hook->stale_pruned > hook->stale_valid + 128)) { log(L_WARN "Route refresh flood in table %s", hook->table->name); - FIB_WALK(&hook->table->fib, net, n) + FIB_WALK(&tab->fib, net, n) { for (struct rte_storage *e = n->routes; e; e = e->next) if (e->rte.sender == req->hook) @@ -2156,6 +2244,8 @@ rt_refresh_begin(struct rt_import_request *req) if (req->trace_routes & D_STATES) log(L_TRACE "%s: route refresh begin [%u]", req->name, hook->stale_set); + + } } /** @@ -2172,13 +2262,16 @@ rt_refresh_end(struct rt_import_request *req) struct rt_import_hook *hook = req->hook; ASSERT_DIE(hook); - hook->stale_valid++; - ASSERT_DIE(hook->stale_set == hook->stale_valid); + RT_LOCKED(hook->table, tab) + { + hook->stale_valid++; + ASSERT_DIE(hook->stale_set == hook->stale_valid); - rt_schedule_prune(hook->table); + rt_schedule_prune(tab); - if (req->trace_routes & D_STATES) - log(L_TRACE "%s: route refresh end [%u]", req->name, hook->stale_valid); + if (req->trace_routes & D_STATES) + log(L_TRACE "%s: route refresh end [%u]", req->name, hook->stale_valid); + } } /** @@ -2203,8 +2296,11 @@ rte_dump(struct rte_storage *e) * This function dumps contents of a given routing table to debug output. */ void -rt_dump(rtable *t) +rt_dump(rtable *tp) { + RT_LOCKED(tp, t) + { + debug("Dump of routing table <%s>%s\n", t->name, t->deleted ? " (deleted)" : ""); #ifdef DEBUGGING fib_check(&t->fib); @@ -2216,6 +2312,8 @@ rt_dump(rtable *t) } FIB_WALK_END; debug("\n"); + + } } /** @@ -2237,11 +2335,14 @@ rt_dump_all(void) } void -rt_dump_hooks(rtable *tab) +rt_dump_hooks(rtable *tp) { + RT_LOCKED(tp, tab) + { + debug("Dump of hooks in routing table <%s>%s\n", tab->name, tab->deleted ? " (deleted)" : ""); - debug(" nhu_state=%u hcu_scheduled=%u use_count=%d rt_count=%u\n", - tab->nhu_state, tab->hcu_scheduled, tab->use_count, tab->rt_count); + debug(" nhu_state=%u use_count=%d rt_count=%u\n", + tab->nhu_state, tab->use_count, tab->rt_count); debug(" last_rt_change=%t gc_time=%t gc_counter=%d prune_state=%u\n", tab->last_rt_change, tab->gc_time, tab->gc_counter, tab->prune_state); @@ -2255,15 +2356,18 @@ rt_dump_hooks(rtable *tab) ih->last_state_change, ih->import_state, ih->stopped); } - struct rt_export_hook *eh; - WALK_LIST(eh, tab->exporter.hooks) + struct rt_table_export_hook *eh; + WALK_LIST(eh, tab->exporter.e.hooks) { - eh->req->dump_req(eh->req); + eh->h.req->dump_req(eh->h.req); debug(" Export hook %p requested by %p:" " refeed_pending=%u last_state_change=%t export_state=%u\n", - eh, eh->req, eh->refeed_pending, eh->last_state_change, atomic_load_explicit(&eh->export_state, memory_order_relaxed)); + eh, eh->h.req, eh->refeed_pending, eh->h.last_state_change, + atomic_load_explicit(&eh->h.export_state, memory_order_relaxed)); } debug("\n"); + + } } void @@ -2282,30 +2386,32 @@ rt_dump_hooks_all(void) } static inline void -rt_schedule_hcu(rtable *tab) +rt_schedule_nhu(struct rtable_private *tab) { - if (tab->hcu_scheduled) - return; - - tab->hcu_scheduled = 1; - ev_schedule(tab->rt_event); -} - -static inline void -rt_schedule_nhu(rtable *tab) -{ - if (tab->nhu_state == NHU_CLEAN) - ev_schedule(tab->rt_event); - - /* state change: - * NHU_CLEAN -> NHU_SCHEDULED - * NHU_RUNNING -> NHU_DIRTY - */ - tab->nhu_state |= NHU_SCHEDULED; + if (tab->nhu_corked) + { + if (!(tab->nhu_corked & NHU_SCHEDULED)) + { + tab->nhu_corked |= NHU_SCHEDULED; + rt_lock_table(tab); + } + } + else if (!(tab->nhu_state & NHU_SCHEDULED)) + { + rt_trace(tab, D_EVENTS, "Scheduling NHU"); + rt_lock_table(tab); + + /* state change: + * NHU_CLEAN -> NHU_SCHEDULED + * NHU_RUNNING -> NHU_DIRTY + */ + if ((tab->nhu_state |= NHU_SCHEDULED) == NHU_SCHEDULED) + ev_schedule(tab->nhu_event); + } } void -rt_schedule_prune(rtable *tab) +rt_schedule_prune(struct rtable_private *tab) { if (tab->prune_state == 0) ev_schedule(tab->rt_event); @@ -2315,12 +2421,12 @@ rt_schedule_prune(rtable *tab) } static void -rt_export_used(struct rt_exporter *e) +rt_export_used(struct rt_table_exporter *e) { - rtable *tab = SKIP_BACK(rtable, exporter, e); + struct rtable_private *tab = SKIP_BACK(struct rtable_private, exporter, e); + ASSERT_DIE(RT_IS_LOCKED(tab)); - if (config->table_debug) - log(L_TRACE "%s: Export cleanup requested", tab->name); + rt_trace(tab, D_EVENTS, "Export cleanup requested"); if (tab->export_used) return; @@ -2332,69 +2438,31 @@ rt_export_used(struct rt_exporter *e) static void rt_event(void *ptr) { - rtable *tab = ptr; + RT_LOCKED((rtable *) ptr, tab) + { rt_lock_table(tab); if (tab->export_used) rt_export_cleanup(tab); - if ( - tab->hcu_corked || - tab->nhu_corked || - (tab->hcu_scheduled || tab->nhu_state) && rt_cork_check(tab->uncork_event) - ) - { - if (!tab->hcu_corked && !tab->nhu_corked && config->table_debug) - log(L_TRACE "%s: Auxiliary routines corked", tab->name); - - tab->hcu_corked |= tab->hcu_scheduled; - tab->hcu_scheduled = 0; - - tab->nhu_corked |= tab->nhu_state; - tab->nhu_state = 0; - } - - if (tab->hcu_scheduled) - rt_update_hostcache(tab); - - if (tab->nhu_state) - rt_next_hop_update(tab); - if (tab->prune_state) rt_prune_table(tab); rt_unlock_table(tab); -} - -static void -rt_uncork_event(void *ptr) -{ - rtable *tab = ptr; - - tab->hcu_scheduled |= tab->hcu_corked; - tab->hcu_corked = 0; - - tab->nhu_state |= tab->nhu_corked; - tab->nhu_corked = 0; - - if (config->table_debug) - log(L_TRACE "%s: Auxiliary routines uncorked", tab->name); - - ev_schedule(tab->rt_event); + } } static void rt_prune_timer(timer *t) { - rtable *tab = t->data; - - if (tab->gc_counter >= tab->config->gc_threshold) - rt_schedule_prune(tab); + RT_LOCKED((rtable *) t->data, tab) + if (tab->gc_counter >= tab->config->gc_threshold) + rt_schedule_prune(tab); } static void -rt_kick_prune_timer(rtable *tab) +rt_kick_prune_timer(struct rtable_private *tab) { /* Return if prune is already scheduled */ if (tm_active(tab->prune_timer) || (tab->prune_state & 1)) @@ -2407,153 +2475,132 @@ rt_kick_prune_timer(rtable *tab) } -static inline btime -rt_settled_time(rtable *tab) -{ - ASSUME(tab->base_settle_time != 0); - - return MIN(tab->last_rt_change + tab->config->min_settle_time, - tab->base_settle_time + tab->config->max_settle_time); -} - static void -rt_settle_timer(timer *t) +rt_flowspec_export_one(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *first) { - rtable *tab = t->data; - - if (!tab->base_settle_time) - return; + struct rt_flowspec_link *ln = SKIP_BACK(struct rt_flowspec_link, req, req); + rtable *dst_pub = ln->dst; + ASSUME(rt_is_flow(dst_pub)); + struct rtable_private *dst = RT_LOCK(dst_pub); - btime settled_time = rt_settled_time(tab); - if (current_time() < settled_time) + /* No need to inspect it further if recalculation is already scheduled */ + if ((dst->nhu_state == NHU_SCHEDULED) || (dst->nhu_state == NHU_DIRTY) + || !trie_match_net(dst->flowspec_trie, net)) { - tm_set(tab->settle_timer, settled_time); + RT_UNLOCK(dst_pub); + rpe_mark_seen_all(req->hook, first, NULL); return; } - /* Settled */ - tab->base_settle_time = 0; - - struct rt_subscription *s; - WALK_LIST(s, tab->subscribers) - ev_send(s->list, s->event); -} - -static void -rt_kick_settle_timer(rtable *tab) -{ - tab->base_settle_time = current_time(); - - if (!tab->settle_timer) - tab->settle_timer = tm_new_init(tab->rp, rt_settle_timer, tab, 0, 0); - - if (!tm_active(tab->settle_timer)) - tm_set(tab->settle_timer, rt_settled_time(tab)); -} - -static inline void -rt_schedule_notify(rtable *tab) -{ - if (EMPTY_LIST(tab->subscribers)) - return; + /* This net may affect some flowspecs, check the actual change */ + rte *o = RTE_VALID_OR_NULL(first->old_best); + struct rte_storage *new_best = first->new_best; - if (tab->base_settle_time) - return; + RPE_WALK(first, rpe, NULL) + { + rpe_mark_seen(req->hook, rpe); + new_best = rpe->new_best; + } - rt_kick_settle_timer(tab); -} + /* Yes, something has actually changed. Schedule the update. */ + if (o != RTE_VALID_OR_NULL(new_best)) + rt_schedule_nhu(dst); -void -rt_subscribe(rtable *tab, struct rt_subscription *s) -{ - s->tab = tab; - rt_lock_table(tab); - DBG("rt_subscribe(%s)\n", tab->name); - add_tail(&tab->subscribers, &s->n); + RT_UNLOCK(dst_pub); } -void -rt_unsubscribe(struct rt_subscription *s) +static void +rt_flowspec_dump_req(struct rt_export_request *req) { - rem_node(&s->n); - rt_unlock_table(s->tab); + struct rt_flowspec_link *ln = SKIP_BACK(struct rt_flowspec_link, req, req); + debug(" Flowspec link for table %s (%p)\n", ln->dst->name, req); } static struct rt_flowspec_link * -rt_flowspec_find_link(rtable *src, rtable *dst) +rt_flowspec_find_link(struct rtable_private *src, rtable *dst) { - struct rt_flowspec_link *ln; - WALK_LIST(ln, src->flowspec_links) - if ((ln->src == src) && (ln->dst == dst)) - return ln; + struct rt_table_export_hook *hook; node *n; + WALK_LIST2(hook, n, src->exporter.e.hooks, h.n) + switch (atomic_load_explicit(&hook->h.export_state, memory_order_acquire)) + { + case TES_FEEDING: + case TES_READY: + if (hook->h.req->export_one == rt_flowspec_export_one) + { + struct rt_flowspec_link *ln = SKIP_BACK(struct rt_flowspec_link, req, hook->h.req); + if (ln->dst == dst) + return ln; + } + } return NULL; } void -rt_flowspec_link(rtable *src, rtable *dst) +rt_flowspec_link(rtable *src_pub, rtable *dst_pub) { - ASSERT(rt_is_ip(src)); - ASSERT(rt_is_flow(dst)); + ASSERT(rt_is_ip(src_pub)); + ASSERT(rt_is_flow(dst_pub)); - struct rt_flowspec_link *ln = rt_flowspec_find_link(src, dst); + int lock_dst = 0; - if (!ln) + RT_LOCKED(src_pub, src) { - rt_lock_table(src); - rt_lock_table(dst); + struct rt_flowspec_link *ln = rt_flowspec_find_link(src, dst_pub); - ln = mb_allocz(src->rp, sizeof(struct rt_flowspec_link)); - ln->src = src; - ln->dst = dst; - add_tail(&src->flowspec_links, &ln->n); + if (!ln) + { + pool *p = src->rp; + ln = mb_allocz(p, sizeof(struct rt_flowspec_link)); + ln->src = src_pub; + ln->dst = dst_pub; + ln->req = (struct rt_export_request) { + .name = mb_sprintf(p, "%s.flowspec.notifier", dst_pub->name), + .list = &global_work_list, + .trace_routes = src->config->debug, + .dump_req = rt_flowspec_dump_req, + .export_one = rt_flowspec_export_one, + }; + + rt_table_export_start_locked(src, &ln->req); + + lock_dst = 1; + } + + ln->uc++; } - ln->uc++; + if (lock_dst) + rt_lock_table(dst_pub); } -void -rt_flowspec_unlink(rtable *src, rtable *dst) +static void +rt_flowspec_link_stopped(struct rt_export_request *req) { - struct rt_flowspec_link *ln = rt_flowspec_find_link(src, dst); - - ASSERT(ln && (ln->uc > 0)); - - ln->uc--; + struct rt_flowspec_link *ln = SKIP_BACK(struct rt_flowspec_link, req, req); + rtable *dst = ln->dst; - if (!ln->uc) - { - rem_node(&ln->n); - mb_free(ln); - - rt_unlock_table(src); - rt_unlock_table(dst); - } + mb_free(ln); + rt_unlock_table(dst); } -static void -rt_flowspec_notify(rtable *src, net *net) +void +rt_flowspec_unlink(rtable *src, rtable *dst) { - /* Only IP tables are src links */ - ASSERT(rt_is_ip(src)); - struct rt_flowspec_link *ln; - WALK_LIST(ln, src->flowspec_links) + RT_LOCKED(src, t) { - rtable *dst = ln->dst; - ASSERT(rt_is_flow(dst)); + ln = rt_flowspec_find_link(t, dst); - /* No need to inspect it further if recalculation is already active */ - if ((dst->nhu_state == NHU_SCHEDULED) || (dst->nhu_state == NHU_DIRTY)) - continue; + ASSERT(ln && (ln->uc > 0)); - if (trie_match_net(dst->flowspec_trie, net->n.addr)) - rt_schedule_nhu(dst); + if (!--ln->uc) + rt_stop_export(&ln->req, rt_flowspec_link_stopped); } } static void -rt_flowspec_reset_trie(rtable *tab) +rt_flowspec_reset_trie(struct rtable_private *tab) { linpool *lp = tab->flowspec_trie->lp; int ipv4 = tab->flowspec_trie->ipv4; @@ -2566,7 +2613,9 @@ rt_flowspec_reset_trie(rtable *tab) static void rt_free(resource *_r) { - rtable *r = (rtable *) _r; + struct rtable_private *r = SKIP_BACK(struct rtable_private, r, _r); + + DOMAIN_FREE(rtable, r->lock); DBG("Deleting routing table %s\n", r->name); ASSERT_DIE(r->use_count == 0); @@ -2581,7 +2630,6 @@ rt_free(resource *_r) fib_free(&r->fib); hmap_free(&r->id_map); rfree(r->rt_event); - rfree(r->settle_timer); mb_free(r); */ } @@ -2589,26 +2637,42 @@ rt_free(resource *_r) static void rt_res_dump(resource *_r) { - rtable *r = (rtable *) _r; + struct rtable_private *r = SKIP_BACK(struct rtable_private, r, _r); + debug("name \"%s\", addr_type=%s, rt_count=%u, use_count=%d\n", r->name, net_label[r->addr_type], r->rt_count, r->use_count); } static struct resclass rt_class = { .name = "Routing table", - .size = sizeof(struct rtable), + .size = sizeof(rtable), .free = rt_free, .dump = rt_res_dump, .lookup = NULL, .memsize = NULL, }; +static const struct rt_exporter_class rt_table_exporter_class = { + .start = rt_table_export_start, + .stop = rt_table_export_stop, + .done = rt_table_export_done, +}; + +void +rt_exporter_init(struct rt_exporter *e) +{ + init_list(&e->hooks); +} + +static struct idm rtable_idm; +uint rtable_max_id = 0; + rtable * rt_setup(pool *pp, struct rtable_config *cf) { pool *p = rp_newf(pp, "Routing table %s", cf->name); - rtable *t = ralloc(p, &rt_class); + struct rtable_private *t = ralloc(p, &rt_class); t->rp = p; t->rte_slab = sl_new(p, sizeof(struct rte_storage)); @@ -2616,6 +2680,11 @@ rt_setup(pool *pp, struct rtable_config *cf) t->name = cf->name; t->config = cf; t->addr_type = cf->addr_type; + t->id = idm_alloc(&rtable_idm); + if (t->id >= rtable_max_id) + rtable_max_id = t->id + 1; + + t->lock = DOMAIN_NEW(rtable, t->name); fib_init(&t->fib, p, t->addr_type, sizeof(net), OFFSETOF(net, n), 0, NULL); @@ -2627,44 +2696,41 @@ rt_setup(pool *pp, struct rtable_config *cf) t->fib.init = net_init_with_trie; } - init_list(&t->flowspec_links); - - t->exporter = (struct rt_exporter) { - .addr_type = t->addr_type, - .start = rt_table_export_start, - .stop = rt_table_export_stop, - .done = rt_table_export_done, - .used = rt_export_used, - }; - - init_list(&t->exporter.hooks); - init_list(&t->exporter.pending); - init_list(&t->imports); hmap_init(&t->id_map, p, 1024); hmap_set(&t->id_map, 0); - init_list(&t->subscribers); - t->rt_event = ev_new_init(p, rt_event, t); - t->uncork_event = ev_new_init(p, rt_uncork_event, t); + t->nhu_event = ev_new_init(p, rt_next_hop_update, t); t->prune_timer = tm_new_init(p, rt_prune_timer, t, 0, 0); - t->exporter.export_timer = tm_new_init(p, rt_announce_exports, t, 0, 0); t->last_rt_change = t->gc_time = current_time(); - t->exporter.next_seq = 1; + + t->exporter = (struct rt_table_exporter) { + .e = { + .class = &rt_table_exporter_class, + .addr_type = t->addr_type, + .rp = t->rp, + }, + .export_timer = tm_new_init(p, rt_announce_exports, t, 0, 0), + .next_seq = 1, + }; + + rt_exporter_init(&t->exporter.e); + + init_list(&t->exporter.pending); t->cork_threshold = cf->cork_threshold; t->rl_pipe = (struct tbf) TBF_DEFAULT_LOG_LIMITS; - if (rt_is_flow(t)) + if (rt_is_flow(RT_PUB(t))) { t->flowspec_trie = f_new_trie(lp_new_default(p), 0); t->flowspec_trie->ipv4 = (t->addr_type == NET_FLOW4); } - return t; + return RT_PUB(t); } /** @@ -2682,6 +2748,7 @@ rt_init(void) init_list(&deleted_routing_tables); ev_init_list(&rt_cork.queue, &main_birdloop, "Route cork release"); rt_cork.run = (event) { .hook = rt_cork_release_hook }; + idm_init(&rtable_idm, rt_table_pool, 256); } @@ -2700,7 +2767,7 @@ rt_init(void) * iteration. */ static void -rt_prune_table(rtable *tab) +rt_prune_table(struct rtable_private *tab) { struct fib_iterator *fit = &tab->prune_fit; int limit = 2000; @@ -2708,7 +2775,7 @@ rt_prune_table(rtable *tab) struct rt_import_hook *ih; node *n, *x; - DBG("Pruning route table %s\n", tab->name); + rt_trace(tab, D_STATES, "Pruning"); #ifdef DEBUGGING fib_check(&tab->fib); #endif @@ -2762,7 +2829,7 @@ again: (e->rte.stale_cycle < s->stale_valid) || (e->rte.stale_cycle > s->stale_set)) { - rte_recalculate(e->rte.sender, n, NULL, e->rte.src); + rte_recalculate(tab, e->rte.sender, n, NULL, e->rte.src); limit--; goto rescan; @@ -2784,6 +2851,10 @@ again: } FIB_ITERATE_END; + rt_trace(tab, D_EVENTS, "Prune done, scheduling export timer"); + if (!tm_active(tab->exporter.export_timer)) + tm_start(tab->exporter.export_timer, tab->config->export_settle_time); + #ifdef DEBUGGING fib_check(&tab->fib); #endif @@ -2840,24 +2911,25 @@ again: } /* In some cases, we may want to directly proceed to export cleanup */ - if (EMPTY_LIST(tab->exporter.hooks) && flushed_channels) + if (EMPTY_LIST(tab->exporter.e.hooks) && flushed_channels) rt_export_cleanup(tab); } static void -rt_export_cleanup(rtable *tab) +rt_export_cleanup(struct rtable_private *tab) { tab->export_used = 0; u64 min_seq = ~((u64) 0); struct rt_pending_export *last_export_to_free = NULL; struct rt_pending_export *first = tab->exporter.first; + int want_prune = 0; - struct rt_export_hook *eh; + struct rt_table_export_hook *eh; node *n; - WALK_LIST2(eh, n, tab->exporter.hooks, n) + WALK_LIST2(eh, n, tab->exporter.e.hooks, h.n) { - switch (atomic_load_explicit(&eh->export_state, memory_order_acquire)) + switch (atomic_load_explicit(&eh->h.export_state, memory_order_acquire)) { case TES_DOWN: continue; @@ -2885,16 +2957,14 @@ rt_export_cleanup(rtable *tab) tab->exporter.first = last_export_to_free ? rt_next_export_fast(last_export_to_free) : NULL; - if (config->table_debug) - log(L_TRACE "%s: Export cleanup, old exporter.first seq %lu, new %lu, min_seq %ld", - tab->name, + rt_trace(tab, D_STATES, "Export cleanup, old exporter.first seq %lu, new %lu, min_seq %ld", first ? first->seq : 0, tab->exporter.first ? tab->exporter.first->seq : 0, min_seq); - WALK_LIST2(eh, n, tab->exporter.hooks, n) + WALK_LIST2(eh, n, tab->exporter.e.hooks, h.n) { - if (atomic_load_explicit(&eh->export_state, memory_order_acquire) != TES_READY) + if (atomic_load_explicit(&eh->h.export_state, memory_order_acquire) != TES_READY) continue; struct rt_pending_export *last = atomic_load_explicit(&eh->last_export, memory_order_acquire); @@ -2920,7 +2990,7 @@ rt_export_cleanup(rtable *tab) net *net = SKIP_BACK(struct network, n.addr, (net_addr (*)[0]) n); ASSERT_DIE(net->first == first); - + if (first == net->last) /* The only export here */ net->last = net->first = NULL; @@ -2928,6 +2998,8 @@ rt_export_cleanup(rtable *tab) /* First is now the next one */ net->first = atomic_load_explicit(&first->next, memory_order_relaxed); + want_prune += !net->routes && !net->first; + /* For now, the old route may be finally freed */ if (first->old) { @@ -2948,7 +3020,7 @@ rt_export_cleanup(rtable *tab) ASSERT_DIE(pos < end); struct rt_pending_export *next = NULL; - + if (++pos < end) next = &reb->export[pos]; else @@ -2963,17 +3035,16 @@ rt_export_cleanup(rtable *tab) if (EMPTY_LIST(tab->exporter.pending)) { - if (config->table_debug) - log(L_TRACE "%s: Resetting export seq", tab->name); + rt_trace(tab, D_EVENTS, "Resetting export seq"); node *n; - WALK_LIST2(eh, n, tab->exporter.hooks, n) + WALK_LIST2(eh, n, tab->exporter.e.hooks, h.n) { - if (atomic_load_explicit(&eh->export_state, memory_order_acquire) != TES_READY) + if (atomic_load_explicit(&eh->h.export_state, memory_order_acquire) != TES_READY) continue; ASSERT_DIE(atomic_load_explicit(&eh->last_export, memory_order_acquire) == NULL); - bmap_reset(&eh->seq_map, 1024); + bmap_reset(&eh->h.seq_map, 1024); } tab->exporter.next_seq = 1; @@ -2997,12 +3068,12 @@ done:; if (!first || (first->seq >= ih->flush_seq)) { ih->import_state = TIS_CLEARED; - ih->stopped(ih->req); - rem_node(&ih->n); - mb_free(ih); - rt_unlock_table(tab); + ev_send(ih->req->list, &ih->announce_event); } + if ((tab->gc_counter += want_prune) >= tab->config->gc_threshold) + rt_kick_prune_timer(tab); + if (tab->export_used) ev_schedule(tab->rt_event); @@ -3035,7 +3106,7 @@ rt_cork_release_hook(void *data UNUSED) * */ struct f_trie * -rt_lock_trie(rtable *tab) +rt_lock_trie(struct rtable_private *tab) { ASSERT(tab->trie); @@ -3052,7 +3123,7 @@ rt_lock_trie(rtable *tab) * It may free the trie and schedule next trie pruning. */ void -rt_unlock_trie(rtable *tab, struct f_trie *trie) +rt_unlock_trie(struct rtable_private *tab, struct f_trie *trie) { ASSERT(trie); @@ -3092,8 +3163,8 @@ rt_preconfig(struct config *c) { init_list(&c->tables); - rt_new_table(cf_get_symbol("master4"), NET_IP4); - rt_new_table(cf_get_symbol("master6"), NET_IP6); + c->def_tables[NET_IP4] = cf_define_symbol(cf_get_symbol("master4"), SYM_TABLE, table, NULL); + c->def_tables[NET_IP6] = cf_define_symbol(cf_get_symbol("master6"), SYM_TABLE, table, NULL); } void @@ -3108,6 +3179,13 @@ rt_postconfig(struct config *c) WALK_LIST(rc, c->tables) if (rc->gc_period == (uint) -1) rc->gc_period = (uint) def_gc_period; + + for (uint net_type = 0; net_type < NET_MAX; net_type++) + if (c->def_tables[net_type] && !c->def_tables[net_type]->table) + { + c->def_tables[net_type]->class = SYM_VOID; + c->def_tables[net_type] = NULL; + } } @@ -3117,7 +3195,7 @@ rt_postconfig(struct config *c) */ void -ea_set_hostentry(ea_list **to, struct rtable *dep, struct rtable *tab, ip_addr gw, ip_addr ll, u32 lnum, u32 labels[lnum]) +ea_set_hostentry(ea_list **to, rtable *dep, rtable *src, ip_addr gw, ip_addr ll, u32 lnum, u32 labels[lnum]) { struct { struct adata ad; @@ -3125,7 +3203,8 @@ ea_set_hostentry(ea_list **to, struct rtable *dep, struct rtable *tab, ip_addr g u32 labels[lnum]; } *head = (void *) tmp_alloc_adata(sizeof *head - sizeof(struct adata)); - head->he = rt_get_hostentry(tab, gw, ll, dep); + RT_LOCKED(src, tab) + head->he = rt_get_hostentry(tab, gw, ll, dep); memcpy(head->labels, labels, lnum * sizeof(u32)); ea_set_attr(to, EA_LITERAL_DIRECT_ADATA( @@ -3254,17 +3333,16 @@ rta_next_hop_outdated(ea_list *a) ? head : NULL; } -static inline struct rte_storage * -rt_next_hop_update_rte(rtable *tab, net *n, rte *old) +static inline int +rt_next_hop_update_rte(rte *old, rte *new) { struct hostentry_adata *head = rta_next_hop_outdated(old->attrs); if (!head) - return NULL; - - rte e0 = *old; - rta_apply_hostentry(&e0.attrs, head); + return 0; - return rte_store(&e0, n, tab); + *new = *old; + rta_apply_hostentry(&new->attrs, head); + return 1; } static inline void @@ -3322,7 +3400,6 @@ rt_flowspec_check(rtable *tab_ip, rtable *tab_flow, const net_addr *n, ea_list * { ASSERT(rt_is_ip(tab_ip)); ASSERT(rt_is_flow(tab_flow)); - ASSERT(tab_ip->trie); /* RFC 8955 6. a) Flowspec has defined dst prefix */ if (!net_flow_has_dst_prefix(n)) @@ -3342,32 +3419,45 @@ rt_flowspec_check(rtable *tab_ip, rtable *tab_flow, const net_addr *n, ea_list * else net_fill_ip6(&dst, net6_prefix(n), net6_pxlen(n)); - /* Find best-match BGP unicast route for flowspec dst prefix */ - net *nb = net_route(tab_ip, &dst); - const rte *rb = nb ? &nb->routes->rte : NULL; + rte rb = {}; + net_addr_union nau; + RT_LOCKED(tab_ip, tip) + { + ASSERT(tip->trie); + /* Find best-match BGP unicast route for flowspec dst prefix */ + net *nb = net_route(tip, &dst); + if (nb) + { + rb = RTE_COPY_VALID(nb->routes); + rta_clone(rb.attrs); + net_copy(&nau.n, nb->n.addr); + rb.net = &nau.n; + } + } /* Register prefix to trie for tracking further changes */ int max_pxlen = (n->type == NET_FLOW4) ? IP4_MAX_PREFIX_LENGTH : IP6_MAX_PREFIX_LENGTH; - trie_add_prefix(tab_flow->flowspec_trie, &dst, (nb ? nb->n.addr->pxlen : 0), max_pxlen); + RT_LOCKED(tab_flow, tfl) + trie_add_prefix(tfl->flowspec_trie, &dst, (rb.net ? rb.net->pxlen : 0), max_pxlen); /* No best-match BGP route -> no flowspec */ - if (!rb || (rt_get_source_attr(rb) != RTS_BGP)) + if (!rb.attrs || (rt_get_source_attr(&rb) != RTS_BGP)) return FLOWSPEC_INVALID; /* Find ORIGINATOR_ID values */ u32 orig_a = ea_get_int(a, "bgp_originator_id", 0); - u32 orig_b = ea_get_int(rb->attrs, "bgp_originator_id", 0); + u32 orig_b = ea_get_int(rb.attrs, "bgp_originator_id", 0); /* Originator is either ORIGINATOR_ID (if present), or BGP neighbor address (if not) */ if ((orig_a != orig_b) || (!orig_a && !orig_b && !ipa_equal( ea_get_ip(a, &ea_gen_from, IPA_NONE), - ea_get_ip(rb->attrs, &ea_gen_from, IPA_NONE) + ea_get_ip(rb.attrs, &ea_gen_from, IPA_NONE) ))) return FLOWSPEC_INVALID; /* Find ASN of the best-match route, for use in next checks */ - u32 asn_b = rta_get_first_asn(rb->attrs); + u32 asn_b = rta_get_first_asn(rb.attrs); if (!asn_b) return FLOWSPEC_INVALID; @@ -3376,51 +3466,53 @@ rt_flowspec_check(rtable *tab_ip, rtable *tab_flow, const net_addr *n, ea_list * return FLOWSPEC_INVALID; /* RFC 8955 6. c) More-specific routes are from the same AS as the best-match route */ - TRIE_WALK(tab_ip->trie, subnet, &dst) + RT_LOCKED(tab_ip, tip) { - net *nc = net_find_valid(tab_ip, &subnet); - if (!nc) - continue; + TRIE_WALK(tip->trie, subnet, &dst) + { + net *nc = net_find_valid(tip, &subnet); + if (!nc) + continue; - const rte *rc = &nc->routes->rte; - if (rt_get_source_attr(rc) != RTS_BGP) - return FLOWSPEC_INVALID; + const rte *rc = &nc->routes->rte; + if (rt_get_source_attr(rc) != RTS_BGP) + RT_RETURN(tip, FLOWSPEC_INVALID); - if (rta_get_first_asn(rc->attrs) != asn_b) - return FLOWSPEC_INVALID; + if (rta_get_first_asn(rc->attrs) != asn_b) + RT_RETURN(tip, FLOWSPEC_INVALID); + } + TRIE_WALK_END; } - TRIE_WALK_END; return FLOWSPEC_VALID; } #endif /* CONFIG_BGP */ -static struct rte_storage * -rt_flowspec_update_rte(rtable *tab, net *n, rte *r) +static int +rt_flowspec_update_rte(rtable *tab, rte *r, rte *new) { #ifdef CONFIG_BGP if (r->generation || (rt_get_source_attr(r) != RTS_BGP)) - return NULL; + return 0; struct bgp_channel *bc = (struct bgp_channel *) SKIP_BACK(struct channel, in_req, r->sender->req); if (!bc->base_table) - return NULL; + return 0; struct bgp_proto *p = SKIP_BACK(struct bgp_proto, p, bc->c.proto); enum flowspec_valid old = rt_get_flowspec_valid(r), - valid = rt_flowspec_check(bc->base_table, tab, n->n.addr, r->attrs, p->is_interior); + valid = rt_flowspec_check(bc->base_table, tab, r->net, r->attrs, p->is_interior); if (old == valid) - return NULL; - - rte new = *r; - ea_set_attr_u32(&new.attrs, &ea_gen_flowspec_valid, 0, valid); + return 0; - return rte_store(&new, n, tab); + *new = *r; + ea_set_attr_u32(&new->attrs, &ea_gen_flowspec_valid, 0, valid); + return 1; #else - return NULL; + return 0; #endif } @@ -3455,10 +3547,9 @@ rt_flowspec_resolve_rte(rte *r, struct channel *c) } static inline int -rt_next_hop_update_net(rtable *tab, net *n) +rt_next_hop_update_net(struct rtable_private *tab, net *n) { - struct rte_storage *new; - int count = 0; + uint count = 0; int is_flow = net_is_flow(n->n.addr); struct rte_storage *old_best = n->routes; @@ -3466,49 +3557,90 @@ rt_next_hop_update_net(rtable *tab, net *n) return 0; for (struct rte_storage *e, **k = &n->routes; e = *k; k = &e->next) - if (is_flow || rta_next_hop_outdated(e->rte.attrs)) - count++; + count++; if (!count) return 0; struct rte_multiupdate { - struct rte_storage *old, *new; - } *updates = alloca(sizeof(struct rte_multiupdate) * count); + struct rte_storage *old, *new_stored; + rte new; + } *updates = tmp_allocz(sizeof(struct rte_multiupdate) * (count+1)); - int pos = 0; + struct rt_pending_export *last_pending = n->last; + + uint pos = 0; for (struct rte_storage *e, **k = &n->routes; e = *k; k = &e->next) - if (is_flow || rta_next_hop_outdated(e->rte.attrs)) - { - struct rte_storage *new = is_flow - ? rt_flowspec_update_rte(tab, n, &e->rte) - : rt_next_hop_update_rte(tab, n, &e->rte); + updates[pos++].old = e; + + /* This is an exceptional place where table can be unlocked while keeping its data: + * the reason why this is safe is that NHU must be always run from the same + * thread as cleanup routines, therefore the only real problem may arise when + * some importer does a change on this particular net (destination) while NHU + * is being computed. Statistically, this should almost never happen. In such + * case, we just drop all the computed changes and do it once again. + * */ + RT_UNLOCK(tab); + + uint mod = 0; + if (is_flow) + for (uint i = 0; i < pos; i++) + mod += rt_flowspec_update_rte(RT_PUB(tab), &updates[i].old->rte, &updates[i].new); - if (!new) - continue; + else + for (uint i = 0; i < pos; i++) + mod += rt_next_hop_update_rte(&updates[i].old->rte, &updates[i].new); - /* Call a pre-comparison hook */ - /* Not really an efficient way to compute this */ - if (e->rte.src->owner->rte_recalculate) - e->rte.src->owner->rte_recalculate(tab, n, &new->rte, &e->rte, &old_best->rte); + RT_LOCK(RT_PUB(tab)); - updates[pos++] = (struct rte_multiupdate) { - .old = e, - .new = new, - }; + if (!mod) + return 0; + + /* Something has changed inbetween, retry NHU. */ + if (last_pending != n->last) + return rt_next_hop_update_net(tab, n); + + /* Now we reconstruct the original linked list */ + struct rte_storage **nptr = &n->routes; + for (uint i = 0; i < pos; i++) + { + updates[i].old->next = NULL; - /* Replace the route in the list */ - new->next = e->next; - *k = e = new; + struct rte_storage *put; + if (updates[i].new.attrs) + put = updates[i].new_stored = rte_store(&updates[i].new, n, tab); + else + put = updates[i].old; + + *nptr = put; + nptr = &put->next; + } + *nptr = NULL; + /* Call the pre-comparison hooks */ + for (uint i = 0; i < pos; i++) + if (updates[i].new_stored) + { /* Get a new ID for the route */ - new->rte.lastmod = current_time(); - new->rte.id = hmap_first_zero(&tab->id_map); - hmap_set(&tab->id_map, new->rte.id); + updates[i].new_stored->rte.lastmod = current_time(); + updates[i].new_stored->rte.id = hmap_first_zero(&tab->id_map); + hmap_set(&tab->id_map, updates[i].new_stored->rte.id); + + /* Call a pre-comparison hook */ + /* Not really an efficient way to compute this */ + if (updates[i].old->rte.src->owner->rte_recalculate) + updates[i].old->rte.src->owner->rte_recalculate(tab, n, &updates[i].new_stored->rte, &updates[i].old->rte, &old_best->rte); } - ASSERT_DIE(pos <= count); - count = pos; +#if DEBUGGING + { + uint t = 0; + for (struct rte_storage *e = n->routes; e; e = e->next) + t++; + ASSERT_DIE(t == pos); + ASSERT_DIE(pos == count); + } +#endif /* Find the new best route */ struct rte_storage **new_best = NULL; @@ -3519,7 +3651,7 @@ rt_next_hop_update_net(rtable *tab, net *n) } /* Relink the new best route to the first position */ - new = *new_best; + struct rte_storage *new = *new_best; if (new != n->routes) { *new_best = new->next; @@ -3527,88 +3659,155 @@ rt_next_hop_update_net(rtable *tab, net *n) n->routes = new; } + uint total = 0; /* Announce the changes */ - for (int i=0; i<count; i++) + for (uint i=0; i<count; i++) { - _Bool nb = (new == updates[i].new), ob = (old_best == updates[i].old); + if (!updates[i].new_stored) + continue; + + _Bool nb = (new->rte.src == updates[i].new.src), ob = (i == 0); const char *best_indicator[2][2] = { { "autoupdated", "autoupdated [-best]" }, { "autoupdated [+best]", "autoupdated [best]" } }; - rt_rte_trace_in(D_ROUTES, updates[i].new->rte.sender->req, &updates[i].new->rte, best_indicator[nb][ob]); - rte_announce(tab, n, updates[i].new, updates[i].old, new, old_best); + rt_rte_trace_in(D_ROUTES, updates[i].new.sender->req, &updates[i].new, best_indicator[nb][ob]); + rte_announce(tab, n, updates[i].new_stored, updates[i].old, new, old_best); + + total++; } - return count; + return total; } static void -rt_next_hop_update(rtable *tab) +rt_next_hop_update(void *_tab) { + RT_LOCKED((rtable *) _tab, tab) + { + + /* If called from an uncork hook, reset the state */ + if (tab->nhu_corked) + { + ASSERT_DIE(tab->nhu_state == 0); + tab->nhu_state = tab->nhu_corked; + tab->nhu_corked = 0; + rt_trace(tab, D_STATES, "Next hop updater uncorked"); + } + + if (!tab->nhu_state) + bug("Called NHU event for no reason in table %s", tab->name); + + /* Check corkedness */ + if (rt_cork_check(tab->nhu_event)) + { + rt_trace(tab, D_STATES, "Next hop updater corked"); + if ((tab->nhu_state & NHU_RUNNING) + && !EMPTY_LIST(tab->exporter.pending) + && !tm_active(tab->exporter.export_timer)) + tm_start(tab->exporter.export_timer, tab->config->export_settle_time); + + tab->nhu_corked = tab->nhu_state; + tab->nhu_state = 0; + RT_RETURN(tab); + } + struct fib_iterator *fit = &tab->nhu_fit; int max_feed = 32; - if (tab->nhu_state == NHU_CLEAN) - return; - + /* Initialize a new run */ if (tab->nhu_state == NHU_SCHEDULED) - { - FIB_ITERATE_INIT(fit, &tab->fib); - tab->nhu_state = NHU_RUNNING; + { + FIB_ITERATE_INIT(fit, &tab->fib); + tab->nhu_state = NHU_RUNNING; - if (tab->flowspec_trie) - rt_flowspec_reset_trie(tab); - } + if (tab->flowspec_trie) + rt_flowspec_reset_trie(tab); + } + /* Walk the fib one net after another */ FIB_ITERATE_START(&tab->fib, fit, net, n) { if (max_feed <= 0) { FIB_ITERATE_PUT(fit); - ev_schedule(tab->rt_event); - return; + ev_schedule(tab->nhu_event); + RT_RETURN(tab); } + lp_state lps; + lp_save(tmp_linpool, &lps); max_feed -= rt_next_hop_update_net(tab, n); + lp_restore(tmp_linpool, &lps); } FIB_ITERATE_END; + /* Finished NHU, cleanup */ + rt_trace(tab, D_EVENTS, "NHU done, scheduling export timer"); + + if (!tm_active(tab->exporter.export_timer)) + tm_start(tab->exporter.export_timer, tab->config->export_settle_time); + /* State change: * NHU_DIRTY -> NHU_SCHEDULED * NHU_RUNNING -> NHU_CLEAN */ - tab->nhu_state &= 1; + if ((tab->nhu_state &= NHU_SCHEDULED) == NHU_SCHEDULED) + ev_schedule(tab->nhu_event); - if (tab->nhu_state != NHU_CLEAN) - ev_schedule(tab->rt_event); + rt_unlock_table(tab); + + } } +void +rt_new_default_table(struct symbol *s) +{ + for (uint addr_type = 0; addr_type < NET_MAX; addr_type++) + if (s == new_config->def_tables[addr_type]) + { + s->table = rt_new_table(s, addr_type); + return; + } + + bug("Requested an unknown new default table: %s", s->name); +} struct rtable_config * -rt_new_table(struct symbol *s, uint addr_type) +rt_get_default_table(struct config *cf, uint addr_type) { - /* Hack that allows to 'redefine' the master table */ - if ((s->class == SYM_TABLE) && - (s->table == new_config->def_tables[addr_type]) && - ((addr_type == NET_IP4) || (addr_type == NET_IP6))) - return s->table; + struct symbol *ts = cf->def_tables[addr_type]; + if (!ts) + return NULL; + if (!ts->table) + rt_new_default_table(ts); + + return ts->table; +} + +struct rtable_config * +rt_new_table(struct symbol *s, uint addr_type) +{ struct rtable_config *c = cfg_allocz(sizeof(struct rtable_config)); - cf_define_symbol(s, SYM_TABLE, table, c); + if (s == new_config->def_tables[addr_type]) + s->table = c; + else + cf_define_symbol(s, SYM_TABLE, table, c); + c->name = s->name; c->addr_type = addr_type; c->gc_threshold = 1000; c->gc_period = (uint) -1; /* set in rt_postconfig() */ - c->min_settle_time = 1 S; - c->max_settle_time = 20 S; c->cork_threshold.low = 128; c->cork_threshold.high = 512; + c->debug = new_config->table_debug; add_tail(&new_config->tables, &c->n); /* First table of each type is kept as default */ if (! new_config->def_tables[addr_type]) - new_config->def_tables[addr_type] = c; + new_config->def_tables[addr_type] = s; return c; } @@ -3622,8 +3821,9 @@ rt_new_table(struct symbol *s, uint addr_type) * configuration. */ void -rt_lock_table(rtable *r) +rt_lock_table_priv(struct rtable_private *r, const char *file, uint line) { + rt_trace(r, D_STATES, "Locked at %s:%d", file, line); r->use_count++; } @@ -3636,20 +3836,32 @@ rt_lock_table(rtable *r) * for deletion by configuration changes. */ void -rt_unlock_table(rtable *r) +rt_unlock_table_priv(struct rtable_private *r, const char *file, uint line) { + rt_trace(r, D_STATES, "Unlocked at %s:%d", file, line); if (!--r->use_count && r->deleted) - { - struct config *conf = r->deleted; + /* Schedule the delete event to finish this up */ + ev_send(&global_event_list, ev_new_init(r->rp, rt_delete, r)); +} - /* Delete the routing table by freeing its pool */ - rt_shutdown(r); - config_del_obstacle(conf); - } +static void +rt_delete(void *tab_) +{ + /* We assume that nobody holds the table reference now as use_count is zero. + * Anyway the last holder may still hold the lock. Therefore we lock and + * unlock it the last time to be sure that nobody is there. */ + struct rtable_private *tab = RT_LOCK((rtable *) tab_); + struct config *conf = tab->deleted; + + RT_UNLOCK(RT_PUB(tab)); + + rfree(tab->rp); + config_del_obstacle(conf); } + static void -rt_check_cork_low(rtable *tab) +rt_check_cork_low(struct rtable_private *tab) { if (!tab->cork_active) return; @@ -3659,27 +3871,25 @@ rt_check_cork_low(rtable *tab) tab->cork_active = 0; rt_cork_release(); - if (config->table_debug) - log(L_TRACE "%s: Uncorked", tab->name); + rt_trace(tab, D_STATES, "Uncorked"); } } static void -rt_check_cork_high(rtable *tab) +rt_check_cork_high(struct rtable_private *tab) { if (!tab->cork_active && tab->exporter.first && (tab->exporter.first->seq + tab->cork_threshold.high <= tab->exporter.next_seq)) { tab->cork_active = 1; rt_cork_acquire(); - if (config->table_debug) - log(L_TRACE "%s: Corked", tab->name); + rt_trace(tab, D_STATES, "Corked"); } } static int -rt_reconfigure(rtable *tab, struct rtable_config *new, struct rtable_config *old) +rt_reconfigure(struct rtable_private *tab, struct rtable_config *new, struct rtable_config *old) { if ((new->addr_type != old->addr_type) || (new->sorted != old->sorted) || @@ -3687,10 +3897,18 @@ rt_reconfigure(rtable *tab, struct rtable_config *new, struct rtable_config *old return 0; DBG("\t%s: same\n", new->name); - new->table = tab; + new->table = RT_PUB(tab); tab->name = new->name; tab->config = new; + if (tab->hostcache) + tab->hostcache->req.trace_routes = new->debug; + + struct rt_table_export_hook *hook; node *n; + WALK_LIST2(hook, n, tab->exporter.e.hooks, h.n) + if (hook->h.req->export_one == rt_flowspec_export_one) + hook->h.req->trace_routes = new->debug; + tab->cork_threshold = new->cork_threshold; if (new->cork_threshold.high != old->cork_threshold.high) @@ -3731,19 +3949,32 @@ rt_commit(struct config *new, struct config *old) { WALK_LIST(o, old->tables) { - rtable *tab = o->table; + struct rtable_private *tab = RT_LOCK(o->table); + if (tab->deleted) + { + RT_UNLOCK(tab); continue; + } r = rt_find_table_config(new, o->name); if (r && !new->shutdown && rt_reconfigure(tab, r, o)) + { + RT_UNLOCK(tab); continue; + } DBG("\t%s: deleted\n", o->name); tab->deleted = old; config_add_obstacle(old); rt_lock_table(tab); + + if (tab->hostcache) + rt_stop_export(&tab->hostcache->req, NULL); + rt_unlock_table(tab); + + RT_UNLOCK(tab); } } @@ -3760,13 +3991,81 @@ rt_commit(struct config *new, struct config *old) static void rt_feed_done(struct rt_export_hook *c) { - c->event->hook = rt_export_hook; + c->event.hook = rt_export_hook; rt_set_export_state(c, TES_READY); rt_send_export_event(c); } +#define MAX_FEED_BLOCK 1024 +typedef struct { + uint cnt, pos; + union { + struct rt_pending_export *rpe; + struct { + rte **feed; + uint *start; + }; + }; +} rt_feed_block; + +static int +rt_prepare_feed(struct rt_table_export_hook *c, net *n, rt_feed_block *b) +{ + if (n->routes) + { + if (c->h.req->export_bulk) + { + uint cnt = rte_feed_count(n); + if (b->cnt && (b->cnt + cnt > MAX_FEED_BLOCK)) + return 0; + + if (!b->cnt) + { + b->feed = tmp_alloc(sizeof(rte *) * MAX(MAX_FEED_BLOCK, cnt)); + b->start = tmp_alloc(sizeof(uint) * ((cnt >= MAX_FEED_BLOCK) ? 2 : (MAX_FEED_BLOCK + 2 - cnt))); + } + + rte_feed_obtain(n, &b->feed[b->cnt], cnt); + b->start[b->pos++] = b->cnt; + b->cnt += cnt; + } + else if (b->pos == MAX_FEED_BLOCK) + return 0; + else + { + if (!b->pos) + b->rpe = tmp_alloc(sizeof(struct rt_pending_export) * MAX_FEED_BLOCK); + + b->rpe[b->pos++] = (struct rt_pending_export) { .new = n->routes, .new_best = n->routes }; + } + } + + rpe_mark_seen_all(&c->h, n->first, NULL); + return 1; +} + +static void +rt_process_feed(struct rt_table_export_hook *c, rt_feed_block *b) +{ + if (!b->pos) + return; + + if (c->h.req->export_bulk) + { + b->start[b->pos] = b->cnt; + for (uint p = 0; p < b->pos; p++) + { + rte **feed = &b->feed[b->start[p]]; + c->h.req->export_bulk(c->h.req, feed[0]->net, NULL, feed, b->start[p+1] - b->start[p]); + } + } + else + for (uint p = 0; p < b->pos; p++) + c->h.req->export_one(c->h.req, b->rpe[p].new->rte.net, &b->rpe[p]); +} + /** * rt_feed_by_fib - advertise all routes to a channel by walking a fib * @c: channel to be fed @@ -3779,61 +4078,73 @@ rt_feed_done(struct rt_export_hook *c) static void rt_feed_by_fib(void *data) { - struct rt_export_hook *c = data; - + struct rt_table_export_hook *c = data; struct fib_iterator *fit = &c->feed_fit; - int max_feed = 256; + rt_feed_block block = {}; - ASSERT(atomic_load_explicit(&c->export_state, memory_order_relaxed) == TES_FEEDING); + ASSERT(atomic_load_explicit(&c->h.export_state, memory_order_relaxed) == TES_FEEDING); - rtable *tab = SKIP_BACK(rtable, exporter, c->table); + RT_LOCKED(RT_PUB(SKIP_BACK(struct rtable_private, exporter, c->table)), tab) + { FIB_ITERATE_START(&tab->fib, fit, net, n) { - if (max_feed <= 0) + if ((c->h.req->addr_mode == TE_ADDR_NONE) || net_in_netX(n->n.addr, c->h.req->addr)) + { + if (atomic_load_explicit(&c->h.export_state, memory_order_acquire) != TES_FEEDING) + return; + + if (!rt_prepare_feed(c, n, &block)) { FIB_ITERATE_PUT(fit); - rt_send_export_event(c); + RT_UNLOCK(tab); + rt_process_feed(c, &block); + rt_send_export_event(&c->h); return; } - - if (atomic_load_explicit(&c->export_state, memory_order_acquire) != TES_FEEDING) - return; - - if ((c->req->addr_mode == TE_ADDR_NONE) || net_in_netX(n->n.addr, c->req->addr)) - max_feed -= rt_feed_net(c, n); + } } FIB_ITERATE_END; + } - rt_feed_done(c); + rt_process_feed(c, &block); + rt_feed_done(&c->h); } static void rt_feed_by_trie(void *data) { - struct rt_export_hook *c = data; - rtable *tab = SKIP_BACK(rtable, exporter, c->table); + struct rt_table_export_hook *c = data; + rt_feed_block block = {}; + + RT_LOCKED(RT_PUB(SKIP_BACK(struct rtable_private, exporter, c->table)), tab) + { ASSERT_DIE(c->walk_state); struct f_trie_walk_state *ws = c->walk_state; - int max_feed = 256; + ASSERT(atomic_load_explicit(&c->h.export_state, memory_order_relaxed) == TES_FEEDING); - ASSERT(atomic_load_explicit(&c->export_state, memory_order_relaxed) == TES_FEEDING); + do { + if (!c->walk_last.type) + continue; - net_addr addr; - while (trie_walk_next(ws, &addr)) - { - net *n = net_find(tab, &addr); + net *n = net_find(tab, &c->walk_last); if (!n) continue; - if ((max_feed -= rt_feed_net(c, n)) <= 0) - return; + if (atomic_load_explicit(&c->h.export_state, memory_order_acquire) != TES_FEEDING) + RT_RETURN(tab); - if (atomic_load_explicit(&c->export_state, memory_order_acquire) != TES_FEEDING) + if (!rt_prepare_feed(c, n, &block)) + { + RT_UNLOCK(tab); + rt_process_feed(c, &block); + rt_send_export_event(&c->h); return; + } } + while (trie_walk_next(ws, &c->walk_last)); rt_unlock_trie(tab, c->walk_lock); c->walk_lock = NULL; @@ -3841,70 +4152,59 @@ rt_feed_by_trie(void *data) mb_free(c->walk_state); c->walk_state = NULL; - rt_feed_done(c); + c->walk_last.type = 0; + + } + + rt_process_feed(c, &block); + rt_feed_done(&c->h); } static void rt_feed_equal(void *data) { - struct rt_export_hook *c = data; - rtable *tab = SKIP_BACK(rtable, exporter, c->table); + struct rt_table_export_hook *c = data; + rt_feed_block block = {}; + net *n; + + RT_LOCKED(RT_PUB(SKIP_BACK(struct rtable_private, exporter, c->table)), tab) + { + ASSERT_DIE(atomic_load_explicit(&c->h.export_state, memory_order_relaxed) == TES_FEEDING); + ASSERT_DIE(c->h.req->addr_mode == TE_ADDR_EQUAL); - ASSERT_DIE(atomic_load_explicit(&c->export_state, memory_order_relaxed) == TES_FEEDING); - ASSERT_DIE(c->req->addr_mode == TE_ADDR_EQUAL); + if (n = net_find(tab, c->h.req->addr)) + ASSERT_DIE(rt_prepare_feed(c, n, &block)); + } - net *n = net_find(tab, c->req->addr); if (n) - rt_feed_net(c, n); + rt_process_feed(c, &block); - rt_feed_done(c); + rt_feed_done(&c->h); } static void rt_feed_for(void *data) { - struct rt_export_hook *c = data; - rtable *tab = SKIP_BACK(rtable, exporter, c->table); - - ASSERT_DIE(atomic_load_explicit(&c->export_state, memory_order_relaxed) == TES_FEEDING); - ASSERT_DIE(c->req->addr_mode == TE_ADDR_FOR); - - net *n = net_route(tab, c->req->addr); - if (n) - rt_feed_net(c, n); - - rt_feed_done(c); -} - -static uint -rt_feed_net(struct rt_export_hook *c, net *n) -{ - uint count = 0; + struct rt_table_export_hook *c = data; + rt_feed_block block = {}; + net *n; - if (c->req->export_bulk) + RT_LOCKED(RT_PUB(SKIP_BACK(struct rtable_private, exporter, c->table)), tab) { - count = rte_feed_count(n); - if (count) - { - rte **feed = alloca(count * sizeof(rte *)); - rte_feed_obtain(n, feed, count); - c->req->export_bulk(c->req, n->n.addr, NULL, feed, count); - } - } + ASSERT_DIE(atomic_load_explicit(&c->h.export_state, memory_order_relaxed) == TES_FEEDING); + ASSERT_DIE(c->h.req->addr_mode == TE_ADDR_FOR); - else if (n->routes) - { - struct rt_pending_export rpe = { .new = n->routes, .new_best = n->routes }; - c->req->export_one(c->req, n->n.addr, &rpe); - count = 1; + if (n = net_route(tab, c->h.req->addr)) + ASSERT_DIE(rt_prepare_feed(c, n, &block)); } - for (struct rt_pending_export *rpe = n->first; rpe; rpe = rpe_next(rpe, NULL)) - rpe_mark_seen(c, rpe); + if (n) + rt_process_feed(c, &block); - return count; + rt_feed_done(&c->h); } + /* * Import table */ @@ -4030,7 +4330,41 @@ hc_delete_hostentry(struct hostcache *hc, pool *p, struct hostentry *he) } static void -rt_init_hostcache(rtable *tab) +hc_notify_dump_req(struct rt_export_request *req) +{ + debug(" Table %s (%p)\n", req->name, req); +} + +static void +hc_notify_export_one(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *first) +{ + struct hostcache *hc = SKIP_BACK(struct hostcache, req, req); + + /* No interest in this update, mark seen only */ + if (ev_active(&hc->update) || !trie_match_net(hc->trie, net)) + { + rpe_mark_seen_all(req->hook, first, NULL); + return; + } + + /* This net may affect some hostentries, check the actual change */ + rte *o = RTE_VALID_OR_NULL(first->old_best); + struct rte_storage *new_best = first->new_best; + + RPE_WALK(first, rpe, NULL) + { + rpe_mark_seen(req->hook, rpe); + new_best = rpe->new_best; + } + + /* Yes, something has actually changed. Do the hostcache update. */ + if (o != RTE_VALID_OR_NULL(new_best)) + ev_schedule_work(&hc->update); +} + + +static void +rt_init_hostcache(struct rtable_private *tab) { struct hostcache *hc = mb_allocz(tab->rp, sizeof(struct hostcache)); init_list(&hc->hostentries); @@ -4042,11 +4376,26 @@ rt_init_hostcache(rtable *tab) hc->lp = lp_new(tab->rp); hc->trie = f_new_trie(hc->lp, 0); + hc->update = (event) { + .hook = rt_update_hostcache, + .data = tab, + }; + + hc->req = (struct rt_export_request) { + .name = mb_sprintf(tab->rp, "%s.hcu.notifier", tab->name), + .list = &global_work_list, + .trace_routes = tab->config->debug, + .dump_req = hc_notify_dump_req, + .export_one = hc_notify_export_one, + }; + + rt_table_export_start_locked(tab, &hc->req); + tab->hostcache = hc; } static void -rt_free_hostcache(rtable *tab) +rt_free_hostcache(struct rtable_private *tab) { struct hostcache *hc = tab->hostcache; @@ -4068,16 +4417,6 @@ rt_free_hostcache(rtable *tab) */ } -static void -rt_notify_hostcache(rtable *tab, net *net) -{ - if (tab->hcu_scheduled) - return; - - if (trie_match_net(tab->hostcache->trie, net->n.addr)) - rt_schedule_hcu(tab); -} - static int if_local_addr(ip_addr a, struct iface *i) { @@ -4108,7 +4447,7 @@ rt_get_igp_metric(const rte *rt) } static int -rt_update_hostentry(rtable *tab, struct hostentry *he) +rt_update_hostentry(struct rtable_private *tab, struct hostentry *he) { ea_list *old_src = he->src; int direct = 0; @@ -4174,9 +4513,24 @@ done: } static void -rt_update_hostcache(rtable *tab) +rt_update_hostcache(void *data) { + rtable **nhu_pending; + + RT_LOCKED((rtable *) data, tab) + { + struct hostcache *hc = tab->hostcache; + + if (rt_cork_check(&hc->update)) + { + rt_trace(tab, D_STATES, "Hostcache update corked"); + RT_RETURN(tab); + } + + /* Destination schedule map */ + nhu_pending = tmp_allocz(sizeof(rtable *) * rtable_max_id); + struct hostentry *he; node *n, *x; @@ -4194,14 +4548,18 @@ rt_update_hostcache(rtable *tab) } if (rt_update_hostentry(tab, he)) - rt_schedule_nhu(he->tab); + nhu_pending[he->tab->id] = he->tab; } + } - tab->hcu_scheduled = 0; + for (uint i=0; i<rtable_max_id; i++) + if (nhu_pending[i]) + RT_LOCKED(nhu_pending[i], dst) + rt_schedule_nhu(dst); } static struct hostentry * -rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep) +rt_get_hostentry(struct rtable_private *tab, ip_addr a, ip_addr ll, rtable *dep) { struct hostentry *he; @@ -27,6 +27,7 @@ struct protocol; struct proto; struct channel; struct rte_src; +struct hostcache; struct symbol; struct timer; struct filter; @@ -52,31 +53,37 @@ struct rt_cork_threshold { struct rtable_config { node n; char *name; - struct rtable *table; + union rtable *table; struct proto_config *krt_attached; /* Kernel syncer attached to this table */ uint addr_type; /* Type of address data stored in table (NET_*) */ uint gc_threshold; /* Maximum number of operations before GC is run */ uint gc_period; /* Approximate time between two consecutive GC runs */ byte sorted; /* Routes of network are sorted according to rte_better() */ byte trie_used; /* Rtable has attached trie */ - btime min_settle_time; /* Minimum settle time for notifications */ - btime max_settle_time; /* Maximum settle time for notifications */ + byte debug; /* Whether to log */ btime export_settle_time; /* Delay before exports are announced */ struct rt_cork_threshold cork_threshold; /* Cork threshold values */ }; struct rt_export_hook; struct rt_export_request; +struct rt_exporter; + +struct rt_exporter_class { + void (*start)(struct rt_exporter *, struct rt_export_request *); + void (*stop)(struct rt_export_hook *); + void (*done)(void *_rt_export_hook); +}; struct rt_exporter { + const struct rt_exporter_class *class; + pool *rp; list hooks; /* Registered route export hooks */ uint addr_type; /* Type of address data exported (NET_*) */ +}; - struct rt_export_hook *(*start)(struct rt_exporter *, struct rt_export_request *); - void (*stop)(struct rt_export_hook *); - void (*done)(struct rt_export_hook *); - void (*used)(struct rt_exporter *); - +struct rt_table_exporter { + struct rt_exporter e; list pending; /* List of packed struct rt_pending_export */ struct timer *export_timer; @@ -84,39 +91,50 @@ struct rt_exporter { u64 next_seq; /* The next export will have this ID */ }; -typedef struct rtable { - resource r; - node n; /* Node in list of all tables */ +extern uint rtable_max_id; + +DEFINE_DOMAIN(rtable); + +/* The public part of rtable structure */ +#define RTABLE_PUBLIC \ + resource r; \ + node n; /* Node in list of all tables */ \ + char *name; /* Name of this table */ \ + uint addr_type; /* Type of address data stored in table (NET_*) */ \ + uint id; /* Integer table ID for fast lookup */ \ + DOMAIN(rtable) lock; /* Lock to take to access the private parts */ \ + struct rtable_config *config; /* Configuration of this table */ \ + +/* The complete rtable structure */ +struct rtable_private { + /* Once more the public part */ + RTABLE_PUBLIC; + + /* Here the private items not to be accessed without locking */ pool *rp; /* Resource pool to allocate everything from, including itself */ struct slab *rte_slab; /* Slab to allocate route objects */ struct fib fib; struct f_trie *trie; /* Trie of prefixes defined in fib */ - char *name; /* Name of this table */ - uint addr_type; /* Type of address data stored in table (NET_*) */ int use_count; /* Number of protocols using this table */ u32 rt_count; /* Number of routes in the table */ list imports; /* Registered route importers */ - struct rt_exporter exporter; /* Exporter API structure */ + struct rt_table_exporter exporter; /* Exporter API structure */ struct hmap id_map; struct hostcache *hostcache; - struct rtable_config *config; /* Configuration of this table */ struct config *deleted; /* Table doesn't exist in current configuration, * delete as soon as use_count becomes 0 and remove * obstacle from this routing table. */ struct event *rt_event; /* Routing table event */ - struct event *uncork_event; /* Called when uncork happens */ + struct event *nhu_event; /* Specific event for next hop update */ struct timer *prune_timer; /* Timer for periodic pruning / GC */ btime last_rt_change; /* Last time when route changed */ - btime base_settle_time; /* Start time of rtable settling interval */ btime gc_time; /* Time of last GC */ uint gc_counter; /* Number of operations since last GC */ byte prune_state; /* Table prune state, 1 -> scheduled, 2-> running */ byte prune_trie; /* Prune prefix trie during next table prune */ - byte hcu_scheduled; /* Hostcache update is scheduled */ - byte hcu_corked; /* Hostcache update is corked with this state */ byte nhu_state; /* Next Hop Update state */ byte nhu_corked; /* Next Hop Update is corked with this state */ byte export_used; /* Pending Export pruning is scheduled */ @@ -130,25 +148,28 @@ typedef struct rtable { u32 trie_old_lock_count; /* Old prefix trie locked by walks */ struct tbf rl_pipe; /* Rate limiting token buffer for pipe collisions */ - list subscribers; /* Subscribers for notifications */ - struct timer *settle_timer; /* Settle time for notifications */ - list flowspec_links; /* List of flowspec links, src for NET_IPx and dst for NET_FLOWx */ struct f_trie *flowspec_trie; /* Trie for evaluation of flowspec notifications */ +}; + +/* The final union private-public rtable structure */ +typedef union rtable { + struct { + RTABLE_PUBLIC; + }; + struct rtable_private priv; } rtable; -struct rt_subscription { - node n; - rtable *tab; - event *event; - event_list *list; -}; +#define RT_IS_LOCKED(tab) DOMAIN_IS_LOCKED(rtable, (tab)->lock) -struct rt_flowspec_link { - node n; - rtable *src; - rtable *dst; - u32 uc; -}; +#define RT_LOCK(tab) ({ LOCK_DOMAIN(rtable, (tab)->lock); &(tab)->priv; }) +#define RT_UNLOCK(tab) UNLOCK_DOMAIN(rtable, (tab)->lock) +#define RT_PRIV(tab) ({ ASSERT_DIE(RT_IS_LOCKED((tab))); &(tab)->priv; }) +#define RT_PUB(tab) SKIP_BACK(rtable, priv, tab) + +#define RT_LOCKED(tpub, tpriv) for (struct rtable_private *tpriv = RT_LOCK(tpub); tpriv; RT_UNLOCK(tpriv), (tpriv = NULL)) +#define RT_RETURN(tpriv, ...) do { RT_UNLOCK(tpriv); return __VA_ARGS__; } while (0) + +#define RT_PRIV_SAME(tpriv, tpub) (&(tpub)->priv == (tpriv)) extern struct rt_cork { _Atomic uint active; @@ -184,43 +205,12 @@ static inline int rt_cork_check(event *e) } -#define NHU_CLEAN 0 -#define NHU_SCHEDULED 1 -#define NHU_RUNNING 2 -#define NHU_DIRTY 3 - typedef struct network { struct rte_storage *routes; /* Available routes for this network */ struct rt_pending_export *first, *last; struct fib_node n; /* FIB flags reserved for kernel syncer */ } net; -struct hostcache { - slab *slab; /* Slab holding all hostentries */ - struct hostentry **hash_table; /* Hash table for hostentries */ - unsigned hash_order, hash_shift; - unsigned hash_max, hash_min; - unsigned hash_items; - linpool *lp; /* Linpool for trie */ - struct f_trie *trie; /* Trie of prefixes that might affect hostentries */ - list hostentries; /* List of all hostentries */ - byte update_hostcache; -}; - -struct hostentry { - node ln; - ip_addr addr; /* IP address of host, part of key */ - ip_addr link; /* (link-local) IP address of host, used as gw - if host is directly attached */ - struct rtable *tab; /* Dependent table, part of key */ - struct hostentry *next; /* Next in hash chain */ - unsigned hash_key; /* Hash key */ - unsigned uc; /* Use count */ - ea_list *src; /* Source attributes */ - byte nexthop_linkable; /* Nexthop list is completely non-device */ - u32 igp_metric; /* Chosen route IGP metric */ -}; - struct rte_storage { struct rte_storage *next; /* Next in chain */ struct rte rte; /* Route data */ @@ -238,6 +228,8 @@ struct rt_import_request { char *name; u8 trace_routes; + event_list *list; /* Where to schedule announce events */ + void (*dump_req)(struct rt_import_request *req); void (*log_state_change)(struct rt_import_request *req, u8 state); /* Preimport is called when the @new route is just-to-be inserted, replacing @old. @@ -269,6 +261,7 @@ struct rt_import_hook { u8 stale_pruning; /* Last prune started when this value was set at stale_valid */ void (*stopped)(struct rt_import_request *); /* Stored callback when import is stopped */ + event announce_event; /* This event announces table updates */ }; struct rt_pending_export { @@ -314,29 +307,44 @@ struct rt_export_hook { u32 withdraws_received; /* Number of route withdraws received */ } stats; + btime last_state_change; /* Time of last state transition */ + + _Atomic u8 export_state; /* Route export state (TES_*, see below) */ + struct event event; /* Event running all the export operations */ + + struct bmap seq_map; /* Keep track which exports were already procesed */ + + void (*stopped)(struct rt_export_request *); /* Stored callback when export is stopped */ +}; + +struct rt_table_export_hook { + union { + struct rt_export_hook h; + struct { /* Overriding the parent structure beginning */ + node _n; + struct rt_table_exporter *table; + }; + }; + union { struct fib_iterator feed_fit; /* Routing table iterator used during feeding */ struct { struct f_trie_walk_state *walk_state; /* Iterator over networks in trie */ struct f_trie *walk_lock; /* Locked trie for walking */ + union { /* Last net visited but not processed */ + net_addr walk_last; + net_addr_ip4 walk_last_ip4; + net_addr_ip6 walk_last_ip6; + }; }; - u32 hash_iter; /* Iterator over hash */ }; - struct bmap seq_map; /* Keep track which exports were already procesed */ - - struct rt_pending_export * _Atomic last_export;/* Last export processed */ + struct rt_pending_export *_Atomic last_export;/* Last export processed */ struct rt_pending_export *rpe_next; /* Next pending export to process */ - btime last_state_change; /* Time of last state transition */ - u8 refeed_pending; /* Refeeding and another refeed is scheduled */ - _Atomic u8 export_state; /* Route export state (TES_*, see below) */ u8 feed_type; /* Which feeding method is used (TFT_*, see below) */ - struct event *event; /* Event running all the export operations */ - - void (*stopped)(struct rt_export_request *); /* Stored callback when export is stopped */ }; #define TIS_DOWN 0 @@ -365,7 +373,8 @@ struct rt_export_hook { #define TFT_HASH 3 void rt_request_import(rtable *tab, struct rt_import_request *req); -void rt_request_export(struct rt_exporter *tab, struct rt_export_request *req); +void rt_request_export(rtable *tab, struct rt_export_request *req); +void rt_request_export_other(struct rt_exporter *tab, struct rt_export_request *req); void rt_export_once(struct rt_exporter *tab, struct rt_export_request *req); @@ -382,15 +391,35 @@ void rt_set_export_state(struct rt_export_hook *hook, u8 state); void rte_import(struct rt_import_request *req, const net_addr *net, rte *new, struct rte_src *src); +/* + * For table export processing + */ + /* Get next rpe. If src is given, it must match. */ struct rt_pending_export *rpe_next(struct rt_pending_export *rpe, struct rte_src *src); +/* Walk all rpe's */ +#define RPE_WALK(first, it, src) \ + for (struct rt_pending_export *it = (first); it; it = rpe_next(it, (src))) + /* Mark the pending export processed */ void rpe_mark_seen(struct rt_export_hook *hook, struct rt_pending_export *rpe); +#define rpe_mark_seen_all(hook, first, src) \ + RPE_WALK((first), _rpe, (src)) rpe_mark_seen((hook), _rpe) + /* Get pending export seen status */ int rpe_get_seen(struct rt_export_hook *hook, struct rt_pending_export *rpe); +/* + * For rt_export_hook and rt_exporter inheritance + */ + +void rt_init_export(struct rt_exporter *re, struct rt_export_hook *hook); +struct rt_export_hook *rt_alloc_export(struct rt_exporter *re, uint size); +void rt_export_stopped(struct rt_export_hook *hook); +void rt_exporter_init(struct rt_exporter *re); + /* Types of route announcement, also used as flags */ #define RA_UNDEF 0 /* Undefined RA type */ #define RA_OPTIMAL 1 /* Announcement of optimal route change */ @@ -404,6 +433,49 @@ int rpe_get_seen(struct rt_export_hook *hook, struct rt_pending_export *rpe); #define RIC_REJECT -1 /* Rejected by protocol */ #define RIC_DROP -2 /* Silently dropped by protocol */ +/* + * Next hop update data structures + */ + +#define NHU_CLEAN 0 +#define NHU_SCHEDULED 1 +#define NHU_RUNNING 2 +#define NHU_DIRTY 3 + +struct hostentry { + node ln; + ip_addr addr; /* IP address of host, part of key */ + ip_addr link; /* (link-local) IP address of host, used as gw + if host is directly attached */ + rtable *tab; /* Dependent table, part of key */ + struct hostentry *next; /* Next in hash chain */ + unsigned hash_key; /* Hash key */ + unsigned uc; /* Use count */ + ea_list *src; /* Source attributes */ + byte nexthop_linkable; /* Nexthop list is completely non-device */ + u32 igp_metric; /* Chosen route IGP metric */ +}; + +struct hostcache { + slab *slab; /* Slab holding all hostentries */ + struct hostentry **hash_table; /* Hash table for hostentries */ + unsigned hash_order, hash_shift; + unsigned hash_max, hash_min; + unsigned hash_items; + linpool *lp; /* Linpool for trie */ + struct f_trie *trie; /* Trie of prefixes that might affect hostentries */ + list hostentries; /* List of all hostentries */ + event update; + struct rt_export_request req; /* Notifier */ +}; + +struct rt_flowspec_link { + rtable *src; + rtable *dst; + u32 uc; + struct rt_export_request req; +}; + #define rte_update channel_rte_import /** * rte_update - enter a new update to a routing table @@ -446,32 +518,36 @@ void rt_init(void); void rt_preconfig(struct config *); void rt_postconfig(struct config *); void rt_commit(struct config *new, struct config *old); -void rt_lock_table(rtable *); -void rt_unlock_table(rtable *); -struct f_trie * rt_lock_trie(rtable *tab); -void rt_unlock_trie(rtable *tab, struct f_trie *trie); -void rt_subscribe(rtable *tab, struct rt_subscription *s); -void rt_unsubscribe(struct rt_subscription *s); +void rt_lock_table_priv(struct rtable_private *, const char *file, uint line); +void rt_unlock_table_priv(struct rtable_private *, const char *file, uint line); +static inline void rt_lock_table_pub(rtable *t, const char *file, uint line) +{ RT_LOCKED(t, tt) rt_lock_table_priv(tt, file, line); } +static inline void rt_unlock_table_pub(rtable *t, const char *file, uint line) +{ RT_LOCKED(t, tt) rt_unlock_table_priv(tt, file, line); } + +#define rt_lock_table(t) _Generic((t), rtable *: rt_lock_table_pub, \ + struct rtable_private *: rt_lock_table_priv)((t), __FILE__, __LINE__) +#define rt_unlock_table(t) _Generic((t), rtable *: rt_unlock_table_pub, \ + struct rtable_private *: rt_unlock_table_priv)((t), __FILE__, __LINE__) + +struct f_trie * rt_lock_trie(struct rtable_private *tab); +void rt_unlock_trie(struct rtable_private *tab, struct f_trie *trie); void rt_flowspec_link(rtable *src, rtable *dst); void rt_flowspec_unlink(rtable *src, rtable *dst); rtable *rt_setup(pool *, struct rtable_config *); -static inline void rt_shutdown(rtable *r) { rfree(r->rp); } -static inline net *net_find(rtable *tab, const net_addr *addr) { return (net *) fib_find(&tab->fib, addr); } -static inline net *net_find_valid(rtable *tab, const net_addr *addr) +static inline net *net_find(struct rtable_private *tab, const net_addr *addr) { return (net *) fib_find(&tab->fib, addr); } +static inline net *net_find_valid(struct rtable_private *tab, const net_addr *addr) { net *n = net_find(tab, addr); return (n && n->routes && rte_is_valid(&n->routes->rte)) ? n : NULL; } -static inline net *net_get(rtable *tab, const net_addr *addr) { return (net *) fib_get(&tab->fib, addr); } -net *net_get(rtable *tab, const net_addr *addr); -net *net_route(rtable *tab, const net_addr *n); +static inline net *net_get(struct rtable_private *tab, const net_addr *addr) { return (net *) fib_get(&tab->fib, addr); } +net *net_route(struct rtable_private *tab, const net_addr *n); int rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filter); rte *rt_export_merged(struct channel *c, rte ** feed, uint count, linpool *pool, int silent); void rt_refresh_begin(struct rt_import_request *); void rt_refresh_end(struct rt_import_request *); void rt_modify_stale(rtable *t, struct rt_import_request *); -void rt_schedule_prune(rtable *t); +void rt_schedule_prune(struct rtable_private *t); void rte_dump(struct rte_storage *); -void rte_free(struct rte_storage *); -struct rte_storage *rte_store(const rte *, net *net, rtable *); void rt_dump(rtable *); void rt_dump_all(void); void rt_dump_hooks(rtable *); @@ -481,6 +557,8 @@ void rt_reload_channel_abort(struct channel *c); void rt_refeed_channel(struct channel *c); void rt_prune_sync(rtable *t, int all); struct rtable_config *rt_new_table(struct symbol *s, uint addr_type); +void rt_new_default_table(struct symbol *s); +struct rtable_config *rt_get_default_table(struct config *cf, uint addr_type); static inline int rt_is_ip(rtable *tab) { return (tab->addr_type == NET_IP4) || (tab->addr_type == NET_IP6); } @@ -530,7 +608,7 @@ struct rt_show_data { void rt_show(struct rt_show_data *); struct rt_show_data_rtable * rt_show_add_exporter(struct rt_show_data *d, struct rt_exporter *t, const char *name); -struct rt_show_data_rtable * rt_show_add_table(struct rt_show_data *d, struct rtable *t); +struct rt_show_data_rtable * rt_show_add_table(struct rt_show_data *d, rtable *t); /* Value of table definition mode in struct rt_show_data */ #define RSD_TDB_DEFAULT 0 /* no table specified */ @@ -557,7 +635,7 @@ struct hostentry_adata { }; void -ea_set_hostentry(ea_list **to, struct rtable *dep, struct rtable *tab, ip_addr gw, ip_addr ll, u32 lnum, u32 labels[lnum]); +ea_set_hostentry(ea_list **to, rtable *dep, rtable *tab, ip_addr gw, ip_addr ll, u32 lnum, u32 labels[lnum]); void ea_show_hostentry(const struct adata *ad, byte *buf, uint size); void ea_show_nexthop_list(struct cli *c, struct nexthop_adata *nhad); diff --git a/proto/bfd/bfd.c b/proto/bfd/bfd.c index e7d27f74..331ba730 100644 --- a/proto/bfd/bfd.c +++ b/proto/bfd/bfd.c @@ -113,7 +113,6 @@ #define HASH_IP_EQ(a1,n1,a2,n2) ipa_equal(a1, a2) && n1 == n2 #define HASH_IP_FN(a,n) ipa_hash(a) ^ u32_hash(n) -DEFINE_DOMAIN(rtable); #define BFD_LOCK LOCK_DOMAIN(rtable, bfd_global.lock) #define BFD_UNLOCK UNLOCK_DOMAIN(rtable, bfd_global.lock) diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c index 2543ee73..bfdd9ac5 100644 --- a/proto/bgp/attrs.c +++ b/proto/bgp/attrs.c @@ -1867,25 +1867,30 @@ bgp_free_pending_tx(struct bgp_channel *c) * Prefix hash table exporter */ +struct bgp_out_export_hook { + struct rt_export_hook h; + u32 hash_iter; /* Iterator over hash */ +}; + static void bgp_out_table_feed(void *data) { - struct rt_export_hook *hook = data; - struct bgp_channel *bc = SKIP_BACK(struct bgp_channel, prefix_exporter, hook->table); + struct bgp_out_export_hook *hook = data; + struct bgp_channel *bc = SKIP_BACK(struct bgp_channel, prefix_exporter, hook->h.table); struct bgp_pending_tx *c = bc->ptx; int max = 512; - const net_addr *neq = (hook->req->addr_mode == TE_ADDR_EQUAL) ? hook->req->addr : NULL; + const net_addr *neq = (hook->h.req->addr_mode == TE_ADDR_EQUAL) ? hook->h.req->addr : NULL; const net_addr *cand = NULL; do { HASH_WALK_ITER(c->prefix_hash, PXH, n, hook->hash_iter) { - switch (hook->req->addr_mode) + switch (hook->h.req->addr_mode) { case TE_ADDR_IN: - if (!net_in_netX(n->net, hook->req->addr)) + if (!net_in_netX(n->net, hook->h.req->addr)) continue; /* fall through */ case TE_ADDR_NONE: @@ -1897,7 +1902,7 @@ bgp_out_table_feed(void *data) case TE_ADDR_FOR: if (!neq) { - if (net_in_netX(hook->req->addr, n->net) && (!cand || (n->net->length > cand->length))) + if (net_in_netX(hook->h.req->addr, n->net) && (!cand || (n->net->length > cand->length))) cand = n->net; continue; } @@ -1942,13 +1947,13 @@ bgp_out_table_feed(void *data) .new = &es, .new_best = &es, }; - if (hook->req->export_bulk) + if (hook->h.req->export_bulk) { rte *feed = &es.rte; - hook->req->export_bulk(hook->req, n->net, &rpe, &feed, 1); + hook->h.req->export_bulk(hook->h.req, n->net, &rpe, &feed, 1); } - else if (hook->req->export_one) - hook->req->export_one(hook->req, n->net, &rpe); + else if (hook->h.req->export_one) + hook->h.req->export_one(hook->h.req, n->net, &rpe); else bug("No export method in export request"); } @@ -1959,36 +1964,51 @@ bgp_out_table_feed(void *data) } while (neq); if (hook->hash_iter) - ev_schedule_work(hook->event); + ev_schedule_work(&hook->h.event); else - rt_set_export_state(hook, TES_READY); + rt_set_export_state(&hook->h, TES_READY); } -static struct rt_export_hook * -bgp_out_table_export_start(struct rt_exporter *re, struct rt_export_request *req UNUSED) +static void +bgp_out_table_export_start(struct rt_exporter *re, struct rt_export_request *req) { - struct bgp_channel *bc = SKIP_BACK(struct bgp_channel, prefix_exporter, re); - pool *p = rp_new(bc->c.proto->pool, "Export hook"); - struct rt_export_hook *hook = mb_allocz(p, sizeof(struct rt_export_hook)); - hook->pool = p; - hook->event = ev_new_init(p, bgp_out_table_feed, hook); - hook->feed_type = TFT_HASH; + req->hook = rt_alloc_export(re, sizeof(struct bgp_out_export_hook)); + req->hook->req = req; - return hook; + struct bgp_out_export_hook *hook = SKIP_BACK(struct bgp_out_export_hook, h, req->hook); + + hook->h.event.hook = bgp_out_table_feed; + rt_init_export(re, req->hook); } +static void +bgp_out_table_export_done(void *data) +{ + struct bgp_out_export_hook *hook = data; + struct rt_export_request *req = hook->h.req; + void (*stopped)(struct rt_export_request *) = hook->h.stopped; + + rt_export_stopped(&hook->h); + CALL(stopped, req); +} + +static const struct rt_exporter_class bgp_out_table_export_class = { + .start = bgp_out_table_export_start, + .done = bgp_out_table_export_done, +}; + void bgp_setup_out_table(struct bgp_channel *c) { ASSERT_DIE(c->c.out_table == NULL); c->prefix_exporter = (struct rt_exporter) { + .class = &bgp_out_table_export_class, .addr_type = c->c.table->addr_type, - .start = bgp_out_table_export_start, + .rp = c->c.proto->pool, }; - init_list(&c->prefix_exporter.hooks); - init_list(&c->prefix_exporter.pending); + rt_exporter_init(&c->prefix_exporter); c->c.out_table = &c->prefix_exporter; } @@ -2510,7 +2530,7 @@ use_deterministic_med(struct rte_storage *r) } int -bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best) +bgp_rte_recalculate(struct rtable_private *table, net *net, rte *new, rte *old, rte *old_best) { rte *key = new ? new : old; u32 lpref = rt_get_preference(key); diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index 68c788ea..573e3d25 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -847,7 +847,7 @@ bgp_graceful_restart_feed(struct bgp_channel *c) .export_one = bgp_graceful_restart_drop_export, }; - rt_request_export(&c->c.table->exporter, &c->stale_feed); + rt_request_export(c->c.table, &c->stale_feed); } @@ -1934,7 +1934,7 @@ bgp_default_igp_table(struct bgp_config *cf, struct bgp_channel_config *cc, u32 return cc2->c.table; /* Last, try default table of given type */ - if (tab = cf->c.global->def_tables[type]) + if (tab = rt_get_default_table(cf->c.global, type)) return tab; cf_error("Undefined IGP table"); @@ -1953,7 +1953,7 @@ bgp_default_base_table(struct bgp_config *cf, struct bgp_channel_config *cc) return cc2->c.table; /* Last, try default table of given type */ - struct rtable_config *tab = cf->c.global->def_tables[type]; + struct rtable_config *tab = rt_get_default_table(cf->c.global, type); if (tab) return tab; diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h index 0cd327a2..1bcfb915 100644 --- a/proto/bgp/bgp.h +++ b/proto/bgp/bgp.h @@ -599,7 +599,7 @@ void bgp_done_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bu int bgp_rte_better(struct rte *, struct rte *); int bgp_rte_mergable(rte *pri, rte *sec); -int bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best); +int bgp_rte_recalculate(struct rtable_private *table, net *net, rte *new, rte *old, rte *old_best); void bgp_rte_modify_stale(struct rt_export_request *req, const net_addr *n, struct rt_pending_export *rpe UNUSED, rte **feed, uint count); u32 bgp_rte_igp_metric(const rte *); void bgp_rt_notify(struct proto *P, struct channel *C, const net_addr *n, rte *new, const rte *old); diff --git a/proto/mrt/mrt.c b/proto/mrt/mrt.c index f4c09ab1..f07f9ca2 100644 --- a/proto/mrt/mrt.c +++ b/proto/mrt/mrt.c @@ -228,7 +228,7 @@ mrt_next_table_(rtable *tab, rtable *tab_ptr, const char *pattern) NODE_VALID(tn); tn = tn->next) { - tab = SKIP_BACK(struct rtable, n, tn); + tab = SKIP_BACK(rtable, n, tn); if (patmatch(pattern, tab->name) && ((tab->addr_type == NET_IP4) || (tab->addr_type == NET_IP6))) return tab; @@ -243,13 +243,15 @@ mrt_next_table(struct mrt_table_dump_state *s) rtable *tab = mrt_next_table_(s->table, s->table_ptr, s->table_expr); if (s->table) - rt_unlock_table(s->table); + RT_LOCKED(s->table, tab) + rt_unlock_table(tab); s->table = tab; s->ipv4 = tab ? (tab->addr_type == NET_IP4) : 0; if (s->table) - rt_lock_table(s->table); + RT_LOCKED(s->table, tab) + rt_lock_table(tab); return s->table; } @@ -573,14 +575,18 @@ mrt_table_dump_init(pool *pp) static void mrt_table_dump_free(struct mrt_table_dump_state *s) { - if (s->table_open) - FIB_ITERATE_UNLINK(&s->fit, &s->table->fib); - if (s->table) - rt_unlock_table(s->table); + RT_LOCKED(s->table, tab) + { + if (s->table_open) + FIB_ITERATE_UNLINK(&s->fit, &tab->fib); + + rt_unlock_table(tab); + } if (s->table_ptr) - rt_unlock_table(s->table_ptr); + RT_LOCKED(s->table_ptr, tab) + rt_unlock_table(tab); config_del_obstacle(s->config); @@ -606,16 +612,19 @@ mrt_table_dump_step(struct mrt_table_dump_state *s) mrt_peer_table_dump(s); - FIB_ITERATE_INIT(&s->fit, &s->table->fib); + RT_LOCKED(s->table, tab) + { + + FIB_ITERATE_INIT(&s->fit, &tab->fib); s->table_open = 1; step: - FIB_ITERATE_START(&s->table->fib, &s->fit, net, n) + FIB_ITERATE_START(&tab->fib, &s->fit, net, n) { if (s->max < 0) { FIB_ITERATE_PUT(&s->fit); - return 0; + RT_RETURN(tab, 0); } /* With Always ADD_PATH option, we jump directly to second phase */ @@ -630,6 +639,8 @@ mrt_table_dump_step(struct mrt_table_dump_state *s) FIB_ITERATE_END; s->table_open = 0; + } + mrt_close_file(s); mrt_peer_table_flush(s); } @@ -661,7 +672,8 @@ mrt_timer(timer *t) s->always_add_path = cf->always_add_path; if (s->table_ptr) - rt_lock_table(s->table_ptr); + RT_LOCKED(s->table_ptr, tab) + rt_lock_table(tab); p->table_dump = s; ev_schedule(p->event); @@ -737,7 +749,8 @@ mrt_dump_cmd(struct mrt_dump_data *d) s->filename = d->filename; if (s->table_ptr) - rt_lock_table(s->table_ptr); + RT_LOCKED(s->table_ptr, tab) + rt_lock_table(tab); this_cli->cont = mrt_dump_cont; this_cli->cleanup = mrt_dump_cleanup; diff --git a/proto/mrt/mrt.h b/proto/mrt/mrt.h index 3b83aa39..f535a391 100644 --- a/proto/mrt/mrt.h +++ b/proto/mrt/mrt.h @@ -40,7 +40,7 @@ struct mrt_proto { struct mrt_dump_data { const char *table_expr; - struct rtable *table_ptr; + rtable *table_ptr; const struct filter *filter; const char *filename; }; @@ -60,7 +60,7 @@ struct mrt_table_dump_state { /* Configuration information */ const char *table_expr; /* Wildcard for table name (or NULL) */ - struct rtable *table_ptr; /* Explicit table (or NULL) */ + rtable *table_ptr; /* Explicit table (or NULL) */ const struct filter *filter; /* Optional filter */ const char *filename; /* Filename pattern */ int always_add_path; /* Always use *_ADDPATH message subtypes */ @@ -73,7 +73,7 @@ struct mrt_table_dump_state { HASH(struct mrt_peer_entry) peer_hash; /* Hash for peers to find the index */ - struct rtable *table; /* Processed table, NULL initially */ + rtable *table; /* Processed table, NULL initially */ struct fib_iterator fit; /* Iterator in processed table */ int table_open; /* Whether iterator is linked */ diff --git a/proto/perf/perf.c b/proto/perf/perf.c index d82ac8aa..9adafe5a 100644 --- a/proto/perf/perf.c +++ b/proto/perf/perf.c @@ -202,7 +202,9 @@ perf_loop(void *data) p->exp++; } - rt_schedule_prune(P->main_channel->table); + RT_LOCKED(P->main_channel->table, tab) + rt_schedule_prune(tab); + ev_schedule(p->loop); } diff --git a/proto/rip/rip.c b/proto/rip/rip.c index ab0e3f4b..183fc265 100644 --- a/proto/rip/rip.c +++ b/proto/rip/rip.c @@ -1121,24 +1121,18 @@ rip_rte_proto(struct rte *rte) SKIP_BACK(struct rip_proto, p.sources, rte->src->owner) : NULL; } -static int -rip_rte_better(struct rte *new, struct rte *old) -{ - ASSERT_DIE(new->src == old->src); - struct rip_proto *p = rip_rte_proto(new); - - u32 new_metric = ea_get_int(new->attrs, &ea_rip_metric, p->infinity); - u32 old_metric = ea_get_int(old->attrs, &ea_rip_metric, p->infinity); - - return new_metric < old_metric; -} - static u32 rip_rte_igp_metric(const rte *rt) { return ea_get_int(rt->attrs, &ea_rip_metric, IGP_METRIC_UNKNOWN); } +static int +rip_rte_better(struct rte *new, struct rte *old) +{ + return rip_rte_igp_metric(new) < rip_rte_igp_metric(old); +} + static void rip_postconfig(struct proto_config *CF) { diff --git a/proto/static/static.c b/proto/static/static.c index 65f3eccc..cb764a1c 100644 --- a/proto/static/static.c +++ b/proto/static/static.c @@ -436,11 +436,11 @@ static_postconfig(struct proto_config *CF) if (!cf->igp_table_ip4) cf->igp_table_ip4 = (cc->table->addr_type == NET_IP4) ? - cc->table : cf->c.global->def_tables[NET_IP4]; + cc->table : rt_get_default_table(cf->c.global, NET_IP4); if (!cf->igp_table_ip6) cf->igp_table_ip6 = (cc->table->addr_type == NET_IP6) ? - cc->table : cf->c.global->def_tables[NET_IP6]; + cc->table : rt_get_default_table(cf->c.global, NET_IP6); WALK_LIST(r, cf->routes) if (r->net && (r->net->type != CF->net_type)) diff --git a/sysdep/unix/krt.c b/sysdep/unix/krt.c index cd44a1e5..b0033a65 100644 --- a/sysdep/unix/krt.c +++ b/sysdep/unix/krt.c @@ -366,6 +366,13 @@ rte_feed_obtain(net *n, rte **feed, uint count) static struct rte * krt_export_net(struct krt_proto *p, net *net) { + /* FIXME: Here we are calling filters in table-locked context when exporting + * to kernel. Here BIRD can crash if the user requested ROA check in kernel + * export filter. It doesn't make much sense to write the filters like this, + * therefore we may keep this unfinished piece of work here for later as it + * won't really affect anybody. */ + ASSERT_DIE(RT_IS_LOCKED(p->p.main_channel->table)); + struct channel *c = p->p.main_channel; const struct filter *filter = c->out_filter; @@ -446,6 +453,9 @@ krt_got_route(struct krt_proto *p, rte *e, s8 src) #endif /* The rest is for KRT_SRC_BIRD (or KRT_SRC_UNKNOWN) */ + RT_LOCKED(p->p.main_channel->table, tab) + { + /* Deleting all routes if flush is requested */ if (p->flush_routes) goto delete; @@ -454,7 +464,7 @@ krt_got_route(struct krt_proto *p, rte *e, s8 src) if (!p->ready) goto ignore; - net *net = net_find(p->p.main_channel->table, e->net); + net *net = net_find(tab, e->net); if (!net || !krt_is_installed(p, net)) goto delete; @@ -499,7 +509,9 @@ delete: krt_replace_rte(p, e->net, NULL, e); goto done; -done: +done:; + } + lp_flush(krt_filter_lp); } @@ -512,7 +524,8 @@ krt_init_scan(struct krt_proto *p) static void krt_prune(struct krt_proto *p) { - struct rtable *t = p->p.main_channel->table; + RT_LOCKED(p->p.main_channel->table, t) + { KRT_TRACE(p, D_EVENTS, "Pruning table %s", t->name); FIB_WALK(&t->fib, net, n) @@ -534,6 +547,8 @@ krt_prune(struct krt_proto *p) if (p->ready) p->initialized = 1; + + } } static void |