From 8c92f47ac77f267368b6d6bd161689a0c0bc5e5a Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Thu, 16 Jun 2022 23:24:53 +0200 Subject: Route attribute storage keeps the previous layers --- lib/attrs.h | 2 +- lib/route.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/attrs.h b/lib/attrs.h index 79b7b14a..a75abcd3 100644 --- a/lib/attrs.h +++ b/lib/attrs.h @@ -42,7 +42,7 @@ lp_store_adata(struct linpool *pool, const void *buf, uint len) #define tmp_copy_adata(ad) tmp_store_adata((ad)->data, (ad)->length) static inline int adata_same(const struct adata *a, const struct adata *b) -{ return (a->length == b->length && !memcmp(a->data, b->data, a->length)); } +{ return (!a && !b) || (a->length == b->length && !memcmp(a->data, b->data, a->length)); } diff --git a/lib/route.h b/lib/route.h index 1b2f4de6..7e28b91e 100644 --- a/lib/route.h +++ b/lib/route.h @@ -237,7 +237,7 @@ ea_list *ea_append(ea_list *to, ea_list *what); void ea_format_bitfield(const struct eattr *a, byte *buf, int bufsize, const char **names, int min, int max); /* Normalize ea_list; allocates the result from tmp_linpool */ -ea_list *ea_normalize(const ea_list *e); +ea_list *ea_normalize(ea_list *e, int overlay); uint ea_list_size(ea_list *); void ea_list_copy(ea_list *dest, ea_list *src, uint size); @@ -414,7 +414,7 @@ static inline int rte_dest(const rte *r) void rta_init(void); ea_list *ea_lookup(ea_list *); /* Get a cached (and normalized) variant of this attribute list */ -static inline int ea_is_cached(ea_list *r) { return r->flags & EALF_CACHED; } +static inline int ea_is_cached(const ea_list *r) { return r->flags & EALF_CACHED; } static inline ea_list *ea_clone(ea_list *r) { r->uc++; return r; } void ea__free(ea_list *r); static inline void ea_free(ea_list *r) { if (r && !--r->uc) ea__free(r); } -- cgit v1.2.3 From becab5072d6d84d6f9c9402387a9e1c14dcc384d Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Thu, 16 Jun 2022 23:24:56 +0200 Subject: Import tables are stored as an attribute layer inside the main tables. The separate import tables were too memory-greedy, there is no need for them being stored as full-sized tables. --- lib/route.h | 3 +- nest/config.Y | 13 +++- nest/proto.c | 77 +++++++++++----------- nest/protocol.h | 16 ++--- nest/rt-attr.c | 4 +- nest/rt-show.c | 12 +++- nest/rt-table.c | 179 +++++++--------------------------------------------- nest/rt.h | 1 + proto/bgp/bgp.c | 12 +--- proto/bgp/packets.c | 2 +- proto/ospf/rt.c | 2 +- proto/perf/perf.c | 2 +- proto/pipe/pipe.c | 2 +- 13 files changed, 100 insertions(+), 225 deletions(-) (limited to 'lib') diff --git a/lib/route.h b/lib/route.h index 7e28b91e..f7b089d9 100644 --- a/lib/route.h +++ b/lib/route.h @@ -161,6 +161,7 @@ typedef struct ea_list { #define EALF_SORTED 1 /* Attributes are sorted by code */ #define EALF_BISECT 2 /* Use interval bisection for searching */ #define EALF_CACHED 4 /* List is cached */ +#define EALF_OVERLAY 8 /* List is an overlay in the same table */ struct ea_class { #define EA_CLASS_INSIDE \ @@ -413,7 +414,7 @@ static inline int rte_dest(const rte *r) } void rta_init(void); -ea_list *ea_lookup(ea_list *); /* Get a cached (and normalized) variant of this attribute list */ +ea_list *ea_lookup(ea_list *, int overlay); /* Get a cached (and normalized) variant of this attribute list */ static inline int ea_is_cached(const ea_list *r) { return r->flags & EALF_CACHED; } static inline ea_list *ea_clone(ea_list *r) { r->uc++; return r; } void ea__free(ea_list *r); diff --git a/nest/config.Y b/nest/config.Y index 2d73b4c7..ea7e1266 100644 --- a/nest/config.Y +++ b/nest/config.Y @@ -311,7 +311,14 @@ channel_item_: | IMPORT LIMIT limit_spec { this_channel->in_limit = $3; } | EXPORT LIMIT limit_spec { this_channel->out_limit = $3; } | PREFERENCE expr { this_channel->preference = $2; check_u16($2); } - | IMPORT KEEP FILTERED bool { this_channel->in_keep_filtered = $4; } + | IMPORT KEEP FILTERED bool { + if ($4) + this_channel->in_keep |= RIK_REJECTED; + else if ((this_channel->in_keep & RIK_PREFILTER) == RIK_PREFILTER) + cf_error("Import keep filtered is implied by the import table."); + else + this_channel->in_keep &= ~RIK_REJECTED; + } | RPKI RELOAD bool { this_channel->rpki_reload = $3; } ; @@ -674,8 +681,8 @@ r_args: $$->tables_defined_by = RSD_TDB_ALL; } | r_args IMPORT TABLE channel_arg { - if (!$4->in_table) cf_error("No import table in channel %s.%s", $4->proto->name, $4->name); - rt_show_add_table($$, $4->in_table); + if (!($4->in_keep & RIK_PREFILTER)) cf_error("No import table in channel %s.%s", $4->proto->name, $4->name); + rt_show_add_table($$, $4->table)->prefilter = $4; $$->tables_defined_by = RSD_TDB_DIRECT; } | r_args EXPORT TABLE channel_arg { diff --git a/nest/proto.c b/nest/proto.c index 77817888..5e67d940 100644 --- a/nest/proto.c +++ b/nest/proto.c @@ -224,7 +224,7 @@ proto_add_channel(struct proto *p, struct channel_config *cf) c->preference = cf->preference; c->debug = cf->debug; c->merge_limit = cf->merge_limit; - c->in_keep_filtered = cf->in_keep_filtered; + c->in_keep = cf->in_keep; c->rpki_reload = cf->rpki_reload; c->channel_state = CS_DOWN; @@ -294,7 +294,7 @@ static void channel_roa_in_changed(struct rt_subscription *s) { struct channel *c = s->data; - int active = c->reload_event && ev_active(c->reload_event); + int active = !!c->reload_req.hook; CD(c, "Reload triggered by RPKI change%s", active ? " - already active" : ""); @@ -379,7 +379,7 @@ channel_roa_subscribe_filter(struct channel *c, int dir) #ifdef CONFIG_BGP /* No automatic reload for BGP channels without in_table / out_table */ if (c->channel == &channel_bgp) - valid = dir ? !!c->in_table : !!c->out_table; + valid = dir ? ((c->in_keep & RIK_PREFILTER) == RIK_PREFILTER) : !!c->out_table; #endif struct filter_iterator fit; @@ -534,9 +534,6 @@ channel_import_stopped(struct rt_import_request *req) req->hook = NULL; - if (c->in_table) - rt_prune_sync(c->in_table, 1); - mb_free(c->in_req.name); c->in_req.name = NULL; @@ -603,43 +600,48 @@ channel_schedule_reload(struct channel *c) { ASSERT(c->in_req.hook); - rt_reload_channel_abort(c); - ev_schedule_work(c->reload_event); + rt_request_export(c->table, &c->reload_req); } static void -channel_reload_loop(void *ptr) +channel_reload_stopped(struct rt_export_request *req) { - struct channel *c = ptr; - - /* Start reload */ - if (!c->reload_active) - c->reload_pending = 0; - - if (!rt_reload_channel(c)) - { - ev_schedule_work(c->reload_event); - return; - } + struct channel *c = SKIP_BACK(struct channel, reload_req, req); /* Restart reload */ if (c->reload_pending) channel_request_reload(c); } +static void +channel_reload_log_state_change(struct rt_export_request *req, u8 state) +{ + if (state == TES_READY) + rt_stop_export(req, channel_reload_stopped); +} + +static void +channel_reload_dump_req(struct rt_export_request *req) +{ + struct channel *c = SKIP_BACK(struct channel, reload_req, req); + debug(" Channel %s.%s import reload request %p\n", c->proto->name, c->name, req); +} + +void channel_reload_export_bulk(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe, rte **feed, uint count); + /* Called by protocol to activate in_table */ void channel_setup_in_table(struct channel *c) { - struct rtable_config *cf = mb_allocz(c->proto->pool, sizeof(struct rtable_config)); - - cf->name = "import"; - cf->addr_type = c->net_type; - cf->internal = 1; - - c->in_table = rt_setup(c->proto->pool, cf); + c->reload_req = (struct rt_export_request) { + .name = mb_sprintf(c->proto->pool, "%s.%s.import", c->proto->name, c->name), + .trace_routes = c->debug | c->proto->debug, + .export_bulk = channel_reload_export_bulk, + .dump_req = channel_reload_dump_req, + .log_state_change = channel_reload_log_state_change, + }; - c->reload_event = ev_new_init(c->proto->pool, channel_reload_loop, c); + c->in_keep |= RIK_PREFILTER; } /* Called by protocol to activate out_table */ @@ -680,10 +682,10 @@ static void channel_do_pause(struct channel *c) { /* Need to abort feeding */ - if (c->reload_event) + if (c->reload_req.hook) { - ev_postpone(c->reload_event); - rt_reload_channel_abort(c); + c->reload_pending = 0; + rt_stop_export(&c->reload_req, channel_reload_stopped); } /* Stop export */ @@ -710,15 +712,13 @@ channel_do_stop(struct channel *c) CALL(c->channel->shutdown, c); /* This have to be done in here, as channel pool is freed before channel_do_down() */ - c->in_table = NULL; - c->reload_event = NULL; c->out_table = NULL; } static void channel_do_down(struct channel *c) { - ASSERT(!c->reload_active); + ASSERT(!c->reload_req.hook); c->proto->active_channels--; @@ -726,8 +726,6 @@ channel_do_down(struct channel *c) memset(&c->import_stats, 0, sizeof(struct channel_import_stats)); memset(&c->export_stats, 0, sizeof(struct channel_export_stats)); - c->in_table = NULL; - c->reload_event = NULL; c->out_table = NULL; /* The in_table and out_table are going to be freed by freeing their resource pools. */ @@ -922,7 +920,9 @@ int channel_reconfigure(struct channel *c, struct channel_config *cf) { /* FIXME: better handle these changes, also handle in_keep_filtered */ - if ((c->table != cf->table->table) || (cf->ra_mode && (c->ra_mode != cf->ra_mode))) + if ((c->table != cf->table->table) || + (cf->ra_mode && (c->ra_mode != cf->ra_mode)) || + (cf->in_keep != c->in_keep)) return 0; /* Note that filter_same() requires arguments in (new, old) order */ @@ -949,7 +949,6 @@ channel_reconfigure(struct channel *c, struct channel_config *cf) c->preference = cf->preference; c->debug = cf->debug; c->in_req.trace_routes = c->out_req.trace_routes = c->debug | c->proto->debug; - c->in_keep_filtered = cf->in_keep_filtered; c->rpki_reload = cf->rpki_reload; /* Execute channel-specific reconfigure hook */ @@ -2099,7 +2098,7 @@ channel_show_stats(struct channel *c) u32 in_routes = c->in_limit.count; u32 out_routes = c->out_limit.count; - if (c->in_keep_filtered) + if (c->in_keep) cli_msg(-1006, " Routes: %u imported, %u filtered, %u exported, %u preferred", in_routes, (rx_routes - in_routes), out_routes, SRI(pref)); else diff --git a/nest/protocol.h b/nest/protocol.h index aeb60ac6..b482ed99 100644 --- a/nest/protocol.h +++ b/nest/protocol.h @@ -454,7 +454,7 @@ struct channel_config { const struct filter *in_filter, *out_filter; /* Attached filters */ struct channel_limit rx_limit; /* Limit for receiving routes from protocol - (relevant when in_keep_filtered is active) */ + (relevant when in_keep & RIK_REJECTED) */ struct channel_limit in_limit; /* Limit for importing routes from protocol */ struct channel_limit out_limit; /* Limit for exporting routes to protocol */ @@ -463,7 +463,7 @@ struct channel_config { u16 preference; /* Default route preference */ u32 debug; /* Debugging flags (D_*) */ u8 merge_limit; /* Maximal number of nexthops for RA_MERGED */ - u8 in_keep_filtered; /* Routes rejected in import filter are kept */ + u8 in_keep; /* Which states of routes to keep (RIK_*) */ u8 rpki_reload; /* RPKI changes trigger channel reload */ }; @@ -480,7 +480,7 @@ struct channel { struct bmap export_map; /* Keeps track which routes were really exported */ struct bmap export_reject_map; /* Keeps track which routes were rejected by export filter */ - struct limit rx_limit; /* Receive limit (for in_keep_filtered) */ + struct limit rx_limit; /* Receive limit (for in_keep & RIK_REJECTED) */ struct limit in_limit; /* Input limit */ struct limit out_limit; /* Output limit */ @@ -517,7 +517,7 @@ struct channel { u16 preference; /* Default route preference */ u32 debug; /* Debugging flags (D_*) */ u8 merge_limit; /* Maximal number of nexthops for RA_MERGED */ - u8 in_keep_filtered; /* Routes rejected in import filter are kept */ + u8 in_keep; /* Which states of routes to keep (RIK_*) */ u8 disabled; u8 stale; /* Used in reconfiguration */ @@ -529,11 +529,7 @@ struct channel { btime last_state_change; /* Time of last state transition */ - struct rtable *in_table; /* Internal table for received routes */ - struct event *reload_event; /* Event responsible for reloading from in_table */ - struct fib_iterator reload_fit; /* FIB iterator in in_table used during reloading */ - struct rte_storage *reload_next_rte; /* Route iterator in in_table used during reloading */ - u8 reload_active; /* Iterator reload_fit is linked */ + struct rt_export_request reload_req; /* Feeder for import reload */ u8 reload_pending; /* Reloading and another reload is scheduled */ u8 refeed_pending; /* Refeeding and another refeed is scheduled */ @@ -544,6 +540,8 @@ struct channel { list roa_subscriptions; /* List of active ROA table subscriptions based on filters roa_check() */ }; +#define RIK_REJECTED 1 /* Routes rejected in import filter are kept */ +#define RIK_PREFILTER (2 | RIK_REJECTED) /* All routes' attribute state before import filter is kept */ /* * Channel states diff --git a/nest/rt-attr.c b/nest/rt-attr.c index c0f81b9d..35f85274 100644 --- a/nest/rt-attr.c +++ b/nest/rt-attr.c @@ -1310,13 +1310,13 @@ rta_rehash(void) * converted to the normalized form. */ ea_list * -ea_lookup(ea_list *o) +ea_lookup(ea_list *o, int overlay) { ea_list *r; uint h; ASSERT(!ea_is_cached(o)); - o = ea_normalize(o, 1); + o = ea_normalize(o, overlay); h = ea_hash(o); for(r=rta_hash_table[h & rta_cache_mask]; r; r=r->next_hash) diff --git a/nest/rt-show.c b/nest/rt-show.c index 8bf74754..12ddc816 100644 --- a/nest/rt-show.c +++ b/nest/rt-show.c @@ -26,7 +26,8 @@ rt_show_table(struct cli *c, struct rt_show_data *d) return; if (d->last_table) cli_printf(c, -1007, ""); - cli_printf(c, -1007, "Table %s:", d->tab->table->name); + cli_printf(c, -1007, "Table %s:", + d->tab->prefilter ? "import" : d->tab->table->name); d->last_table = d->tab; } @@ -156,7 +157,7 @@ rt_show_net(struct cli *c, net *n, struct rt_show_data *d) for (struct rte_storage *er = n->routes; er; er = er->next) { - if (rte_is_filtered(&er->rte) != d->filtered) + if (!d->tab->prefilter && (rte_is_filtered(&er->rte) != d->filtered)) continue; d->rt_counter++; @@ -167,6 +168,11 @@ rt_show_net(struct cli *c, net *n, struct rt_show_data *d) continue; struct rte e = er->rte; + if (d->tab->prefilter) + if (e.sender != d->tab->prefilter->in_req.hook) + continue; + else while (e.attrs->next) + e.attrs = e.attrs->next; /* Export channel is down, do not try to export routes to it */ if (ec && !ec->out_req.hook) @@ -239,7 +245,7 @@ rt_show_net(struct cli *c, net *n, struct rt_show_data *d) else ia[0] = 0; - rt_show_rte(c, ia, &e, d, (n->routes == er)); + rt_show_rte(c, ia, &e, d, !d->tab->prefilter && (n->routes == er)); first_show = 0; } diff --git a/nest/rt-table.c b/nest/rt-table.c index 1631e00f..975da363 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -606,7 +606,7 @@ rte_store(const rte *r, net *net, rtable *tab) if (ea_is_cached(e->rte.attrs)) e->rte.attrs = rta_clone(e->rte.attrs); else - e->rte.attrs = rta_lookup(e->rte.attrs); + e->rte.attrs = rta_lookup(e->rte.attrs, 1); return e; } @@ -1513,7 +1513,7 @@ channel_preimport(struct rt_import_request *req, rte *new, rte *old) if (new_in && !old_in) if (CHANNEL_LIMIT_PUSH(c, IN)) - if (c->in_keep_filtered) + if (c->in_keep & RIK_REJECTED) { new->flags |= REF_FILTERED; return new; @@ -1527,8 +1527,6 @@ channel_preimport(struct rt_import_request *req, rte *new, rte *old) return new; } -static void rte_update_direct(struct channel *c, const net_addr *n, rte *new, struct rte_src *src); - void rte_update(struct channel *c, const net_addr *n, rte *new, struct rte_src *src) { @@ -1537,15 +1535,13 @@ rte_update(struct channel *c, const net_addr *n, rte *new, struct rte_src *src) ASSERT(c->channel_state == CS_UP); - if (c->in_table && !rte_update_in(c, n, new, src)) - return; - - return rte_update_direct(c, n, new, src); -} + /* The import reloader requires prefilter routes to be the first layer */ + if (new && (c->in_keep & RIK_PREFILTER)) + if (ea_is_cached(new->attrs) && !new->attrs->next) + new->attrs = ea_clone(new->attrs); + else + new->attrs = ea_lookup(new->attrs, 0); -static void -rte_update_direct(struct channel *c, const net_addr *n, rte *new, struct rte_src *src) -{ const struct filter *filter = c->in_filter; struct channel_import_stats *stats = &c->import_stats; @@ -1563,7 +1559,7 @@ rte_update_direct(struct channel *c, const net_addr *n, rte *new, struct rte_src stats->updates_filtered++; channel_rte_trace_in(D_FILTERS, c, new, "filtered out"); - if (c->in_keep_filtered) + if (c->in_keep & RIK_REJECTED) new->flags |= REF_FILTERED; else new = NULL; @@ -1588,6 +1584,11 @@ rte_update_direct(struct channel *c, const net_addr *n, rte *new, struct rte_src rte_import(&c->in_req, n, new, src); + /* Now the route attributes are kept by the in-table cached version + * and we may drop the local handle */ + if (new && (c->in_keep & RIK_PREFILTER)) + ea_free(new->attrs); + rte_update_unlock(); } @@ -3183,154 +3184,22 @@ done: * Import table */ -int -rte_update_in(struct channel *c, const net_addr *n, rte *new, struct rte_src *src) -{ - struct rtable *tab = c->in_table; - net *net; - - if (new) - net = net_get(tab, n); - else - { - net = net_find(tab, n); - - if (!net) - goto drop_withdraw; - } - - /* Find the old rte */ - struct rte_storage **pos = rte_find(net, src); - if (*pos) - { - rte *old = &(*pos)->rte; - if (new && rte_same(old, new)) - { - /* Refresh the old rte, continue with update to main rtable */ - if (old->flags & (REF_STALE | REF_DISCARD | REF_MODIFY)) - { - old->flags &= ~(REF_STALE | REF_DISCARD | REF_MODIFY); - return 1; - } - - goto drop_update; - } - - if (!new) - CHANNEL_LIMIT_POP(c, RX); - - /* Move iterator if needed */ - if (*pos == c->reload_next_rte) - c->reload_next_rte = (*pos)->next; - - /* Remove the old rte */ - struct rte_storage *del = *pos; - *pos = (*pos)->next; - rte_free(del); - tab->rt_count--; - } - else if (new) - { - if (CHANNEL_LIMIT_PUSH(c, RX)) - { - /* Required by rte_trace_in() */ - new->net = n; - - channel_rte_trace_in(D_FILTERS, c, new, "ignored [limit]"); - goto drop_update; - } - } - else - goto drop_withdraw; - if (!new) - { - if (!net->routes) - fib_delete(&tab->fib, net); - - return 1; - } - - /* Insert the new rte */ - struct rte_storage *e = rte_store(new, net, tab); - e->rte.lastmod = current_time(); - e->next = *pos; - *pos = e; - tab->rt_count++; - return 1; - -drop_update: - c->import_stats.updates_received++; - c->in_req.hook->stats.updates_ignored++; - - if (!net->routes) - fib_delete(&tab->fib, net); - - return 0; - -drop_withdraw: - c->import_stats.withdraws_received++; - c->in_req.hook->stats.withdraws_ignored++; - return 0; -} - -int -rt_reload_channel(struct channel *c) +void channel_reload_export_bulk(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe UNUSED, rte **feed, uint count) { - struct rtable *tab = c->in_table; - struct fib_iterator *fit = &c->reload_fit; - int max_feed = 64; - - ASSERT(c->channel_state == CS_UP); + struct channel *c = SKIP_BACK(struct channel, reload_req, req); - if (!c->reload_active) - { - FIB_ITERATE_INIT(fit, &tab->fib); - c->reload_active = 1; - } - - do { - for (struct rte_storage *e = c->reload_next_rte; e; e = e->next) + for (uint i=0; isender == c->in_req.hook) { - if (max_feed-- <= 0) - { - c->reload_next_rte = e; - debug("%s channel reload burst split (max_feed=%d)", c->proto->name, max_feed); - return 0; - } + /* Strip the later attribute layers */ + rte new = *feed[i]; + while (new.attrs->next) + new.attrs = new.attrs->next; - rte r = e->rte; - rte_update_direct(c, r.net, &r, r.src); + /* And reload the route */ + rte_update(c, net, &new, new.src); } - - c->reload_next_rte = NULL; - - FIB_ITERATE_START(&tab->fib, fit, net, n) - { - if (c->reload_next_rte = n->routes) - { - FIB_ITERATE_PUT_NEXT(fit, &tab->fib); - break; - } - } - FIB_ITERATE_END; - } - while (c->reload_next_rte); - - c->reload_active = 0; - return 1; -} - -void -rt_reload_channel_abort(struct channel *c) -{ - if (c->reload_active) - { - /* Unlink the iterator */ - fit_get(&c->in_table->fib, &c->reload_fit); - c->reload_next_rte = NULL; - c->reload_active = 0; - } } void diff --git a/nest/rt.h b/nest/rt.h index 32bba6a6..d5e28cb6 100644 --- a/nest/rt.h +++ b/nest/rt.h @@ -387,6 +387,7 @@ struct rt_show_data_rtable { node n; rtable *table; struct channel *export_channel; + struct channel *prefilter; }; struct rt_show_data { diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index 84430287..8df99420 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -900,9 +900,6 @@ bgp_refresh_begin(struct bgp_channel *c) c->load_state = BFS_REFRESHING; rt_refresh_begin(c->c.table, &c->c.in_req); - - if (c->c.in_table) - rt_refresh_begin(c->c.in_table, &c->c.in_req); } /** @@ -924,9 +921,6 @@ bgp_refresh_end(struct bgp_channel *c) c->load_state = BFS_NONE; rt_refresh_end(c->c.table, &c->c.in_req); - - if (c->c.in_table) - rt_prune_sync(c->c.in_table, 0); } @@ -1393,9 +1387,9 @@ bgp_reload_routes(struct channel *C) struct bgp_proto *p = (void *) C->proto; struct bgp_channel *c = (void *) C; - ASSERT(p->conn && (p->route_refresh || c->c.in_table)); + ASSERT(p->conn && (p->route_refresh || (C->in_keep & RIK_PREFILTER))); - if (c->c.in_table) + if (C->in_keep & RIK_PREFILTER) channel_schedule_reload(C); else bgp_schedule_packet(p->conn, c, PKT_ROUTE_REFRESH); @@ -2153,7 +2147,7 @@ bgp_channel_reconfigure(struct channel *C, struct channel_config *CC, int *impor (new->cost != old->cost)) { /* import_changed itself does not force ROUTE_REFRESH when import_table is active */ - if (c->c.in_table && (c->c.channel_state == CS_UP)) + if ((c->c.in_keep & RIK_PREFILTER) && (c->c.channel_state == CS_UP)) bgp_schedule_packet(p->conn, c, PKT_ROUTE_REFRESH); *import_changed = 1; diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c index 45ee4ed2..c2e98340 100644 --- a/proto/bgp/packets.c +++ b/proto/bgp/packets.c @@ -1392,7 +1392,7 @@ bgp_rte_update(struct bgp_parse_state *s, const net_addr *n, u32 path_id, ea_lis /* Prepare cached route attributes */ if (s->cached_ea == NULL) - s->cached_ea = ea_lookup(a0); + s->cached_ea = ea_lookup(a0, 0); rte e0 = { .attrs = s->cached_ea, diff --git a/proto/ospf/rt.c b/proto/ospf/rt.c index aedf3df6..69c2907d 100644 --- a/proto/ospf/rt.c +++ b/proto/ospf/rt.c @@ -2086,7 +2086,7 @@ again1: ASSERT_DIE(ARRAY_SIZE(eattrs.a) >= eattrs.l.count); - ea_list *eal = ea_lookup(&eattrs.l); + ea_list *eal = ea_lookup(&eattrs.l, 0); ea_free(nf->old_ea); nf->old_ea = eal; diff --git a/proto/perf/perf.c b/proto/perf/perf.c index 67ad2ada..d82ac8aa 100644 --- a/proto/perf/perf.c +++ b/proto/perf/perf.c @@ -156,7 +156,7 @@ perf_loop(void *data) ea_set_attr_data(&ea, &ea_gen_nexthop, 0, &nhad.ad.data, sizeof nhad - sizeof nhad.ad); - p->data[i].a = rta_lookup(ea); + p->data[i].a = rta_lookup(ea, 0); } else p->data[i].a = rta_clone(p->data[i-1].a); diff --git a/proto/pipe/pipe.c b/proto/pipe/pipe.c index 99d4b737..d12e6731 100644 --- a/proto/pipe/pipe.c +++ b/proto/pipe/pipe.c @@ -126,7 +126,7 @@ pipe_postconfig(struct proto_config *CF) if (cc->rx_limit.action) cf_error("Pipe protocol does not support receive limits"); - if (cc->in_keep_filtered) + if (cc->in_keep) cf_error("Pipe protocol prohibits keeping filtered routes"); cc->debug = cf->c.debug; -- cgit v1.2.3 From d2142ad4050c64991f856734bafadd527c28e74b Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Mon, 27 Jun 2022 12:14:05 +0200 Subject: Fixed displaying BGP and RIP attributes after recent reworks --- filter/test.conf | 4 ---- lib/route.h | 1 + nest/rt-attr.c | 41 +++++++++++++++++++++++------------------ proto/bgp/attrs.c | 4 ++++ proto/rip/rip.c | 2 +- 5 files changed, 29 insertions(+), 23 deletions(-) (limited to 'lib') diff --git a/filter/test.conf b/filter/test.conf index eb9cc30c..cd2fa8db 100644 --- a/filter/test.conf +++ b/filter/test.conf @@ -1470,11 +1470,7 @@ bool t; bgp_community = -empty-; bgp_originator_id = 9.7.5.3; bgp_cluster_list = -empty-; - t = defined(bgp_mp_reach_nlri); - t = defined(bgp_mp_unreach_nlri); bgp_ext_community = --empty--; - bgp_as4_path = +empty+; - t = defined(bgp_as4_aggregator); t = defined(bgp_aigp); bgp_large_community = ---empty---; t = defined(bgp_mpls_label_stack); diff --git a/lib/route.h b/lib/route.h index f7b089d9..130465f9 100644 --- a/lib/route.h +++ b/lib/route.h @@ -172,6 +172,7 @@ struct ea_class { btype type; /* Data type ID */ \ uint readonly:1; /* This attribute can't be changed by filters */ \ uint conf:1; /* Requested by config */ \ + uint hidden:1; /* Technical attribute, do not show, do not expose to filters */ \ void (*format)(const eattr *ea, byte *buf, uint size); \ void (*stored)(const eattr *ea); /* When stored into global hash */ \ void (*freed)(const eattr *ea); /* When released from global hash */ \ diff --git a/nest/rt-attr.c b/nest/rt-attr.c index cec1d7cf..0881b61b 100644 --- a/nest/rt-attr.c +++ b/nest/rt-attr.c @@ -436,7 +436,8 @@ ea_class_free(struct ea_class *cl) /* No more ea class references. Unregister the attribute. */ idm_free(&ea_class_idm, cl->id); ea_class_global[cl->id] = NULL; - ea_lex_unregister(cl); + if (!cl->hidden) + ea_lex_unregister(cl); } static void @@ -492,7 +493,8 @@ ea_register(pool *p, struct ea_class *def) ASSERT_DIE(def->id < ea_class_max); ea_class_global[def->id] = def; - ea_lex_register(def); + if (!def->hidden) + ea_lex_register(def); return ea_ref_class(p, def); } @@ -1017,37 +1019,40 @@ opaque_format(const struct adata *ad, byte *buf, uint size) } static inline void -ea_show_int_set(struct cli *c, const struct adata *ad, int way, byte *pos, byte *buf, byte *end) +ea_show_int_set(struct cli *c, const char *name, const struct adata *ad, int way, byte *buf) { - int i = int_set_format(ad, way, 0, pos, end - pos); - cli_printf(c, -1012, "\t%s", buf); + int nlen = strlen(name); + int i = int_set_format(ad, way, 0, buf, CLI_MSG_SIZE - nlen - 3); + cli_printf(c, -1012, "\t%s: %s", name, buf); while (i) { - i = int_set_format(ad, way, i, buf, end - buf - 1); + i = int_set_format(ad, way, i, buf, CLI_MSG_SIZE - 1); cli_printf(c, -1012, "\t\t%s", buf); } } static inline void -ea_show_ec_set(struct cli *c, const struct adata *ad, byte *pos, byte *buf, byte *end) +ea_show_ec_set(struct cli *c, const char *name, const struct adata *ad, byte *buf) { - int i = ec_set_format(ad, 0, pos, end - pos); - cli_printf(c, -1012, "\t%s", buf); + int nlen = strlen(name); + int i = ec_set_format(ad, 0, buf, CLI_MSG_SIZE - nlen - 3); + cli_printf(c, -1012, "\t%s: %s", name, buf); while (i) { - i = ec_set_format(ad, i, buf, end - buf - 1); + i = ec_set_format(ad, i, buf, CLI_MSG_SIZE - 1); cli_printf(c, -1012, "\t\t%s", buf); } } static inline void -ea_show_lc_set(struct cli *c, const struct adata *ad, byte *pos, byte *buf, byte *end) +ea_show_lc_set(struct cli *c, const char *name, const struct adata *ad, byte *buf) { - int i = lc_set_format(ad, 0, pos, end - pos); - cli_printf(c, -1012, "\t%s", buf); + int nlen = strlen(name); + int i = lc_set_format(ad, 0, buf, CLI_MSG_SIZE - nlen - 3); + cli_printf(c, -1012, "\t%s: %s", name, buf); while (i) { - i = lc_set_format(ad, i, buf, end - buf - 1); + i = lc_set_format(ad, i, buf, CLI_MSG_SIZE - 1); cli_printf(c, -1012, "\t\t%s", buf); } } @@ -1075,7 +1080,7 @@ ea_show(struct cli *c, const eattr *e) struct ea_class *cls = ea_class_global[e->id]; ASSERT_DIE(cls); - if (e->undef) + if (e->undef || cls->hidden) return; else if (cls->format) cls->format(e, buf, end - buf); @@ -1098,13 +1103,13 @@ ea_show(struct cli *c, const eattr *e) as_path_format(ad, pos, end - pos); break; case T_CLIST: - ea_show_int_set(c, ad, 1, pos, buf, end); + ea_show_int_set(c, cls->name, ad, 1, buf); return; case T_ECLIST: - ea_show_ec_set(c, ad, pos, buf, end); + ea_show_ec_set(c, cls->name, ad, buf); return; case T_LCLIST: - ea_show_lc_set(c, ad, pos, buf, end); + ea_show_lc_set(c, cls->name, ad, buf); return; default: bsprintf(pos, "", e->type); diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c index 46e949bf..22a53dce 100644 --- a/proto/bgp/attrs.c +++ b/proto/bgp/attrs.c @@ -1122,12 +1122,14 @@ static union bgp_attr_desc bgp_attr_table[BGP_ATTR_MAX] = { [BA_MP_REACH_NLRI] = { .name = "bgp_mp_reach_nlri", .type = T_OPAQUE, + .hidden = 1, .flags = BAF_OPTIONAL, .decode = bgp_decode_mp_reach_nlri, }, [BA_MP_UNREACH_NLRI] = { .name = "bgp_mp_unreach_nlri", .type = T_OPAQUE, + .hidden = 1, .flags = BAF_OPTIONAL, .decode = bgp_decode_mp_unreach_nlri, }, @@ -1142,6 +1144,7 @@ static union bgp_attr_desc bgp_attr_table[BGP_ATTR_MAX] = { [BA_AS4_PATH] = { .name = "bgp_as4_path", .type = T_PATH, + .hidden = 1, .flags = BAF_OPTIONAL | BAF_TRANSITIVE, .encode = bgp_encode_raw, .decode = bgp_decode_as4_path, @@ -1149,6 +1152,7 @@ static union bgp_attr_desc bgp_attr_table[BGP_ATTR_MAX] = { [BA_AS4_AGGREGATOR] = { .name = "bgp_as4_aggregator", .type = T_OPAQUE, + .hidden = 1, .flags = BAF_OPTIONAL | BAF_TRANSITIVE, .encode = bgp_encode_raw, .decode = bgp_decode_as4_aggregator, diff --git a/proto/rip/rip.c b/proto/rip/rip.c index d6edac14..f5c01380 100644 --- a/proto/rip/rip.c +++ b/proto/rip/rip.c @@ -1240,7 +1240,7 @@ rip_get_route_info(rte *rte, byte *buf) static void rip_tag_format(const eattr *a, byte *buf, uint buflen) { - bsnprintf(buf, buflen, "tag: %04x", a->u.data); + bsnprintf(buf, buflen, "%04x", a->u.data); } static struct ea_class ea_rip_metric = { -- cgit v1.2.3 From fd72c696784ba74e793db1fb7c44668c47a383b2 Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Tue, 28 Jun 2022 10:51:00 +0200 Subject: Attribute lists split to storage headers and data to save BGP memory --- lib/route.h | 29 +++++++++++++++++++++-------- nest/rt-attr.c | 39 ++++++++++++++++++++------------------- 2 files changed, 41 insertions(+), 27 deletions(-) (limited to 'lib') diff --git a/lib/route.h b/lib/route.h index 130465f9..30be7c4e 100644 --- a/lib/route.h +++ b/lib/route.h @@ -147,10 +147,6 @@ typedef struct eattr { #define EA_BIT_GET(ea) ((ea) >> 24) typedef struct ea_list { - struct ea_list *next_hash; /* Next in hash chain */ - struct ea_list **pprev_hash; /* Previous in hash chain */ - u32 uc; /* Use count */ - u32 hash_key; /* List hash */ struct ea_list *next; /* In case we have an override list */ byte flags; /* Flags: EALF_... */ byte rfu; @@ -158,10 +154,17 @@ typedef struct ea_list { eattr attrs[0]; /* Attribute definitions themselves */ } ea_list; +struct ea_storage { + struct ea_storage *next_hash; /* Next in hash chain */ + struct ea_storage **pprev_hash; /* Previous in hash chain */ + u32 uc; /* Use count */ + u32 hash_key; /* List hash */ + ea_list l[0]; /* The list itself */ +}; + #define EALF_SORTED 1 /* Attributes are sorted by code */ #define EALF_BISECT 2 /* Use interval bisection for searching */ #define EALF_CACHED 4 /* List is cached */ -#define EALF_OVERLAY 8 /* List is an overlay in the same table */ struct ea_class { #define EA_CLASS_INSIDE \ @@ -417,9 +420,19 @@ static inline int rte_dest(const rte *r) void rta_init(void); ea_list *ea_lookup(ea_list *, int overlay); /* Get a cached (and normalized) variant of this attribute list */ static inline int ea_is_cached(const ea_list *r) { return r->flags & EALF_CACHED; } -static inline ea_list *ea_clone(ea_list *r) { r->uc++; return r; } -void ea__free(ea_list *r); -static inline void ea_free(ea_list *r) { if (r && !--r->uc) ea__free(r); } +static inline struct ea_storage *ea_get_storage(ea_list *r) +{ + ASSERT_DIE(ea_is_cached(r)); + return SKIP_BACK(struct ea_storage, l, r); +} + +static inline ea_list *ea_clone(ea_list *r) { ea_get_storage(r)->uc++; return r; } +void ea__free(struct ea_storage *r); +static inline void ea_free(ea_list *l) { + if (!l) return; + struct ea_storage *r = ea_get_storage(l); + if (!--r->uc) ea__free(r); +} void ea_dump(ea_list *); void ea_dump_all(void); diff --git a/nest/rt-attr.c b/nest/rt-attr.c index ba036cd2..a66d4c6e 100644 --- a/nest/rt-attr.c +++ b/nest/rt-attr.c @@ -1206,11 +1206,12 @@ ea_dump(ea_list *e) } while (e) { + struct ea_storage *s = ea_is_cached(e) ? ea_get_storage(e) : NULL; debug("[%c%c%c] uc=%d h=%08x", (e->flags & EALF_SORTED) ? 'S' : 's', (e->flags & EALF_BISECT) ? 'B' : 'b', (e->flags & EALF_CACHED) ? 'C' : 'c', - e->uc, e->hash_key); + s ? s->uc : 0, s ? s->hash_key : 0); for(i=0; icount; i++) { eattr *a = &e->attrs[i]; @@ -1304,12 +1305,12 @@ static uint rta_cache_count; static uint rta_cache_size = 32; static uint rta_cache_limit; static uint rta_cache_mask; -static ea_list **rta_hash_table; +static struct ea_storage **rta_hash_table; static void rta_alloc_hash(void) { - rta_hash_table = mb_allocz(rta_pool, sizeof(ea_list *) * rta_cache_size); + rta_hash_table = mb_allocz(rta_pool, sizeof(struct ea_storage *) * rta_cache_size); if (rta_cache_size < 32768) rta_cache_limit = rta_cache_size * 2; else @@ -1318,7 +1319,7 @@ rta_alloc_hash(void) } static inline void -rta_insert(ea_list *r) +rta_insert(struct ea_storage *r) { uint h = r->hash_key & rta_cache_mask; r->next_hash = rta_hash_table[h]; @@ -1333,8 +1334,8 @@ rta_rehash(void) { uint ohs = rta_cache_size; uint h; - ea_list *r, *n; - ea_list **oht = rta_hash_table; + struct ea_storage *r, *n; + struct ea_storage **oht = rta_hash_table; rta_cache_size = 2*rta_cache_size; DBG("Rehashing rta cache from %d to %d entries.\n", ohs, rta_cache_size); @@ -1364,7 +1365,7 @@ rta_rehash(void) ea_list * ea_lookup(ea_list *o, int overlay) { - ea_list *r; + struct ea_storage *r; uint h; ASSERT(!ea_is_cached(o)); @@ -1372,15 +1373,15 @@ ea_lookup(ea_list *o, int overlay) h = ea_hash(o); for(r=rta_hash_table[h & rta_cache_mask]; r; r=r->next_hash) - if (r->hash_key == h && ea_same(r, o)) - return ea_clone(r); + if (r->hash_key == h && ea_same(r->l, o)) + return ea_clone(r->l); uint elen = ea_list_size(o); - r = mb_alloc(rta_pool, elen); - ea_list_copy(r, o, elen); - ea_list_ref(r); + r = mb_alloc(rta_pool, elen + sizeof(struct ea_storage)); + ea_list_copy(r->l, o, elen); + ea_list_ref(r->l); - r->flags |= EALF_CACHED; + r->l->flags |= EALF_CACHED; r->hash_key = h; r->uc = 1; @@ -1389,19 +1390,19 @@ ea_lookup(ea_list *o, int overlay) if (++rta_cache_count > rta_cache_limit) rta_rehash(); - return r; + return r->l; } void -ea__free(ea_list *a) +ea__free(struct ea_storage *a) { - ASSERT(rta_cache_count && ea_is_cached(a)); + ASSERT(rta_cache_count); rta_cache_count--; *a->pprev_hash = a->next_hash; if (a->next_hash) a->next_hash->pprev_hash = a->pprev_hash; - ea_list_unref(a); + ea_list_unref(a->l); mb_free(a); } @@ -1416,10 +1417,10 @@ ea_dump_all(void) { debug("Route attribute cache (%d entries, rehash at %d):\n", rta_cache_count, rta_cache_limit); for (uint h=0; h < rta_cache_size; h++) - for (ea_list *a = rta_hash_table[h]; a; a = a->next_hash) + for (struct ea_storage *a = rta_hash_table[h]; a; a = a->next_hash) { debug("%p ", a); - ea_dump(a); + ea_dump(a->l); debug("\n"); } debug("\n"); -- cgit v1.2.3 From d5e3272f3d9b1bad7ceb6d0d5897a7269e28a537 Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Wed, 29 Jun 2022 13:22:40 +0200 Subject: Hash: iterable now per partes by an iterator It's now possible to pause iteration through hash. This requires struct hash_iterator to be allocated somewhere handy. The iteration itself is surrounded by HASH_WALK_ITER and HASH_WALK_ITER_END. Call HASH_WALK_ITER_PUT to ask for pausing; it may still do some more iterations until it comes to a suitable pausing point. The iterator must be initalized to an empty structure. No cleanup is needed if iteration is abandoned inbetween. --- lib/hash.h | 40 ++++++++++++++++++++++++++++++---------- lib/hash_test.c | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/hash.h b/lib/hash.h index 8febb33f..ebb2857a 100644 --- a/lib/hash.h +++ b/lib/hash.h @@ -10,7 +10,7 @@ #ifndef _BIRD_HASH_H_ #define _BIRD_HASH_H_ -#define HASH(type) struct { type **data; uint count, order; } +#define HASH(type) struct { type **data; uint count; u16 iterators; u8 order; u8 down_requested:1; } #define HASH_TYPE(v) typeof(** (v).data) #define HASH_SIZE(v) (1U << (v).order) @@ -125,20 +125,26 @@ #define HASH_MAY_STEP_DOWN_(v,pool,rehash_fn,args) \ ({ \ - if (((v).count < (HASH_SIZE(v) REHASH_LO_MARK(args))) && \ - ((v).order > (REHASH_LO_BOUND(args)))) \ + if ((v).iterators) \ + (v).down_requested = 1; \ + else if (((v).count < (HASH_SIZE(v) REHASH_LO_MARK(args))) && \ + ((v).order > (REHASH_LO_BOUND(args)))) \ rehash_fn(&(v), pool, -(REHASH_LO_STEP(args))); \ }) #define HASH_MAY_RESIZE_DOWN_(v,pool,rehash_fn,args) \ ({ \ - uint _o = (v).order; \ - while (((v).count < ((1U << _o) REHASH_LO_MARK(args))) && \ - (_o > (REHASH_LO_BOUND(args)))) \ - _o -= (REHASH_LO_STEP(args)); \ - if (_o < (v).order) \ - rehash_fn(&(v), pool, _o - (v).order); \ - }) + if ((v).iterators) \ + (v).down_requested = 1; \ + else { \ + uint _o = (v).order; \ + while (((v).count < ((1U << _o) REHASH_LO_MARK(args))) && \ + (_o > (REHASH_LO_BOUND(args)))) \ + _o -= (REHASH_LO_STEP(args)); \ + if (_o < (v).order) \ + rehash_fn(&(v), pool, _o - (v).order); \ + } \ + }) #define HASH_INSERT2(v,id,pool,node) \ @@ -195,6 +201,20 @@ #define HASH_WALK_FILTER_END } while (0) +#define HASH_WALK_ITER(v, id, n, iter) \ + do { \ + uint _hash_walk_iter_put = 0; \ + uint _shift = 32 - (v).order; \ + for ( ; !_hash_walk_iter_put; (iter) += (1U << _shift)) { \ + _hash_walk_iter_put = ((iter) + (1U << _shift) == 0); \ + for (HASH_TYPE(v) *n = (v).data[(iter) >> _shift]; n; n = id##_NEXT((n)))\ + if (HASH_FN(v, id, id##_KEY(n)) >= ((iter) >> _shift)) \ + +#define HASH_WALK_ITER_PUT (_hash_walk_iter_put = 1) + +#define HASH_WALK_ITER_END } } while (0) + + static inline void mem_hash_init(u64 *h) { diff --git a/lib/hash_test.c b/lib/hash_test.c index 4bce7017..ecfcdd66 100644 --- a/lib/hash_test.c +++ b/lib/hash_test.c @@ -285,6 +285,46 @@ t_walk_filter(void) return 1; } +static int +t_walk_iter(void) +{ + init_hash(); + fill_hash(); + + u32 hit = 0; + + u32 prev_hash = ~0; + for (uint cnt = 0; cnt < MAX_NUM; ) + { + u32 last_hash = ~0; +// printf("PUT!\n"); + HASH_WALK_ITER(hash, TEST, n, hit) + { + cnt++; + u32 cur_hash = HASH_FN(hash, TEST, n->key); + /* + printf("C%08x L%08x P%08x K%08x H%08x N%p S%d I%ld\n", + cur_hash, last_hash, prev_hash, n->key, hit, n, _shift, n - &nodes[0]); + */ + + if (last_hash == ~0U) + { + if (prev_hash != ~0U) + bt_assert(prev_hash < cur_hash); + last_hash = prev_hash = cur_hash; + } + else + bt_assert(last_hash == cur_hash); + + if (cnt < MAX_NUM) + HASH_WALK_ITER_PUT; + } + HASH_WALK_ITER_END; + } + + return 1; +} + int main(int argc, char *argv[]) { @@ -299,6 +339,7 @@ main(int argc, char *argv[]) bt_test_suite(t_walk_delsafe_remove, "HASH_WALK_DELSAFE and HASH_REMOVE"); bt_test_suite(t_walk_delsafe_remove2, "HASH_WALK_DELSAFE and HASH_REMOVE2. HASH_REMOVE2 is HASH_REMOVE and smart auto-resize function"); bt_test_suite(t_walk_filter, "HASH_WALK_FILTER"); + bt_test_suite(t_walk_iter, "HASH_WALK_ITER"); return bt_exit_value(); } -- cgit v1.2.3 From 6b0368cc2c317d1acc0881a96b32ded291d82741 Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Mon, 20 Jun 2022 19:10:49 +0200 Subject: Export tables merged with BGP prefix hash Until now, if export table was enabled, Nest was storing exactly the route before rt_notify() was called on it. This was quite sloppy and spooky and it also wasn't reflecting the changes BGP does before sending. And as BGP is storing the routes to be sent anyway, we are simply keeping the already-sent routes in there to better rule out unneeded reexports. Some of the route attributes (IGP metric, preference) make no sense in BGP, therefore these will be probably replaced by something sensible. Also the nexthop shown in the short output is the BGP nexthop. --- lib/route.h | 2 + nest/config.Y | 2 +- nest/proto.c | 16 ---- nest/protocol.h | 3 +- nest/rt-attr.c | 27 +++++- nest/rt-table.c | 168 ++++----------------------------- nest/rt.h | 20 +++- proto/bgp/attrs.c | 266 +++++++++++++++++++++++++++++++++++++++++++++++----- proto/bgp/bgp.c | 7 +- proto/bgp/bgp.h | 19 ++-- proto/bgp/packets.c | 32 ++++--- 11 files changed, 342 insertions(+), 220 deletions(-) (limited to 'lib') diff --git a/lib/route.h b/lib/route.h index 30be7c4e..68596316 100644 --- a/lib/route.h +++ b/lib/route.h @@ -35,6 +35,7 @@ typedef struct rte { #define REF_STALE 4 /* Route is stale in a refresh cycle */ #define REF_DISCARD 8 /* Route is scheduled for discard */ #define REF_MODIFY 16 /* Route is scheduled for modify */ +#define REF_PENDING 32 /* Route has not propagated completely yet */ /* Route is valid for propagation (may depend on other flags in the future), accepts NULL */ static inline int rte_is_valid(rte *r) { return r && !(r->flags & REF_FILTERED); } @@ -53,6 +54,7 @@ struct rte_src { struct rte_src *rt_find_source(struct proto *p, u32 id); struct rte_src *rt_get_source(struct proto *p, u32 id); +struct rte_src *rt_find_source_global(u32 id); static inline void rt_lock_source(struct rte_src *src) { src->uc++; } static inline void rt_unlock_source(struct rte_src *src) { src->uc--; } void rt_prune_sources(void); diff --git a/nest/config.Y b/nest/config.Y index 7c163f74..edddfc2b 100644 --- a/nest/config.Y +++ b/nest/config.Y @@ -694,7 +694,7 @@ r_args: } | r_args EXPORT TABLE channel_arg { if (!$4->out_table) cf_error("No export table in channel %s.%s", $4->proto->name, $4->name); - rt_show_add_exporter($$, &$4->out_table->exporter, "export"); + rt_show_add_exporter($$, $4->out_table, "export"); $$->tables_defined_by = RSD_TDB_DIRECT; } | r_args FILTER filter { diff --git a/nest/proto.c b/nest/proto.c index e7be8001..061205c1 100644 --- a/nest/proto.c +++ b/nest/proto.c @@ -559,10 +559,6 @@ channel_export_stopped(struct rt_export_request *req) return; } - /* Free the routes from out_table */ - if (c->out_table) - rt_prune_sync(c->out_table, 1); - mb_free(c->out_req.name); c->out_req.name = NULL; @@ -647,18 +643,6 @@ channel_setup_in_table(struct channel *c) c->in_keep |= RIK_PREFILTER; } -/* Called by protocol to activate out_table */ -void -channel_setup_out_table(struct channel *c) -{ - struct rtable_config *cf = mb_allocz(c->proto->pool, sizeof(struct rtable_config)); - cf->name = "export"; - cf->addr_type = c->net_type; - cf->internal = 1; - - c->out_table = rt_setup(c->proto->pool, cf); -} - static void channel_do_start(struct channel *c) diff --git a/nest/protocol.h b/nest/protocol.h index a1701a7e..3ccd364a 100644 --- a/nest/protocol.h +++ b/nest/protocol.h @@ -537,7 +537,7 @@ struct channel { u8 refeed_pending; /* Refeeding and another refeed is scheduled */ u8 rpki_reload; /* RPKI changes trigger channel reload */ - struct rtable *out_table; /* Internal table for exported routes */ + struct rt_exporter *out_table; /* Internal table for exported routes */ list roa_subscriptions; /* List of active ROA table subscriptions based on filters roa_check() */ }; @@ -610,7 +610,6 @@ int proto_configure_channel(struct proto *p, struct channel **c, struct channel_ void channel_set_state(struct channel *c, uint state); void channel_setup_in_table(struct channel *c); -void channel_setup_out_table(struct channel *c); void channel_schedule_reload(struct channel *c); static inline void channel_init(struct channel *c) { channel_set_state(c, CS_START); } diff --git a/nest/rt-attr.c b/nest/rt-attr.c index a66d4c6e..b31bc5cc 100644 --- a/nest/rt-attr.c +++ b/nest/rt-attr.c @@ -197,11 +197,14 @@ static struct idm src_ids; #define RSH_INIT_ORDER 6 static HASH(struct rte_src) src_hash; +static struct rte_src **rte_src_global; +static uint rte_src_global_max = SRC_ID_INIT_SIZE; static void rte_src_init(void) { rte_src_slab = sl_new(rta_pool, sizeof(struct rte_src)); + rte_src_global = mb_allocz(rta_pool, sizeof(struct rte_src *) * rte_src_global_max); idm_init(&src_ids, rta_pool, SRC_ID_INIT_SIZE); @@ -232,10 +235,27 @@ rt_get_source(struct proto *p, u32 id) src->uc = 0; HASH_INSERT2(src_hash, RSH, rta_pool, src); + if (src->global_id >= rte_src_global_max) + { + rte_src_global = mb_realloc(rte_src_global, sizeof(struct rte_src *) * (rte_src_global_max *= 2)); + memset(&rte_src_global[rte_src_global_max / 2], 0, + sizeof(struct rte_src *) * (rte_src_global_max / 2)); + } + + rte_src_global[src->global_id] = src; return src; } +struct rte_src * +rt_find_source_global(u32 id) +{ + if (id >= rte_src_global_max) + return NULL; + else + return rte_src_global[id]; +} + void rt_prune_sources(void) { @@ -1081,8 +1101,11 @@ ea_show_nexthop_list(struct cli *c, struct nexthop_adata *nhad) bsprintf(weight, " weight %d", nh->weight + 1); if (ipa_nonzero(nh->gw)) - cli_printf(c, -1007, "\tvia %I on %s%s%s%s", - nh->gw, nh->iface->name, mpls, onlink, weight); + if (nh->iface) + cli_printf(c, -1007, "\tvia %I on %s%s%s%s", + nh->gw, nh->iface->name, mpls, onlink, weight); + else + cli_printf(c, -1007, "\tvia %I", nh->gw); else cli_printf(c, -1007, "\tdev %s%s%s", nh->iface->name, mpls, onlink, weight); diff --git a/nest/rt-table.c b/nest/rt-table.c index 5adf6346..29690414 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -820,17 +820,6 @@ do_rt_notify(struct channel *c, const net_addr *net, rte *new, const rte *old) if (!new && old) CHANNEL_LIMIT_POP(c, OUT); - /* Apply export table */ - struct rte_storage *old_exported = NULL; - if (c->out_table) - { - if (!rte_update_out(c, net, new, old, &old_exported)) - { - channel_rte_trace_out(D_ROUTES, c, new, "idempotent"); - return; - } - } - if (new) stats->updates_accepted++; else @@ -852,10 +841,7 @@ do_rt_notify(struct channel *c, const net_addr *net, rte *new, const rte *old) channel_rte_trace_out(D_ROUTES, c, old, "removed"); } - p->rt_notify(p, c, net, new, old_exported ? &old_exported->rte : old); - - if (c->out_table && old_exported) - rte_free(old_exported); + p->rt_notify(p, c, net, new, old); } static void @@ -1747,7 +1733,7 @@ rt_export_stopped(void *data) hook->stopped(hook->req); /* Reporting the hook as finished. */ - tab->done(hook); + CALL(tab->done, hook); /* Freeing the hook together with its coroutine. */ rfree(hook->pool); @@ -1763,7 +1749,7 @@ rt_set_import_state(struct rt_import_hook *hook, u8 state) hook->req->log_state_change(hook->req, state); } -static inline void +void rt_set_export_state(struct rt_export_hook *hook, u8 state) { hook->last_state_change = current_time(); @@ -1874,16 +1860,22 @@ rt_table_export_stop(struct rt_export_hook *hook) if (hook->export_state != TES_FEEDING) return; - if (hook->walk_lock) + switch (hook->req->addr_mode) { - rt_unlock_trie(tab, hook->walk_lock); - hook->walk_lock = NULL; - - mb_free(hook->walk_state); - hook->walk_state = NULL; + case TE_ADDR_IN: + if (hook->walk_lock) + { + rt_unlock_trie(tab, hook->walk_lock); + hook->walk_lock = NULL; + mb_free(hook->walk_state); + hook->walk_state = NULL; + break; + } + /* fall through */ + case TE_ADDR_NONE: + fit_get(&tab->fib, &hook->feed_fit); + break; } - else - fit_get(&tab->fib, &hook->feed_fit); } void @@ -1896,7 +1888,7 @@ rt_stop_export(struct rt_export_request *req, void (*stopped)(struct rt_export_r ev_postpone(hook->event); /* Stop feeding from the exporter */ - hook->table->stop(hook); + CALL(hook->table->stop, hook); /* Reset the event as the stopped event */ hook->event->hook = rt_export_stopped; @@ -3397,130 +3389,6 @@ void channel_reload_export_bulk(struct rt_export_request *req, const net_addr *n } } -void -rt_prune_sync(rtable *t, int all) -{ - struct fib_iterator fit; - - FIB_ITERATE_INIT(&fit, &t->fib); - -again: - FIB_ITERATE_START(&t->fib, &fit, net, n) - { - struct rte_storage *e, **ee = &n->routes; - - while (e = *ee) - { - if (all || (e->rte.flags & (REF_STALE | REF_DISCARD))) - { - *ee = e->next; - rte_free(e); - t->rt_count--; - } - else - ee = &e->next; - } - - if (all || !n->routes) - { - FIB_ITERATE_PUT(&fit); - fib_delete(&t->fib, n); - goto again; - } - } - FIB_ITERATE_END; -} - - -/* - * Export table - */ - -int -rte_update_out(struct channel *c, const net_addr *n, rte *new, const rte *old0, struct rte_storage **old_exported) -{ - struct rtable *tab = c->out_table; - struct rte_src *src; - net *net; - - if (new) - { - net = net_get(tab, n); - src = new->src; - } - else - { - net = net_find(tab, n); - src = old0->src; - - if (!net) - goto drop; - } - - /* Find the old rte */ - struct rte_storage **pos = (c->ra_mode == RA_ANY) ? rte_find(net, src) : &net->routes; - struct rte_storage *old = NULL; - - if (old = *pos) - { - if (new && rte_same(&(*pos)->rte, new)) - goto drop; - - /* Remove the old rte */ - *pos = old->next; - *old_exported = old; - tab->rt_count--; - } - - if (!new) - { - if (!old) - goto drop; - - if (!net->routes) - fib_delete(&tab->fib, net); - - return 1; - } - - /* Insert the new rte */ - struct rte_storage *e = rte_store(new, net, tab); - e->rte.lastmod = current_time(); - e->next = *pos; - *pos = e; - tab->rt_count++; - return 1; - -drop: - return 0; -} - -void -rt_refeed_channel(struct channel *c) -{ - if (!c->out_table) - { - channel_request_feeding(c); - return; - } - - ASSERT_DIE(c->ra_mode != RA_ANY); - - c->proto->feed_begin(c, 0); - - FIB_WALK(&c->out_table->fib, net, n) - { - if (!n->routes) - continue; - - rte e = n->routes->rte; - c->proto->rt_notify(c->proto, c, n->n.addr, &e, NULL); - } - FIB_WALK_END; - - c->proto->feed_end(c); -} - /* * Hostcache diff --git a/nest/rt.h b/nest/rt.h index 3ed56fcc..5a8c5a19 100644 --- a/nest/rt.h +++ b/nest/rt.h @@ -247,14 +247,20 @@ struct rt_export_hook { u32 withdraws_received; /* Number of route withdraws received */ } stats; - struct fib_iterator feed_fit; /* Routing table iterator used during feeding */ - struct f_trie_walk_state *walk_state; /* Iterator over networks in trie */ - struct f_trie *walk_lock; /* Locked trie for walking */ + union { + struct fib_iterator feed_fit; /* Routing table iterator used during feeding */ + struct { + struct f_trie_walk_state *walk_state; /* Iterator over networks in trie */ + struct f_trie *walk_lock; /* Locked trie for walking */ + }; + u32 hash_iter; /* Iterator over hash */ + }; btime last_state_change; /* Time of last state transition */ u8 refeed_pending; /* Refeeding and another refeed is scheduled */ u8 export_state; /* Route export state (TES_*, see below) */ + u8 feed_type; /* Which feeding method is used (TFT_*, see below) */ struct event *event; /* Event running all the export operations */ @@ -282,6 +288,10 @@ struct rt_export_hook { #define TE_ADDR_IN 3 /* Interval query - show route in */ +#define TFT_FIB 1 +#define TFT_TRIE 2 +#define TFT_HASH 3 + void rt_request_import(rtable *tab, struct rt_import_request *req); void rt_request_export(struct rt_exporter *tab, struct rt_export_request *req); @@ -296,6 +306,8 @@ const char *rt_export_state_name(u8 state); static inline u8 rt_import_get_state(struct rt_import_hook *ih) { return ih ? ih->import_state : TIS_DOWN; } static inline u8 rt_export_get_state(struct rt_export_hook *eh) { return eh ? eh->export_state : TES_DOWN; } +void rt_set_export_state(struct rt_export_hook *hook, u8 state); + void rte_import(struct rt_import_request *req, const net_addr *net, rte *new, struct rte_src *src); /* Types of route announcement, also used as flags */ @@ -386,8 +398,6 @@ int rt_reload_channel(struct channel *c); void rt_reload_channel_abort(struct channel *c); void rt_refeed_channel(struct channel *c); void rt_prune_sync(rtable *t, int all); -int rte_update_in(struct channel *c, const net_addr *n, rte *new, struct rte_src *src); -int rte_update_out(struct channel *c, const net_addr *n, rte *new, const rte *old, struct rte_storage **old_exported); struct rtable_config *rt_new_table(struct symbol *s, uint addr_type); static inline int rt_is_ip(rtable *tab) diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c index 22a53dce..084c9b63 100644 --- a/proto/bgp/attrs.c +++ b/proto/bgp/attrs.c @@ -1550,7 +1550,6 @@ bgp_free_bucket_table(struct bgp_channel *c) static struct bgp_bucket * bgp_get_bucket(struct bgp_channel *c, ea_list *new) { - /* Hash and lookup */ u32 hash = ea_hash(new); struct bgp_bucket *b = HASH_FIND(c->bucket_hash, RBH, new, hash); @@ -1571,8 +1570,7 @@ bgp_get_bucket(struct bgp_channel *c, ea_list *new) /* Copy the ea_list */ ea_list_copy(b->eattrs, new, ea_size); - /* Insert the bucket to send queue and bucket hash */ - add_tail(&c->bucket_queue, &b->send_node); + /* Insert the bucket to bucket hash */ HASH_INSERT2(c->bucket_hash, RBH, c->pool, b); return b; @@ -1590,14 +1588,30 @@ bgp_get_withdraw_bucket(struct bgp_channel *c) return c->withdraw_bucket; } -void -bgp_free_bucket(struct bgp_channel *c, struct bgp_bucket *b) +static void +bgp_free_bucket_xx(struct bgp_channel *c, struct bgp_bucket *b) { - rem_node(&b->send_node); HASH_REMOVE2(c->bucket_hash, RBH, c->pool, b); mb_free(b); } +int +bgp_done_bucket(struct bgp_channel *c, struct bgp_bucket *b) +{ + /* Won't free the withdraw bucket */ + if (b == c->withdraw_bucket) + return 0; + + if (EMPTY_LIST(b->prefixes)) + rem_node(&b->send_node); + + if (b->px_uc || !EMPTY_LIST(b->prefixes)) + return 0; + + bgp_free_bucket_xx(c, b); + return 1; +} + void bgp_defer_bucket(struct bgp_channel *c, struct bgp_bucket *b) { @@ -1617,8 +1631,8 @@ bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b) struct bgp_prefix *px = HEAD(b->prefixes); log(L_ERR "%s: - withdrawing %N", p->p.name, &px->net); - rem_node(&px->buck_node); - add_tail(&wb->prefixes, &px->buck_node); + rem_node(&px->buck_node_xx); + add_tail(&wb->prefixes, &px->buck_node_xx); } } @@ -1629,7 +1643,7 @@ bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b) #define PXH_KEY(px) px->net, px->path_id, px->hash #define PXH_NEXT(px) px->next -#define PXH_EQ(n1,i1,h1,n2,i2,h2) h1 == h2 && i1 == i2 && net_equal(n1, n2) +#define PXH_EQ(n1,i1,h1,n2,i2,h2) h1 == h2 && (c->add_path_tx ? (i1 == i2) : 1) && net_equal(n1, n2) #define PXH_FN(n,i,h) h #define PXH_REHASH bgp_pxh_rehash @@ -1659,15 +1673,13 @@ bgp_free_prefix_table(struct bgp_channel *c) static struct bgp_prefix * bgp_get_prefix(struct bgp_channel *c, const net_addr *net, u32 path_id) { + u32 path_id_hash = c->add_path_tx ? path_id : 0; /* We must use a different hash function than the rtable */ - u32 hash = u32_hash(net_hash(net) ^ u32_hash(path_id)); - struct bgp_prefix *px = HASH_FIND(c->prefix_hash, PXH, net, path_id, hash); + u32 hash = u32_hash(net_hash(net) ^ u32_hash(path_id_hash)); + struct bgp_prefix *px = HASH_FIND(c->prefix_hash, PXH, net, path_id_hash, hash); if (px) - { - rem_node(&px->buck_node); return px; - } if (c->prefix_slab) px = sl_alloc(c->prefix_slab); @@ -1684,10 +1696,64 @@ bgp_get_prefix(struct bgp_channel *c, const net_addr *net, u32 path_id) return px; } -void +static void bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *px); + +static inline int +bgp_update_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bucket *b) +{ +#define BPX_TRACE(what) do { \ + if (c->c.debug & D_ROUTES) log(L_TRACE "%s.%s < %s %N %uG %s", \ + c->c.proto->name, c->c.name, what, \ + px->net, px->path_id, (b == c->withdraw_bucket) ? "withdraw" : "update"); } while (0) + px->lastmod = current_time(); + + /* Already queued for the same bucket */ + if (px->cur == b) + { + BPX_TRACE("already queued"); + return 0; + } + + /* Unqueue from the old bucket */ + if (px->cur) + { + rem_node(&px->buck_node_xx); + bgp_done_bucket(c, px->cur); + } + + /* The new bucket is the same as we sent before */ + if ((px->last == b) || c->c.out_table && !px->last && (b == c->withdraw_bucket)) + { + if (px->cur) + BPX_TRACE("reverted"); + else + BPX_TRACE("already sent"); + + /* Well, we haven't sent anything yet */ + if (!px->last) + bgp_free_prefix(c, px); + + px->cur = NULL; + return 0; + } + + /* Enqueue the bucket if it has been empty */ + if ((b != c->withdraw_bucket) && EMPTY_LIST(b->prefixes)) + add_tail(&c->bucket_queue, &b->send_node); + + /* Enqueue to the new bucket and indicate the change */ + add_tail(&b->prefixes, &px->buck_node_xx); + px->cur = b; + + BPX_TRACE("queued"); + return 1; + +#undef BPX_TRACE +} + +static void bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *px) { - rem_node(&px->buck_node); HASH_REMOVE2(c->prefix_hash, PXH, c->pool, px); if (c->prefix_slab) @@ -1696,6 +1762,167 @@ bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *px) mb_free(px); } +void +bgp_done_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bucket *buck) +{ + /* Cleanup: We're called from bucket senders. */ + ASSERT_DIE(px->cur == buck); + rem_node(&px->buck_node_xx); + + /* We may want to store the updates */ + if (c->c.out_table) + { + /* Nothing to be sent right now */ + px->cur = NULL; + + /* Unref the previous sent version */ + if (px->last) + px->last->px_uc--; + + /* Ref the current sent version */ + if (buck != c->withdraw_bucket) + { + px->last = buck; + px->last->px_uc++; + return; + } + + /* Prefixes belonging to the withdraw bucket are freed always */ + } + + bgp_free_prefix(c, px); +} + + +/* + * Prefix hash table exporter + */ + +static void +bgp_out_table_feed(void *data) +{ + struct rt_export_hook *hook = data; + struct bgp_channel *c = SKIP_BACK(struct bgp_channel, prefix_exporter, hook->table); + + int max = 512; + + const net_addr *neq = (hook->req->addr_mode == TE_ADDR_EQUAL) ? hook->req->addr : NULL; + const net_addr *cand = NULL; + + do { + HASH_WALK_ITER(c->prefix_hash, PXH, n, hook->hash_iter) + { + switch (hook->req->addr_mode) + { + case TE_ADDR_IN: + if (!net_in_netX(n->net, hook->req->addr)) + continue; + /* fall through */ + case TE_ADDR_NONE: + /* Splitting only for multi-net exports */ + if (--max <= 0) + HASH_WALK_ITER_PUT; + break; + + case TE_ADDR_FOR: + if (!neq) + { + if (net_in_netX(hook->req->addr, n->net) && (!cand || (n->net->length > cand->length))) + cand = n->net; + continue; + } + /* fall through */ + case TE_ADDR_EQUAL: + if (!net_equal(n->net, neq)) + continue; + break; + } + + struct bgp_bucket *buck = n->cur ?: n->last; + ea_list *ea = NULL; + if (buck == c->withdraw_bucket) + ea_set_dest(&ea, 0, RTD_UNREACHABLE); + else + { + ea = buck->eattrs; + eattr *eanh = bgp_find_attr(ea, BA_NEXT_HOP); + ASSERT_DIE(eanh); + const ip_addr *nh = (const void *) eanh->u.ptr->data; + + struct nexthop_adata nhad = { + .ad = { .length = sizeof (struct nexthop_adata) - sizeof (struct adata), }, + .nh = { .gw = nh[0], }, + }; + + ea_set_attr(&ea, EA_LITERAL_DIRECT_ADATA(&ea_gen_nexthop, 0, tmp_copy_adata(&nhad.ad))); + } + + struct rte_storage es = { + .rte = { + .attrs = ea, + .net = n->net, + .src = rt_find_source_global(n->path_id), + .sender = NULL, + .lastmod = n->lastmod, + .flags = n->cur ? REF_PENDING : 0, + }, + }; + + struct rt_pending_export rpe = { + .new = &es, .new_best = &es, + }; + + if (hook->req->export_bulk) + { + rte *feed = &es.rte; + hook->req->export_bulk(hook->req, n->net, &rpe, &feed, 1); + } + else if (hook->req->export_one) + hook->req->export_one(hook->req, n->net, &rpe); + else + bug("No export method in export request"); + } + HASH_WALK_ITER_END; + + neq = cand; + cand = NULL; + } while (neq); + + if (hook->hash_iter) + ev_schedule_work(hook->event); + else + rt_set_export_state(hook, TES_READY); +} + +static struct rt_export_hook * +bgp_out_table_export_start(struct rt_exporter *re, struct rt_export_request *req UNUSED) +{ + struct bgp_channel *c = SKIP_BACK(struct bgp_channel, prefix_exporter, re); + pool *p = rp_new(c->c.proto->pool, "Export hook"); + struct rt_export_hook *hook = mb_allocz(p, sizeof(struct rt_export_hook)); + hook->pool = p; + hook->lp = lp_new_default(p); + hook->event = ev_new_init(p, bgp_out_table_feed, hook); + hook->feed_type = TFT_HASH; + + return hook; +} + +void +bgp_setup_out_table(struct bgp_channel *c) +{ + ASSERT_DIE(c->c.out_table == NULL); + + c->prefix_exporter = (struct rt_exporter) { + .addr_type = c->c.table->addr_type, + .start = bgp_out_table_export_start, + }; + + init_list(&c->prefix_exporter.hooks); + + c->c.out_table = &c->prefix_exporter; +} + /* * BGP protocol glue @@ -1894,7 +2121,6 @@ bgp_rt_notify(struct proto *P, struct channel *C, const net_addr *n, rte *new, c struct bgp_proto *p = (void *) P; struct bgp_channel *c = (void *) C; struct bgp_bucket *buck; - struct bgp_prefix *px; u32 path; if (new) @@ -1915,10 +2141,8 @@ bgp_rt_notify(struct proto *P, struct channel *C, const net_addr *n, rte *new, c path = old->src->global_id; } - px = bgp_get_prefix(c, n, c->add_path_tx ? path : 0); - add_tail(&buck->prefixes, &px->buck_node); - - bgp_schedule_packet(p->conn, c, PKT_UPDATE); + if (bgp_update_prefix(c, bgp_get_prefix(c, n, path), buck)) + bgp_schedule_packet(p->conn, c, PKT_UPDATE); } diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index 8df99420..6ffe8824 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -1740,14 +1740,15 @@ bgp_channel_start(struct channel *C) } c->pool = p->p.pool; // XXXX - bgp_init_bucket_table(c); - bgp_init_prefix_table(c); if (c->cf->import_table) channel_setup_in_table(C); if (c->cf->export_table) - channel_setup_out_table(C); + bgp_setup_out_table(c); + + bgp_init_bucket_table(c); + bgp_init_prefix_table(c); c->stale_timer = tm_new_init(c->pool, bgp_long_lived_stale_timeout, c, 0, 0); diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h index cca7873a..003893e0 100644 --- a/proto/bgp/bgp.h +++ b/proto/bgp/bgp.h @@ -156,7 +156,7 @@ struct bgp_channel_config { u8 aigp_originate; /* AIGP is originated automatically */ u32 cost; /* IGP cost for direct next hops */ u8 import_table; /* Use c.in_table as Adj-RIB-In */ - u8 export_table; /* Use c.out_table as Adj-RIB-Out */ + u8 export_table; /* Keep Adj-RIB-Out and export it */ struct rtable_config *igp_table_ip4; /* Table for recursive IPv4 next hop lookups */ struct rtable_config *igp_table_ip6; /* Table for recursive IPv6 next hop lookups */ @@ -357,6 +357,8 @@ struct bgp_channel { HASH(struct bgp_prefix) prefix_hash; /* Prefixes to be sent */ slab *prefix_slab; /* Slab holding prefix nodes */ + struct rt_exporter prefix_exporter; /* Table-like exporter for prefix_hash */ + ip_addr next_hop_addr; /* Local address for NEXT_HOP attribute */ ip_addr link_addr; /* Link-local version of next_hop_addr */ @@ -378,8 +380,11 @@ struct bgp_channel { }; struct bgp_prefix { - node buck_node; /* Node in per-bucket list */ + node buck_node_xx; /* Node in per-bucket list */ struct bgp_prefix *next; /* Node in prefix hash table */ + struct bgp_bucket *last; /* Last bucket sent with this prefix */ + struct bgp_bucket *cur; /* Current bucket (cur == last) if no update is required */ + btime lastmod; /* Last modification of this prefix */ u32 hash; u32 path_id; net_addr net[0]; @@ -388,8 +393,9 @@ struct bgp_prefix { struct bgp_bucket { node send_node; /* Node in send queue */ struct bgp_bucket *next; /* Node in bucket hash table */ - list prefixes; /* Prefixes in this bucket (struct bgp_prefix) */ + list prefixes; /* Prefixes to send in this bucket (struct bgp_prefix) */ u32 hash; /* Hash over extended attributes */ + u32 px_uc; /* How many prefixes are linking this bucket */ ea_list eattrs[0]; /* Per-bucket extended attributes */ }; @@ -556,15 +562,16 @@ int bgp_encode_attrs(struct bgp_write_state *s, ea_list *attrs, byte *buf, byte ea_list * bgp_decode_attrs(struct bgp_parse_state *s, byte *data, uint len); void bgp_finish_attrs(struct bgp_parse_state *s, ea_list **to); +void bgp_setup_out_table(struct bgp_channel *c); + void bgp_init_bucket_table(struct bgp_channel *c); void bgp_free_bucket_table(struct bgp_channel *c); -void bgp_free_bucket(struct bgp_channel *c, struct bgp_bucket *b); -void bgp_defer_bucket(struct bgp_channel *c, struct bgp_bucket *b); void bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b); +int bgp_done_bucket(struct bgp_channel *c, struct bgp_bucket *b); void bgp_init_prefix_table(struct bgp_channel *c); void bgp_free_prefix_table(struct bgp_channel *c); -void bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *bp); +void bgp_done_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bucket *buck); int bgp_rte_better(struct rte *, struct rte *); int bgp_rte_mergable(rte *pri, rte *sec); diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c index c2e98340..4d4ae3eb 100644 --- a/proto/bgp/packets.c +++ b/proto/bgp/packets.c @@ -1488,7 +1488,7 @@ bgp_encode_nlri_ip4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *bu memcpy(pos, &a, b); ADVANCE(pos, size, b); - bgp_free_prefix(s->channel, px); + bgp_done_prefix(s->channel, px, buck); } return pos - buf; @@ -1573,7 +1573,7 @@ bgp_encode_nlri_ip6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *bu memcpy(pos, &a, b); ADVANCE(pos, size, b); - bgp_free_prefix(s->channel, px); + bgp_done_prefix(s->channel, px, buck); } return pos - buf; @@ -1661,7 +1661,7 @@ bgp_encode_nlri_vpn4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *b memcpy(pos, &a, b); ADVANCE(pos, size, b); - bgp_free_prefix(s->channel, px); + bgp_done_prefix(s->channel, px, buck); } return pos - buf; @@ -1758,7 +1758,7 @@ bgp_encode_nlri_vpn6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *b memcpy(pos, &a, b); ADVANCE(pos, size, b); - bgp_free_prefix(s->channel, px); + bgp_done_prefix(s->channel, px, buck); } return pos - buf; @@ -1845,7 +1845,7 @@ bgp_encode_nlri_flow4(struct bgp_write_state *s, struct bgp_bucket *buck, byte * memcpy(pos, net->data, flen); ADVANCE(pos, size, flen); - bgp_free_prefix(s->channel, px); + bgp_done_prefix(s->channel, px, buck); } return pos - buf; @@ -1933,7 +1933,7 @@ bgp_encode_nlri_flow6(struct bgp_write_state *s, struct bgp_bucket *buck, byte * memcpy(pos, net->data, flen); ADVANCE(pos, size, flen); - bgp_free_prefix(s->channel, px); + bgp_done_prefix(s->channel, px, buck); } return pos - buf; @@ -2167,6 +2167,8 @@ bgp_create_ip_reach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *bu * var IPv4 Network Layer Reachability Information */ + ASSERT_DIE(s->channel->withdraw_bucket != buck); + int lr, la; la = bgp_encode_attrs(s, buck->eattrs, buf+4, buf + MAX_ATTRS_LENGTH); @@ -2188,6 +2190,8 @@ bgp_create_ip_reach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *bu static byte * bgp_create_mp_reach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end) { + ASSERT_DIE(s->channel->withdraw_bucket != buck); + /* * 2 B IPv4 Withdrawn Routes Length (zero) * --- IPv4 Withdrawn Routes NLRI (unused) @@ -2341,9 +2345,8 @@ again: ; buck = HEAD(c->bucket_queue); /* Cleanup empty buckets */ - if (EMPTY_LIST(buck->prefixes)) + if (bgp_done_bucket(c, buck)) { - bgp_free_bucket(c, buck); lp_restore(tmp_linpool, &tmpp); goto again; } @@ -2352,10 +2355,7 @@ again: ; bgp_create_ip_reach(&s, buck, buf, end): bgp_create_mp_reach(&s, buck, buf, end); - if (EMPTY_LIST(buck->prefixes)) - bgp_free_bucket(c, buck); - else - bgp_defer_bucket(c, buck); + bgp_done_bucket(c, buck); if (!res) { @@ -2724,7 +2724,7 @@ bgp_rx_route_refresh(struct bgp_conn *conn, byte *pkt, uint len) { case BGP_RR_REQUEST: BGP_TRACE(D_PACKETS, "Got ROUTE-REFRESH"); - rt_refeed_channel(&c->c); + channel_request_feeding(&c->c); break; case BGP_RR_BEGIN: @@ -2903,7 +2903,11 @@ bgp_schedule_packet(struct bgp_conn *conn, struct bgp_channel *c, int type) { ASSERT(conn->sk); - DBG("BGP: Scheduling packet type %d\n", type); + struct bgp_proto *p = conn->bgp; + if (c) + BGP_TRACE(D_PACKETS, "Scheduling packet type %d for channel %s", type, c->c.name); + else + BGP_TRACE(D_PACKETS, "Scheduling packet type %d", type); if (c) { -- cgit v1.2.3