diff options
author | Maria Matejka <mq@ucw.cz> | 2022-07-12 10:36:10 +0200 |
---|---|---|
committer | Maria Matejka <mq@ucw.cz> | 2022-07-12 12:22:41 +0200 |
commit | 080cbd1219ba86dd44712d0d24ceae884b34ec4b (patch) | |
tree | 86bf2e0153ab7365224d454b145e1ae5fac6c1d6 | |
parent | 4ef2262bd575b071e43c30d0199398a5f6ac27a5 (diff) |
Route refresh in tables uses a stale counter.
Until now, we were marking routes as REF_STALE and REF_DISCARD to
cleanup old routes after route refresh. This needed a synchronous route
table walk at both beginning and the end of route refresh routine,
marking the routes by the flags.
We avoid these walks by using a stale counter. Every route contains:
u8 stale_cycle;
Every import hook contains:
u8 stale_set;
u8 stale_valid;
u8 stale_pruned;
u8 stale_pruning;
In base_state, stale_set == stale_valid == stale_pruned == stale_pruning
and all routes' stale_cycle also have the same value.
The route refresh looks like follows:
+ ----------- + --------- + ----------- + ------------- + ------------ +
| | stale_set | stale_valid | stale_pruning | stale_pruned |
| Base | x | x | x | x |
| Begin | x+1 | x | x | x |
... now routes are being inserted with stale_cycle == (x+1)
| End | x+1 | x+1 | x | x |
... now table pruning routine is scheduled
| Prune begin | x+1 | x+1 | x+1 | x |
... now routes with stale_cycle not between stale_set and stale_valid
are deleted
| Prune end | x+1 | x+1 | x+1 | x+1 |
+ ----------- + --------- + ----------- + ------------- + ------------ +
The pruning routine is asynchronous and may have high latency in
high-load environments. Therefore, multiple route refresh requests may
happen before the pruning routine starts, leading to this situation:
| Prune begin | x+k | x+k | x -> x+k | x |
... or even
| Prune begin | x+k+1 | x+k | x -> x+k | x |
... if the prune event starts while another route refresh is running.
In such a case, the pruning routine still deletes routes not fitting
between stale_set and and stale_valid, effectively pruning the remnants
of all unpruned route refreshes from before:
| Prune end | x+k | x+k | x+k | x+k |
In extremely rare cases, there may happen too many route refreshes
before any route prune routine finishes. If the difference between
stale_valid and stale_pruned becomes more than 128 when requesting for
another route refresh, the routine walks the table synchronously and
resets all the stale values to a base state, while logging a warning.
-rw-r--r-- | lib/route.h | 3 | ||||
-rw-r--r-- | nest/rt-show.c | 4 | ||||
-rw-r--r-- | nest/rt-table.c | 91 | ||||
-rw-r--r-- | nest/rt.h | 8 | ||||
-rw-r--r-- | proto/bgp/bgp.c | 18 | ||||
-rw-r--r-- | proto/rpki/packets.c | 8 |
6 files changed, 90 insertions, 42 deletions
diff --git a/lib/route.h b/lib/route.h index 68596316..9f78ed00 100644 --- a/lib/route.h +++ b/lib/route.h @@ -29,11 +29,10 @@ typedef struct rte { u8 generation; /* If this route import is based on other previously exported route, this value should be 1 + MAX(generation of the parent routes). Otherwise the route is independent and this value is zero. */ + u8 stale_cycle; /* Auxiliary value for route refresh */ } rte; #define REF_FILTERED 2 /* Route is rejected by import filter */ -#define REF_STALE 4 /* Route is stale in a refresh cycle */ -#define REF_DISCARD 8 /* Route is scheduled for discard */ #define REF_MODIFY 16 /* Route is scheduled for modify */ #define REF_PENDING 32 /* Route has not propagated completely yet */ diff --git a/nest/rt-show.c b/nest/rt-show.c index c3294518..dd0fe595 100644 --- a/nest/rt-show.c +++ b/nest/rt-show.c @@ -77,7 +77,11 @@ rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, int primary e->src->proto->name, tm, from, primary ? (sync_error ? " !" : " *") : "", info); if (d->verbose) + { ea_show_list(c, a); + cli_printf(c, -1008, "\tInternal route handling values: %uL %uG %uS", + e->src->private_id, e->src->global_id, e->stale_cycle); + } else if (dest == RTD_UNICAST) ea_show_nexthop_list(c, nhad); else if (had) diff --git a/nest/rt-table.c b/nest/rt-table.c index d30573de..2ba28e33 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -1323,7 +1323,8 @@ rte_recalculate(struct rt_import_hook *c, net *net, rte *new, struct rte_src *sr { /* No changes, ignore the new route and refresh the old one */ - old->flags &= ~(REF_STALE | REF_DISCARD | REF_MODIFY); + old->flags &= ~REF_MODIFY; + old->stale_cycle = new->stale_cycle; if (!rte_is_filtered(new)) { @@ -1639,6 +1640,9 @@ rte_import(struct rt_import_request *req, const net_addr *n, rte *new, struct rt nn = net_get(hook->table, n); new->net = nn->n.addr; new->sender = hook; + + /* Set the stale cycle */ + new->stale_cycle = hook->stale_set; } else if (!(nn = net_find(hook->table, n))) { @@ -1916,15 +1920,38 @@ rt_stop_export(struct rt_export_request *req, void (*stopped)(struct rt_export_r * flag in rt_refresh_end() and then removing such routes in the prune loop. */ void -rt_refresh_begin(rtable *t, struct rt_import_request *req) +rt_refresh_begin(struct rt_import_request *req) { - FIB_WALK(&t->fib, net, n) - { - for (struct rte_storage *e = n->routes; e; e = e->next) - if (e->rte.sender == req->hook) - e->rte.flags |= REF_STALE; - } - FIB_WALK_END; + struct rt_import_hook *hook = req->hook; + ASSERT_DIE(hook); + ASSERT_DIE(hook->stale_set == hook->stale_valid); + + /* If the pruning routine is too slow */ + if ((hook->stale_pruned < hook->stale_valid) && (hook->stale_pruned + 128 < hook->stale_valid) + || (hook->stale_pruned > hook->stale_valid) && (hook->stale_pruned > hook->stale_valid + 128)) + { + log(L_WARN "Route refresh flood in table %s", hook->table->name); + FIB_WALK(&hook->table->fib, net, n) + { + for (struct rte_storage *e = n->routes; e; e = e->next) + if (e->rte.sender == req->hook) + e->rte.stale_cycle = 0; + } + FIB_WALK_END; + hook->stale_set = 1; + hook->stale_valid = 0; + hook->stale_pruned = 0; + } + /* Setting a new value of the stale modifier */ + else if (!++hook->stale_set) + { + /* Let's reserve the stale_cycle zero value for always-invalid routes */ + hook->stale_set = 1; + hook->stale_valid = 0; + } + + if (req->trace_routes & D_STATES) + log(L_TRACE "%s: route refresh begin [%u]", req->name, hook->stale_set); } /** @@ -1936,34 +1963,32 @@ rt_refresh_begin(rtable *t, struct rt_import_request *req) * hook. See rt_refresh_begin() for description of refresh cycles. */ void -rt_refresh_end(rtable *t, struct rt_import_request *req) +rt_refresh_end(struct rt_import_request *req) { - int prune = 0; + struct rt_import_hook *hook = req->hook; + ASSERT_DIE(hook); - FIB_WALK(&t->fib, net, n) - { - for (struct rte_storage *e = n->routes; e; e = e->next) - if ((e->rte.sender == req->hook) && (e->rte.flags & REF_STALE)) - { - e->rte.flags |= REF_DISCARD; - prune = 1; - } - } - FIB_WALK_END; + hook->stale_valid++; + ASSERT_DIE(hook->stale_set == hook->stale_valid); - if (prune) - rt_schedule_prune(t); + rt_schedule_prune(hook->table); + + if (req->trace_routes & D_STATES) + log(L_TRACE "%s: route refresh end [%u]", req->name, hook->stale_valid); } void rt_modify_stale(rtable *t, struct rt_import_request *req) { int prune = 0; + struct rt_import_hook *s = req->hook; FIB_WALK(&t->fib, net, n) { for (struct rte_storage *e = n->routes; e; e = e->next) - if ((e->rte.sender == req->hook) && (e->rte.flags & REF_STALE) && !(e->rte.flags & REF_FILTERED)) + if ((e->rte.sender == s) && + !(e->rte.flags & REF_FILTERED) && + (e->rte.stale_cycle + 1 == s->stale_set)) { e->rte.flags |= REF_MODIFY; prune = 1; @@ -2434,6 +2459,13 @@ rt_prune_table(rtable *tab) WALK_LIST2(ih, n, tab->imports, n) if (ih->import_state == TIS_STOP) rt_set_import_state(ih, TIS_FLUSHING); + else if ((ih->stale_valid != ih->stale_pruning) && (ih->stale_pruning == ih->stale_pruned)) + { + ih->stale_pruning = ih->stale_valid; + + if (ih->req->trace_routes & D_STATES) + log(L_TRACE "%s: table prune after refresh begin [%u]", ih->req->name, ih->stale_pruning); + } FIB_ITERATE_INIT(fit, &tab->fib); tab->prune_state = 2; @@ -2459,7 +2491,10 @@ again: for (struct rte_storage *e=n->routes; e; e=e->next) { - if ((e->rte.sender->import_state == TIS_FLUSHING) || (e->rte.flags & REF_DISCARD)) + struct rt_import_hook *s = e->rte.sender; + if ((s->import_state == TIS_FLUSHING) || + (e->rte.stale_cycle < s->stale_valid) || + (e->rte.stale_cycle > s->stale_set)) { rte_discard(n, &e->rte); limit--; @@ -2544,6 +2579,12 @@ again: mb_free(ih); rt_unlock_table(tab); } + else if (ih->stale_pruning != ih->stale_pruned) + { + ih->stale_pruned = ih->stale_pruning; + if (ih->req->trace_routes & D_STATES) + log(L_TRACE "%s: table prune after refresh end [%u]", ih->req->name, ih->stale_pruned); + } } /** @@ -201,6 +201,10 @@ struct rt_import_hook { btime last_state_change; /* Time of last state transition */ u8 import_state; /* IS_* */ + u8 stale_set; /* Set this stale_cycle to imported routes */ + u8 stale_valid; /* Routes with this stale_cycle and bigger are considered valid */ + u8 stale_pruned; /* Last prune finished when this value was set at stale_valid */ + u8 stale_pruning; /* Last prune started when this value was set at stale_valid */ void (*stopped)(struct rt_import_request *); /* Stored callback when import is stopped */ }; @@ -381,8 +385,8 @@ net *net_get(rtable *tab, const net_addr *addr); net *net_route(rtable *tab, const net_addr *n); int rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filter); rte *rt_export_merged(struct channel *c, rte ** feed, uint count, linpool *pool, int silent); -void rt_refresh_begin(rtable *t, struct rt_import_request *); -void rt_refresh_end(rtable *t, struct rt_import_request *); +void rt_refresh_begin(struct rt_import_request *); +void rt_refresh_end(struct rt_import_request *); void rt_modify_stale(rtable *t, struct rt_import_request *); void rt_schedule_prune(rtable *t); void rte_dump(struct rte_storage *); diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index 6ffe8824..d9008b9a 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -760,16 +760,16 @@ bgp_handle_graceful_restart(struct bgp_proto *p) { case BGP_GRS_NONE: c->gr_active = BGP_GRS_ACTIVE; - rt_refresh_begin(c->c.table, &c->c.in_req); + rt_refresh_begin(&c->c.in_req); break; case BGP_GRS_ACTIVE: - rt_refresh_end(c->c.table, &c->c.in_req); - rt_refresh_begin(c->c.table, &c->c.in_req); + rt_refresh_end(&c->c.in_req); + rt_refresh_begin(&c->c.in_req); break; case BGP_GRS_LLGR: - rt_refresh_begin(c->c.table, &c->c.in_req); + rt_refresh_begin(&c->c.in_req); rt_modify_stale(c->c.table, &c->c.in_req); break; } @@ -777,8 +777,8 @@ bgp_handle_graceful_restart(struct bgp_proto *p) else { /* Just flush the routes */ - rt_refresh_begin(c->c.table, &c->c.in_req); - rt_refresh_end(c->c.table, &c->c.in_req); + rt_refresh_begin(&c->c.in_req); + rt_refresh_end(&c->c.in_req); } /* Reset bucket and prefix tables */ @@ -819,7 +819,7 @@ bgp_graceful_restart_done(struct bgp_channel *c) BGP_TRACE(D_EVENTS, "Neighbor graceful restart done"); tm_stop(c->stale_timer); - rt_refresh_end(c->c.table, &c->c.in_req); + rt_refresh_end(&c->c.in_req); } /** @@ -899,7 +899,7 @@ bgp_refresh_begin(struct bgp_channel *c) { log(L_WARN "%s: BEGIN-OF-RR received before END-OF-RIB, ignoring", p->p.name); return; } c->load_state = BFS_REFRESHING; - rt_refresh_begin(c->c.table, &c->c.in_req); + rt_refresh_begin(&c->c.in_req); } /** @@ -920,7 +920,7 @@ bgp_refresh_end(struct bgp_channel *c) { log(L_WARN "%s: END-OF-RR received without prior BEGIN-OF-RR, ignoring", p->p.name); return; } c->load_state = BFS_NONE; - rt_refresh_end(c->c.table, &c->c.in_req); + rt_refresh_end(&c->c.in_req); } diff --git a/proto/rpki/packets.c b/proto/rpki/packets.c index 4a52b54b..108da61b 100644 --- a/proto/rpki/packets.c +++ b/proto/rpki/packets.c @@ -661,9 +661,9 @@ rpki_handle_cache_response_pdu(struct rpki_cache *cache, const struct pdu_cache_ * a refresh cycle. */ if (cache->p->roa4_channel) - rt_refresh_begin(cache->p->roa4_channel->table, &cache->p->roa4_channel->in_req); + rt_refresh_begin(&cache->p->roa4_channel->in_req); if (cache->p->roa6_channel) - rt_refresh_begin(cache->p->roa6_channel->table, &cache->p->roa6_channel->in_req); + rt_refresh_begin(&cache->p->roa6_channel->in_req); cache->p->refresh_channels = 1; } @@ -846,9 +846,9 @@ rpki_handle_end_of_data_pdu(struct rpki_cache *cache, const struct pdu_end_of_da { cache->p->refresh_channels = 0; if (cache->p->roa4_channel) - rt_refresh_end(cache->p->roa4_channel->table, &cache->p->roa4_channel->in_req); + rt_refresh_end(&cache->p->roa4_channel->in_req); if (cache->p->roa6_channel) - rt_refresh_end(cache->p->roa6_channel->table, &cache->p->roa6_channel->in_req); + rt_refresh_end(&cache->p->roa6_channel->in_req); } cache->last_update = current_time(); |