summaryrefslogtreecommitdiff
path: root/proto/bgp/bgp.c
diff options
context:
space:
mode:
authorMaria Matejka <mq@ucw.cz>2022-07-12 10:36:10 +0200
committerMaria Matejka <mq@ucw.cz>2022-07-12 12:22:41 +0200
commit080cbd1219ba86dd44712d0d24ceae884b34ec4b (patch)
tree86bf2e0153ab7365224d454b145e1ae5fac6c1d6 /proto/bgp/bgp.c
parent4ef2262bd575b071e43c30d0199398a5f6ac27a5 (diff)
Route refresh in tables uses a stale counter.
Until now, we were marking routes as REF_STALE and REF_DISCARD to cleanup old routes after route refresh. This needed a synchronous route table walk at both beginning and the end of route refresh routine, marking the routes by the flags. We avoid these walks by using a stale counter. Every route contains: u8 stale_cycle; Every import hook contains: u8 stale_set; u8 stale_valid; u8 stale_pruned; u8 stale_pruning; In base_state, stale_set == stale_valid == stale_pruned == stale_pruning and all routes' stale_cycle also have the same value. The route refresh looks like follows: + ----------- + --------- + ----------- + ------------- + ------------ + | | stale_set | stale_valid | stale_pruning | stale_pruned | | Base | x | x | x | x | | Begin | x+1 | x | x | x | ... now routes are being inserted with stale_cycle == (x+1) | End | x+1 | x+1 | x | x | ... now table pruning routine is scheduled | Prune begin | x+1 | x+1 | x+1 | x | ... now routes with stale_cycle not between stale_set and stale_valid are deleted | Prune end | x+1 | x+1 | x+1 | x+1 | + ----------- + --------- + ----------- + ------------- + ------------ + The pruning routine is asynchronous and may have high latency in high-load environments. Therefore, multiple route refresh requests may happen before the pruning routine starts, leading to this situation: | Prune begin | x+k | x+k | x -> x+k | x | ... or even | Prune begin | x+k+1 | x+k | x -> x+k | x | ... if the prune event starts while another route refresh is running. In such a case, the pruning routine still deletes routes not fitting between stale_set and and stale_valid, effectively pruning the remnants of all unpruned route refreshes from before: | Prune end | x+k | x+k | x+k | x+k | In extremely rare cases, there may happen too many route refreshes before any route prune routine finishes. If the difference between stale_valid and stale_pruned becomes more than 128 when requesting for another route refresh, the routine walks the table synchronously and resets all the stale values to a base state, while logging a warning.
Diffstat (limited to 'proto/bgp/bgp.c')
-rw-r--r--proto/bgp/bgp.c18
1 files changed, 9 insertions, 9 deletions
diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c
index 6ffe8824..d9008b9a 100644
--- a/proto/bgp/bgp.c
+++ b/proto/bgp/bgp.c
@@ -760,16 +760,16 @@ bgp_handle_graceful_restart(struct bgp_proto *p)
{
case BGP_GRS_NONE:
c->gr_active = BGP_GRS_ACTIVE;
- rt_refresh_begin(c->c.table, &c->c.in_req);
+ rt_refresh_begin(&c->c.in_req);
break;
case BGP_GRS_ACTIVE:
- rt_refresh_end(c->c.table, &c->c.in_req);
- rt_refresh_begin(c->c.table, &c->c.in_req);
+ rt_refresh_end(&c->c.in_req);
+ rt_refresh_begin(&c->c.in_req);
break;
case BGP_GRS_LLGR:
- rt_refresh_begin(c->c.table, &c->c.in_req);
+ rt_refresh_begin(&c->c.in_req);
rt_modify_stale(c->c.table, &c->c.in_req);
break;
}
@@ -777,8 +777,8 @@ bgp_handle_graceful_restart(struct bgp_proto *p)
else
{
/* Just flush the routes */
- rt_refresh_begin(c->c.table, &c->c.in_req);
- rt_refresh_end(c->c.table, &c->c.in_req);
+ rt_refresh_begin(&c->c.in_req);
+ rt_refresh_end(&c->c.in_req);
}
/* Reset bucket and prefix tables */
@@ -819,7 +819,7 @@ bgp_graceful_restart_done(struct bgp_channel *c)
BGP_TRACE(D_EVENTS, "Neighbor graceful restart done");
tm_stop(c->stale_timer);
- rt_refresh_end(c->c.table, &c->c.in_req);
+ rt_refresh_end(&c->c.in_req);
}
/**
@@ -899,7 +899,7 @@ bgp_refresh_begin(struct bgp_channel *c)
{ log(L_WARN "%s: BEGIN-OF-RR received before END-OF-RIB, ignoring", p->p.name); return; }
c->load_state = BFS_REFRESHING;
- rt_refresh_begin(c->c.table, &c->c.in_req);
+ rt_refresh_begin(&c->c.in_req);
}
/**
@@ -920,7 +920,7 @@ bgp_refresh_end(struct bgp_channel *c)
{ log(L_WARN "%s: END-OF-RR received without prior BEGIN-OF-RR, ignoring", p->p.name); return; }
c->load_state = BFS_NONE;
- rt_refresh_end(c->c.table, &c->c.in_req);
+ rt_refresh_end(&c->c.in_req);
}