From fb829de69052755a31d76d73e17525d050e5ff4d Mon Sep 17 00:00:00 2001 From: Ondrej Zajicek Date: Wed, 28 Mar 2012 18:40:04 +0200 Subject: Fixes responsiveness for protocol shutdown. When a protocol went down, all its routes were flushed in one step, that may block BIRD for too much time. The patch fixes that by limiting maximum number of routes flushed in one step. --- nest/proto.c | 118 +++++++++++++++++++++++++++++++++++----------------- nest/protocol.h | 1 + nest/route.h | 5 ++- nest/rt-table.c | 125 +++++++++++++++++++++++++++++++++++++++++--------------- 4 files changed, 177 insertions(+), 72 deletions(-) diff --git a/nest/proto.c b/nest/proto.c index 0fc72ce1..0caf7dbc 100644 --- a/nest/proto.c +++ b/nest/proto.c @@ -38,7 +38,7 @@ static event *proto_flush_event; static char *p_states[] = { "DOWN", "START", "UP", "STOP" }; static char *c_states[] = { "HUNGRY", "FEEDING", "HAPPY", "FLUSHING" }; -static void proto_flush_all(void *); +static void proto_flush_loop(void *); static void proto_rethink_goal(struct proto *p); static char *proto_state_name(struct proto *p); @@ -635,7 +635,7 @@ protos_build(void) #endif proto_pool = rp_new(&root_pool, "Protocols"); proto_flush_event = ev_new(proto_pool); - proto_flush_event->hook = proto_flush_all; + proto_flush_event->hook = proto_flush_loop; } static void @@ -691,20 +691,6 @@ proto_feed_initial(void *P) proto_feed_more(P); } -static void -proto_schedule_flush(struct proto *p) -{ - /* Need to abort feeding */ - if (p->core_state == FS_FEEDING) - rt_feed_baby_abort(p); - - DBG("%s: Scheduling flush\n", p->name); - p->core_state = FS_FLUSHING; - proto_relink(p); - proto_flush_hooks(p); - ev_schedule(proto_flush_event); -} - static void proto_schedule_feed(struct proto *p, int initial) { @@ -721,6 +707,85 @@ proto_schedule_feed(struct proto *p, int initial) ev_schedule(p->attn); } +/* + * Flushing loop is responsible for flushing routes and protocols + * after they went down. It runs in proto_flush_event. At the start of + * one round, protocols waiting to flush are marked in + * proto_schedule_flush_loop(). At the end of the round (when routing + * table flush is complete), marked protocols are flushed and a next + * round may start. + */ + +static int flush_loop_state; /* 1 -> running */ + +static void +proto_schedule_flush_loop(void) +{ + struct proto *p; + + if (flush_loop_state) + return; + flush_loop_state = 1; + + rt_schedule_prune_all(); + WALK_LIST(p, flush_proto_list) + p->flushing = 1; + + ev_schedule(proto_flush_event); +} + +static void +proto_flush_loop(void *unused UNUSED) +{ + struct proto *p; + + if (! rt_prune_loop()) + { + /* Rtable pruning is not finished */ + ev_schedule(proto_flush_event); + return; + } + + again: + WALK_LIST(p, flush_proto_list) + if (p->flushing) + { + /* This will flush interfaces in the same manner + like rt_prune_all() flushes routes */ + if (p->proto == &proto_unix_iface) + if_flush_ifaces(p); + + DBG("Flushing protocol %s\n", p->name); + p->flushing = 0; + p->core_state = FS_HUNGRY; + proto_relink(p); + if (p->proto_state == PS_DOWN) + proto_fell_down(p); + goto again; + } + + /* This round finished, perhaps there will be another one */ + flush_loop_state = 0; + if (!EMPTY_LIST(flush_proto_list)) + proto_schedule_flush_loop(); +} + +static void +proto_schedule_flush(struct proto *p) +{ + /* Need to abort feeding */ + if (p->core_state == FS_FEEDING) + rt_feed_baby_abort(p); + + DBG("%s: Scheduling flush\n", p->name); + p->core_state = FS_FLUSHING; + proto_relink(p); + proto_flush_hooks(p); + proto_schedule_flush_loop(); +} + + + /** * proto_request_feeding - request feeding routes to the protocol * @p: given protocol @@ -810,27 +875,6 @@ proto_notify_state(struct proto *p, unsigned ps) } } -static void -proto_flush_all(void *unused UNUSED) -{ - struct proto *p; - - rt_prune_all(); - while ((p = HEAD(flush_proto_list))->n.next) - { - /* This will flush interfaces in the same manner - like rt_prune_all() flushes routes */ - if (p->proto == &proto_unix_iface) - if_flush_ifaces(p); - - DBG("Flushing protocol %s\n", p->name); - p->core_state = FS_HUNGRY; - proto_relink(p); - if (p->proto_state == PS_DOWN) - proto_fell_down(p); - } -} - /* * CLI Commands */ diff --git a/nest/protocol.h b/nest/protocol.h index a83c4ffc..c914c40a 100644 --- a/nest/protocol.h +++ b/nest/protocol.h @@ -144,6 +144,7 @@ struct proto { unsigned core_goal; /* State we want to reach (see below) */ unsigned reconfiguring; /* We're shutting down due to reconfiguration */ unsigned refeeding; /* We are refeeding (valid only if core_state == FS_FEEDING) */ + unsigned flushing; /* Protocol is flushed in current flush loop round */ u32 hash_key; /* Random key used for hashing of neighbors */ bird_clock_t last_state_change; /* Time of last state transition */ char *last_state_name_announced; /* Last state name we've announced to the user */ diff --git a/nest/route.h b/nest/route.h index ea948838..ce68017d 100644 --- a/nest/route.h +++ b/nest/route.h @@ -139,8 +139,10 @@ typedef struct rtable { int gc_counter; /* Number of operations since last GC */ bird_clock_t gc_time; /* Time of last GC */ byte gc_scheduled; /* GC is scheduled */ + byte prune_state; /* Table prune state, 1 -> prune is running */ byte hcu_scheduled; /* Hostcache update is scheduled */ byte nhu_state; /* Next Hop Update state */ + struct fib_iterator prune_fit; /* Rtable prune FIB iterator */ struct fib_iterator nhu_fit; /* Next Hop Update FIB iterator */ } rtable; @@ -244,7 +246,8 @@ void rt_dump(rtable *); void rt_dump_all(void); int rt_feed_baby(struct proto *p); void rt_feed_baby_abort(struct proto *p); -void rt_prune_all(void); +void rt_schedule_prune_all(void); +int rt_prune_loop(void); struct rtable_config *rt_new_table(struct symbol *s); struct rt_show_data { diff --git a/nest/rt-table.c b/nest/rt-table.c index 377687de..27adde16 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -55,7 +55,6 @@ static void rt_free_hostcache(rtable *tab); static void rt_notify_hostcache(rtable *tab, net *net); static void rt_update_hostcache(rtable *tab); static void rt_next_hop_update(rtable *tab); -static void rt_prune(rtable *tab); static inline void rt_schedule_gc(rtable *tab); @@ -837,6 +836,38 @@ rt_schedule_nhu(rtable *tab) tab->nhu_state |= 1; } +static void +rt_prune_nets(rtable *tab) +{ + struct fib_iterator fit; + int ncnt = 0, ndel = 0; + +#ifdef DEBUGGING + fib_check(&tab->fib); +#endif + + FIB_ITERATE_INIT(&fit, &tab->fib); +again: + FIB_ITERATE_START(&tab->fib, &fit, f) + { + net *n = (net *) f; + ncnt++; + if (!n->routes) /* Orphaned FIB entry */ + { + FIB_ITERATE_PUT(&fit, f); + fib_delete(&tab->fib, f); + ndel++; + goto again; + } + } + FIB_ITERATE_END(f); + DBG("Pruned %d of %d networks\n", ndel, ncnt); + + tab->gc_counter = 0; + tab->gc_time = now; + tab->gc_scheduled = 0; +} + static void rt_event(void *ptr) { @@ -849,7 +880,7 @@ rt_event(void *ptr) rt_next_hop_update(tab); if (tab->gc_scheduled) - rt_prune(tab); + rt_prune_nets(tab); } void @@ -885,70 +916,96 @@ rt_init(void) init_list(&routing_tables); } -/** - * rt_prune - prune a routing table - * @tab: routing table to be pruned - * - * This function is called whenever a protocol shuts down. It scans - * the routing table and removes all routes belonging to inactive - * protocols and also stale network entries. - */ -static void -rt_prune(rtable *tab) + +/* Called from proto_schedule_flush_loop() only, + ensuring that all prune states are zero */ +void +rt_schedule_prune_all(void) { - struct fib_iterator fit; - int rcnt = 0, rdel = 0, ncnt = 0, ndel = 0; + rtable *t; + + WALK_LIST(t, routing_tables) + t->prune_state = 1; +} + +static inline int +rt_prune_step(rtable *tab, int *max_feed) +{ + struct fib_iterator *fit = &tab->prune_fit; DBG("Pruning route table %s\n", tab->name); #ifdef DEBUGGING fib_check(&tab->fib); #endif - FIB_ITERATE_INIT(&fit, &tab->fib); + + if (tab->prune_state == 0) + return 1; + + if (tab->prune_state == 1) + { + FIB_ITERATE_INIT(fit, &tab->fib); + tab->prune_state = 2; + } + again: - FIB_ITERATE_START(&tab->fib, &fit, f) + FIB_ITERATE_START(&tab->fib, fit, fn) { - net *n = (net *) f; + net *n = (net *) fn; rte *e; - ncnt++; + rescan: - for (e=n->routes; e; e=e->next, rcnt++) + for (e=n->routes; e; e=e->next) if (e->sender->core_state != FS_HAPPY && e->sender->core_state != FS_FEEDING) { + if (*max_feed <= 0) + { + FIB_ITERATE_PUT(fit, fn); + return 0; + } + rte_discard(tab, e); - rdel++; + (*max_feed)--; + goto rescan; } - if (!n->routes) /* Orphaned FIB entry? */ + if (!n->routes) /* Orphaned FIB entry */ { - FIB_ITERATE_PUT(&fit, f); - fib_delete(&tab->fib, f); - ndel++; + FIB_ITERATE_PUT(fit, fn); + fib_delete(&tab->fib, fn); goto again; } } - FIB_ITERATE_END(f); - DBG("Pruned %d of %d routes and %d of %d networks\n", rdel, rcnt, ndel, ncnt); + FIB_ITERATE_END(fn); + #ifdef DEBUGGING fib_check(&tab->fib); #endif - tab->gc_counter = 0; - tab->gc_time = now; - tab->gc_scheduled = 0; + + tab->prune_state = 0; + return 1; } /** - * rt_prune_all - prune all routing tables + * rt_prune_loop - prune routing tables + * @tab: routing table to be pruned * - * This function calls rt_prune() for all known routing tables. + * The prune loop scans routing tables and removes routes belonging to + * inactive protocols and also stale network entries. Returns 1 when + * all such routes are pruned. It is a part of the protocol flushing + * loop. */ -void -rt_prune_all(void) +int +rt_prune_loop(void) { rtable *t; + int max_feed = 512; WALK_LIST(t, routing_tables) - rt_prune(t); + if (! rt_prune_step(t, &max_feed)) + return 0; + + return 1; } void -- cgit v1.2.3