summaryrefslogtreecommitdiff
path: root/nest
diff options
context:
space:
mode:
authorOndrej Zajicek <santiago@crfreenet.org>2014-03-20 14:07:12 +0100
committerOndrej Zajicek <santiago@crfreenet.org>2014-03-20 14:07:12 +0100
commit0c791f873aeb7c1052c97db7da4fe23873d69603 (patch)
tree48496c5965cb6e9f54d7863827c35054c3697c19 /nest
parent4e398e34bf140baf73fe8dceaf81078fb343f65a (diff)
BGP graceful restart support.
Also significant core protocol state changes needed for that, global graceful restart recovery state and kernel proto support for recovery.
Diffstat (limited to 'nest')
-rw-r--r--nest/cmds.c3
-rw-r--r--nest/config.Y6
-rw-r--r--nest/proto.c388
-rw-r--r--nest/protocol.h31
-rw-r--r--nest/route.h17
-rw-r--r--nest/rt-table.c100
6 files changed, 422 insertions, 123 deletions
diff --git a/nest/cmds.c b/nest/cmds.c
index ec6bc762..70fbdaf8 100644
--- a/nest/cmds.c
+++ b/nest/cmds.c
@@ -7,6 +7,7 @@
*/
#include "nest/bird.h"
+#include "nest/protocol.h"
#include "nest/route.h"
#include "nest/cli.h"
#include "conf/conf.h"
@@ -32,6 +33,8 @@ cmd_show_status(void)
tm_format_datetime(tim, &config->tf_base, config->load_time);
cli_msg(-1011, "Last reconfiguration on %s", tim);
+ graceful_restart_show_status();
+
if (shutting_down)
cli_msg(13, "Shutdown in progress");
else if (configuring)
diff --git a/nest/config.Y b/nest/config.Y
index e9b8a21b..59d354b8 100644
--- a/nest/config.Y
+++ b/nest/config.Y
@@ -49,6 +49,7 @@ CF_KEYWORDS(PASSWORD, FROM, PASSIVE, TO, ID, EVENTS, PACKETS, PROTOCOLS, INTERFA
CF_KEYWORDS(PRIMARY, STATS, COUNT, FOR, COMMANDS, PREEXPORT, GENERATE, ROA, MAX, FLUSH, AS)
CF_KEYWORDS(LISTEN, BGP, V6ONLY, DUAL, ADDRESS, PORT, PASSWORDS, DESCRIPTION, SORTED)
CF_KEYWORDS(RELOAD, IN, OUT, MRTDUMP, MESSAGES, RESTRICT, MEMORY, IGP_METRIC, CLASS, DSCP)
+CF_KEYWORDS(GRACEFUL, RESTART, WAIT)
CF_ENUM(T_ENUM_RTS, RTS_, DUMMY, STATIC, INHERIT, DEVICE, STATIC_DEVICE, REDIRECT,
RIP, OSPF, OSPF_IA, OSPF_EXT1, OSPF_EXT2, BGP, PIPE)
@@ -110,6 +111,11 @@ listen_opt:
;
+CF_ADDTO(conf, gr_opts)
+
+gr_opts: GRACEFUL RESTART WAIT expr ';' { new_config->gr_wait = $4; } ;
+
+
/* Creation of routing tables */
tab_sorted:
diff --git a/nest/proto.c b/nest/proto.c
index cfa6ff4b..2bc3e319 100644
--- a/nest/proto.c
+++ b/nest/proto.c
@@ -35,9 +35,18 @@ static struct proto *initial_device_proto;
static event *proto_flush_event;
static timer *proto_shutdown_timer;
+static timer *gr_wait_timer;
+
+#define GRS_NONE 0
+#define GRS_INIT 1
+#define GRS_ACTIVE 2
+#define GRS_DONE 3
+
+static int graceful_restart_state;
+static u32 graceful_restart_locks;
static char *p_states[] = { "DOWN", "START", "UP", "STOP" };
-static char *c_states[] = { "HUNGRY", "FEEDING", "HAPPY", "FLUSHING" };
+static char *c_states[] = { "HUNGRY", "???", "HAPPY", "FLUSHING" };
static void proto_flush_loop(void *);
static void proto_shutdown_loop(struct timer *);
@@ -51,10 +60,12 @@ proto_enqueue(list *l, struct proto *p)
}
static void
-proto_relink(struct proto *p)
+proto_set_core_state(struct proto *p, uint state)
{
list *l = NULL;
+ p->core_state = state;
+
if (p->debug & D_STATES)
{
char *name = proto_state_name(p);
@@ -66,13 +77,13 @@ proto_relink(struct proto *p)
}
else
p->last_state_name_announced = NULL;
+
rem_node(&p->n);
switch (p->core_state)
{
case FS_HUNGRY:
l = &inactive_proto_list;
break;
- case FS_FEEDING:
case FS_HAPPY:
l = &active_proto_list;
break;
@@ -126,6 +137,9 @@ proto_init_instance(struct proto *p)
p->attn = ev_new(p->pool);
p->attn->data = p;
+ if (graceful_restart_state == GRS_INIT)
+ p->gr_recovery = 1;
+
if (! p->proto->multitable)
rt_lock_table(p->table);
}
@@ -169,7 +183,7 @@ proto_add_announce_hook(struct proto *p, struct rtable *t, struct proto_stats *s
h->next = p->ahooks;
p->ahooks = h;
- if (p->rt_notify)
+ if (p->rt_notify && (p->export_state == ES_READY))
add_tail(&t->hooks, &h->n);
return h;
}
@@ -194,6 +208,16 @@ proto_find_announce_hook(struct proto *p, struct rtable *t)
}
static void
+proto_link_ahooks(struct proto *p)
+{
+ struct announce_hook *h;
+
+ if (p->rt_notify)
+ for(h=p->ahooks; h; h=h->next)
+ add_tail(&h->table->hooks, &h->n);
+}
+
+static void
proto_unlink_ahooks(struct proto *p)
{
struct announce_hook *h;
@@ -362,6 +386,7 @@ proto_init(struct proto_config *c)
q->proto_state = PS_DOWN;
q->core_state = FS_HUNGRY;
+ q->export_state = ES_DOWN;
q->last_state_change = now;
proto_enqueue(&initial_proto_list, q);
@@ -590,6 +615,7 @@ static void
proto_rethink_goal(struct proto *p)
{
struct protocol *q;
+ byte goal;
if (p->reconfiguring && p->core_state == FS_HUNGRY && p->proto_state == PS_DOWN)
{
@@ -606,22 +632,14 @@ proto_rethink_goal(struct proto *p)
/* Determine what state we want to reach */
if (p->disabled || p->reconfiguring)
- {
- p->core_goal = FS_HUNGRY;
- if (p->core_state == FS_HUNGRY && p->proto_state == PS_DOWN)
- return;
- }
+ goal = PS_DOWN;
else
- {
- p->core_goal = FS_HAPPY;
- if (p->core_state == FS_HAPPY && p->proto_state == PS_UP)
- return;
- }
+ goal = PS_UP;
q = p->proto;
- if (p->core_goal == FS_HAPPY) /* Going up */
+ if (goal == PS_UP) /* Going up */
{
- if (p->core_state == FS_HUNGRY && p->proto_state == PS_DOWN)
+ if (p->proto_state == PS_DOWN && p->core_state == FS_HUNGRY)
{
DBG("Kicking %s up\n", p->name);
PD(p, "Starting");
@@ -640,6 +658,104 @@ proto_rethink_goal(struct proto *p)
}
}
+
+
+static void graceful_restart_done(struct timer *t UNUSED);
+static void proto_want_export_up(struct proto *p);
+
+void
+graceful_restart_recovery(void)
+{
+ graceful_restart_state = GRS_INIT;
+}
+
+void
+graceful_restart_init(void)
+{
+ if (!graceful_restart_state)
+ return;
+
+ log(L_INFO "Graceful restart started");
+
+ if (!graceful_restart_locks)
+ {
+ graceful_restart_done(NULL);
+ return;
+ }
+
+ graceful_restart_state = GRS_ACTIVE;
+ gr_wait_timer = tm_new(proto_pool);
+ gr_wait_timer->hook = graceful_restart_done;
+ tm_start(gr_wait_timer, config->gr_wait);
+}
+
+static void
+graceful_restart_done(struct timer *t UNUSED)
+{
+ struct proto *p;
+ node *n;
+
+ log(L_INFO "Graceful restart done");
+ graceful_restart_state = GRS_DONE;
+
+ WALK_LIST2(p, n, proto_list, glob_node)
+ {
+ if (!p->gr_recovery)
+ continue;
+
+ /* Resume postponed export of routes */
+ if ((p->proto_state == PS_UP) && p->gr_wait)
+ proto_want_export_up(p);
+
+ /* Cleanup */
+ p->gr_recovery = 0;
+ p->gr_wait = 0;
+ p->gr_lock = 0;
+ }
+
+ graceful_restart_locks = 0;
+}
+
+void
+graceful_restart_show_status(void)
+{
+ if (graceful_restart_state != GRS_ACTIVE)
+ return;
+
+ cli_msg(-24, "Graceful restart recovery in progress");
+ cli_msg(-24, " Waiting for %d protocols to recover", graceful_restart_locks);
+ cli_msg(-24, " Wait timer is %d/%d", tm_remains(gr_wait_timer), config->gr_wait);
+}
+
+/* Just from start hook */
+void
+proto_graceful_restart_lock(struct proto *p)
+{
+ ASSERT(graceful_restart_state == GRS_INIT);
+ ASSERT(p->gr_recovery);
+
+ if (p->gr_lock)
+ return;
+
+ p->gr_lock = 1;
+ graceful_restart_locks++;
+}
+
+void
+proto_graceful_restart_unlock(struct proto *p)
+{
+ if (!p->gr_lock)
+ return;
+
+ p->gr_lock = 0;
+ graceful_restart_locks--;
+
+ if ((graceful_restart_state == GRS_ACTIVE) && !graceful_restart_locks)
+ tm_start(gr_wait_timer, 0);
+}
+
+
+
/**
* protos_dump_all - dump status of all protocols
*
@@ -751,6 +867,8 @@ protos_build(void)
proto_flush_event->hook = proto_flush_loop;
proto_shutdown_timer = tm_new(proto_pool);
proto_shutdown_timer->hook = proto_shutdown_loop;
+ proto_shutdown_timer = tm_new(proto_pool);
+ proto_shutdown_timer->hook = proto_shutdown_loop;
}
static void
@@ -779,15 +897,17 @@ proto_feed_more(void *P)
{
struct proto *p = P;
- if (p->core_state != FS_FEEDING)
+ if (p->export_state != ES_FEEDING)
return;
DBG("Feeding protocol %s continued\n", p->name);
if (rt_feed_baby(p))
{
- p->core_state = FS_HAPPY;
- proto_relink(p);
- DBG("Protocol %s up and running\n", p->name);
+ DBG("Feeding protocol %s finished\n", p->name);
+ p->export_state = ES_READY;
+
+ if (p->feed_done)
+ p->feed_done(p);
}
else
{
@@ -801,7 +921,7 @@ proto_feed_initial(void *P)
{
struct proto *p = P;
- if (p->core_state != FS_FEEDING)
+ if (p->export_state != ES_FEEDING)
return;
DBG("Feeding protocol %s\n", p->name);
@@ -814,40 +934,10 @@ static void
proto_schedule_feed(struct proto *p, int initial)
{
DBG("%s: Scheduling meal\n", p->name);
- p->core_state = FS_FEEDING;
- p->refeeding = !initial;
-
- /* FIXME: This should be changed for better support of multitable protos */
- if (!initial)
- {
- struct announce_hook *ah;
- for (ah = p->ahooks; ah; ah = ah->next)
- proto_reset_limit(ah->out_limit);
-
- /* Hack: reset exp_routes during refeed, and do not decrease it later */
- p->stats.exp_routes = 0;
- }
- /* Connect protocol to routing table */
- if (initial && !p->proto->multitable)
- {
- p->main_source = rt_get_source(p, 0);
- rt_lock_source(p->main_source);
-
- p->main_ahook = proto_add_announce_hook(p, p->table, &p->stats);
- p->main_ahook->in_filter = p->cf->in_filter;
- p->main_ahook->out_filter = p->cf->out_filter;
- p->main_ahook->rx_limit = p->cf->rx_limit;
- p->main_ahook->in_limit = p->cf->in_limit;
- p->main_ahook->out_limit = p->cf->out_limit;
- p->main_ahook->in_keep_filtered = p->cf->in_keep_filtered;
-
- proto_reset_limit(p->main_ahook->rx_limit);
- proto_reset_limit(p->main_ahook->in_limit);
- proto_reset_limit(p->main_ahook->out_limit);
- }
+ p->export_state = ES_FEEDING;
+ p->refeeding = !initial;
- proto_relink(p);
p->attn->hook = initial ? proto_feed_initial : proto_feed_more;
ev_schedule(p->attn);
}
@@ -877,7 +967,7 @@ proto_schedule_flush_loop(void)
{
p->flushing = 1;
for (h=p->ahooks; h; h=h->next)
- h->table->prune_state = 1;
+ rt_mark_for_prune(h->table);
}
ev_schedule(proto_flush_event);
@@ -908,8 +998,7 @@ proto_flush_loop(void *unused UNUSED)
DBG("Flushing protocol %s\n", p->name);
p->flushing = 0;
- p->core_state = FS_HUNGRY;
- proto_relink(p);
+ proto_set_core_state(p, FS_HUNGRY);
if (p->proto_state == PS_DOWN)
proto_fell_down(p);
goto again;
@@ -921,19 +1010,6 @@ proto_flush_loop(void *unused UNUSED)
proto_schedule_flush_loop();
}
-static void
-proto_schedule_flush(struct proto *p)
-{
- /* Need to abort feeding */
- if (p->core_state == FS_FEEDING)
- rt_feed_baby_abort(p);
-
- DBG("%s: Scheduling flush\n", p->name);
- p->core_state = FS_FLUSHING;
- proto_relink(p);
- proto_unlink_ahooks(p);
- proto_schedule_flush_loop();
-}
/* Temporary hack to propagate restart to BGP */
int proto_restart;
@@ -980,9 +1056,9 @@ proto_schedule_down(struct proto *p, byte restart, byte code)
*
* Sometimes it is needed to send again all routes to the
* protocol. This is called feeding and can be requested by this
- * function. This would cause protocol core state transition
- * to FS_FEEDING (during feeding) and when completed, it will
- * switch back to FS_HAPPY. This function can be called even
+ * function. This would cause protocol export state transition
+ * to ES_FEEDING (during feeding) and when completed, it will
+ * switch back to ES_READY. This function can be called even
* when feeding is already running, in that case it is restarted.
*/
void
@@ -991,7 +1067,7 @@ proto_request_feeding(struct proto *p)
ASSERT(p->proto_state == PS_UP);
/* If we are already feeding, we want to restart it */
- if (p->core_state == FS_FEEDING)
+ if (p->export_state == ES_FEEDING)
{
/* Unless feeding is in initial state */
if (p->attn->hook == proto_feed_initial)
@@ -1000,6 +1076,14 @@ proto_request_feeding(struct proto *p)
rt_feed_baby_abort(p);
}
+ /* FIXME: This should be changed for better support of multitable protos */
+ struct announce_hook *ah;
+ for (ah = p->ahooks; ah; ah = ah->next)
+ proto_reset_limit(ah->out_limit);
+
+ /* Hack: reset exp_routes during refeed, and do not decrease it later */
+ p->stats.exp_routes = 0;
+
proto_schedule_feed(p, 0);
}
@@ -1060,6 +1144,83 @@ proto_notify_limit(struct announce_hook *ah, struct proto_limit *l, int dir, u32
}
}
+
+static void
+proto_want_core_up(struct proto *p)
+{
+ ASSERT(p->core_state == FS_HUNGRY);
+
+ if (!p->proto->multitable)
+ {
+ p->main_source = rt_get_source(p, 0);
+ rt_lock_source(p->main_source);
+
+ /* Connect protocol to routing table */
+ p->main_ahook = proto_add_announce_hook(p, p->table, &p->stats);
+ p->main_ahook->in_filter = p->cf->in_filter;
+ p->main_ahook->out_filter = p->cf->out_filter;
+ p->main_ahook->rx_limit = p->cf->rx_limit;
+ p->main_ahook->in_limit = p->cf->in_limit;
+ p->main_ahook->out_limit = p->cf->out_limit;
+ p->main_ahook->in_keep_filtered = p->cf->in_keep_filtered;
+
+ proto_reset_limit(p->main_ahook->rx_limit);
+ proto_reset_limit(p->main_ahook->in_limit);
+ proto_reset_limit(p->main_ahook->out_limit);
+ }
+
+ proto_set_core_state(p, FS_HAPPY);
+}
+
+static void
+proto_want_export_up(struct proto *p)
+{
+ ASSERT(p->core_state == CS_HAPPY);
+ ASSERT(p->export_state == ES_DOWN);
+
+ proto_link_ahooks(p);
+ proto_schedule_feed(p, 1); /* Sets ES_FEEDING */
+}
+
+static void
+proto_want_export_down(struct proto *p)
+{
+ ASSERT(p->export_state != ES_DOWN);
+
+ /* Need to abort feeding */
+ if (p->export_state == ES_FEEDING)
+ rt_feed_baby_abort(p);
+
+ p->export_state = ES_DOWN;
+ proto_unlink_ahooks(p);
+}
+
+static void
+proto_want_core_down(struct proto *p)
+{
+ ASSERT(p->core_state == CS_HAPPY);
+ ASSERT(p->export_state == ES_DOWN);
+
+ proto_set_core_state(p, FS_FLUSHING);
+ proto_schedule_flush_loop();
+
+ if (!p->proto->multitable)
+ {
+ rt_unlock_source(p->main_source);
+ p->main_source = NULL;
+ }
+}
+
+static void
+proto_falling_down(struct proto *p)
+{
+ p->gr_recovery = 0;
+ p->gr_wait = 0;
+ if (p->gr_lock)
+ proto_graceful_restart_unlock(p);
+}
+
+
/**
* proto_notify_state - notify core about protocol state change
* @p: protocol the state of which has changed
@@ -1079,6 +1240,7 @@ proto_notify_state(struct proto *p, unsigned ps)
{
unsigned ops = p->proto_state;
unsigned cs = p->core_state;
+ unsigned es = p->export_state;
DBG("%s reporting state transition %s/%s -> */%s\n", p->name, c_states[cs], p_states[ops], p_states[ps]);
if (ops == ps)
@@ -1089,17 +1251,47 @@ proto_notify_state(struct proto *p, unsigned ps)
switch (ps)
{
+ case PS_START:
+ ASSERT(ops == PS_DOWN || ops == PS_UP);
+ ASSERT(cs == FS_HUNGRY || cs == FS_HAPPY);
+
+ if (es != ES_DOWN)
+ proto_want_export_down(p);
+ break;
+
+ case PS_UP:
+ ASSERT(ops == PS_DOWN || ops == PS_START);
+ ASSERT(cs == FS_HUNGRY || cs == FS_HAPPY);
+ ASSERT(es == ES_DOWN);
+
+ if (cs == FS_HUNGRY)
+ proto_want_core_up(p);
+ if (!p->gr_wait)
+ proto_want_export_up(p);
+ break;
+
+ case PS_STOP:
+ ASSERT(ops == PS_START || ops == PS_UP);
+
+ p->down_sched = 0;
+
+ if (es != ES_DOWN)
+ proto_want_export_down(p);
+ if (cs == FS_HAPPY)
+ proto_want_core_down(p);
+ proto_falling_down(p);
+ break;
+
case PS_DOWN:
p->down_code = 0;
p->down_sched = 0;
- if ((cs == FS_FEEDING) || (cs == FS_HAPPY))
- proto_schedule_flush(p);
- if (p->proto->multitable)
- {
- rt_unlock_source(p->main_source);
- p->main_source = NULL;
- }
+ if (es != ES_DOWN)
+ proto_want_export_down(p);
+ if (cs == FS_HAPPY)
+ proto_want_core_down(p);
+ if (ops != PS_STOP)
+ proto_falling_down(p);
neigh_prune(); // FIXME convert neighbors to resource?
rfree(p->pool);
@@ -1111,22 +1303,9 @@ proto_notify_state(struct proto *p, unsigned ps)
return; /* The protocol might have ceased to exist */
}
break;
- case PS_START:
- ASSERT(ops == PS_DOWN);
- ASSERT(cs == FS_HUNGRY);
- break;
- case PS_UP:
- ASSERT(ops == PS_DOWN || ops == PS_START);
- ASSERT(cs == FS_HUNGRY);
- proto_schedule_feed(p, 1);
- break;
- case PS_STOP:
- p->down_sched = 0;
- if ((cs == FS_FEEDING) || (cs == FS_HAPPY))
- proto_schedule_flush(p);
- break;
+
default:
- bug("Invalid state transition for %s from %s/%s to */%s", p->name, c_states[cs], p_states[ops], p_states[ps]);
+ bug("%s: Invalid state %d", p->name, ps);
}
}
@@ -1141,11 +1320,17 @@ proto_state_name(struct proto *p)
switch (P(p->proto_state, p->core_state))
{
case P(PS_DOWN, FS_HUNGRY): return "down";
- case P(PS_START, FS_HUNGRY): return "start";
- case P(PS_UP, FS_HUNGRY):
- case P(PS_UP, FS_FEEDING): return "feed";
+ case P(PS_START, FS_HUNGRY):
+ case P(PS_START, FS_HAPPY): return "start";
+ case P(PS_UP, FS_HAPPY):
+ switch (p->export_state)
+ {
+ case ES_DOWN: return "wait";
+ case ES_FEEDING: return "feed";
+ case ES_READY: return "up";
+ default: return "???";
+ }
case P(PS_STOP, FS_HUNGRY): return "stop";
- case P(PS_UP, FS_HAPPY): return "up";
case P(PS_STOP, FS_FLUSHING):
case P(PS_DOWN, FS_FLUSHING): return "flush";
default: return "???";
@@ -1196,6 +1381,11 @@ proto_show_basic_info(struct proto *p)
cli_msg(-1006, " Input filter: %s", filter_name(p->cf->in_filter));
cli_msg(-1006, " Output filter: %s", filter_name(p->cf->out_filter));
+ if (graceful_restart_state == GRS_ACTIVE)
+ cli_msg(-1006, " GR recovery: %s%s",
+ p->gr_lock ? " pending" : "",
+ p->gr_wait ? " waiting" : "");
+
proto_show_limit(p->cf->rx_limit, "Receive limit:");
proto_show_limit(p->cf->in_limit, "Import limit:");
proto_show_limit(p->cf->out_limit, "Export limit:");
diff --git a/nest/protocol.h b/nest/protocol.h
index b58f9e67..ec779563 100644
--- a/nest/protocol.h
+++ b/nest/protocol.h
@@ -148,10 +148,13 @@ struct proto {
byte disabled; /* Manually disabled */
byte proto_state; /* Protocol state machine (PS_*, see below) */
byte core_state; /* Core state machine (FS_*, see below) */
- byte core_goal; /* State we want to reach (FS_*, see below) */
+ byte export_state; /* Route export state (ES_*, see below) */
byte reconfiguring; /* We're shutting down due to reconfiguration */
- byte refeeding; /* We are refeeding (valid only if core_state == FS_FEEDING) */
+ byte refeeding; /* We are refeeding (valid only if export_state == ES_FEEDING) */
byte flushing; /* Protocol is flushed in current flush loop round */
+ byte gr_recovery; /* Protocol should participate in graceful restart recovery */
+ byte gr_lock; /* Graceful restart mechanism should wait for this proto */
+ byte gr_wait; /* Route export to protocol is postponed until graceful restart */
byte down_sched; /* Shutdown is scheduled for later (PDS_*) */
byte down_code; /* Reason for shutdown (PDC_* codes) */
u32 hash_key; /* Random key used for hashing of neighbors */
@@ -175,6 +178,7 @@ struct proto {
* reload_routes Request protocol to reload all its routes to the core
* (using rte_update()). Returns: 0=reload cannot be done,
* 1= reload is scheduled and will happen (asynchronously).
+ * feed_done Notify protocol about finish of route feeding.
*/
void (*if_notify)(struct proto *, unsigned flags, struct iface *i);
@@ -185,6 +189,7 @@ struct proto {
void (*store_tmp_attrs)(struct rte *rt, struct ea_list *attrs);
int (*import_control)(struct proto *, struct rte **rt, struct ea_list **attrs, struct linpool *pool);
int (*reload_routes)(struct proto *);
+ void (*feed_done)(struct proto *);
/*
* Routing entry hooks (called only for routes belonging to this protocol):
@@ -242,6 +247,13 @@ static inline void
proto_copy_rest(struct proto_config *dest, struct proto_config *src, unsigned size)
{ memcpy(dest + 1, src + 1, size - sizeof(struct proto_config)); }
+void graceful_restart_recovery(void);
+void graceful_restart_init(void);
+void graceful_restart_show_status(void);
+void proto_graceful_restart_lock(struct proto *p);
+void proto_graceful_restart_unlock(struct proto *p);
+
+#define DEFAULT_GR_WAIT 240
void proto_show_limit(struct proto_limit *l, const char *dsc);
void proto_show_basic_info(struct proto *p);
@@ -343,10 +355,17 @@ void proto_notify_state(struct proto *p, unsigned state);
* as a result of received ROUTE-REFRESH request).
*/
-#define FS_HUNGRY 0
-#define FS_FEEDING 1
-#define FS_HAPPY 2
-#define FS_FLUSHING 3
+#define FS_HUNGRY 0
+#define FS_FEEDING 1 /* obsolete */
+#define FS_HAPPY 2
+#define FS_FLUSHING 3
+
+
+#define ES_DOWN 0
+#define ES_FEEDING 1
+#define ES_READY 2
+
+
/*
* Debugging flags
diff --git a/nest/route.h b/nest/route.h
index f00f8b2b..82d9e202 100644
--- a/nest/route.h
+++ b/nest/route.h
@@ -148,6 +148,10 @@ typedef struct rtable {
struct fib_iterator nhu_fit; /* Next Hop Update FIB iterator */
} rtable;
+#define RPS_NONE 0
+#define RPS_SCHEDULED 1
+#define RPS_RUNNING 2
+
typedef struct network {
struct fib_node n; /* FIB flags reserved for kernel syncer */
struct rte *routes; /* Available routes for this network */
@@ -222,6 +226,8 @@ typedef struct rte {
#define REF_COW 1 /* Copy this rte on write */
#define REF_FILTERED 2 /* Route is rejected by import filter */
+#define REF_STALE 4 /* Route is stale in a refresh cycle */
+#define REF_DISCARD 8 /* Route is scheduled for discard */
/* Route is valid for propagation (may depend on other flags in the future), accepts NULL */
static inline int rte_is_valid(rte *r) { return r && !(r->flags & REF_FILTERED); }
@@ -257,6 +263,8 @@ void rte_update2(struct announce_hook *ah, net *net, rte *new, struct rte_src *s
static inline void rte_update(struct proto *p, net *net, rte *new) { rte_update2(p->main_ahook, net, new, p->main_source); }
void rte_discard(rtable *tab, rte *old);
int rt_examine(rtable *t, ip_addr prefix, int pxlen, struct proto *p, struct filter *filter);
+void rt_refresh_begin(rtable *t, struct announce_hook *ah);
+void rt_refresh_end(rtable *t, struct announce_hook *ah);
void rte_dump(rte *);
void rte_free(rte *);
rte *rte_do_cow(rte *);
@@ -268,6 +276,15 @@ void rt_feed_baby_abort(struct proto *p);
int rt_prune_loop(void);
struct rtable_config *rt_new_table(struct symbol *s);
+static inline void
+rt_mark_for_prune(rtable *tab)
+{
+ if (tab->prune_state == RPS_RUNNING)
+ fit_get(&tab->fib, &tab->prune_fit);
+
+ tab->prune_state = RPS_SCHEDULED;
+}
+
struct rt_show_data {
ip_addr prefix;
unsigned pxlen;
diff --git a/nest/rt-table.c b/nest/rt-table.c
index 8c91ea0a..bc911729 100644
--- a/nest/rt-table.c
+++ b/nest/rt-table.c
@@ -55,8 +55,10 @@ static void rt_free_hostcache(rtable *tab);
static void rt_notify_hostcache(rtable *tab, net *net);
static void rt_update_hostcache(rtable *tab);
static void rt_next_hop_update(rtable *tab);
-
+static inline int rt_prune_table(rtable *tab);
static inline void rt_schedule_gc(rtable *tab);
+static inline void rt_schedule_prune(rtable *tab);
+
static inline struct ea_list *
make_tmp_attrs(struct rte *rt, struct linpool *pool)
@@ -570,7 +572,7 @@ rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old, rte *befo
struct announce_hook *a;
WALK_LIST(a, tab->hooks)
{
- ASSERT(a->proto->core_state == FS_HAPPY || a->proto->core_state == FS_FEEDING);
+ ASSERT(a->proto->export_state != ES_DOWN);
if (a->proto->accept_ra_types == type)
if (type == RA_ACCEPTED)
rt_notify_accepted(a, net, new, old, before_old, tmpa, 0);
@@ -1108,6 +1110,46 @@ rt_examine(rtable *t, ip_addr prefix, int pxlen, struct proto *p, struct filter
return v > 0;
}
+void
+rt_refresh_begin(rtable *t, struct announce_hook *ah)
+{
+ net *n;
+ rte *e;
+
+ FIB_WALK(&t->fib, fn)
+ {
+ n = (net *) fn;
+ for (e = n->routes; e; e = e->next)
+ if (e->sender == ah)
+ e->flags |= REF_STALE;
+ }
+ FIB_WALK_END;
+}
+
+void
+rt_refresh_end(rtable *t, struct announce_hook *ah)
+{
+ int prune = 0;
+ net *n;
+ rte *e;
+
+ FIB_WALK(&t->fib, fn)
+ {
+ n = (net *) fn;
+ for (e = n->routes; e; e = e->next)
+ if ((e->sender == ah) && (e->flags & REF_STALE))
+ {
+ e->flags |= REF_DISCARD;
+ prune = 1;
+ }
+ }
+ FIB_WALK_END;
+
+ if (prune)
+ rt_schedule_prune(t);
+}
+
+
/**
* rte_dump - dump a route
* @e: &rte to be dumped
@@ -1170,6 +1212,13 @@ rt_dump_all(void)
}
static inline void
+rt_schedule_prune(rtable *tab)
+{
+ rt_mark_for_prune(tab);
+ ev_schedule(tab->rt_event);
+}
+
+static inline void
rt_schedule_gc(rtable *tab)
{
if (tab->gc_scheduled)
@@ -1199,6 +1248,7 @@ rt_schedule_nhu(rtable *tab)
tab->nhu_state |= 1;
}
+
static void
rt_prune_nets(rtable *tab)
{
@@ -1242,6 +1292,14 @@ rt_event(void *ptr)
if (tab->nhu_state)
rt_next_hop_update(tab);
+ if (tab->prune_state)
+ if (!rt_prune_table(tab))
+ {
+ /* Table prune unfinished */
+ ev_schedule(tab->rt_event);
+ return;
+ }
+
if (tab->gc_scheduled)
{
rt_prune_nets(tab);
@@ -1283,8 +1341,8 @@ rt_init(void)
}
-static inline int
-rt_prune_step(rtable *tab, int step, int *max_feed)
+static int
+rt_prune_step(rtable *tab, int step, int *limit)
{
static struct rate_limit rl_flush;
struct fib_iterator *fit = &tab->prune_fit;
@@ -1294,13 +1352,13 @@ rt_prune_step(rtable *tab, int step, int *max_feed)
fib_check(&tab->fib);
#endif
- if (tab->prune_state == 0)
+ if (tab->prune_state == RPS_NONE)
return 1;
- if (tab->prune_state == 1)
+ if (tab->prune_state == RPS_SCHEDULED)
{
FIB_ITERATE_INIT(fit, &tab->fib);
- tab->prune_state = 2;
+ tab->prune_state = RPS_RUNNING;
}
again:
@@ -1312,9 +1370,10 @@ again:
rescan:
for (e=n->routes; e; e=e->next)
if (e->sender->proto->flushing ||
+ (e->flags & REF_DISCARD) ||
(step && e->attrs->src->proto->flushing))
{
- if (*max_feed <= 0)
+ if (*limit <= 0)
{
FIB_ITERATE_PUT(fit, fn);
return 0;
@@ -1325,7 +1384,7 @@ again:
n->n.prefix, n->n.pxlen, e->attrs->src->proto->name, tab->name);
rte_discard(tab, e);
- (*max_feed)--;
+ (*limit)--;
goto rescan;
}
@@ -1342,10 +1401,17 @@ again:
fib_check(&tab->fib);
#endif
- tab->prune_state = 0;
+ tab->prune_state = RPS_NONE;
return 1;
}
+static inline int
+rt_prune_table(rtable *tab)
+{
+ int limit = 512;
+ return rt_prune_step(tab, 0, &limit);
+}
+
/**
* rt_prune_loop - prune routing tables
*
@@ -1364,19 +1430,19 @@ int
rt_prune_loop(void)
{
static int step = 0;
- int max_feed = 512;
+ int limit = 512;
rtable *t;
again:
WALK_LIST(t, routing_tables)
- if (! rt_prune_step(t, step, &max_feed))
+ if (! rt_prune_step(t, step, &limit))
return 0;
if (step == 0)
{
/* Prepare for the second step */
WALK_LIST(t, routing_tables)
- t->prune_state = 1;
+ t->prune_state = RPS_SCHEDULED;
step = 1;
goto again;
@@ -1721,7 +1787,7 @@ again:
(p->accept_ra_types == RA_ACCEPTED))
if (rte_is_valid(e))
{
- if (p->core_state != FS_FEEDING)
+ if (p->export_state != ES_FEEDING)
return 1; /* In the meantime, the protocol fell down. */
do_feed_baby(p, p->accept_ra_types, h, n, e);
max_feed--;
@@ -1730,7 +1796,7 @@ again:
if (p->accept_ra_types == RA_ANY)
for(e = n->routes; rte_is_valid(e); e = e->next)
{
- if (p->core_state != FS_FEEDING)
+ if (p->export_state != ES_FEEDING)
return 1; /* In the meantime, the protocol fell down. */
do_feed_baby(p, RA_ANY, h, n, e);
max_feed--;
@@ -2223,9 +2289,7 @@ rt_show_cont(struct cli *c)
cli_printf(c, 8004, "Stopped due to reconfiguration");
goto done;
}
- if (d->export_protocol &&
- d->export_protocol->core_state != FS_HAPPY &&
- d->export_protocol->core_state != FS_FEEDING)
+ if (d->export_protocol && (d->export_protocol->export_state == ES_DOWN))
{
cli_printf(c, 8005, "Protocol is down");
goto done;