summaryrefslogtreecommitdiff
path: root/nest
diff options
context:
space:
mode:
authorOndrej Zajicek <santiago@crfreenet.org>2015-06-08 02:20:43 +0200
committerOndrej Zajicek <santiago@crfreenet.org>2015-06-08 02:24:08 +0200
commit8d9eef17713a9b38cd42bd59c4ce76c3ef6c2fc2 (patch)
tree3115be5be954d6bbfd05db675b4a5508a50ed9d2 /nest
parentdb027a41d47b8fc52b65067ccabe2024554e53ca (diff)
BGP multipath support
Kernel option 'merge paths' allows to merge routes exported to kernel protocol (currently BGP and static routes) to multipath routes.
Diffstat (limited to 'nest')
-rw-r--r--nest/protocol.h3
-rw-r--r--nest/route.h11
-rw-r--r--nest/rt-attr.c10
-rw-r--r--nest/rt-table.c228
4 files changed, 239 insertions, 13 deletions
diff --git a/nest/protocol.h b/nest/protocol.h
index a51e9afd..8c49154f 100644
--- a/nest/protocol.h
+++ b/nest/protocol.h
@@ -158,6 +158,7 @@ struct proto {
byte gr_wait; /* Route export to protocol is postponed until graceful restart */
byte down_sched; /* Shutdown is scheduled for later (PDS_*) */
byte down_code; /* Reason for shutdown (PDC_* codes) */
+ byte merge_limit; /* Maximal number of nexthops for RA_MERGED */
u32 hash_key; /* Random key used for hashing of neighbors */
bird_clock_t last_state_change; /* Time of last state transition */
char *last_state_name_announced; /* Last state name we've announced to the user */
@@ -200,6 +201,7 @@ struct proto {
* rte_recalculate Called at the beginning of the best route selection
* rte_better Compare two rte's and decide which one is better (1=first, 0=second).
* rte_same Compare two rte's and decide whether they are identical (1=yes, 0=no).
+ * rte_mergable Compare two rte's and decide whether they could be merged (1=yes, 0=no).
* rte_insert Called whenever a rte is inserted to a routing table.
* rte_remove Called whenever a rte is removed from the routing table.
*/
@@ -207,6 +209,7 @@ struct proto {
int (*rte_recalculate)(struct rtable *, struct network *, struct rte *, struct rte *, struct rte *);
int (*rte_better)(struct rte *, struct rte *);
int (*rte_same)(struct rte *, struct rte *);
+ int (*rte_mergable)(struct rte *, struct rte *);
void (*rte_insert)(struct network *, struct rte *);
void (*rte_remove)(struct network *, struct rte *);
diff --git a/nest/route.h b/nest/route.h
index e22f950b..6067526d 100644
--- a/nest/route.h
+++ b/nest/route.h
@@ -240,6 +240,7 @@ static inline int rte_is_filtered(rte *r) { return !!(r->flags & REF_FILTERED);
#define RA_OPTIMAL 1 /* Announcement of optimal route change */
#define RA_ACCEPTED 2 /* Announcement of first accepted route */
#define RA_ANY 3 /* Announcement of any route change */
+#define RA_MERGED 4 /* Announcement of optimal route merged with next ones */
/* Return value of import_control() callback */
#define RIC_ACCEPT 1 /* Accepted by protocol */
@@ -263,12 +264,14 @@ void rte_update2(struct announce_hook *ah, net *net, rte *new, struct rte_src *s
static inline void rte_update(struct proto *p, net *net, rte *new) { rte_update2(p->main_ahook, net, new, p->main_source); }
void rte_discard(rtable *tab, rte *old);
int rt_examine(rtable *t, ip_addr prefix, int pxlen, struct proto *p, struct filter *filter);
+rte *rt_export_merged(struct announce_hook *ah, net *net, rte **rt_free, struct ea_list **tmpa, int silent);
void rt_refresh_begin(rtable *t, struct announce_hook *ah);
void rt_refresh_end(rtable *t, struct announce_hook *ah);
void rte_dump(rte *);
void rte_free(rte *);
rte *rte_do_cow(rte *);
static inline rte * rte_cow(rte *r) { return (r->flags & REF_COW) ? rte_do_cow(r) : r; }
+rte *rte_cow_rta(rte *r, linpool *lp);
void rt_dump(rtable *);
void rt_dump_all(void);
int rt_feed_baby(struct proto *p);
@@ -388,6 +391,12 @@ typedef struct rta {
#define IGP_METRIC_UNKNOWN 0x80000000 /* Default igp_metric used when no other
protocol-specific metric is availabe */
+
+/* Route has regular, reachable nexthop (i.e. not RTD_UNREACHABLE and like) */
+static inline int rte_is_reachable(rte *r)
+{ uint d = r->attrs->dest; return (d == RTD_ROUTER) || (d == RTD_DEVICE) || (d == RTD_MULTIPATH); }
+
+
/*
* Extended Route Attributes
*/
@@ -490,6 +499,8 @@ static inline int rta_is_cached(rta *r) { return r->aflags & RTAF_CACHED; }
static inline rta *rta_clone(rta *r) { r->uc++; return r; }
void rta__free(rta *r);
static inline void rta_free(rta *r) { if (r && !--r->uc) rta__free(r); }
+rta *rta_do_cow(rta *o, linpool *lp);
+static inline rta * rta_cow(rta *r, linpool *lp) { return rta_is_cached(r) ? rta_do_cow(r, lp) : r; }
void rta_dump(rta *);
void rta_dump_all(void);
void rta_show(struct cli *, rta *, ea_list *);
diff --git a/nest/rt-attr.c b/nest/rt-attr.c
index 32090b52..7fa05d6d 100644
--- a/nest/rt-attr.c
+++ b/nest/rt-attr.c
@@ -1138,6 +1138,16 @@ rta__free(rta *a)
sl_free(rta_slab, a);
}
+rta *
+rta_do_cow(rta *o, linpool *lp)
+{
+ rta *r = lp_alloc(lp, sizeof(rta));
+ memcpy(r, o, sizeof(rta));
+ r->aflags = 0;
+ r->uc = 0;
+ return r;
+}
+
/**
* rta_dump - dump route attributes
* @a: attribute structure to dump
diff --git a/nest/rt-table.c b/nest/rt-table.c
index 22e1c489..9e2c4e0d 100644
--- a/nest/rt-table.c
+++ b/nest/rt-table.c
@@ -144,6 +144,38 @@ rte_do_cow(rte *r)
return e;
}
+/**
+ * rte_cow_rta - get a private writable copy of &rte with writable &rta
+ * @r: a route entry to be copied
+ * @lp: a linpool from which to allocate &rta
+ *
+ * rte_cow_rta() takes a &rte and prepares it and associated &rta for
+ * modification. There are three possibilities: First, both &rte and &rta are
+ * private copies, in that case they are returned unchanged. Second, &rte is
+ * private copy, but &rta is cached, in that case &rta is duplicated using
+ * rta_do_cow(). Third, both &rte is shared and &rta is cached, in that case
+ * both structures are duplicated by rte_do_cow() and rta_do_cow().
+ *
+ * Note that in the second case, cached &rta loses one reference, while private
+ * copy created by rta_do_cow() is a shallow copy sharing indirect data (eattrs,
+ * nexthops, ...) with it. To work properly, original shared &rta should have
+ * another reference during the life of created private copy.
+ *
+ * Result: a pointer to the new writable &rte with writable &rta.
+ */
+rte *
+rte_cow_rta(rte *r, linpool *lp)
+{
+ if (!rta_is_cached(r->attrs))
+ return r;
+
+ rte *e = rte_cow(r);
+ rta *a = rta_do_cow(r->attrs, lp);
+ rta_free(e->attrs);
+ e->attrs = a;
+ return e;
+}
+
static int /* Actually better or at least as good as */
rte_better(rte *new, rte *old)
{
@@ -172,6 +204,26 @@ rte_better(rte *new, rte *old)
return 0;
}
+static int
+rte_mergable(rte *pri, rte *sec)
+{
+ int (*mergable)(rte *, rte *);
+
+ if (!rte_is_valid(pri) || !rte_is_valid(sec))
+ return 0;
+
+ if (pri->pref != sec->pref)
+ return 0;
+
+ if (pri->attrs->src->proto->proto != sec->attrs->src->proto->proto)
+ return 0;
+
+ if (mergable = pri->attrs->src->proto->rte_mergable)
+ return mergable(pri, sec);
+
+ return 0;
+}
+
static void
rte_trace(struct proto *p, rte *e, int dir, char *msg)
{
@@ -535,6 +587,129 @@ rt_notify_accepted(struct announce_hook *ah, net *net, rte *new_changed, rte *ol
rte_free(old_free);
}
+
+static struct mpnh *
+mpnh_merge_rta(struct mpnh *nhs, rta *a, int max)
+{
+ struct mpnh nh = { .gw = a->gw, .iface = a->iface };
+ struct mpnh *nh2 = (a->dest == RTD_MULTIPATH) ? a->nexthops : &nh;
+ return mpnh_merge(nhs, nh2, 1, 0, max, rte_update_pool);
+}
+
+rte *
+rt_export_merged(struct announce_hook *ah, net *net, rte **rt_free, ea_list **tmpa, int silent)
+{
+ // struct proto *p = ah->proto;
+ struct mpnh *nhs = NULL;
+ rte *best0, *best, *rt0, *rt, *tmp;
+
+ best0 = net->routes;
+ *rt_free = NULL;
+
+ if (!rte_is_valid(best0))
+ return NULL;
+
+ best = export_filter(ah, best0, rt_free, tmpa, silent);
+
+ if (!best || !rte_is_reachable(best))
+ return best;
+
+ for (rt0 = best0->next; rt0; rt0 = rt0->next)
+ {
+ if (!rte_mergable(best0, rt0))
+ continue;
+
+ rt = export_filter(ah, rt0, &tmp, NULL, 1);
+
+ if (!rt)
+ continue;
+
+ if (rte_is_reachable(rt))
+ nhs = mpnh_merge_rta(nhs, rt->attrs, ah->proto->merge_limit);
+
+ if (tmp)
+ rte_free(tmp);
+ }
+
+ if (nhs)
+ {
+ nhs = mpnh_merge_rta(nhs, best->attrs, ah->proto->merge_limit);
+
+ if (nhs->next)
+ {
+ best = rte_cow_rta(best, rte_update_pool);
+ best->attrs->dest = RTD_MULTIPATH;
+ best->attrs->nexthops = nhs;
+ }
+ }
+
+ if (best != best0)
+ *rt_free = best;
+
+ return best;
+}
+
+
+static void
+rt_notify_merged(struct announce_hook *ah, net *net, rte *new_changed, rte *old_changed,
+ rte *new_best, rte*old_best, int refeed)
+{
+ // struct proto *p = ah->proto;
+
+ rte *new_best_free = NULL;
+ rte *old_best_free = NULL;
+ rte *new_changed_free = NULL;
+ rte *old_changed_free = NULL;
+ ea_list *tmpa = NULL;
+
+ /* We assume that all rte arguments are either NULL or rte_is_valid() */
+
+ /* This check should be done by the caller */
+ if (!new_best && !old_best)
+ return;
+
+ /* Check whether the change is relevant to the merged route */
+ if ((new_best == old_best) && !refeed)
+ {
+ new_changed = rte_mergable(new_best, new_changed) ?
+ export_filter(ah, new_changed, &new_changed_free, NULL, 1) : NULL;
+
+ old_changed = rte_mergable(old_best, old_changed) ?
+ export_filter(ah, old_changed, &old_changed_free, NULL, 1) : NULL;
+
+ if (!new_changed && !old_changed)
+ return;
+ }
+
+ if (new_best)
+ ah->stats->exp_updates_received++;
+ else
+ ah->stats->exp_withdraws_received++;
+
+ /* Prepare new merged route */
+ if (new_best)
+ new_best = rt_export_merged(ah, net, &new_best_free, &tmpa, 0);
+
+ /* Prepare old merged route (without proper merged next hops) */
+ /* There are some issues with running filter on old route - see rt_notify_basic() */
+ if (old_best && !refeed)
+ old_best = export_filter(ah, old_best, &old_best_free, NULL, 1);
+
+ if (new_best || old_best)
+ do_rt_notify(ah, net, new_best, old_best, tmpa, refeed);
+
+ /* Discard temporary rte's */
+ if (new_best_free)
+ rte_free(new_best_free);
+ if (old_best_free)
+ rte_free(old_best_free);
+ if (new_changed_free)
+ rte_free(new_changed_free);
+ if (old_changed_free)
+ rte_free(old_changed_free);
+}
+
+
/**
* rte_announce - announce a routing table change
* @tab: table the route has been added to
@@ -564,13 +739,20 @@ rt_notify_accepted(struct announce_hook *ah, net *net, rte *new_changed, rte *ol
* the protocol gets called.
*/
static void
-rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old, rte *before_old)
+rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old,
+ rte *new_best, rte *old_best, rte *before_old)
{
+ if (!rte_is_valid(new))
+ new = NULL;
+
if (!rte_is_valid(old))
old = before_old = NULL;
- if (!rte_is_valid(new))
- new = NULL;
+ if (!rte_is_valid(new_best))
+ new_best = NULL;
+
+ if (!rte_is_valid(old_best))
+ old_best = NULL;
if (!old && !new)
return;
@@ -593,6 +775,8 @@ rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old, rte *befo
if (a->proto->accept_ra_types == type)
if (type == RA_ACCEPTED)
rt_notify_accepted(a, net, new, old, before_old, 0);
+ else if (type == RA_MERGED)
+ rt_notify_merged(a, net, new, old, new_best, old_best, 0);
else
rt_notify_basic(a, net, new, old, 0);
}
@@ -898,11 +1082,12 @@ rte_recalculate(struct announce_hook *ah, net *net, rte *new, struct rte_src *sr
}
/* Propagate the route change */
- rte_announce(table, RA_ANY, net, new, old, NULL);
+ rte_announce(table, RA_ANY, net, new, old, NULL, NULL, NULL);
if (net->routes != old_best)
- rte_announce(table, RA_OPTIMAL, net, net->routes, old_best, NULL);
+ rte_announce(table, RA_OPTIMAL, net, net->routes, old_best, NULL, NULL, NULL);
if (table->config->sorted)
- rte_announce(table, RA_ACCEPTED, net, new, old, before_old);
+ rte_announce(table, RA_ACCEPTED, net, new, old, NULL, NULL, before_old);
+ rte_announce(table, RA_MERGED, net, new, old, net->routes, old_best, NULL);
if (!net->routes &&
(table->gc_counter++ >= table->config->gc_max_ops) &&
@@ -1081,10 +1266,11 @@ rte_update2(struct announce_hook *ah, net *net, rte *new, struct rte_src *src)
/* Independent call to rte_announce(), used from next hop
recalculation, outside of rte_update(). new must be non-NULL */
static inline void
-rte_announce_i(rtable *tab, unsigned type, net *n, rte *new, rte *old)
+rte_announce_i(rtable *tab, unsigned type, net *net, rte *new, rte *old,
+ rte *new_best, rte *old_best)
{
rte_update_lock();
- rte_announce(tab, type, n, new, old, NULL);
+ rte_announce(tab, type, net, new, old, new_best, old_best, NULL);
rte_update_unlock();
}
@@ -1548,7 +1734,7 @@ rt_next_hop_update_net(rtable *tab, net *n)
new = rt_next_hop_update_rte(tab, e);
*k = new;
- rte_announce_i(tab, RA_ANY, n, new, e);
+ rte_announce_i(tab, RA_ANY, n, new, e, NULL, NULL);
rte_trace_in(D_ROUTES, new->sender->proto, new, "updated");
/* Call a pre-comparison hook */
@@ -1588,10 +1774,13 @@ rt_next_hop_update_net(rtable *tab, net *n)
/* Announce the new best route */
if (new != old_best)
{
- rte_announce_i(tab, RA_OPTIMAL, n, new, old_best);
+ rte_announce_i(tab, RA_OPTIMAL, n, new, old_best, NULL, NULL);
rte_trace_in(D_ROUTES, new->sender->proto, new, "updated [best]");
}
+ /* FIXME: Better announcement of merged routes */
+ rte_announce_i(tab, RA_MERGED, n, new, old_best, new, old_best);
+
if (free_old_best)
rte_free_quick(old_best);
@@ -1755,6 +1944,8 @@ do_feed_baby(struct proto *p, int type, struct announce_hook *h, net *n, rte *e)
rte_update_lock();
if (type == RA_ACCEPTED)
rt_notify_accepted(h, n, e, NULL, NULL, p->refeeding ? 2 : 1);
+ else if (type == RA_MERGED)
+ rt_notify_merged(h, n, NULL, NULL, e, p->refeeding ? e : NULL, p->refeeding);
else
rt_notify_basic(h, n, e, p->refeeding ? e : NULL, p->refeeding);
rte_update_unlock();
@@ -1802,7 +1993,8 @@ again:
/* XXXX perhaps we should change feed for RA_ACCEPTED to not use 'new' */
if ((p->accept_ra_types == RA_OPTIMAL) ||
- (p->accept_ra_types == RA_ACCEPTED))
+ (p->accept_ra_types == RA_ACCEPTED) ||
+ (p->accept_ra_types == RA_MERGED))
if (rte_is_valid(e))
{
if (p->export_state != ES_FEEDING)
@@ -2267,12 +2459,22 @@ rt_show_net(struct cli *c, net *n, struct rt_show_data *d)
rte_update_lock(); /* We use the update buffer for filtering */
tmpa = make_tmp_attrs(e, rte_update_pool);
- if (d->export_mode)
+ /* Special case for merged export */
+ if ((d->export_mode == RSEM_EXPORT) && (d->export_protocol->accept_ra_types == RA_MERGED))
+ {
+ rte *rt_free;
+ e = rt_export_merged(a, n, &rt_free, &tmpa, 1);
+ pass = 1;
+
+ if (!e)
+ { e = ee; goto skip; }
+ }
+ else if (d->export_mode)
{
struct proto *ep = d->export_protocol;
int ic = ep->import_control ? ep->import_control(ep, &e, &tmpa, rte_update_pool) : 0;
- if (ep->accept_ra_types == RA_OPTIMAL)
+ if (ep->accept_ra_types == RA_OPTIMAL || ep->accept_ra_types == RA_MERGED)
pass = 1;
if (ic < 0)