summaryrefslogtreecommitdiff
path: root/nest/rt-table.c
diff options
context:
space:
mode:
authorOndrej Zajicek <santiago@crfreenet.org>2015-06-08 02:20:43 +0200
committerOndrej Zajicek <santiago@crfreenet.org>2015-06-08 02:24:08 +0200
commit8d9eef17713a9b38cd42bd59c4ce76c3ef6c2fc2 (patch)
tree3115be5be954d6bbfd05db675b4a5508a50ed9d2 /nest/rt-table.c
parentdb027a41d47b8fc52b65067ccabe2024554e53ca (diff)
BGP multipath support
Kernel option 'merge paths' allows to merge routes exported to kernel protocol (currently BGP and static routes) to multipath routes.
Diffstat (limited to 'nest/rt-table.c')
-rw-r--r--nest/rt-table.c228
1 files changed, 215 insertions, 13 deletions
diff --git a/nest/rt-table.c b/nest/rt-table.c
index 22e1c489..9e2c4e0d 100644
--- a/nest/rt-table.c
+++ b/nest/rt-table.c
@@ -144,6 +144,38 @@ rte_do_cow(rte *r)
return e;
}
+/**
+ * rte_cow_rta - get a private writable copy of &rte with writable &rta
+ * @r: a route entry to be copied
+ * @lp: a linpool from which to allocate &rta
+ *
+ * rte_cow_rta() takes a &rte and prepares it and associated &rta for
+ * modification. There are three possibilities: First, both &rte and &rta are
+ * private copies, in that case they are returned unchanged. Second, &rte is
+ * private copy, but &rta is cached, in that case &rta is duplicated using
+ * rta_do_cow(). Third, both &rte is shared and &rta is cached, in that case
+ * both structures are duplicated by rte_do_cow() and rta_do_cow().
+ *
+ * Note that in the second case, cached &rta loses one reference, while private
+ * copy created by rta_do_cow() is a shallow copy sharing indirect data (eattrs,
+ * nexthops, ...) with it. To work properly, original shared &rta should have
+ * another reference during the life of created private copy.
+ *
+ * Result: a pointer to the new writable &rte with writable &rta.
+ */
+rte *
+rte_cow_rta(rte *r, linpool *lp)
+{
+ if (!rta_is_cached(r->attrs))
+ return r;
+
+ rte *e = rte_cow(r);
+ rta *a = rta_do_cow(r->attrs, lp);
+ rta_free(e->attrs);
+ e->attrs = a;
+ return e;
+}
+
static int /* Actually better or at least as good as */
rte_better(rte *new, rte *old)
{
@@ -172,6 +204,26 @@ rte_better(rte *new, rte *old)
return 0;
}
+static int
+rte_mergable(rte *pri, rte *sec)
+{
+ int (*mergable)(rte *, rte *);
+
+ if (!rte_is_valid(pri) || !rte_is_valid(sec))
+ return 0;
+
+ if (pri->pref != sec->pref)
+ return 0;
+
+ if (pri->attrs->src->proto->proto != sec->attrs->src->proto->proto)
+ return 0;
+
+ if (mergable = pri->attrs->src->proto->rte_mergable)
+ return mergable(pri, sec);
+
+ return 0;
+}
+
static void
rte_trace(struct proto *p, rte *e, int dir, char *msg)
{
@@ -535,6 +587,129 @@ rt_notify_accepted(struct announce_hook *ah, net *net, rte *new_changed, rte *ol
rte_free(old_free);
}
+
+static struct mpnh *
+mpnh_merge_rta(struct mpnh *nhs, rta *a, int max)
+{
+ struct mpnh nh = { .gw = a->gw, .iface = a->iface };
+ struct mpnh *nh2 = (a->dest == RTD_MULTIPATH) ? a->nexthops : &nh;
+ return mpnh_merge(nhs, nh2, 1, 0, max, rte_update_pool);
+}
+
+rte *
+rt_export_merged(struct announce_hook *ah, net *net, rte **rt_free, ea_list **tmpa, int silent)
+{
+ // struct proto *p = ah->proto;
+ struct mpnh *nhs = NULL;
+ rte *best0, *best, *rt0, *rt, *tmp;
+
+ best0 = net->routes;
+ *rt_free = NULL;
+
+ if (!rte_is_valid(best0))
+ return NULL;
+
+ best = export_filter(ah, best0, rt_free, tmpa, silent);
+
+ if (!best || !rte_is_reachable(best))
+ return best;
+
+ for (rt0 = best0->next; rt0; rt0 = rt0->next)
+ {
+ if (!rte_mergable(best0, rt0))
+ continue;
+
+ rt = export_filter(ah, rt0, &tmp, NULL, 1);
+
+ if (!rt)
+ continue;
+
+ if (rte_is_reachable(rt))
+ nhs = mpnh_merge_rta(nhs, rt->attrs, ah->proto->merge_limit);
+
+ if (tmp)
+ rte_free(tmp);
+ }
+
+ if (nhs)
+ {
+ nhs = mpnh_merge_rta(nhs, best->attrs, ah->proto->merge_limit);
+
+ if (nhs->next)
+ {
+ best = rte_cow_rta(best, rte_update_pool);
+ best->attrs->dest = RTD_MULTIPATH;
+ best->attrs->nexthops = nhs;
+ }
+ }
+
+ if (best != best0)
+ *rt_free = best;
+
+ return best;
+}
+
+
+static void
+rt_notify_merged(struct announce_hook *ah, net *net, rte *new_changed, rte *old_changed,
+ rte *new_best, rte*old_best, int refeed)
+{
+ // struct proto *p = ah->proto;
+
+ rte *new_best_free = NULL;
+ rte *old_best_free = NULL;
+ rte *new_changed_free = NULL;
+ rte *old_changed_free = NULL;
+ ea_list *tmpa = NULL;
+
+ /* We assume that all rte arguments are either NULL or rte_is_valid() */
+
+ /* This check should be done by the caller */
+ if (!new_best && !old_best)
+ return;
+
+ /* Check whether the change is relevant to the merged route */
+ if ((new_best == old_best) && !refeed)
+ {
+ new_changed = rte_mergable(new_best, new_changed) ?
+ export_filter(ah, new_changed, &new_changed_free, NULL, 1) : NULL;
+
+ old_changed = rte_mergable(old_best, old_changed) ?
+ export_filter(ah, old_changed, &old_changed_free, NULL, 1) : NULL;
+
+ if (!new_changed && !old_changed)
+ return;
+ }
+
+ if (new_best)
+ ah->stats->exp_updates_received++;
+ else
+ ah->stats->exp_withdraws_received++;
+
+ /* Prepare new merged route */
+ if (new_best)
+ new_best = rt_export_merged(ah, net, &new_best_free, &tmpa, 0);
+
+ /* Prepare old merged route (without proper merged next hops) */
+ /* There are some issues with running filter on old route - see rt_notify_basic() */
+ if (old_best && !refeed)
+ old_best = export_filter(ah, old_best, &old_best_free, NULL, 1);
+
+ if (new_best || old_best)
+ do_rt_notify(ah, net, new_best, old_best, tmpa, refeed);
+
+ /* Discard temporary rte's */
+ if (new_best_free)
+ rte_free(new_best_free);
+ if (old_best_free)
+ rte_free(old_best_free);
+ if (new_changed_free)
+ rte_free(new_changed_free);
+ if (old_changed_free)
+ rte_free(old_changed_free);
+}
+
+
/**
* rte_announce - announce a routing table change
* @tab: table the route has been added to
@@ -564,13 +739,20 @@ rt_notify_accepted(struct announce_hook *ah, net *net, rte *new_changed, rte *ol
* the protocol gets called.
*/
static void
-rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old, rte *before_old)
+rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old,
+ rte *new_best, rte *old_best, rte *before_old)
{
+ if (!rte_is_valid(new))
+ new = NULL;
+
if (!rte_is_valid(old))
old = before_old = NULL;
- if (!rte_is_valid(new))
- new = NULL;
+ if (!rte_is_valid(new_best))
+ new_best = NULL;
+
+ if (!rte_is_valid(old_best))
+ old_best = NULL;
if (!old && !new)
return;
@@ -593,6 +775,8 @@ rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old, rte *befo
if (a->proto->accept_ra_types == type)
if (type == RA_ACCEPTED)
rt_notify_accepted(a, net, new, old, before_old, 0);
+ else if (type == RA_MERGED)
+ rt_notify_merged(a, net, new, old, new_best, old_best, 0);
else
rt_notify_basic(a, net, new, old, 0);
}
@@ -898,11 +1082,12 @@ rte_recalculate(struct announce_hook *ah, net *net, rte *new, struct rte_src *sr
}
/* Propagate the route change */
- rte_announce(table, RA_ANY, net, new, old, NULL);
+ rte_announce(table, RA_ANY, net, new, old, NULL, NULL, NULL);
if (net->routes != old_best)
- rte_announce(table, RA_OPTIMAL, net, net->routes, old_best, NULL);
+ rte_announce(table, RA_OPTIMAL, net, net->routes, old_best, NULL, NULL, NULL);
if (table->config->sorted)
- rte_announce(table, RA_ACCEPTED, net, new, old, before_old);
+ rte_announce(table, RA_ACCEPTED, net, new, old, NULL, NULL, before_old);
+ rte_announce(table, RA_MERGED, net, new, old, net->routes, old_best, NULL);
if (!net->routes &&
(table->gc_counter++ >= table->config->gc_max_ops) &&
@@ -1081,10 +1266,11 @@ rte_update2(struct announce_hook *ah, net *net, rte *new, struct rte_src *src)
/* Independent call to rte_announce(), used from next hop
recalculation, outside of rte_update(). new must be non-NULL */
static inline void
-rte_announce_i(rtable *tab, unsigned type, net *n, rte *new, rte *old)
+rte_announce_i(rtable *tab, unsigned type, net *net, rte *new, rte *old,
+ rte *new_best, rte *old_best)
{
rte_update_lock();
- rte_announce(tab, type, n, new, old, NULL);
+ rte_announce(tab, type, net, new, old, new_best, old_best, NULL);
rte_update_unlock();
}
@@ -1548,7 +1734,7 @@ rt_next_hop_update_net(rtable *tab, net *n)
new = rt_next_hop_update_rte(tab, e);
*k = new;
- rte_announce_i(tab, RA_ANY, n, new, e);
+ rte_announce_i(tab, RA_ANY, n, new, e, NULL, NULL);
rte_trace_in(D_ROUTES, new->sender->proto, new, "updated");
/* Call a pre-comparison hook */
@@ -1588,10 +1774,13 @@ rt_next_hop_update_net(rtable *tab, net *n)
/* Announce the new best route */
if (new != old_best)
{
- rte_announce_i(tab, RA_OPTIMAL, n, new, old_best);
+ rte_announce_i(tab, RA_OPTIMAL, n, new, old_best, NULL, NULL);
rte_trace_in(D_ROUTES, new->sender->proto, new, "updated [best]");
}
+ /* FIXME: Better announcement of merged routes */
+ rte_announce_i(tab, RA_MERGED, n, new, old_best, new, old_best);
+
if (free_old_best)
rte_free_quick(old_best);
@@ -1755,6 +1944,8 @@ do_feed_baby(struct proto *p, int type, struct announce_hook *h, net *n, rte *e)
rte_update_lock();
if (type == RA_ACCEPTED)
rt_notify_accepted(h, n, e, NULL, NULL, p->refeeding ? 2 : 1);
+ else if (type == RA_MERGED)
+ rt_notify_merged(h, n, NULL, NULL, e, p->refeeding ? e : NULL, p->refeeding);
else
rt_notify_basic(h, n, e, p->refeeding ? e : NULL, p->refeeding);
rte_update_unlock();
@@ -1802,7 +1993,8 @@ again:
/* XXXX perhaps we should change feed for RA_ACCEPTED to not use 'new' */
if ((p->accept_ra_types == RA_OPTIMAL) ||
- (p->accept_ra_types == RA_ACCEPTED))
+ (p->accept_ra_types == RA_ACCEPTED) ||
+ (p->accept_ra_types == RA_MERGED))
if (rte_is_valid(e))
{
if (p->export_state != ES_FEEDING)
@@ -2267,12 +2459,22 @@ rt_show_net(struct cli *c, net *n, struct rt_show_data *d)
rte_update_lock(); /* We use the update buffer for filtering */
tmpa = make_tmp_attrs(e, rte_update_pool);
- if (d->export_mode)
+ /* Special case for merged export */
+ if ((d->export_mode == RSEM_EXPORT) && (d->export_protocol->accept_ra_types == RA_MERGED))
+ {
+ rte *rt_free;
+ e = rt_export_merged(a, n, &rt_free, &tmpa, 1);
+ pass = 1;
+
+ if (!e)
+ { e = ee; goto skip; }
+ }
+ else if (d->export_mode)
{
struct proto *ep = d->export_protocol;
int ic = ep->import_control ? ep->import_control(ep, &e, &tmpa, rte_update_pool) : 0;
- if (ep->accept_ra_types == RA_OPTIMAL)
+ if (ep->accept_ra_types == RA_OPTIMAL || ep->accept_ra_types == RA_MERGED)
pass = 1;
if (ic < 0)