diff options
author | Ondrej Zajicek <santiago@crfreenet.org> | 2015-06-08 02:20:43 +0200 |
---|---|---|
committer | Ondrej Zajicek <santiago@crfreenet.org> | 2015-06-08 02:24:08 +0200 |
commit | 8d9eef17713a9b38cd42bd59c4ce76c3ef6c2fc2 (patch) | |
tree | 3115be5be954d6bbfd05db675b4a5508a50ed9d2 /nest | |
parent | db027a41d47b8fc52b65067ccabe2024554e53ca (diff) |
BGP multipath support
Kernel option 'merge paths' allows to merge routes exported to kernel
protocol (currently BGP and static routes) to multipath routes.
Diffstat (limited to 'nest')
-rw-r--r-- | nest/protocol.h | 3 | ||||
-rw-r--r-- | nest/route.h | 11 | ||||
-rw-r--r-- | nest/rt-attr.c | 10 | ||||
-rw-r--r-- | nest/rt-table.c | 228 |
4 files changed, 239 insertions, 13 deletions
diff --git a/nest/protocol.h b/nest/protocol.h index a51e9afd..8c49154f 100644 --- a/nest/protocol.h +++ b/nest/protocol.h @@ -158,6 +158,7 @@ struct proto { byte gr_wait; /* Route export to protocol is postponed until graceful restart */ byte down_sched; /* Shutdown is scheduled for later (PDS_*) */ byte down_code; /* Reason for shutdown (PDC_* codes) */ + byte merge_limit; /* Maximal number of nexthops for RA_MERGED */ u32 hash_key; /* Random key used for hashing of neighbors */ bird_clock_t last_state_change; /* Time of last state transition */ char *last_state_name_announced; /* Last state name we've announced to the user */ @@ -200,6 +201,7 @@ struct proto { * rte_recalculate Called at the beginning of the best route selection * rte_better Compare two rte's and decide which one is better (1=first, 0=second). * rte_same Compare two rte's and decide whether they are identical (1=yes, 0=no). + * rte_mergable Compare two rte's and decide whether they could be merged (1=yes, 0=no). * rte_insert Called whenever a rte is inserted to a routing table. * rte_remove Called whenever a rte is removed from the routing table. */ @@ -207,6 +209,7 @@ struct proto { int (*rte_recalculate)(struct rtable *, struct network *, struct rte *, struct rte *, struct rte *); int (*rte_better)(struct rte *, struct rte *); int (*rte_same)(struct rte *, struct rte *); + int (*rte_mergable)(struct rte *, struct rte *); void (*rte_insert)(struct network *, struct rte *); void (*rte_remove)(struct network *, struct rte *); diff --git a/nest/route.h b/nest/route.h index e22f950b..6067526d 100644 --- a/nest/route.h +++ b/nest/route.h @@ -240,6 +240,7 @@ static inline int rte_is_filtered(rte *r) { return !!(r->flags & REF_FILTERED); #define RA_OPTIMAL 1 /* Announcement of optimal route change */ #define RA_ACCEPTED 2 /* Announcement of first accepted route */ #define RA_ANY 3 /* Announcement of any route change */ +#define RA_MERGED 4 /* Announcement of optimal route merged with next ones */ /* Return value of import_control() callback */ #define RIC_ACCEPT 1 /* Accepted by protocol */ @@ -263,12 +264,14 @@ void rte_update2(struct announce_hook *ah, net *net, rte *new, struct rte_src *s static inline void rte_update(struct proto *p, net *net, rte *new) { rte_update2(p->main_ahook, net, new, p->main_source); } void rte_discard(rtable *tab, rte *old); int rt_examine(rtable *t, ip_addr prefix, int pxlen, struct proto *p, struct filter *filter); +rte *rt_export_merged(struct announce_hook *ah, net *net, rte **rt_free, struct ea_list **tmpa, int silent); void rt_refresh_begin(rtable *t, struct announce_hook *ah); void rt_refresh_end(rtable *t, struct announce_hook *ah); void rte_dump(rte *); void rte_free(rte *); rte *rte_do_cow(rte *); static inline rte * rte_cow(rte *r) { return (r->flags & REF_COW) ? rte_do_cow(r) : r; } +rte *rte_cow_rta(rte *r, linpool *lp); void rt_dump(rtable *); void rt_dump_all(void); int rt_feed_baby(struct proto *p); @@ -388,6 +391,12 @@ typedef struct rta { #define IGP_METRIC_UNKNOWN 0x80000000 /* Default igp_metric used when no other protocol-specific metric is availabe */ + +/* Route has regular, reachable nexthop (i.e. not RTD_UNREACHABLE and like) */ +static inline int rte_is_reachable(rte *r) +{ uint d = r->attrs->dest; return (d == RTD_ROUTER) || (d == RTD_DEVICE) || (d == RTD_MULTIPATH); } + + /* * Extended Route Attributes */ @@ -490,6 +499,8 @@ static inline int rta_is_cached(rta *r) { return r->aflags & RTAF_CACHED; } static inline rta *rta_clone(rta *r) { r->uc++; return r; } void rta__free(rta *r); static inline void rta_free(rta *r) { if (r && !--r->uc) rta__free(r); } +rta *rta_do_cow(rta *o, linpool *lp); +static inline rta * rta_cow(rta *r, linpool *lp) { return rta_is_cached(r) ? rta_do_cow(r, lp) : r; } void rta_dump(rta *); void rta_dump_all(void); void rta_show(struct cli *, rta *, ea_list *); diff --git a/nest/rt-attr.c b/nest/rt-attr.c index 32090b52..7fa05d6d 100644 --- a/nest/rt-attr.c +++ b/nest/rt-attr.c @@ -1138,6 +1138,16 @@ rta__free(rta *a) sl_free(rta_slab, a); } +rta * +rta_do_cow(rta *o, linpool *lp) +{ + rta *r = lp_alloc(lp, sizeof(rta)); + memcpy(r, o, sizeof(rta)); + r->aflags = 0; + r->uc = 0; + return r; +} + /** * rta_dump - dump route attributes * @a: attribute structure to dump diff --git a/nest/rt-table.c b/nest/rt-table.c index 22e1c489..9e2c4e0d 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -144,6 +144,38 @@ rte_do_cow(rte *r) return e; } +/** + * rte_cow_rta - get a private writable copy of &rte with writable &rta + * @r: a route entry to be copied + * @lp: a linpool from which to allocate &rta + * + * rte_cow_rta() takes a &rte and prepares it and associated &rta for + * modification. There are three possibilities: First, both &rte and &rta are + * private copies, in that case they are returned unchanged. Second, &rte is + * private copy, but &rta is cached, in that case &rta is duplicated using + * rta_do_cow(). Third, both &rte is shared and &rta is cached, in that case + * both structures are duplicated by rte_do_cow() and rta_do_cow(). + * + * Note that in the second case, cached &rta loses one reference, while private + * copy created by rta_do_cow() is a shallow copy sharing indirect data (eattrs, + * nexthops, ...) with it. To work properly, original shared &rta should have + * another reference during the life of created private copy. + * + * Result: a pointer to the new writable &rte with writable &rta. + */ +rte * +rte_cow_rta(rte *r, linpool *lp) +{ + if (!rta_is_cached(r->attrs)) + return r; + + rte *e = rte_cow(r); + rta *a = rta_do_cow(r->attrs, lp); + rta_free(e->attrs); + e->attrs = a; + return e; +} + static int /* Actually better or at least as good as */ rte_better(rte *new, rte *old) { @@ -172,6 +204,26 @@ rte_better(rte *new, rte *old) return 0; } +static int +rte_mergable(rte *pri, rte *sec) +{ + int (*mergable)(rte *, rte *); + + if (!rte_is_valid(pri) || !rte_is_valid(sec)) + return 0; + + if (pri->pref != sec->pref) + return 0; + + if (pri->attrs->src->proto->proto != sec->attrs->src->proto->proto) + return 0; + + if (mergable = pri->attrs->src->proto->rte_mergable) + return mergable(pri, sec); + + return 0; +} + static void rte_trace(struct proto *p, rte *e, int dir, char *msg) { @@ -535,6 +587,129 @@ rt_notify_accepted(struct announce_hook *ah, net *net, rte *new_changed, rte *ol rte_free(old_free); } + +static struct mpnh * +mpnh_merge_rta(struct mpnh *nhs, rta *a, int max) +{ + struct mpnh nh = { .gw = a->gw, .iface = a->iface }; + struct mpnh *nh2 = (a->dest == RTD_MULTIPATH) ? a->nexthops : &nh; + return mpnh_merge(nhs, nh2, 1, 0, max, rte_update_pool); +} + +rte * +rt_export_merged(struct announce_hook *ah, net *net, rte **rt_free, ea_list **tmpa, int silent) +{ + // struct proto *p = ah->proto; + struct mpnh *nhs = NULL; + rte *best0, *best, *rt0, *rt, *tmp; + + best0 = net->routes; + *rt_free = NULL; + + if (!rte_is_valid(best0)) + return NULL; + + best = export_filter(ah, best0, rt_free, tmpa, silent); + + if (!best || !rte_is_reachable(best)) + return best; + + for (rt0 = best0->next; rt0; rt0 = rt0->next) + { + if (!rte_mergable(best0, rt0)) + continue; + + rt = export_filter(ah, rt0, &tmp, NULL, 1); + + if (!rt) + continue; + + if (rte_is_reachable(rt)) + nhs = mpnh_merge_rta(nhs, rt->attrs, ah->proto->merge_limit); + + if (tmp) + rte_free(tmp); + } + + if (nhs) + { + nhs = mpnh_merge_rta(nhs, best->attrs, ah->proto->merge_limit); + + if (nhs->next) + { + best = rte_cow_rta(best, rte_update_pool); + best->attrs->dest = RTD_MULTIPATH; + best->attrs->nexthops = nhs; + } + } + + if (best != best0) + *rt_free = best; + + return best; +} + + +static void +rt_notify_merged(struct announce_hook *ah, net *net, rte *new_changed, rte *old_changed, + rte *new_best, rte*old_best, int refeed) +{ + // struct proto *p = ah->proto; + + rte *new_best_free = NULL; + rte *old_best_free = NULL; + rte *new_changed_free = NULL; + rte *old_changed_free = NULL; + ea_list *tmpa = NULL; + + /* We assume that all rte arguments are either NULL or rte_is_valid() */ + + /* This check should be done by the caller */ + if (!new_best && !old_best) + return; + + /* Check whether the change is relevant to the merged route */ + if ((new_best == old_best) && !refeed) + { + new_changed = rte_mergable(new_best, new_changed) ? + export_filter(ah, new_changed, &new_changed_free, NULL, 1) : NULL; + + old_changed = rte_mergable(old_best, old_changed) ? + export_filter(ah, old_changed, &old_changed_free, NULL, 1) : NULL; + + if (!new_changed && !old_changed) + return; + } + + if (new_best) + ah->stats->exp_updates_received++; + else + ah->stats->exp_withdraws_received++; + + /* Prepare new merged route */ + if (new_best) + new_best = rt_export_merged(ah, net, &new_best_free, &tmpa, 0); + + /* Prepare old merged route (without proper merged next hops) */ + /* There are some issues with running filter on old route - see rt_notify_basic() */ + if (old_best && !refeed) + old_best = export_filter(ah, old_best, &old_best_free, NULL, 1); + + if (new_best || old_best) + do_rt_notify(ah, net, new_best, old_best, tmpa, refeed); + + /* Discard temporary rte's */ + if (new_best_free) + rte_free(new_best_free); + if (old_best_free) + rte_free(old_best_free); + if (new_changed_free) + rte_free(new_changed_free); + if (old_changed_free) + rte_free(old_changed_free); +} + + /** * rte_announce - announce a routing table change * @tab: table the route has been added to @@ -564,13 +739,20 @@ rt_notify_accepted(struct announce_hook *ah, net *net, rte *new_changed, rte *ol * the protocol gets called. */ static void -rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old, rte *before_old) +rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old, + rte *new_best, rte *old_best, rte *before_old) { + if (!rte_is_valid(new)) + new = NULL; + if (!rte_is_valid(old)) old = before_old = NULL; - if (!rte_is_valid(new)) - new = NULL; + if (!rte_is_valid(new_best)) + new_best = NULL; + + if (!rte_is_valid(old_best)) + old_best = NULL; if (!old && !new) return; @@ -593,6 +775,8 @@ rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old, rte *befo if (a->proto->accept_ra_types == type) if (type == RA_ACCEPTED) rt_notify_accepted(a, net, new, old, before_old, 0); + else if (type == RA_MERGED) + rt_notify_merged(a, net, new, old, new_best, old_best, 0); else rt_notify_basic(a, net, new, old, 0); } @@ -898,11 +1082,12 @@ rte_recalculate(struct announce_hook *ah, net *net, rte *new, struct rte_src *sr } /* Propagate the route change */ - rte_announce(table, RA_ANY, net, new, old, NULL); + rte_announce(table, RA_ANY, net, new, old, NULL, NULL, NULL); if (net->routes != old_best) - rte_announce(table, RA_OPTIMAL, net, net->routes, old_best, NULL); + rte_announce(table, RA_OPTIMAL, net, net->routes, old_best, NULL, NULL, NULL); if (table->config->sorted) - rte_announce(table, RA_ACCEPTED, net, new, old, before_old); + rte_announce(table, RA_ACCEPTED, net, new, old, NULL, NULL, before_old); + rte_announce(table, RA_MERGED, net, new, old, net->routes, old_best, NULL); if (!net->routes && (table->gc_counter++ >= table->config->gc_max_ops) && @@ -1081,10 +1266,11 @@ rte_update2(struct announce_hook *ah, net *net, rte *new, struct rte_src *src) /* Independent call to rte_announce(), used from next hop recalculation, outside of rte_update(). new must be non-NULL */ static inline void -rte_announce_i(rtable *tab, unsigned type, net *n, rte *new, rte *old) +rte_announce_i(rtable *tab, unsigned type, net *net, rte *new, rte *old, + rte *new_best, rte *old_best) { rte_update_lock(); - rte_announce(tab, type, n, new, old, NULL); + rte_announce(tab, type, net, new, old, new_best, old_best, NULL); rte_update_unlock(); } @@ -1548,7 +1734,7 @@ rt_next_hop_update_net(rtable *tab, net *n) new = rt_next_hop_update_rte(tab, e); *k = new; - rte_announce_i(tab, RA_ANY, n, new, e); + rte_announce_i(tab, RA_ANY, n, new, e, NULL, NULL); rte_trace_in(D_ROUTES, new->sender->proto, new, "updated"); /* Call a pre-comparison hook */ @@ -1588,10 +1774,13 @@ rt_next_hop_update_net(rtable *tab, net *n) /* Announce the new best route */ if (new != old_best) { - rte_announce_i(tab, RA_OPTIMAL, n, new, old_best); + rte_announce_i(tab, RA_OPTIMAL, n, new, old_best, NULL, NULL); rte_trace_in(D_ROUTES, new->sender->proto, new, "updated [best]"); } + /* FIXME: Better announcement of merged routes */ + rte_announce_i(tab, RA_MERGED, n, new, old_best, new, old_best); + if (free_old_best) rte_free_quick(old_best); @@ -1755,6 +1944,8 @@ do_feed_baby(struct proto *p, int type, struct announce_hook *h, net *n, rte *e) rte_update_lock(); if (type == RA_ACCEPTED) rt_notify_accepted(h, n, e, NULL, NULL, p->refeeding ? 2 : 1); + else if (type == RA_MERGED) + rt_notify_merged(h, n, NULL, NULL, e, p->refeeding ? e : NULL, p->refeeding); else rt_notify_basic(h, n, e, p->refeeding ? e : NULL, p->refeeding); rte_update_unlock(); @@ -1802,7 +1993,8 @@ again: /* XXXX perhaps we should change feed for RA_ACCEPTED to not use 'new' */ if ((p->accept_ra_types == RA_OPTIMAL) || - (p->accept_ra_types == RA_ACCEPTED)) + (p->accept_ra_types == RA_ACCEPTED) || + (p->accept_ra_types == RA_MERGED)) if (rte_is_valid(e)) { if (p->export_state != ES_FEEDING) @@ -2267,12 +2459,22 @@ rt_show_net(struct cli *c, net *n, struct rt_show_data *d) rte_update_lock(); /* We use the update buffer for filtering */ tmpa = make_tmp_attrs(e, rte_update_pool); - if (d->export_mode) + /* Special case for merged export */ + if ((d->export_mode == RSEM_EXPORT) && (d->export_protocol->accept_ra_types == RA_MERGED)) + { + rte *rt_free; + e = rt_export_merged(a, n, &rt_free, &tmpa, 1); + pass = 1; + + if (!e) + { e = ee; goto skip; } + } + else if (d->export_mode) { struct proto *ep = d->export_protocol; int ic = ep->import_control ? ep->import_control(ep, &e, &tmpa, rte_update_pool) : 0; - if (ep->accept_ra_types == RA_OPTIMAL) + if (ep->accept_ra_types == RA_OPTIMAL || ep->accept_ra_types == RA_MERGED) pass = 1; if (ic < 0) |