summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--nest/config.Y2
-rw-r--r--nest/route.h32
-rw-r--r--nest/rt-attr.c63
-rw-r--r--nest/rt-table.c84
-rw-r--r--proto/bgp/attrs.c14
5 files changed, 140 insertions, 55 deletions
diff --git a/nest/config.Y b/nest/config.Y
index 920a3054..fa726efb 100644
--- a/nest/config.Y
+++ b/nest/config.Y
@@ -51,7 +51,7 @@ CF_ENUM(T_ENUM_RTS, RTS_, DUMMY, STATIC, INHERIT, DEVICE, STATIC_DEVICE, REDIREC
RIP, OSPF, OSPF_IA, OSPF_EXT1, OSPF_EXT2, BGP, PIPE)
CF_ENUM(T_ENUM_SCOPE, SCOPE_, HOST, LINK, SITE, ORGANIZATION, UNIVERSE)
CF_ENUM(T_ENUM_RTC, RTC_, UNICAST, BROADCAST, MULTICAST, ANYCAST)
-CF_ENUM(T_ENUM_RTD, RTD_, ROUTER, DEVICE, BLACKHOLE, UNREACHABLE, PROHIBIT)
+CF_ENUM(T_ENUM_RTD, RTD_, ROUTER, DEVICE, BLACKHOLE, UNREACHABLE, PROHIBIT, MULTIPATH)
%type <i32> idval
%type <f> imexport
diff --git a/nest/route.h b/nest/route.h
index a849bf00..8f9dff9a 100644
--- a/nest/route.h
+++ b/nest/route.h
@@ -170,7 +170,7 @@ struct hostentry {
struct hostentry *next; /* Next in hash chain */
unsigned hash_key; /* Hash key */
unsigned uc; /* Use count */
- struct iface *iface; /* Chosen outgoing interface */
+ struct rta *src; /* Source rta entry */
ip_addr gw; /* Chosen next hop */
byte dest; /* Chosen route destination type (RTD_...) */
u32 igp_metric; /* Chosen route IGP metric */
@@ -266,6 +266,14 @@ void rt_show(struct rt_show_data *);
* construction of BGP route attribute lists.
*/
+/* Multipath next-hop */
+struct mpnh {
+ ip_addr gw; /* Next hop */
+ struct iface *iface; /* Outgoing interface */
+ struct mpnh *next;
+ unsigned char weight;
+};
+
typedef struct rta {
struct rta *next, **pprev; /* Hash chain */
struct proto *proto; /* Protocol instance that originally created the route */
@@ -282,6 +290,7 @@ typedef struct rta {
ip_addr from; /* Advertising router */
struct hostentry *hostentry; /* Hostentry for recursive next-hops */
struct iface *iface; /* Outgoing interface */
+ struct mpnh *nexthops; /* Next-hops for multipath routes */
struct ea_list *eattrs; /* Extended Attribute chain */
} rta;
@@ -309,7 +318,8 @@ typedef struct rta {
#define RTD_BLACKHOLE 2 /* Silently drop packets */
#define RTD_UNREACHABLE 3 /* Reject as unreachable */
#define RTD_PROHIBIT 4 /* Administratively prohibited */
-#define RTD_NONE 5 /* Invalid RTD */
+#define RTD_MULTIPATH 5 /* Multipath route (nexthops != NULL) */
+#define RTD_NONE 6 /* Invalid RTD */
#define RTAF_CACHED 1 /* This is a cached rta */
@@ -387,6 +397,10 @@ void ea_format(eattr *e, byte *buf);
#define EA_FORMAT_BUF_SIZE 256
ea_list *ea_append(ea_list *to, ea_list *what);
+int mpnh__same(struct mpnh *x, struct mpnh *y); /* Compare multipath nexthops */
+static inline int mpnh_same(struct mpnh *x, struct mpnh *y)
+{ return (x == y) || mpnh__same(x, y); }
+
void rta_init(void);
rta *rta_lookup(rta *); /* Get rta equivalent to this one, uc++ */
static inline rta *rta_clone(rta *r) { r->uc++; return r; }
@@ -403,12 +417,14 @@ void rta_set_recursive_next_hop(rtable *dep, rta *a, rtable *tab, ip_addr *gw, i
* count. Cached rta locks its hostentry (increases its use count),
* uncached rta does not lock it. Hostentry with zero use count is
* removed asynchronously during host cache update, therefore it is
- * safe to hold such hostentry temorarily. There is no need to hold
- * a lock for hostentry->dep table, because that table contains routes
- * responsible for that hostentry, and therefore is non-empty if given
- * hostentry has non-zero use count. The protocol responsible for routes
- * with recursive next hops should also hold a lock for a table governing
- * that routes (argument tab to rta_set_recursive_next_hop()).
+ * safe to hold such hostentry temorarily. Hostentry holds a lock for
+ * a 'source' rta, mainly to share multipath nexthops. There is no
+ * need to hold a lock for hostentry->dep table, because that table
+ * contains routes responsible for that hostentry, and therefore is
+ * non-empty if given hostentry has non-zero use count. The protocol
+ * responsible for routes with recursive next hops should also hold a
+ * lock for a table governing that routes (argument tab to
+ * rta_set_recursive_next_hop()).
*/
static inline void rt_lock_hostentry(struct hostentry *he) { if (he) he->uc++; }
diff --git a/nest/rt-attr.c b/nest/rt-attr.c
index ce6fe85d..c1f9c793 100644
--- a/nest/rt-attr.c
+++ b/nest/rt-attr.c
@@ -57,9 +57,65 @@
pool *rta_pool;
static slab *rta_slab;
+static slab *mpnh_slab;
struct protocol *attr_class_to_protocol[EAP_MAX];
+static inline unsigned int
+mpnh_hash(struct mpnh *x)
+{
+ unsigned int h = 0;
+ for (; x; x = x->next)
+ h ^= ipa_hash(x->gw);
+
+ return h;
+}
+
+int
+mpnh__same(struct mpnh *x, struct mpnh *y)
+{
+ for (; x && y; x = x->next, y = y->next)
+ if (!ipa_equal(x->gw, y->gw) || (x->iface != y->iface) || (x->weight != y->weight))
+ return 0;
+
+ return x == y;
+}
+
+static struct mpnh *
+mpnh_copy(struct mpnh *o)
+{
+ struct mpnh *first = NULL;
+ struct mpnh **last = &first;
+
+ for (; o; o = o->next)
+ {
+ struct mpnh *n = sl_alloc(mpnh_slab);
+ n->gw = o->gw;
+ n->iface = o->iface;
+ n->next = NULL;
+ n->weight = o->weight;
+
+ *last = n;
+ last = &(n->next);
+ }
+
+ return first;
+}
+
+static void
+mpnh_free(struct mpnh *o)
+{
+ struct mpnh *n;
+
+ while (o)
+ {
+ n = o->next;
+ sl_free(mpnh_slab, o);
+ o = n;
+ }
+}
+
+
/*
* Extended Attributes
*/
@@ -587,7 +643,8 @@ rta_alloc_hash(void)
static inline unsigned int
rta_hash(rta *a)
{
- return (a->proto->hash_key ^ ipa_hash(a->gw) ^ ea_hash(a->eattrs)) & 0xffff;
+ return (a->proto->hash_key ^ ipa_hash(a->gw) ^
+ mpnh_hash(a->nexthops) ^ ea_hash(a->eattrs)) & 0xffff;
}
static inline int
@@ -604,6 +661,7 @@ rta_same(rta *x, rta *y)
ipa_equal(x->from, y->from) &&
x->iface == y->iface &&
x->hostentry == y->hostentry &&
+ mpnh_same(x->nexthops, y->nexthops) &&
ea_same(x->eattrs, y->eattrs));
}
@@ -614,6 +672,7 @@ rta_copy(rta *o)
memcpy(r, o, sizeof(rta));
r->uc = 1;
+ r->nexthops = mpnh_copy(o->nexthops);
r->eattrs = ea_list_copy(o->eattrs);
return r;
}
@@ -707,6 +766,7 @@ rta__free(rta *a)
a->next->pprev = a->pprev;
a->aflags = 0; /* Poison the entry */
rt_unlock_hostentry(a->hostentry);
+ mpnh_free(a->nexthops);
ea_free(a->eattrs);
sl_free(rta_slab, a);
}
@@ -798,6 +858,7 @@ rta_init(void)
{
rta_pool = rp_new(&root_pool, "Attributes");
rta_slab = sl_new(rta_pool, sizeof(rta));
+ mpnh_slab = sl_new(rta_pool, sizeof(struct mpnh));
rta_alloc_hash();
}
diff --git a/nest/rt-table.c b/nest/rt-table.c
index a4976f03..73b05d08 100644
--- a/nest/rt-table.c
+++ b/nest/rt-table.c
@@ -963,28 +963,30 @@ rt_preconfig(struct config *c)
*/
static inline int
-hostentry_diff(struct hostentry *he, struct iface *iface, ip_addr gw,
- byte dest, u32 igp_metric)
-{
- return (he->iface != iface) || !ipa_equal(he->gw, gw) ||
- (he->dest != dest) || (he->igp_metric != igp_metric);
-}
-
-static inline int
rta_next_hop_outdated(rta *a)
{
struct hostentry *he = a->hostentry;
- return he && hostentry_diff(he, a->iface, a->gw, a->dest, a->igp_metric);
+
+ if (!he)
+ return 0;
+
+ if (!he->src)
+ return a->dest != RTD_UNREACHABLE;
+
+ return (a->iface != he->src->iface) || !ipa_equal(a->gw, he->gw) ||
+ (a->dest != he->dest) || (a->igp_metric != he->igp_metric) ||
+ !mpnh_same(a->nexthops, he->src->nexthops);
}
static inline void
rta_apply_hostentry(rta *a, struct hostentry *he)
{
a->hostentry = he;
- a->iface = he->iface;
+ a->iface = he->src ? he->src->iface : NULL;
a->gw = he->gw;
a->dest = he->dest;
a->igp_metric = he->igp_metric;
+ a->nexthops = he->src ? he->src->nexthops : NULL;
}
static inline rte *
@@ -1388,6 +1390,7 @@ hc_new_hostentry(struct hostcache *hc, ip_addr a, ip_addr ll, rtable *dep, unsig
he->tab = dep;
he->hash_key = k;
he->uc = 0;
+ he->src = NULL;
add_tail(&hc->hostentries, &he->ln);
hc_insert(hc, he);
@@ -1402,6 +1405,8 @@ hc_new_hostentry(struct hostcache *hc, ip_addr a, ip_addr ll, rtable *dep, unsig
static void
hc_delete_hostentry(struct hostcache *hc, struct hostentry *he)
{
+ rta_free(he->src);
+
rem_node(&he->ln);
hc_remove(hc, he);
sl_free(hc->slab, he);
@@ -1436,6 +1441,8 @@ rt_free_hostcache(rtable *tab)
WALK_LIST(n, hc->hostentries)
{
struct hostentry *he = SKIP_BACK(struct hostentry, ln, n);
+ rta_free(he->src);
+
if (he->uc)
log(L_ERR "Hostcache is not empty in table %s", tab->name);
}
@@ -1488,7 +1495,7 @@ rt_get_igp_metric(rte *rt)
return rt->u.rip.metric;
/* Device routes */
- if (a->dest != RTD_ROUTER)
+ if ((a->dest != RTD_ROUTER) && (a->dest != RTD_MULTIPATH))
return 0;
return IGP_METRIC_UNKNOWN;
@@ -1497,12 +1504,15 @@ rt_get_igp_metric(rte *rt)
static int
rt_update_hostentry(rtable *tab, struct hostentry *he)
{
- struct iface *old_iface = he->iface;
- ip_addr old_gw = he->gw;
- byte old_dest = he->dest;
- u32 old_metric = he->igp_metric;
+ rta *old_src = he->src;
int pxlen = 0;
+ /* Reset the hostentry */
+ he->src = NULL;
+ he->gw = IPA_NONE;
+ he->dest = RTD_UNREACHABLE;
+ he->igp_metric = 0;
+
net *n = net_route(tab, he->addr, MAX_PREFIX_LENGTH);
if (n)
{
@@ -1513,53 +1523,41 @@ rt_update_hostentry(rtable *tab, struct hostentry *he)
{
/* Recursive route should not depend on another recursive route */
log(L_WARN "Next hop address %I resolvable through recursive route for %I/%d",
- he->addr, n->n.prefix, n->n.pxlen);
- he->iface = NULL;
- he->gw = IPA_NONE;
- he->dest = RTD_UNREACHABLE;
+ he->addr, n->n.prefix, pxlen);
+ goto done;
}
- else if (a->dest == RTD_DEVICE)
+
+ if (a->dest == RTD_DEVICE)
{
if (if_local_addr(he->addr, a->iface))
{
/* The host address is a local address, this is not valid */
log(L_WARN "Next hop address %I is a local address of iface %s",
he->addr, a->iface->name);
- he->iface = NULL;
- he->gw = IPA_NONE;
- he->dest = RTD_UNREACHABLE;
+ goto done;
}
- else
- {
- /* The host is directly reachable, use link as a gateway */
- he->iface = a->iface;
- he->gw = he->link;
- he->dest = RTD_ROUTER;
- }
+
+ /* The host is directly reachable, use link as a gateway */
+ he->gw = he->link;
+ he->dest = RTD_ROUTER;
}
else
{
/* The host is reachable through some route entry */
- he->iface = a->iface;
he->gw = a->gw;
he->dest = a->dest;
}
- he->igp_metric = he->iface ? rt_get_igp_metric(n->routes) : 0;
- }
- else
- {
- /* The host is unreachable */
- he->iface = NULL;
- he->gw = IPA_NONE;
- he->dest = RTD_UNREACHABLE;
- he->igp_metric = 0;
+ he->src = rta_clone(a);
+ he->igp_metric = rt_get_igp_metric(n->routes);
}
+ done:
/* Add a prefix range to the trie */
trie_add_prefix(tab->hostcache->trie, he->addr, MAX_PREFIX_LENGTH, pxlen, MAX_PREFIX_LENGTH);
- return hostentry_diff(he, old_iface, old_gw, old_dest, old_metric);
+ rta_free(old_src);
+ return old_src != he->src;
}
static void
@@ -1630,6 +1628,7 @@ rt_format_via(rte *e, byte *via)
case RTD_BLACKHOLE: bsprintf(via, "blackhole"); break;
case RTD_UNREACHABLE: bsprintf(via, "unreachable"); break;
case RTD_PROHIBIT: bsprintf(via, "prohibited"); break;
+ case RTD_MULTIPATH: bsprintf(via, "multipath"); break;
default: bsprintf(via, "???");
}
}
@@ -1641,6 +1640,7 @@ rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, ea_list *tm
byte tm[TM_DATETIME_BUFFER_SIZE], info[256];
rta *a = e->attrs;
int primary = (e->net->routes == e);
+ struct mpnh *nh;
rt_format_via(e, via);
tm_format_datetime(tm, &config->tf_route, e->lastmod);
@@ -1663,6 +1663,8 @@ rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, ea_list *tm
bsprintf(info, " (%d)", e->pref);
cli_printf(c, -1007, "%-18s %s [%s %s%s]%s%s", ia, via, a->proto->name,
tm, from, primary ? " *" : "", info);
+ for (nh = a->nexthops; nh; nh = nh->next)
+ cli_printf(c, -1007, "\tvia %I on %s weight %d", nh->gw, nh->iface->name, nh->weight + 1);
if (d->verbose)
rta_show(c, a, tmpa);
}
diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c
index ef5d024e..ff231b17 100644
--- a/proto/bgp/attrs.c
+++ b/proto/bgp/attrs.c
@@ -1015,6 +1015,13 @@ bgp_get_neighbor(rte *r)
return ((struct bgp_proto *) r->attrs->proto)->remote_as;
}
+static inline int
+rte_resolvable(rte *rt)
+{
+ int rd = rt->attrs->dest;
+ return (rd == RTD_ROUTER) || (rd == RTD_DEVICE) || (rd == RTD_MULTIPATH);
+}
+
int
bgp_rte_better(rte *new, rte *old)
{
@@ -1024,9 +1031,8 @@ bgp_rte_better(rte *new, rte *old)
u32 n, o;
/* RFC 4271 9.1.2.1. Route resolvability test */
- /* non-NULL iface means it is either RTD_ROUTER or RTD_DEVICE route */
- n = new->attrs->iface != NULL;
- o = old->attrs->iface != NULL;
+ n = rte_resolvable(new);
+ o = rte_resolvable(old);
if (n > o)
return 1;
if (n < o)
@@ -1502,7 +1508,7 @@ bgp_get_route_info(rte *e, byte *buf, ea_list *attrs)
buf += bsprintf(buf, " (%d", e->pref);
if (e->attrs->hostentry)
{
- if (!e->attrs->iface)
+ if (!rte_resolvable(e))
buf += bsprintf(buf, "/-");
else if (e->attrs->igp_metric >= IGP_METRIC_UNKNOWN)
buf += bsprintf(buf, "/?");