diff options
Diffstat (limited to 'nest')
-rw-r--r-- | nest/config.Y | 5 | ||||
-rw-r--r-- | nest/iface.h | 9 | ||||
-rw-r--r-- | nest/neighbor.c | 122 | ||||
-rw-r--r-- | nest/route.h | 66 | ||||
-rw-r--r-- | nest/rt-attr.c | 185 | ||||
-rw-r--r-- | nest/rt-dev.c | 5 | ||||
-rw-r--r-- | nest/rt-table.c | 247 |
7 files changed, 419 insertions, 220 deletions
diff --git a/nest/config.Y b/nest/config.Y index 23d6a452..e6b0927b 100644 --- a/nest/config.Y +++ b/nest/config.Y @@ -78,8 +78,7 @@ CF_KEYWORDS(GRACEFUL, RESTART, WAIT, MAX, FLUSH, AS) CF_ENUM(T_ENUM_RTS, RTS_, DUMMY, STATIC, INHERIT, DEVICE, STATIC_DEVICE, REDIRECT, RIP, OSPF, OSPF_IA, OSPF_EXT1, OSPF_EXT2, BGP, PIPE, BABEL) CF_ENUM(T_ENUM_SCOPE, SCOPE_, HOST, LINK, SITE, ORGANIZATION, UNIVERSE, UNDEFINED) -CF_ENUM(T_ENUM_RTC, RTC_, UNICAST, BROADCAST, MULTICAST, ANYCAST) -CF_ENUM(T_ENUM_RTD, RTD_, ROUTER, DEVICE, BLACKHOLE, UNREACHABLE, PROHIBIT, MULTIPATH) +CF_ENUM(T_ENUM_RTD, RTD_, UNICAST, BLACKHOLE, UNREACHABLE, PROHIBIT) CF_ENUM(T_ENUM_ROA, ROA_, UNKNOWN, VALID, INVALID) %type <i32> idval @@ -154,6 +153,8 @@ net_type: | FLOW6{ $$ = NET_FLOW6; } ; +CF_ENUM(T_ENUM_NETTYPE, NET_, IP4, IP6, VPN4, VPN6, ROA4, ROA6, FLOW4, FLOW6) + /* Creation of routing tables */ diff --git a/nest/iface.h b/nest/iface.h index d960b859..de5070d6 100644 --- a/nest/iface.h +++ b/nest/iface.h @@ -118,12 +118,15 @@ typedef struct neighbor { SCOPE_HOST when it's our own address */ } neighbor; -#define NEF_STICKY 1 -#define NEF_ONLINK 2 -#define NEF_BIND 4 /* Used internally for neighbors bound to an iface */ +#define NEF_STICKY 1 +#define NEF_ONLINK 2 +#define NEF_BIND 4 /* Used internally for neighbors bound to an iface */ +#define NEF_IFACE 8 /* Neighbors bound to iface */ + neighbor *neigh_find(struct proto *, ip_addr *, unsigned flags); neighbor *neigh_find2(struct proto *p, ip_addr *a, struct iface *ifa, unsigned flags); +neighbor *neigh_find_iface(struct proto *p, struct iface *ifa); static inline int neigh_connected_to(struct proto *p, ip_addr *a, struct iface *i) { diff --git a/nest/neighbor.c b/nest/neighbor.c index 2c7f9b84..96475a50 100644 --- a/nest/neighbor.c +++ b/nest/neighbor.c @@ -48,7 +48,7 @@ #define NEIGH_HASH_OFFSET 24 static slab *neigh_slab; -static list sticky_neigh_list, neigh_hash_table[NEIGH_HASH_SIZE]; +static list sticky_neigh_list, iface_neigh_list, neigh_hash_table[NEIGH_HASH_SIZE]; static inline uint neigh_hash(struct proto *p, ip_addr *a) @@ -166,6 +166,8 @@ neigh_find2(struct proto *p, ip_addr *a, struct iface *ifa, unsigned flags) return NULL; n = sl_alloc(neigh_slab); + memset(n, 0, sizeof(neighbor)); + n->addr = *a; if (scope >= 0) { @@ -187,6 +189,35 @@ neigh_find2(struct proto *p, ip_addr *a, struct iface *ifa, unsigned flags) return n; } +neighbor * +neigh_find_iface(struct proto *p, struct iface *ifa) +{ + neighbor *n; + node *nn; + + /* We keep neighbors with NEF_IFACE foremost in ifa->neighbors list */ + WALK_LIST2(n, nn, ifa->neighbors, if_n) + { + if (! (n->flags & NEF_IFACE)) + break; + + if (n->proto == p) + return n; + } + + n = sl_alloc(neigh_slab); + memset(n, 0, sizeof(neighbor)); + + add_tail(&iface_neigh_list, &n->n); + add_head(&ifa->neighbors, &n->if_n); + n->iface = ifa; + n->proto = p; + n->flags = NEF_IFACE; + n->scope = (ifa->flags & IF_UP) ? SCOPE_HOST : -1; + + return n; +} + /** * neigh_dump - dump specified neighbor entry. * @n: the entry to dump @@ -205,6 +236,8 @@ neigh_dump(neighbor *n) debug("%s %p %08x scope %s", n->proto->name, n->data, n->aux, ip_scope_text(n->scope)); if (n->flags & NEF_STICKY) debug(" STICKY"); + if (n->flags & NEF_IFACE) + debug(" IFACE"); debug("\n"); } @@ -223,6 +256,8 @@ neigh_dump_all(void) debug("Known neighbors:\n"); WALK_LIST(n, sticky_neigh_list) neigh_dump(n); + WALK_LIST(n, iface_neigh_list) + neigh_dump(n); for(i=0; i<NEIGH_HASH_SIZE; i++) WALK_LIST(n, neigh_hash_table[i]) neigh_dump(n); @@ -232,13 +267,18 @@ neigh_dump_all(void) static void neigh_up(neighbor *n, struct iface *i, int scope, struct ifa *a) { + DBG("Waking up sticky neighbor %I\n", n->addr); n->iface = i; n->ifa = a; n->scope = scope; - add_tail(&i->neighbors, &n->if_n); - rem_node(&n->n); - add_tail(&neigh_hash_table[neigh_hash(n->proto, &n->addr)], &n->n); - DBG("Waking up sticky neighbor %I\n", n->addr); + + if (! (n->flags & NEF_IFACE)) + { + add_tail(&i->neighbors, &n->if_n); + rem_node(&n->n); + add_tail(&neigh_hash_table[neigh_hash(n->proto, &n->addr)], &n->n); + } + if (n->proto->neigh_notify && (n->proto->proto_state != PS_STOP)) n->proto->neigh_notify(n); } @@ -247,14 +287,20 @@ static void neigh_down(neighbor *n) { DBG("Flushing neighbor %I on %s\n", n->addr, n->iface->name); - rem_node(&n->if_n); - if (! (n->flags & NEF_BIND)) + if (! (n->flags & (NEF_BIND | NEF_IFACE))) n->iface = NULL; n->ifa = NULL; n->scope = -1; + + if (! (n->flags & NEF_IFACE)) + { + rem_node(&n->if_n); + rem_node(&n->n); + } + if (n->proto->neigh_notify && (n->proto->proto_state != PS_STOP)) n->proto->neigh_notify(n); - rem_node(&n->n); + if (n->flags & NEF_STICKY) { add_tail(&sticky_neigh_list, &n->n); @@ -272,7 +318,8 @@ neigh_down(neighbor *n) return; } } - else + + if (! (n->flags & (NEF_STICKY | NEF_IFACE))) sl_free(neigh_slab, n); } @@ -290,10 +337,17 @@ void neigh_if_up(struct iface *i) { struct ifa *a; - neighbor *n, *next; + neighbor *n; + node *x, *y; int scope; - WALK_LIST_DELSAFE(n, next, sticky_neigh_list) + /* Wake up all iface neighbors */ + WALK_LIST2_DELSAFE(n, x, y, i->neighbors, if_n) + if ((n->scope < 0) && (n->flags & NEF_IFACE)) + neigh_up(n, i, SCOPE_HOST, NULL); + + /* Wake up appropriate sticky neighbors */ + WALK_LIST_DELSAFE(n, x, sticky_neigh_list) if ((!n->iface || n->iface == i) && ((scope = if_connected(&n->addr, i, &a)) >= 0)) neigh_up(n, i, scope, a); @@ -311,10 +365,11 @@ neigh_if_up(struct iface *i) void neigh_if_down(struct iface *i) { + neighbor *n; node *x, *y; - WALK_LIST_DELSAFE(x, y, i->neighbors) - neigh_down(SKIP_BACK(neighbor, if_n, x)); + WALK_LIST2_DELSAFE(n, x, y, i->neighbors, if_n) + neigh_down(n); } /** @@ -328,14 +383,12 @@ neigh_if_down(struct iface *i) void neigh_if_link(struct iface *i) { + neighbor *n; node *x, *y; - WALK_LIST_DELSAFE(x, y, i->neighbors) - { - neighbor *n = SKIP_BACK(neighbor, if_n, x); - if (n->proto->neigh_notify && (n->proto->proto_state != PS_STOP)) - n->proto->neigh_notify(n); - } + WALK_LIST2_DELSAFE(n, x, y, i->neighbors, if_n) + if (n->proto->neigh_notify && (n->proto->proto_state != PS_STOP)) + n->proto->neigh_notify(n); } /** @@ -352,19 +405,21 @@ void neigh_ifa_update(struct ifa *a) { struct iface *i = a->iface; + struct ifa *aa; node *x, *y; - + neighbor *n; + int scope; + /* Remove all neighbors whose scope has changed */ - WALK_LIST_DELSAFE(x, y, i->neighbors) - { - struct ifa *aa; - neighbor *n = SKIP_BACK(neighbor, if_n, x); - if (if_connected(&n->addr, i, &aa) != n->scope) - neigh_down(n); - } + WALK_LIST2_DELSAFE(n, x, y, i->neighbors, if_n) + if (n->ifa && (if_connected(&n->addr, i, &aa) != n->scope)) + neigh_down(n); /* Wake up all sticky neighbors that are reachable now */ - neigh_if_up(i); + WALK_LIST_DELSAFE(n, x, sticky_neigh_list) + if ((!n->iface || n->iface == i) && + ((scope = if_connected(&n->addr, i, &aa)) >= 0)) + neigh_up(n, i, scope, aa); } static inline void @@ -373,7 +428,7 @@ neigh_prune_one(neighbor *n) if (n->proto->proto_state != PS_DOWN) return; rem_node(&n->n); - if (n->scope >= 0) + if (n->if_n.next) rem_node(&n->if_n); sl_free(neigh_slab, n); } @@ -398,6 +453,8 @@ neigh_prune(void) neigh_prune_one(n); WALK_LIST_DELSAFE(n, m, sticky_neigh_list) neigh_prune_one(n); + WALK_LIST_DELSAFE(n, m, iface_neigh_list) + neigh_prune_one(n); } /** @@ -410,10 +467,11 @@ neigh_prune(void) void neigh_init(pool *if_pool) { - int i; - neigh_slab = sl_new(if_pool, sizeof(neighbor)); + init_list(&sticky_neigh_list); - for(i=0; i<NEIGH_HASH_SIZE; i++) + init_list(&iface_neigh_list); + + for(int i = 0; i < NEIGH_HASH_SIZE; i++) init_list(&neigh_hash_table[i]); } diff --git a/nest/route.h b/nest/route.h index 12e67d61..546b04c4 100644 --- a/nest/route.h +++ b/nest/route.h @@ -168,6 +168,11 @@ typedef struct rtable { struct fib_iterator nhu_fit; /* Next Hop Update FIB iterator */ } rtable; +#define NHU_CLEAN 0 +#define NHU_SCHEDULED 1 +#define NHU_RUNNING 2 +#define NHU_DIRTY 3 + typedef struct network { struct rte *routes; /* Available routes for this network */ struct fib_node n; /* FIB flags reserved for kernel syncer */ @@ -195,8 +200,8 @@ struct hostentry { unsigned hash_key; /* Hash key */ unsigned uc; /* Use count */ struct rta *src; /* Source rta entry */ - ip_addr gw; /* Chosen next hop */ byte dest; /* Chosen route destination type (RTD_...) */ + byte nexthop_linkable; /* Nexthop list is completely non-device */ u32 igp_metric; /* Chosen route IGP metric */ }; @@ -333,12 +338,15 @@ void rt_show(struct rt_show_data *); * construction of BGP route attribute lists. */ -/* Multipath next-hop */ -struct mpnh { +/* Nexthop structure */ +struct nexthop { ip_addr gw; /* Next hop */ struct iface *iface; /* Outgoing interface */ - struct mpnh *next; + struct nexthop *next; byte weight; + byte labels_orig; /* Number of labels before hostentry was applied */ + byte labels; /* Number of all labels */ + u32 label[0]; }; struct rte_src { @@ -354,20 +362,16 @@ typedef struct rta { struct rta *next, **pprev; /* Hash chain */ u32 uc; /* Use count */ u32 hash_key; /* Hash over important fields */ - struct mpnh *nexthops; /* Next-hops for multipath routes */ struct ea_list *eattrs; /* Extended Attribute chain */ struct rte_src *src; /* Route source that created the route */ struct hostentry *hostentry; /* Hostentry for recursive next-hops */ - struct iface *iface; /* Outgoing interface */ - ip_addr gw; /* Next hop */ ip_addr from; /* Advertising router */ u32 igp_metric; /* IGP metric to next hop (for iBGP routes) */ - byte source; /* Route source (RTS_...) */ - byte scope; /* Route scope (SCOPE_... -- see ip.h) */ - byte cast; /* Casting type (RTC_...) */ - byte dest; /* Route destination type (RTD_...) */ - byte flags; /* Route flags (RTF_...), now unused */ - byte aflags; /* Attribute cache flags (RTAF_...) */ + u8 source; /* Route source (RTS_...) */ + u8 scope; /* Route scope (SCOPE_... -- see ip.h) */ + u8 dest; /* Route destination type (RTD_...) */ + u8 aflags; + struct nexthop nh; /* Next hop */ } rta; #define RTS_DUMMY 0 /* Dummy route to be removed soon */ @@ -392,13 +396,12 @@ typedef struct rta { #define RTC_MULTICAST 2 #define RTC_ANYCAST 3 /* IPv6 Anycast */ -#define RTD_ROUTER 0 /* Next hop is neighbor router */ -#define RTD_DEVICE 1 /* Points to device */ +#define RTD_NONE 0 /* Undefined next hop */ +#define RTD_UNICAST 1 /* Next hop is neighbor router */ #define RTD_BLACKHOLE 2 /* Silently drop packets */ #define RTD_UNREACHABLE 3 /* Reject as unreachable */ #define RTD_PROHIBIT 4 /* Administratively prohibited */ -#define RTD_MULTIPATH 5 /* Multipath route (nexthops != NULL) */ -#define RTD_NONE 6 /* Invalid RTD */ +#define RTD_MAX 5 /* Flags for net->n.flags, used by kernel syncer */ #define KRF_INSTALLED 0x80 /* This route should be installed in the kernel */ @@ -410,9 +413,14 @@ typedef struct rta { protocol-specific metric is availabe */ +const char * rta_dest_names[RTD_MAX]; + +static inline const char *rta_dest_name(uint n) +{ return (n < RTD_MAX) ? rta_dest_names[n] : "???"; } + /* Route has regular, reachable nexthop (i.e. not RTD_UNREACHABLE and like) */ static inline int rte_is_reachable(rte *r) -{ uint d = r->attrs->dest; return (d == RTD_ROUTER) || (d == RTD_DEVICE) || (d == RTD_MULTIPATH); } +{ return r->attrs->dest == RTD_UNICAST; } /* @@ -517,14 +525,22 @@ uint ea_hash(ea_list *e); /* Calculate 16-bit hash value */ ea_list *ea_append(ea_list *to, ea_list *what); void ea_format_bitfield(struct eattr *a, byte *buf, int bufsize, const char **names, int min, int max); -int mpnh__same(struct mpnh *x, struct mpnh *y); /* Compare multipath nexthops */ -static inline int mpnh_same(struct mpnh *x, struct mpnh *y) -{ return (x == y) || mpnh__same(x, y); } -struct mpnh *mpnh_merge(struct mpnh *x, struct mpnh *y, int rx, int ry, int max, linpool *lp); -void mpnh_insert(struct mpnh **n, struct mpnh *y); -int mpnh_is_sorted(struct mpnh *x); +#define NEXTHOP_MAX_SIZE (sizeof(struct nexthop) + sizeof(u32)*MPLS_MAX_LABEL_STACK) + +static inline size_t nexthop_size(const struct nexthop *nh) +{ return sizeof(struct nexthop) + sizeof(u32)*nh->labels; } +int nexthop__same(struct nexthop *x, struct nexthop *y); /* Compare multipath nexthops */ +static inline int nexthop_same(struct nexthop *x, struct nexthop *y) +{ return (x == y) || nexthop__same(x, y); } +struct nexthop *nexthop_merge(struct nexthop *x, struct nexthop *y, int rx, int ry, int max, linpool *lp); +static inline void nexthop_link(struct rta *a, struct nexthop *from) +{ memcpy(&a->nh, from, nexthop_size(from)); } +void nexthop_insert(struct nexthop **n, struct nexthop *y); +int nexthop_is_sorted(struct nexthop *x); void rta_init(void); +static inline size_t rta_size(const rta *a) { return sizeof(rta) + sizeof(u32)*a->nh.labels; } +#define RTA_MAX_SIZE (sizeof(rta) + sizeof(u32)*MPLS_MAX_LABEL_STACK) rta *rta_lookup(rta *); /* Get rta equivalent to this one, uc++ */ static inline int rta_is_cached(rta *r) { return r->aflags & RTAF_CACHED; } static inline rta *rta_clone(rta *r) { r->uc++; return r; } @@ -535,7 +551,7 @@ static inline rta * rta_cow(rta *r, linpool *lp) { return rta_is_cached(r) ? rta void rta_dump(rta *); void rta_dump_all(void); void rta_show(struct cli *, rta *, ea_list *); -void rta_set_recursive_next_hop(rtable *dep, rta *a, rtable *tab, ip_addr gw, ip_addr ll); +void rta_set_recursive_next_hop(rtable *dep, rta *a, rtable *tab, ip_addr gw, ip_addr ll, mpls_label_stack *mls); /* * rta_set_recursive_next_hop() acquires hostentry from hostcache and fills diff --git a/nest/rt-attr.c b/nest/rt-attr.c index 94f25de8..1b7f5836 100644 --- a/nest/rt-attr.c +++ b/nest/rt-attr.c @@ -58,10 +58,18 @@ #include <stddef.h> +const char * rta_dest_names[RTD_MAX] = { + [RTD_NONE] = "", + [RTD_UNICAST] = "unicast", + [RTD_BLACKHOLE] = "blackhole", + [RTD_UNREACHABLE] = "unreachable", + [RTD_PROHIBIT] = "prohibited", +}; + pool *rta_pool; -static slab *rta_slab; -static slab *mpnh_slab; +static slab *rta_slab_[4]; +static slab *nexthop_slab_[4]; static slab *rte_src_slab; static struct idm src_ids; @@ -144,27 +152,38 @@ rt_prune_sources(void) */ static inline u32 -mpnh_hash(struct mpnh *x) +nexthop_hash(struct nexthop *x) { u32 h = 0; for (; x; x = x->next) - h ^= ipa_hash(x->gw); + { + h ^= ipa_hash(x->gw) ^ (h << 5) ^ (h >> 9); + + for (int i = 0; i < x->labels; i++) + h ^= x->label[i] ^ (h << 6) ^ (h >> 7); + } return h; } int -mpnh__same(struct mpnh *x, struct mpnh *y) +nexthop__same(struct nexthop *x, struct nexthop *y) { for (; x && y; x = x->next, y = y->next) - if (!ipa_equal(x->gw, y->gw) || (x->iface != y->iface) || (x->weight != y->weight)) + { + if (!ipa_equal(x->gw, y->gw) || (x->iface != y->iface) || (x->weight != y->weight) || (x->labels != y->labels)) return 0; + for (int i = 0; i < x->labels; i++) + if (x->label[i] != y->label[i]) + return 0; + } + return x == y; } static int -mpnh_compare_node(struct mpnh *x, struct mpnh *y) +nexthop_compare_node(struct nexthop *x, struct nexthop *y) { int r; @@ -182,22 +201,33 @@ mpnh_compare_node(struct mpnh *x, struct mpnh *y) if (r) return r; + r = ((int) y->labels) - ((int) x->labels); + if (r) + return r; + + for (int i = 0; i < y->labels; i++) + { + r = ((int) y->label[i]) - ((int) x->label[i]); + if (r) + return r; + } + return ((int) x->iface->index) - ((int) y->iface->index); } -static inline struct mpnh * -mpnh_copy_node(const struct mpnh *src, linpool *lp) +static inline struct nexthop * +nexthop_copy_node(const struct nexthop *src, linpool *lp) { - struct mpnh *n = lp_alloc(lp, sizeof(struct mpnh)); - n->gw = src->gw; - n->iface = src->iface; + struct nexthop *n = lp_alloc(lp, nexthop_size(src)); + + memcpy(n, src, nexthop_size(src)); n->next = NULL; - n->weight = src->weight; + return n; } /** - * mpnh_merge - merge nexthop lists + * nexthop_merge - merge nexthop lists * @x: list 1 * @y: list 2 * @rx: reusability of list @x @@ -205,7 +235,7 @@ mpnh_copy_node(const struct mpnh *src, linpool *lp) * @max: max number of nexthops * @lp: linpool for allocating nexthops * - * The mpnh_merge() function takes two nexthop lists @x and @y and merges them, + * The nexthop_merge() function takes two nexthop lists @x and @y and merges them, * eliminating possible duplicates. The input lists must be sorted and the * result is sorted too. The number of nexthops in result is limited by @max. * New nodes are allocated from linpool @lp. @@ -218,28 +248,28 @@ mpnh_copy_node(const struct mpnh *src, linpool *lp) * resulting list is no longer needed. When reusability is not set, the * corresponding lists are not modified nor linked from the resulting list. */ -struct mpnh * -mpnh_merge(struct mpnh *x, struct mpnh *y, int rx, int ry, int max, linpool *lp) +struct nexthop * +nexthop_merge(struct nexthop *x, struct nexthop *y, int rx, int ry, int max, linpool *lp) { - struct mpnh *root = NULL; - struct mpnh **n = &root; + struct nexthop *root = NULL; + struct nexthop **n = &root; while ((x || y) && max--) { - int cmp = mpnh_compare_node(x, y); + int cmp = nexthop_compare_node(x, y); if (cmp < 0) { - *n = rx ? x : mpnh_copy_node(x, lp); + *n = rx ? x : nexthop_copy_node(x, lp); x = x->next; } else if (cmp > 0) { - *n = ry ? y : mpnh_copy_node(y, lp); + *n = ry ? y : nexthop_copy_node(y, lp); y = y->next; } else { - *n = rx ? x : (ry ? y : mpnh_copy_node(x, lp)); + *n = rx ? x : (ry ? y : nexthop_copy_node(x, lp)); x = x->next; y = y->next; } @@ -251,11 +281,11 @@ mpnh_merge(struct mpnh *x, struct mpnh *y, int rx, int ry, int max, linpool *lp) } void -mpnh_insert(struct mpnh **n, struct mpnh *x) +nexthop_insert(struct nexthop **n, struct nexthop *x) { for (; *n; n = &((*n)->next)) { - int cmp = mpnh_compare_node(*n, x); + int cmp = nexthop_compare_node(*n, x); if (cmp < 0) continue; @@ -270,28 +300,37 @@ mpnh_insert(struct mpnh **n, struct mpnh *x) } int -mpnh_is_sorted(struct mpnh *x) +nexthop_is_sorted(struct nexthop *x) { for (; x && x->next; x = x->next) - if (mpnh_compare_node(x, x->next) >= 0) + if (nexthop_compare_node(x, x->next) >= 0) return 0; return 1; } -static struct mpnh * -mpnh_copy(struct mpnh *o) +static inline slab * +nexthop_slab(struct nexthop *nh) { - struct mpnh *first = NULL; - struct mpnh **last = &first; + return nexthop_slab_[MIN(nh->labels, 3)]; +} + +static struct nexthop * +nexthop_copy(struct nexthop *o) +{ + struct nexthop *first = NULL; + struct nexthop **last = &first; for (; o; o = o->next) { - struct mpnh *n = sl_alloc(mpnh_slab); + struct nexthop *n = sl_alloc(nexthop_slab(o)); n->gw = o->gw; n->iface = o->iface; n->next = NULL; n->weight = o->weight; + n->labels = o->labels; + for (int i=0; i<o->labels; i++) + n->label[i] = o->label[i]; *last = n; last = &(n->next); @@ -301,14 +340,14 @@ mpnh_copy(struct mpnh *o) } static void -mpnh_free(struct mpnh *o) +nexthop_free(struct nexthop *o) { - struct mpnh *n; + struct nexthop *n; while (o) { n = o->next; - sl_free(mpnh_slab, o); + sl_free(nexthop_slab(o), o); o = n; } } @@ -994,19 +1033,14 @@ rta_hash(rta *a) #define MIX(f) mem_hash_mix(&h, &(a->f), sizeof(a->f)); MIX(src); MIX(hostentry); - MIX(iface); - MIX(gw); MIX(from); MIX(igp_metric); MIX(source); MIX(scope); - MIX(cast); MIX(dest); - MIX(flags); - MIX(aflags); #undef MIX - return mem_hash_value(&h) ^ mpnh_hash(a->nexthops) ^ ea_hash(a->eattrs); + return mem_hash_value(&h) ^ nexthop_hash(&(a->nh)) ^ ea_hash(a->eattrs); } static inline int @@ -1015,26 +1049,28 @@ rta_same(rta *x, rta *y) return (x->src == y->src && x->source == y->source && x->scope == y->scope && - x->cast == y->cast && x->dest == y->dest && - x->flags == y->flags && x->igp_metric == y->igp_metric && - ipa_equal(x->gw, y->gw) && ipa_equal(x->from, y->from) && - x->iface == y->iface && x->hostentry == y->hostentry && - mpnh_same(x->nexthops, y->nexthops) && + nexthop_same(&(x->nh), &(y->nh)) && ea_same(x->eattrs, y->eattrs)); } +static inline slab * +rta_slab(rta *a) +{ + return rta_slab_[a->nh.labels > 2 ? 3 : a->nh.labels]; +} + static rta * rta_copy(rta *o) { - rta *r = sl_alloc(rta_slab); + rta *r = sl_alloc(rta_slab(o)); - memcpy(r, o, sizeof(rta)); + memcpy(r, o, rta_size(o)); r->uc = 1; - r->nexthops = mpnh_copy(o->nexthops); + r->nh.next = nexthop_copy(o->nh.next); r->eattrs = ea_list_copy(o->eattrs); return r; } @@ -1127,19 +1163,26 @@ rta__free(rta *a) *a->pprev = a->next; if (a->next) a->next->pprev = a->pprev; - a->aflags = 0; /* Poison the entry */ rt_unlock_hostentry(a->hostentry); rt_unlock_source(a->src); - mpnh_free(a->nexthops); + if (a->nh.next) + nexthop_free(a->nh.next); ea_free(a->eattrs); - sl_free(rta_slab, a); + a->aflags = 0; /* Poison the entry */ + sl_free(rta_slab(a), a); } rta * rta_do_cow(rta *o, linpool *lp) { - rta *r = lp_alloc(lp, sizeof(rta)); - memcpy(r, o, sizeof(rta)); + rta *r = lp_alloc(lp, rta_size(o)); + memcpy(r, o, rta_size(o)); + for (struct nexthop **nhn = &(r->nh.next), *nho = o->nh.next; nho; nho = nho->next) + { + *nhn = lp_alloc(lp, nexthop_size(nho)); + memcpy(*nhn, nho, nexthop_size(nho)); + nhn = &((*nhn)->next); + } r->aflags = 0; r->uc = 0; return r; @@ -1158,19 +1201,23 @@ rta_dump(rta *a) "RTS_STAT_DEV", "RTS_REDIR", "RTS_RIP", "RTS_OSPF", "RTS_OSPF_IA", "RTS_OSPF_EXT1", "RTS_OSPF_EXT2", "RTS_BGP", "RTS_PIPE", "RTS_BABEL" }; - static char *rtc[] = { "", " BC", " MC", " AC" }; static char *rtd[] = { "", " DEV", " HOLE", " UNREACH", " PROHIBIT" }; - debug("p=%s uc=%d %s %s%s%s h=%04x", - a->src->proto->name, a->uc, rts[a->source], ip_scope_text(a->scope), rtc[a->cast], + debug("p=%s uc=%d %s %s%s h=%04x", + a->src->proto->name, a->uc, rts[a->source], ip_scope_text(a->scope), rtd[a->dest], a->hash_key); if (!(a->aflags & RTAF_CACHED)) debug(" !CACHED"); debug(" <-%I", a->from); - if (a->dest == RTD_ROUTER) - debug(" ->%I", a->gw); - if (a->dest == RTD_DEVICE || a->dest == RTD_ROUTER) - debug(" [%s]", a->iface ? a->iface->name : "???" ); + if (a->dest == RTD_UNICAST) + for (struct nexthop *nh = &(a->nh); nh; nh = nh->next) + { + if (ipa_nonzero(nh->gw)) debug(" ->%I", nh->gw); + if (nh->labels) debug(" L %d", nh->label[0]); + for (int i=1; i<nh->labels; i++) + debug("/%d", nh->label[i]); + debug(" [%s]", nh->iface ? nh->iface->name : "???"); + } if (a->eattrs) { debug(" EA: "); @@ -1206,10 +1253,9 @@ rta_show(struct cli *c, rta *a, ea_list *eal) { static char *src_names[] = { "dummy", "static", "inherit", "device", "static-device", "redirect", "RIP", "OSPF", "OSPF-IA", "OSPF-E1", "OSPF-E2", "BGP", "pipe" }; - static char *cast_names[] = { "unicast", "broadcast", "multicast", "anycast" }; int i; - cli_printf(c, -1008, "\tType: %s %s %s", src_names[a->source], cast_names[a->cast], ip_scope_text(a->scope)); + cli_printf(c, -1008, "\tType: %s %s", src_names[a->source], ip_scope_text(a->scope)); if (!eal) eal = a->eattrs; for(; eal; eal=eal->next) @@ -1227,8 +1273,17 @@ void rta_init(void) { rta_pool = rp_new(&root_pool, "Attributes"); - rta_slab = sl_new(rta_pool, sizeof(rta)); - mpnh_slab = sl_new(rta_pool, sizeof(struct mpnh)); + + rta_slab_[0] = sl_new(rta_pool, sizeof(rta)); + rta_slab_[1] = sl_new(rta_pool, sizeof(rta) + sizeof(u32)); + rta_slab_[2] = sl_new(rta_pool, sizeof(rta) + sizeof(u32)*2); + rta_slab_[3] = sl_new(rta_pool, sizeof(rta) + sizeof(u32)*MPLS_MAX_LABEL_STACK); + + nexthop_slab_[0] = sl_new(rta_pool, sizeof(struct nexthop)); + nexthop_slab_[1] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32)); + nexthop_slab_[2] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32)*2); + nexthop_slab_[3] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32)*MPLS_MAX_LABEL_STACK); + rta_alloc_hash(); rte_src_init(); } diff --git a/nest/rt-dev.c b/nest/rt-dev.c index d98cd79f..9993da24 100644 --- a/nest/rt-dev.c +++ b/nest/rt-dev.c @@ -78,9 +78,8 @@ dev_ifa_notify(struct proto *P, uint flags, struct ifa *ad) .src = src, .source = RTS_DEVICE, .scope = SCOPE_UNIVERSE, - .cast = RTC_UNICAST, - .dest = RTD_DEVICE, - .iface = ad->iface + .dest = RTD_UNICAST, + .nh.iface = ad->iface, }; a = rta_lookup(&a0); diff --git a/nest/rt-table.c b/nest/rt-table.c index 8c429874..f8baf572 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -50,7 +50,6 @@ static linpool *rte_update_pool; static list routing_tables; -static byte *rt_format_via(rte *e); static void rt_free_hostcache(rtable *tab); static void rt_notify_hostcache(rtable *tab, net *net); static void rt_update_hostcache(rtable *tab); @@ -346,7 +345,7 @@ rte_mergable(rte *pri, rte *sec) static void rte_trace(struct proto *p, rte *e, int dir, char *msg) { - log(L_TRACE "%s %c %s %N %s", p->name, dir, msg, e->net->n.addr, rt_format_via(e)); + log(L_TRACE "%s %c %s %N %s", p->name, dir, msg, e->net->n.addr, rta_dest_name(e->attrs->dest)); } static inline void @@ -708,19 +707,17 @@ rt_notify_accepted(struct channel *c, net *net, rte *new_changed, rte *old_chang } -static struct mpnh * -mpnh_merge_rta(struct mpnh *nhs, rta *a, linpool *pool, int max) +static struct nexthop * +nexthop_merge_rta(struct nexthop *nhs, rta *a, linpool *pool, int max) { - struct mpnh nh = { .gw = a->gw, .iface = a->iface }; - struct mpnh *nh2 = (a->dest == RTD_MULTIPATH) ? a->nexthops : &nh; - return mpnh_merge(nhs, nh2, 1, 0, max, pool); + return nexthop_merge(nhs, &(a->nh), 1, 0, max, pool); } rte * rt_export_merged(struct channel *c, net *net, rte **rt_free, ea_list **tmpa, linpool *pool, int silent) { // struct proto *p = c->proto; - struct mpnh *nhs = NULL; + struct nexthop *nhs = NULL; rte *best0, *best, *rt0, *rt, *tmp; best0 = net->routes; @@ -745,7 +742,7 @@ rt_export_merged(struct channel *c, net *net, rte **rt_free, ea_list **tmpa, lin continue; if (rte_is_reachable(rt)) - nhs = mpnh_merge_rta(nhs, rt->attrs, pool, c->merge_limit); + nhs = nexthop_merge_rta(nhs, rt->attrs, pool, c->merge_limit); if (tmp) rte_free(tmp); @@ -753,13 +750,12 @@ rt_export_merged(struct channel *c, net *net, rte **rt_free, ea_list **tmpa, lin if (nhs) { - nhs = mpnh_merge_rta(nhs, best->attrs, pool, c->merge_limit); + nhs = nexthop_merge_rta(nhs, best->attrs, pool, c->merge_limit); if (nhs->next) { best = rte_cow_rta(best, pool); - best->attrs->dest = RTD_MULTIPATH; - best->attrs->nexthops = nhs; + nexthop_link(best->attrs, nhs); } } @@ -922,7 +918,7 @@ rte_validate(rte *e) return 0; } - if ((e->attrs->dest == RTD_MULTIPATH) && !mpnh_is_sorted(e->attrs->nexthops)) + if ((e->attrs->dest == RTD_UNICAST) && !nexthop_is_sorted(&(e->attrs->nh))) { log(L_WARN "Ignoring unsorted multipath route %N received via %s", n->n.addr, e->sender->proto->name); @@ -1564,11 +1560,14 @@ rt_schedule_hcu(rtable *tab) static inline void rt_schedule_nhu(rtable *tab) { - if (tab->nhu_state == 0) + if (tab->nhu_state == NHU_CLEAN) ev_schedule(tab->rt_event); - /* state change 0->1, 2->3 */ - tab->nhu_state |= 1; + /* state change: + * NHU_CLEAN -> NHU_SCHEDULED + * NHU_RUNNING -> NHU_DIRTY + */ + tab->nhu_state |= NHU_SCHEDULED; } void @@ -1763,33 +1762,102 @@ rta_next_hop_outdated(rta *a) if (!he->src) return a->dest != RTD_UNREACHABLE; - return (a->iface != he->src->iface) || !ipa_equal(a->gw, he->gw) || - (a->dest != he->dest) || (a->igp_metric != he->igp_metric) || - !mpnh_same(a->nexthops, he->src->nexthops); + return (a->dest != he->dest) || (a->igp_metric != he->igp_metric) || + (!he->nexthop_linkable) || !nexthop_same(&(a->nh), &(he->src->nh)); } static inline void -rta_apply_hostentry(rta *a, struct hostentry *he) +rta_apply_hostentry(rta *a, struct hostentry *he, mpls_label_stack *mls) { a->hostentry = he; - a->iface = he->src ? he->src->iface : NULL; - a->gw = he->gw; a->dest = he->dest; a->igp_metric = he->igp_metric; - a->nexthops = he->src ? he->src->nexthops : NULL; + + if (a->dest != RTD_UNICAST) + { + /* No nexthop */ +no_nexthop: + a->nh = (struct nexthop) {}; + if (mls) + { /* Store the label stack for later changes */ + a->nh.labels_orig = a->nh.labels = mls->len; + memcpy(a->nh.label, mls->stack, mls->len * sizeof(u32)); + } + return; + } + + if (((!mls) || (!mls->len)) && he->nexthop_linkable) + { /* Just link the nexthop chain, no label append happens. */ + memcpy(&(a->nh), &(he->src->nh), nexthop_size(&(he->src->nh))); + return; + } + + struct nexthop *nhp = NULL, *nhr = NULL; + int skip_nexthop = 0; + + for (struct nexthop *nh = &(he->src->nh); nh; nh = nh->next) + { + if (skip_nexthop) + skip_nexthop--; + else + { + nhr = nhp; + nhp = (nhp ? (nhp->next = lp_allocz(rte_update_pool, NEXTHOP_MAX_SIZE)) : &(a->nh)); + } + + nhp->iface = nh->iface; + nhp->weight = nh->weight; + if (mls) + { + nhp->labels = nh->labels + mls->len; + nhp->labels_orig = mls->len; + if (nhp->labels <= MPLS_MAX_LABEL_STACK) + { + memcpy(nhp->label, nh->label, nh->labels * sizeof(u32)); /* First the hostentry labels */ + memcpy(&(nhp->label[nh->labels]), mls->stack, mls->len * sizeof(u32)); /* Then the bottom labels */ + } + else + { + log(L_WARN "Sum of label stack sizes %d + %d = %d exceedes allowed maximum (%d)", + nh->labels, mls->len, nhp->labels, MPLS_MAX_LABEL_STACK); + skip_nexthop++; + continue; + } + } + if (ipa_nonzero(nh->gw)) + nhp->gw = nh->gw; /* Router nexthop */ + else if (ipa_nonzero(he->link)) + nhp->gw = he->link; /* Device nexthop with link-local address known */ + else + nhp->gw = he->addr; /* Device nexthop with link-local address unknown */ + } + + if (skip_nexthop) + if (nhr) + nhr->next = NULL; + else + { + a->dest = RTD_UNREACHABLE; + log(L_WARN "No valid nexthop remaining, setting route unreachable"); + goto no_nexthop; + } } static inline rte * rt_next_hop_update_rte(rtable *tab UNUSED, rte *old) { - rta a; - memcpy(&a, old->attrs, sizeof(rta)); - rta_apply_hostentry(&a, old->attrs->hostentry); - a.aflags = 0; + rta *a = alloca(RTA_MAX_SIZE); + memcpy(a, old->attrs, rta_size(old->attrs)); + + mpls_label_stack mls = { .len = a->nh.labels_orig }; + memcpy(mls.stack, &a->nh.label[a->nh.labels - mls.len], mls.len * sizeof(u32)); + + rta_apply_hostentry(a, old->attrs->hostentry, &mls); + a->aflags = 0; rte *e = sl_alloc(rte_slab); memcpy(e, old, sizeof(rte)); - e->attrs = rta_lookup(&a); + e->attrs = rta_lookup(a); return e; } @@ -1870,13 +1938,13 @@ rt_next_hop_update(rtable *tab) struct fib_iterator *fit = &tab->nhu_fit; int max_feed = 32; - if (tab->nhu_state == 0) + if (tab->nhu_state == NHU_CLEAN) return; - if (tab->nhu_state == 1) + if (tab->nhu_state == NHU_SCHEDULED) { FIB_ITERATE_INIT(fit, &tab->fib); - tab->nhu_state = 2; + tab->nhu_state = NHU_RUNNING; } FIB_ITERATE_START(&tab->fib, fit, net, n) @@ -1891,10 +1959,13 @@ rt_next_hop_update(rtable *tab) } FIB_ITERATE_END; - /* state change 2->0, 3->1 */ + /* State change: + * NHU_DIRTY -> NHU_SCHEDULED + * NHU_RUNNING -> NHU_CLEAN + */ tab->nhu_state &= 1; - if (tab->nhu_state > 0) + if (tab->nhu_state != NHU_CLEAN) ev_schedule(tab->rt_event); } @@ -2198,12 +2269,12 @@ hc_new_hostentry(struct hostcache *hc, ip_addr a, ip_addr ll, rtable *dep, unsig { struct hostentry *he = sl_alloc(hc->slab); - he->addr = a; - he->link = ll; - he->tab = dep; - he->hash_key = k; - he->uc = 0; - he->src = NULL; + *he = (struct hostentry) { + .addr = a, + .link = ll, + .tab = dep, + .hash_key = k, + }; add_tail(&hc->hostentries, &he->ln); hc_insert(hc, he); @@ -2310,8 +2381,7 @@ rt_get_igp_metric(rte *rt) return rt->u.rip.metric; #endif - /* Device routes */ - if ((a->dest != RTD_ROUTER) && (a->dest != RTD_MULTIPATH)) + if (a->source == RTS_DEVICE) return 0; return IGP_METRIC_UNKNOWN; @@ -2325,7 +2395,7 @@ rt_update_hostentry(rtable *tab, struct hostentry *he) /* Reset the hostentry */ he->src = NULL; - he->gw = IPA_NONE; + he->nexthop_linkable = 0; he->dest = RTD_UNREACHABLE; he->igp_metric = 0; @@ -2346,32 +2416,31 @@ rt_update_hostentry(rtable *tab, struct hostentry *he) goto done; } - if (a->dest == RTD_DEVICE) - { - if (if_local_addr(he->addr, a->iface)) - { - /* The host address is a local address, this is not valid */ - log(L_WARN "Next hop address %I is a local address of iface %s", - he->addr, a->iface->name); - goto done; - } - - /* The host is directly reachable, use link as a gateway */ - he->gw = he->link; - he->dest = RTD_ROUTER; - } - else + he->dest = a->dest; + he->nexthop_linkable = 1; + if (he->dest == RTD_UNICAST) { - /* The host is reachable through some route entry */ - he->gw = a->gw; - he->dest = a->dest; + for (struct nexthop *nh = &(a->nh); nh; nh = nh->next) + if (ipa_zero(nh->gw)) + { + if (if_local_addr(he->addr, nh->iface)) + { + /* The host address is a local address, this is not valid */ + log(L_WARN "Next hop address %I is a local address of iface %s", + he->addr, nh->iface->name); + goto done; + } + + he->nexthop_linkable = 0; + break; + } } he->src = rta_clone(a); he->igp_metric = rt_get_igp_metric(e); } - done: +done: /* Add a prefix range to the trie */ trie_add_prefix(tab->hostcache->trie, &he_addr, pxlen, he_addr.pxlen); @@ -2426,9 +2495,9 @@ rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep) } void -rta_set_recursive_next_hop(rtable *dep, rta *a, rtable *tab, ip_addr gw, ip_addr ll) +rta_set_recursive_next_hop(rtable *dep, rta *a, rtable *tab, ip_addr gw, ip_addr ll, mpls_label_stack *mls) { - rta_apply_hostentry(a, rt_get_hostentry(tab, gw, ipa_zero(ll) ? gw : ll, dep)); + rta_apply_hostentry(a, rt_get_hostentry(tab, gw, ipa_zero(ll) ? gw : ll, dep), mls); } @@ -2436,27 +2505,6 @@ rta_set_recursive_next_hop(rtable *dep, rta *a, rtable *tab, ip_addr gw, ip_addr * CLI commands */ -static byte * -rt_format_via(rte *e) -{ - rta *a = e->attrs; - - /* Max text length w/o IP addr and interface name is 16 */ - static byte via[IPA_MAX_TEXT_LENGTH+sizeof(a->iface->name)+16]; - - switch (a->dest) - { - case RTD_ROUTER: bsprintf(via, "via %I on %s", a->gw, a->iface->name); break; - case RTD_DEVICE: bsprintf(via, "dev %s", a->iface->name); break; - case RTD_BLACKHOLE: bsprintf(via, "blackhole"); break; - case RTD_UNREACHABLE: bsprintf(via, "unreachable"); break; - case RTD_PROHIBIT: bsprintf(via, "prohibited"); break; - case RTD_MULTIPATH: bsprintf(via, "multipath"); break; - default: bsprintf(via, "???"); - } - return via; -} - static void rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, ea_list *tmpa) { @@ -2466,10 +2514,10 @@ rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, ea_list *tm int primary = (e->net->routes == e); int sync_error = (e->net->n.flags & KRF_SYNC_ERROR); void (*get_route_info)(struct rte *, byte *buf, struct ea_list *attrs); - struct mpnh *nh; + struct nexthop *nh; tm_format_datetime(tm, &config->tf_route, e->lastmod); - if (ipa_nonzero(a->from) && !ipa_equal(a->from, a->gw)) + if (ipa_nonzero(a->from) && !ipa_equal(a->from, a->nh.gw)) bsprintf(from, " from %I", a->from); else from[0] = 0; @@ -2488,10 +2536,29 @@ rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, ea_list *tm get_route_info(e, info, tmpa); else bsprintf(info, " (%d)", e->pref); - cli_printf(c, -1007, "%-18s %s [%s %s%s]%s%s", ia, rt_format_via(e), a->src->proto->name, - tm, from, primary ? (sync_error ? " !" : " *") : "", info); - for (nh = a->nexthops; nh; nh = nh->next) - cli_printf(c, -1007, "\tvia %I on %s weight %d", nh->gw, nh->iface->name, nh->weight + 1); + + cli_printf(c, -1007, "%-18s %s [%s %s%s]%s%s", ia, rta_dest_name(a->dest), + a->src->proto->name, tm, from, primary ? (sync_error ? " !" : " *") : "", info); + + if (a->dest == RTD_UNICAST) + for (nh = &(a->nh); nh; nh = nh->next) + { + char mpls[MPLS_MAX_LABEL_STACK*12 + 5], *lsp = mpls; + + if (nh->labels) + { + lsp += bsprintf(lsp, " mpls %d", nh->label[0]); + for (int i=1;i<nh->labels; i++) + lsp += bsprintf(lsp, "/%d", nh->label[i]); + } + *lsp = '\0'; + + if (a->nh.next) + cli_printf(c, -1007, "\tvia %I%s on %s weight %d", nh->gw, mpls, nh->iface->name, nh->weight + 1); + else + cli_printf(c, -1007, "\tvia %I%s on %s", nh->gw, mpls, nh->iface->name); + } + if (d->verbose) rta_show(c, a, tmpa); } |