diff options
41 files changed, 1793 insertions, 1091 deletions
diff --git a/conf/cf-lex.l b/conf/cf-lex.l index bd6dfff2..fb3d59e4 100644 --- a/conf/cf-lex.l +++ b/conf/cf-lex.l @@ -123,6 +123,60 @@ include ^{WHITE}*include{WHITE}*\".*\"{WHITE}*; cf_include(start, end-start); } +[02]:{DIGIT}+:{DIGIT}+ { + unsigned long int l, len1, len2; + char *e; + + if (yytext[0] == '0') + { + cf_lval.i64 = 0; + len1 = 16; + len2 = 32; + } + else + { + cf_lval.i64 = 2ULL << 48; + len1 = 32; + len2 = 16; + } + + errno = 0; + l = strtoul(yytext+2, &e, 10); + if (e && (*e != ':') || (errno == ERANGE) || (l >> len1)) + cf_error("ASN out of range"); + cf_lval.i64 |= ((u64) l) << len2; + + errno = 0; + l = strtoul(e+1, &e, 10); + if (e && *e || (errno == ERANGE) || (l >> len2)) + cf_error("Number out of range"); + cf_lval.i64 |= l; + + return VPN_RD; +} + +1:{DIGIT}+\.{DIGIT}+\.{DIGIT}+\.{DIGIT}+:{DIGIT}+ { + unsigned long int l; + ip4_addr ip4; + char *e; + + cf_lval.i64 = 1ULL << 48; + + e = strchr(yytext+2, ':'); + *e++ = '\0'; + if (!ip4_pton(yytext+2, &ip4)) + cf_error("Invalid IPv4 address %s in Route Distinguisher", yytext+2); + cf_lval.i64 |= ((u64) ip4_to_u32(ip4)) << 16; + + errno = 0; + l = strtoul(e, &e, 10); + if (e && *e || (errno == ERANGE) || (l >> 16)) + cf_error("Number out of range"); + cf_lval.i64 |= l; + + return VPN_RD; +} + {DIGIT}+\.{DIGIT}+\.{DIGIT}+\.{DIGIT}+ { if (!ip4_pton(yytext, &cf_lval.ip4)) cf_error("Invalid IPv4 address %s", yytext); diff --git a/conf/confbase.Y b/conf/confbase.Y index aec4aeb4..d6a6951f 100644 --- a/conf/confbase.Y +++ b/conf/confbase.Y @@ -38,6 +38,7 @@ CF_DECLS %union { int i; u32 i32; + u64 i64; ip_addr a; ip4_addr ip4; ip6_addr ip6; @@ -64,6 +65,7 @@ CF_DECLS struct proto_spec ps; struct channel_limit cl; struct timeformat *tf; + mpls_label_stack *mls; } %token END CLI_MARKER INVALID_TOKEN ELSECOL DDOT @@ -72,6 +74,7 @@ CF_DECLS %token <i> NUM ENUM %token <ip4> IP4 %token <ip6> IP6 +%token <i64> VPN_RD %token <s> SYM %token <t> TEXT %type <iface> ipa_scope @@ -81,7 +84,8 @@ CF_DECLS %type <time> datetime %type <a> ipa %type <net> net_ip4_ net_ip6_ net_ip6 net_ip_ net_ip net_or_ipa -%type <net_ptr> net_ net_any net_roa4_ net_roa6_ net_roa_ +%type <net_ptr> net_ net_any net_vpn4_ net_vpn6_ net_vpn_ net_roa4_ net_roa6_ net_roa_ +%type <mls> label_stack_start label_stack %type <t> text opttext @@ -93,7 +97,7 @@ CF_DECLS %left '!' %nonassoc '.' -CF_KEYWORDS(DEFINE, ON, OFF, YES, NO, S, MS, US, PORT) +CF_KEYWORDS(DEFINE, ON, OFF, YES, NO, S, MS, US, PORT, VPN) CF_GRAMMAR @@ -196,6 +200,18 @@ net_ip6_: IP6 '/' NUM cf_error("Invalid IPv6 prefix"); }; +net_vpn4_: VPN_RD net_ip4_ +{ + $$ = cfg_alloc(sizeof(net_addr_vpn4)); + net_fill_vpn4($$, net4_prefix(&$2), net4_pxlen(&$2), $1); +} + +net_vpn6_: VPN_RD net_ip6_ +{ + $$ = cfg_alloc(sizeof(net_addr_vpn6)); + net_fill_vpn6($$, net6_prefix(&$2), net6_pxlen(&$2), $1); +} + net_roa4_: net_ip4_ MAX NUM AS NUM { $$ = cfg_alloc(sizeof(net_addr_roa4)); @@ -213,10 +229,12 @@ net_roa6_: net_ip6_ MAX NUM AS NUM }; net_ip_: net_ip4_ | net_ip6_ ; +net_vpn_: net_vpn4_ | net_vpn6_ ; net_roa_: net_roa4_ | net_roa6_ ; net_: net_ip_ { $$ = cfg_alloc($1.length); net_copy($$, &($1)); } + | net_vpn_ | net_roa_ | net_flow_ ; @@ -254,6 +272,8 @@ net_any: net_or_ipa: net_ip4_ | net_ip6_ + | net_vpn4_ { $$ = *$1; } + | net_vpn6_ { $$ = *$1; } | IP4 { net_fill_ip4(&($$), $1, IP4_MAX_PREFIX_LENGTH); } | IP6 { net_fill_ip6(&($$), $1, IP6_MAX_PREFIX_LENGTH); } | SYM { @@ -266,6 +286,22 @@ net_or_ipa: } ; +label_stack_start: NUM +{ + $$ = cfg_allocz(sizeof(mpls_label_stack)); + $$->len = 1; + $$->stack[0] = $1; +}; + +label_stack: + label_stack_start + | label_stack '/' NUM { + if ($1->len >= MPLS_MAX_LABEL_STACK) + cf_error("Too many labels in stack"); + $1->stack[$1->len++] = $3; + $$ = $1; + } +; datetime: TEXT { diff --git a/configure.in b/configure.in index bb779be7..af9c452d 100644 --- a/configure.in +++ b/configure.in @@ -57,6 +57,7 @@ if test "$ac_test_CFLAGS" != set ; then bird_cflags_default=yes fi +AC_PROG_CC AC_PROG_CC_C99 if test -z "$GCC" ; then AC_MSG_ERROR([This program requires the GNU C Compiler.]) diff --git a/doc/Makefile b/doc/Makefile index 4e7e91eb..f36642be 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -36,8 +36,8 @@ $(o)%.ps: $(o)%.dvi dvips -D600 -ta4 -o $@ $< $(o)%.pdf: $(o)%.tex - pdflatex -output-directory=$(dir $@) $< - pdflatex -output-directory=$(dir $@) $< + TEXINPUTS=$(TEXINPUTS):$(doc-srcdir)/tex pdflatex -output-directory=$(dir $@) $< + TEXINPUTS=$(TEXINPUTS):$(doc-srcdir)/tex pdflatex -output-directory=$(dir $@) $< $(o)%.txt: $(o)%.sgml cd $(dir $@) && $(sgml2)txt $(notdir $<) diff --git a/doc/bird.sgml b/doc/bird.sgml index 0b978b62..91c21eeb 100644 --- a/doc/bird.sgml +++ b/doc/bird.sgml @@ -1186,20 +1186,41 @@ foot). <tag><label id="type-ip">ip</tag> This type can hold a single IP address. Depending on the compile-time configuration of BIRD you are using, it is either an IPv4 or IPv6 - address. IP addresses are written in the standard notation + address; this may be checked by <cf>.is_ip4</cf> which returns <cf/bool/. + IP addresses are written in the standard notation (<cf/10.20.30.40/ or <cf/fec0:3:4::1/). You can apply special operator <cf>.mask(<M>num</M>)</cf> on values of type ip. It masks out all but first <cf><M>num</M></cf> bits from the IP address. So <cf/1.2.3.4.mask(8) = 1.0.0.0/ is true. <tag><label id="type-prefix">prefix</tag> - This type can hold a network prefix consisting of IP address and prefix - length. Prefix literals are written as <cf><m/ipaddress//<m/pxlen/</cf>, + This type can hold a network prefix consisting of IP address, prefix + length and several other values. This is the key in route tables. + + Prefixes may be of several types, which can be determined by the special + operator <cf/.type/. The type may be: + + <cf/NET_IP4/ and <cf/NET_IP6/ prefixes hold an IP prefix. The literals + are written as <cf><m/ipaddress//<m/pxlen/</cf>, or <cf><m>ipaddress</m>/<m>netmask</m></cf>. There are two special - operators on prefixes: <cf/.ip/ which extracts the IP address from the - pair, and <cf/.len/, which separates prefix length from the pair. + operators on IP prefixes: <cf/.ip/ which extracts the IP address from + the pair, and <cf/.len/, which separates prefix length from the pair. So <cf>1.2.0.0/16.len = 16</cf> is true. + <cf/NET_VPN4/ and <cf/NET_VPN6/ prefixes hold an IP prefix with VPN + Route Distinguisher (<rfc id="4364">). They support the same special + operators as IP prefixes, and also <cf/.rd/ which extracts the Route + Distinguisher. Their literals are written + as <cf><m/vpnrd/ <m/ipprefix/</cf> + + <cf/NET_ROA4/ and <cf/NET_ROA6/ prefixes hold an IP prefix range + together with an ASN. They support the same special operators as IP + prefixes, and also <cf/.maxlen/ which extracts maximal prefix length, + and <cf/.asn/ which extracts the ASN. + + <cf/NET_FLOW4/ and <cf/NET_FLOW6/ hold an IP prefix together with a + flowspec rule. Filters currently don't support flowspec parsing. + <tag><label id="type-ec">ec</tag> This is a specialized type used to represent BGP extended community values. It is essentially a 64bit value, literals of this type are @@ -4160,12 +4181,12 @@ return packets as undeliverable if they are in your IP block, you don't have any specific destination for them and you don't want to send them out through the default route to prevent routing loops). -<p>There are five types of static routes: `classical' routes telling to forward -packets to a neighboring router, multipath routes specifying several (possibly -weighted) neighboring routers, device routes specifying forwarding to hosts on a -directly connected network, recursive routes computing their nexthops by doing -route table lookups for a given IP, and special routes (sink, blackhole etc.) -which specify a special action to be done instead of forwarding the packet. +<p>There are four types of static routes: `classical' routes telling to forward +packets to a neighboring router (single path or multipath, possibly weighted), +device routes specifying forwarding to hosts on a directly connected network, +recursive routes computing their nexthops by doing route table lookups for a +given IP, and special routes (sink, blackhole etc.) which specify a special +action to be done instead of forwarding the packet. <p>When the particular destination is not available (the interface is down or the next hop of the route is not a neighbor at the moment), Static just @@ -4194,14 +4215,14 @@ definition of the protocol contains mainly a list of static routes. <p>Route definitions (each may also contain a block of per-route options): <descrip> - <tag><label id="static-route-via-ip">route <m/prefix/ via <m/ip/</tag> - Static route through a neighboring router. For link-local next hops, + <tag><label id="static-route-via-ip">route <m/prefix/ via <m/ip/ [mpls <m/num/[/<m/num/[/<m/num/[...]]]]</tag> + Static single path route through a neighboring router. For link-local next hops, interface can be specified as a part of the address (e.g., - <cf/via fe80::1234%eth0/). + <cf/via fe80::1234%eth0/). MPLS labels should be specified in outer-first order. - <tag><label id="static-route-via-mpath">route <m/prefix/ multipath via <m/ip/ [weight <m/num/] [bfd <m/switch/] [via <m/.../]</tag> + <tag><label id="static-route-via-mpath">route <m/prefix/ via <m/ip/ [mpls <m/num/[/<m/num/[/<m/num/[...]]]] [weight <m/num/] [bfd <m/switch/] [via ...]</tag> Static multipath route. Contains several nexthops (gateways), possibly - with their weights. + with their weights and MPLS labels. <tag><label id="static-route-via-iface">route <m/prefix/ via <m/"interface"/</tag> Static device route through an interface to hosts on a directly diff --git a/filter/config.Y b/filter/config.Y index 7b4178be..2864d290 100644 --- a/filter/config.Y +++ b/filter/config.Y @@ -393,13 +393,14 @@ CF_DECLS CF_KEYWORDS(FUNCTION, PRINT, PRINTN, UNSET, RETURN, ACCEPT, REJECT, ERROR, QUITBIRD, - INT, BOOL, IP, PREFIX, PAIR, QUAD, EC, LC, + INT, BOOL, IP, TYPE, PREFIX, RD, PAIR, QUAD, EC, LC, SET, STRING, BGPMASK, BGPPATH, CLIST, ECLIST, LCLIST, IF, THEN, ELSE, CASE, TRUE, FALSE, RT, RO, UNKNOWN, GENERIC, - FROM, GW, NET, MASK, PROTO, SOURCE, SCOPE, CAST, DEST, IFNAME, IFINDEX, + FROM, GW, NET, MASK, PROTO, SOURCE, SCOPE, DEST, IFNAME, IFINDEX, PREFERENCE, ROA_CHECK, ASN, + IS_V4, IS_V6, LEN, MAXLEN, DEFINED, ADD, DELETE, CONTAINS, RESET, @@ -460,6 +461,7 @@ type: INT { $$ = T_INT; } | BOOL { $$ = T_BOOL; } | IP { $$ = T_IP; } + | RD { $$ = T_RD; } | PREFIX { $$ = T_NET; } | PAIR { $$ = T_PAIR; } | QUAD { $$ = T_QUAD; } @@ -786,6 +788,7 @@ constant: | FALSE { $$ = f_new_inst(); $$->code = 'c'; $$->aux = T_BOOL; $$->a2.i = 0; } | TEXT { $$ = f_new_inst(); $$->code = 'c'; $$->aux = T_STRING; $$->a2.p = $1; } | fipa { NEW_F_VAL; $$ = f_new_inst(); $$->code = 'C'; $$->a1.p = val; *val = $1; } + | VPN_RD { NEW_F_VAL; $$ = f_new_inst(); $$->code = 'C'; val->type = T_RD; val->val.ec = $1; $$->a1.p = val; } | net_ { NEW_F_VAL; $$ = f_new_inst(); $$->code = 'C'; val->type = T_NET; val->val.net = $1; $$->a1.p = val; } | '[' set_items ']' { DBG( "We've got a set here..." ); $$ = f_new_inst(); $$->code = 'c'; $$->aux = T_SET; $$->a2.p = build_tree($2); DBG( "ook\n" ); } | '[' fprefix_set ']' { $$ = f_new_inst(); $$->code = 'c'; $$->aux = T_PREFIX_SET; $$->a2.p = $2; } @@ -854,7 +857,6 @@ static_attr: | PROTO { $$ = f_new_inst(); $$->aux = T_STRING; $$->a2.i = SA_PROTO; } | SOURCE { $$ = f_new_inst(); $$->aux = T_ENUM_RTS; $$->a2.i = SA_SOURCE; } | SCOPE { $$ = f_new_inst(); $$->aux = T_ENUM_SCOPE; $$->a2.i = SA_SCOPE; $$->a1.i = 1; } - | CAST { $$ = f_new_inst(); $$->aux = T_ENUM_RTC; $$->a2.i = SA_CAST; } | DEST { $$ = f_new_inst(); $$->aux = T_ENUM_RTD; $$->a2.i = SA_DEST; $$->a1.i = 1; } | IFNAME { $$ = f_new_inst(); $$->aux = T_STRING; $$->a2.i = SA_IFNAME; } | IFINDEX { $$ = f_new_inst(); $$->aux = T_INT; $$->a2.i = SA_IFINDEX; } @@ -889,7 +891,10 @@ term: | rtadot dynamic_attr { $$ = $2; $$->code = P('e','a'); } + | term '.' IS_V4 { $$ = f_new_inst(); $$->code = P('I','i'); $$->a1.p = $1; } + | term '.' TYPE { $$ = f_new_inst(); $$->code = 'T'; $$->a1.p = $1; } | term '.' IP { $$ = f_new_inst(); $$->code = P('c','p'); $$->a1.p = $1; $$->aux = T_IP; } + | term '.' RD { $$ = f_new_inst(); $$->code = P('R','D'); $$->a1.p = $1; $$->aux = T_RD; } | term '.' LEN { $$ = f_new_inst(); $$->code = 'L'; $$->a1.p = $1; } | term '.' MAXLEN { $$ = f_new_inst(); $$->code = P('R','m'); $$->a1.p = $1; } | term '.' ASN { $$ = f_new_inst(); $$->code = P('R','a'); $$->a1.p = $1; } diff --git a/filter/filter.c b/filter/filter.c index a6ef1e10..9f6c2cdd 100644 --- a/filter/filter.c +++ b/filter/filter.c @@ -151,6 +151,7 @@ val_compare(struct f_val v1, struct f_val v2) case T_QUAD: return uint_cmp(v1.val.i, v2.val.i); case T_EC: + case T_RD: return u64_cmp(v1.val.ec, v2.val.ec); case T_LC: return lcomm_cmp(v1.val.lc, v2.val.lc); @@ -515,6 +516,7 @@ val_format(struct f_val v, buffer *buf) case T_QUAD: buffer_print(buf, "%R", v.val.i); return; case T_EC: ec_format(buf2, v.val.ec); buffer_print(buf, "%s", buf2); return; case T_LC: lc_format(buf2, v.val.lc); buffer_print(buf, "%s", buf2); return; + case T_RD: rd_format(v.val.ec, buf2, 1024); buffer_print(buf, "%s", buf2); return; case T_PREFIX_SET: trie_format(v.val.ti, buf); return; case T_SET: tree_format(v.val.t, buf); return; case T_ENUM: buffer_print(buf, "(enum %x)%u", v.type, v.val.i); return; @@ -815,6 +817,25 @@ interpret(struct f_inst *what) res.type = T_BOOL; res.val.i = (v1.type != T_VOID); break; + case 'T': + ONEARG; + switch (v1.type) + { + case T_NET: + res.type = T_ENUM_NETTYPE; + res.val.i = v1.val.net->type; + break; + default: + runtime( "Can't determine type of this item" ); + } + break; + case P('I','i'): + ONEARG; + if (v1.type != T_IP) + runtime( "IP version check needs an IP address" ); + res.type = T_BOOL; + res.val.i = ipa_is_ip4(v1.val.ip); + break; /* Set to indirect value, a1 = variable, a2 = value */ case 's': @@ -900,15 +921,14 @@ interpret(struct f_inst *what) switch (what->a2.i) { case SA_FROM: res.val.ip = rta->from; break; - case SA_GW: res.val.ip = rta->gw; break; + case SA_GW: res.val.ip = rta->nh.gw; break; case SA_NET: res.val.net = (*f_rte)->net->n.addr; break; case SA_PROTO: res.val.s = rta->src->proto->name; break; case SA_SOURCE: res.val.i = rta->source; break; case SA_SCOPE: res.val.i = rta->scope; break; - case SA_CAST: res.val.i = rta->cast; break; case SA_DEST: res.val.i = rta->dest; break; - case SA_IFNAME: res.val.s = rta->iface ? rta->iface->name : ""; break; - case SA_IFINDEX: res.val.i = rta->iface ? rta->iface->index : 0; break; + case SA_IFNAME: res.val.s = rta->nh.iface ? rta->nh.iface->name : ""; break; + case SA_IFINDEX: res.val.i = rta->nh.iface ? rta->nh.iface->index : 0; break; default: bug("Invalid static attribute access (%x)", res.type); @@ -938,10 +958,10 @@ interpret(struct f_inst *what) if (!n || (n->scope == SCOPE_HOST)) runtime( "Invalid gw address" ); - rta->dest = RTD_ROUTER; - rta->gw = ip; - rta->iface = n->iface; - rta->nexthops = NULL; + rta->dest = RTD_UNICAST; + rta->nh.gw = ip; + rta->nh.iface = n->iface; + rta->nh.next = NULL; rta->hostentry = NULL; } break; @@ -956,9 +976,9 @@ interpret(struct f_inst *what) runtime( "Destination can be changed only to blackhole, unreachable or prohibit" ); rta->dest = i; - rta->gw = IPA_NONE; - rta->iface = NULL; - rta->nexthops = NULL; + rta->nh.gw = IPA_NONE; + rta->nh.iface = NULL; + rta->nh.next = NULL; rta->hostentry = NULL; break; @@ -1210,6 +1230,15 @@ interpret(struct f_inst *what) res.type = T_IP; res.val.ip = net_prefix(v1.val.net); break; + case P('R','D'): + ONEARG; + if (v1.type != T_NET) + runtime( "Prefix expected" ); + if (!net_is_vpn(v1.val.net)) + runtime( "VPN address expected" ); + res.type = T_RD; + res.val.ec = net_rd(v1.val.net); + break; case P('a','f'): /* Get first ASN from AS PATH */ ONEARG; if (v1.type != T_PATH) @@ -1582,6 +1611,8 @@ i_same(struct f_inst *f1, struct f_inst *f2) case P('!', '~'): case '~': TWOARGS; break; case P('d','e'): ONEARG; break; + case 'T': ONEARG; break; + case P('n','T'): break; case P('m','l'): TWOARGS; @@ -1647,6 +1678,7 @@ i_same(struct f_inst *f1, struct f_inst *f2) case 'r': ONEARG; break; case P('c','p'): ONEARG; break; + case P('R','D'): ONEARG; break; case P('c','a'): /* Call rewriting trickery to avoid exponential behaviour */ ONEARG; if (!i_same(f1->a2.p, f2->a2.p)) diff --git a/filter/filter.h b/filter/filter.h index a4808731..0beac679 100644 --- a/filter/filter.h +++ b/filter/filter.h @@ -146,6 +146,8 @@ void val_format(struct f_val v, buffer *buf); #define T_ENUM_RTC 0x33 #define T_ENUM_RTD 0x34 #define T_ENUM_ROA 0x35 +#define T_ENUM_NETTYPE 0x36 + /* new enums go here */ #define T_ENUM_EMPTY 0x3f /* Special hack for atomic_aggr */ @@ -162,6 +164,7 @@ void val_format(struct f_val v, buffer *buf); #define T_ECLIST 0x27 /* Extended community list */ #define T_LC 0x28 /* Large community value, lcomm */ #define T_LCLIST 0x29 /* Large community list */ +#define T_RD 0x2a /* Route distinguisher for VPN addresses */ #define T_RETURN 0x40 #define T_SET 0x80 @@ -174,10 +177,9 @@ void val_format(struct f_val v, buffer *buf); #define SA_PROTO 4 #define SA_SOURCE 5 #define SA_SCOPE 6 -#define SA_CAST 7 -#define SA_DEST 8 -#define SA_IFNAME 9 -#define SA_IFINDEX 10 +#define SA_DEST 7 +#define SA_IFNAME 8 +#define SA_IFINDEX 9 struct f_tree { diff --git a/filter/test.conf b/filter/test.conf index 7915e627..dedad1d8 100644 --- a/filter/test.conf +++ b/filter/test.conf @@ -312,15 +312,18 @@ function t_ip() ip p; { p = 127.1.2.3; + bt_assert(p.is_v4); bt_assert(p.mask(8) = 127.0.0.0); bt_assert(1.2.3.4 = 1.2.3.4); bt_assert(1.2.3.4 = onetwo); bt_assert(format(p) = "127.1.2.3"); p = ::fffe:6:c0c:936d:88c7:35d3; + bt_assert(!p.is_v4); bt_assert(format(p) = "::fffe:6:c0c:936d:88c7:35d3"); p = 1234:5678::; + bt_assert(!p.is_v4); bt_assert(p.mask(24) = 1234:5600::); } @@ -444,8 +447,8 @@ function test_pxset(prefix set pxs) function t_prefix_set() prefix set pxs; { - pxs = [ 1.2.0.0/16, 1.4.0.0/16+]; - bt_assert(format(pxs) = "[1.2.0.0/112{::0.1.0.0}, 1.4.0.0/112{::0.1.255.255}]"); + pxs = [ 1.2.0.0/16, 1.4.0.0/16+, 44.66.88.64/30{24,28}, 12.34.56.0/24{8,16} ]; + bt_assert(format(pxs) = "[1.2.0.0/112{::0.1.0.0}, 1.4.0.0/112{::0.1.255.255}, 12.34.0.0/112{::1.255.0.0}, 44.66.88.64/124{::1f0}]"); bt_assert(1.2.0.0/16 ~ pxs); bt_assert(1.4.0.0/16 ~ pxs); bt_assert(1.4.0.0/18 ~ pxs); @@ -1165,12 +1168,20 @@ int j; accept "ok I take that"; } +filter roa_filter +{ + if net ~ [ 10.0.0.0/8{16,24}, 2000::/3{16,96} ] then { + accept; + } + reject; +} + roa4 table r4; roa6 table r6; protocol static { - roa4 { table r4; }; + roa4 { table r4; import filter roa_filter; }; route 10.110.0.0/16 max 16 as 1000 blackhole; route 10.120.0.0/16 max 24 as 1000 blackhole ; route 10.130.0.0/16 max 24 as 2000 blackhole; @@ -1179,66 +1190,124 @@ protocol static protocol static { - roa6 { table r6; }; + roa6 { table r6; import filter roa_filter; }; route 2001:0db8:85a3:8a2e::/64 max 96 as 1000 blackhole; } function test_roa_check() +prefix pfx; { # cannot be tested in __startup(), sorry - print "Should be true: ", roa_check(r4, 10.10.0.0/16, 1000) = ROA_UNKNOWN, - " ", roa_check(r4, 10.0.0.0/8, 1000) = ROA_UNKNOWN, - " ", roa_check(r4, 10.110.0.0/16, 1000) = ROA_VALID, - " ", roa_check(r4, 10.110.0.0/16, 2000) = ROA_INVALID, - " ", roa_check(r4, 10.110.32.0/20, 1000) = ROA_INVALID, - " ", roa_check(r4, 10.120.32.0/20, 1000) = ROA_VALID; - print "Should be true: ", roa_check(r4, 10.120.32.0/20, 2000) = ROA_INVALID, - " ", roa_check(r4, 10.120.32.32/28, 1000) = ROA_INVALID, - " ", roa_check(r4, 10.130.130.0/24, 1000) = ROA_INVALID, - " ", roa_check(r4, 10.130.130.0/24, 2000) = ROA_VALID, - " ", roa_check(r4, 10.130.30.0/24, 3000) = ROA_INVALID, - " ", roa_check(r4, 10.130.130.0/24, 3000) = ROA_VALID; - print "Should be true: ", roa_check(r6, 2001:0db8:85a3:8a2e:1234::/80, 1000) = ROA_VALID, - " ", roa_check(r6, 2001:0db8:85a3:8a2e:1234::/97, 1000) = ROA_INVALID, - " ", roa_check(r6, 2001:0db8:85a3:8a2e::/64, 1000) = ROA_VALID, - " ", roa_check(r6, 2001:0db8:85a3::/48, 1000) = ROA_UNKNOWN; - - print "Should be true: ", roa_check(r4, 10.10.0.0/16, 1000) = ROA_UNKNOWN, - " ", roa_check(r4, 10.0.0.0/8, 1000) = ROA_UNKNOWN, - " ", roa_check(r4, 10.110.0.0/16, 1000) = ROA_VALID, - " ", roa_check(r4, 10.110.0.0/16, 2000) = ROA_INVALID, - " ", roa_check(r4, 10.110.32.0/20, 1000) = ROA_INVALID, - " ", roa_check(r4, 10.120.32.0/20, 1000) = ROA_VALID; - - print "Should be true: ", roa_check(r6, 2001:0db8:85a3:8a2e:1234::/80, 1000) = ROA_VALID, - " ", roa_check(r6, 2001:0db8:85a3:8a2e:1234::/97, 1000) = ROA_INVALID, - " ", roa_check(r6, 2001:0db8:85a3:8a2e::/64, 1000) = ROA_VALID, - " ", roa_check(r6, 2001:0db8:85a3::/48, 1000) = ROA_UNKNOWN; - - print "Should be true: ", roa_check(r4, 2001:0db8:85a3:8a2e:1234::/97, 1000) = ROA_INVALID || - roa_check(r6, 2001:0db8:85a3:8a2e:1234::/97, 1000) = ROA_INVALID; - - print "Should be false: ", roa_check(r4, 2001:0db8:85a3:8a2e:1234::/80, 1000) = ROA_INVALID || - roa_check(r6, 2001:0db8:85a3:8a2e:1234::/80, 1000) = ROA_INVALID, - " ", roa_check(r4, 2001:0db8:85a3::/48, 1000) = ROA_INVALID || - roa_check(r6, 2001:0db8:85a3::/48, 1000) = ROA_INVALID; - - print "Should be true: ", 10.130.130.0/24 ~ 0.0.0.0/0, - " ", 2001:0db8:85a3:8a2e::/64 ~ ::/0; - print "Should be false: ", 10.130.130.0/24 ~ ::/0, - " ", 2001:0db8:85a3:8a2e::/64 ~ 0.0.0.0/0; + bt_assert(roa_check(r4, 10.10.0.0/16, 1000) = ROA_UNKNOWN); + bt_assert(roa_check(r4, 10.0.0.0/8, 1000) = ROA_UNKNOWN); + bt_assert(roa_check(r4, 10.110.0.0/16, 1000) = ROA_VALID); + bt_assert(roa_check(r4, 10.110.0.0/16, 2000) = ROA_INVALID); + bt_assert(roa_check(r4, 10.110.32.0/20, 1000) = ROA_INVALID); + bt_assert(roa_check(r4, 10.120.32.0/20, 1000) = ROA_VALID); + bt_assert(roa_check(r4, 10.120.32.0/20, 2000) = ROA_INVALID); + bt_assert(roa_check(r4, 10.120.32.32/28, 1000) = ROA_INVALID); + bt_assert(roa_check(r4, 10.130.130.0/24, 1000) = ROA_INVALID); + bt_assert(roa_check(r4, 10.130.130.0/24, 2000) = ROA_VALID); + bt_assert(roa_check(r4, 10.130.30.0/24, 3000) = ROA_INVALID); + bt_assert(roa_check(r4, 10.130.130.0/24, 3000) = ROA_VALID); + + bt_assert(roa_check(r6, 2001:0db8:85a3:8a2e:1234::/80, 1000) = ROA_VALID); + bt_assert(roa_check(r6, 2001:0db8:85a3:8a2e:1234::/97, 1000) = ROA_INVALID); + bt_assert(roa_check(r6, 2001:0db8:85a3:8a2e::/64, 1000) = ROA_VALID); + bt_assert(roa_check(r6, 2001:0db8:85a3::/48, 1000) = ROA_UNKNOWN); + + bt_assert(roa_check(r4, 10.10.0.0/16, 1000) = ROA_UNKNOWN); + bt_assert(roa_check(r4, 10.0.0.0/8, 1000) = ROA_UNKNOWN); + bt_assert(roa_check(r4, 10.110.0.0/16, 1000) = ROA_VALID); + bt_assert(roa_check(r4, 10.110.0.0/16, 2000) = ROA_INVALID); + bt_assert(roa_check(r4, 10.110.32.0/20, 1000) = ROA_INVALID); + bt_assert(roa_check(r4, 10.120.32.0/20, 1000) = ROA_VALID); + + bt_assert(roa_check(r6, 2001:0db8:85a3:8a2e:1234::/80, 1000) = ROA_VALID); + bt_assert(roa_check(r6, 2001:0db8:85a3:8a2e:1234::/97, 1000) = ROA_INVALID); + bt_assert(roa_check(r6, 2001:0db8:85a3:8a2e::/64, 1000) = ROA_VALID); + bt_assert(roa_check(r6, 2001:0db8:85a3::/48, 1000) = ROA_UNKNOWN); + + bt_assert(roa_check(r4, 2001:0db8:85a3:8a2e:1234::/97, 1000) = ROA_UNKNOWN); + bt_assert(roa_check(r6, 2001:0db8:85a3:8a2e:1234::/97, 1000) = ROA_INVALID); + + bt_assert(roa_check(r4, 2001:0db8:85a3:8a2e:1234::/80, 1000) = ROA_UNKNOWN); + bt_assert(roa_check(r6, 2001:0db8:85a3:8a2e:1234::/80, 1000) = ROA_VALID); + bt_assert(roa_check(r4, 2001:0db8:85a3::/48, 1000) = ROA_UNKNOWN); + bt_assert(roa_check(r6, 2001:0db8:85a3::/48, 1000) = ROA_UNKNOWN); + + bt_assert(10.130.130.0/24 ~ 0.0.0.0/0); + bt_assert(2001:0db8:85a3:8a2e::/64 ~ ::/0); + bt_assert(10.130.130.0/24 !~ ::/0); + bt_assert(2001:0db8:85a3:8a2e::/64 !~ 0.0.0.0/0); + + pfx = 12.13.0.0/16 max 24 as 1234; + bt_assert(pfx.len = 16); + bt_assert(pfx.maxlen = 24); + bt_assert(pfx.asn = 1234); + + pfx = 1000::/8 max 32 as 1234; + bt_assert(pfx.len = 8); + bt_assert(pfx.maxlen = 32); + bt_assert(pfx.asn = 1234); } -function roa_operators_test() -prefix pfx; +bt_test_suite(test_roa_check, "Testing ROA"); + +/* + * Testing Mixed Net Types + * ----------------------- + */ + +function t_mixed_prefix() +prefix set pxs; +prefix set pxt; { - print "Testing ROA prefix operators '.maxlen' and '.asn':"; + pxs = [ 98.45.0.0/16, 128.128.0.0/12+, 2200::/42-, ::ffff:d000:0/100{98,102}]; + bt_assert(format(pxs) = "[::/0, ::/2{c000::}, 98.45.0.0/112{::0.1.0.0}, 128.128.0.0/108{::0.31.255.255}, 208.0.0.0/100{::124.0.0.0}, 2200::/42{ffff:ffff:ffc0::}]"); + bt_assert(::fe00:0:0/88 !~ pxs); + bt_assert(::fffe:0:0/95 !~ pxs); + bt_assert(::ffff:d800:0/101 ~ pxs); + bt_assert(216.0.0.0/5 ~ pxs); + bt_assert(212.0.0.0/6 ~ pxs); + bt_assert(212.0.0.0/7 !~ pxs); + bt_assert(::ffff:8080:8080/121 ~ pxs); + bt_assert(::/0 ~ pxs); + bt_assert(0.0.0.0/0 !~ pxs); + bt_assert(128.135.64.17/32 ~ pxs); + +# pxt = [ 0:1:2 10.1.10.0/24, 0:5:10000 10.1.10.0/24 ]; +# print pxt; + + bt_assert(format(NET_IP4) = "(enum 36)1"); ## if (net.type = NET_IP4) ... + bt_assert(format(NET_VPN6) = "(enum 36)4"); + bt_assert(format(0:1:2) = "0:1:2"); +} - pfx = 12.13.0.0/16 max 24 as 1234; - print pfx; - print "Should be true: ", pfx.len = 16, " ", pfx.maxlen = 24, " ", pfx.asn = 1234; +bt_test_suite(t_mixed_prefix, "Testing mixed net types"); - pfx = 1000::/8 max 32 as 1234; - print pfx; - print "Should be true: ", pfx.len = 8, " ", pfx.maxlen = 32, " ", pfx.asn = 1234; + +filter vpn_filter +{ + bt_assert(format(net) = "0:1:2 10.1.10.0/24"); + bt_assert(net.type = NET_VPN4); + bt_assert(net.type != NET_IP4); + bt_assert(net.type != NET_IP6); + bt_assert(net.rd = 0:1:2); + + case (net.type) { + NET_IP4: print "IPV4"; + NET_IP6: print "IPV6"; + } + + accept; +} + +vpn4 table v4; +vpn4 table v6; + +protocol static +{ + vpn4 { table v4; import filter vpn_filter; }; + route 0:1:2 10.1.10.0/24 unreachable; } diff --git a/lib/alloca.h b/lib/alloca.h index f0d61bb4..e5557cdb 100644 --- a/lib/alloca.h +++ b/lib/alloca.h @@ -15,4 +15,6 @@ #include <stdlib.h> #endif +#define allocz(len) ({ void *_x = alloca(len); memset(_x, 0, len); _x; }) + #endif diff --git a/lib/buffer.h b/lib/buffer.h index a8b11951..6fc18852 100644 --- a/lib/buffer.h +++ b/lib/buffer.h @@ -14,7 +14,7 @@ #include "sysdep/config.h" #define BUFFER(type) struct { type *data; uint used, size; } - +#define BUFFER_TYPE(v) typeof(* (v).data) #define BUFFER_SIZE(v) ((v).size * sizeof(* (v).data)) #define BUFFER_INIT(v,pool,isize) \ @@ -46,6 +46,9 @@ #define BUFFER_FLUSH(v) ({ (v).used = 0; }) +#define BUFFER_WALK(v,n) \ + for (BUFFER_TYPE(v) *_n = (v).data, n; _n < ((v).data + (v).used) && (n = *_n, 1); _n++) + #define BUFFER_SHALLOW_COPY(dst, src) \ ({ \ (dst).used = (src).used; \ diff --git a/lib/buffer_test.c b/lib/buffer_test.c index 55179e82..5b7de330 100644 --- a/lib/buffer_test.c +++ b/lib/buffer_test.c @@ -133,6 +133,25 @@ t_buffer_flush(void) return 1; } +static int +t_buffer_walk(void) +{ + int i; + + init_buffer(); + fill_expected_array(); + for (i = 0; i < MAX_NUM; i++) + BUFFER_PUSH(buf) = expected[i]; + + i = 0; + BUFFER_WALK(buf, v) + bt_assert(v == expected[i++]); + + bt_assert(i == MAX_NUM); + + return 1; +} + int main(int argc, char *argv[]) { @@ -142,6 +161,7 @@ main(int argc, char *argv[]) bt_test_suite(t_buffer_pop, "Fill whole buffer (PUSH), a half of elements POP and PUSH new elements"); bt_test_suite(t_buffer_resize, "Init a small buffer and try overfill"); bt_test_suite(t_buffer_flush, "Fill and flush all elements"); + bt_test_suite(t_buffer_walk, "Fill and walk through buffer"); return bt_exit_value(); } @@ -325,6 +325,33 @@ static inline ip6_addr ip6_hton(ip6_addr a) static inline ip6_addr ip6_ntoh(ip6_addr a) { return _MI6(ntohl(_I0(a)), ntohl(_I1(a)), ntohl(_I2(a)), ntohl(_I3(a))); } +#define MPLS_MAX_LABEL_STACK 8 +typedef struct mpls_label_stack { + uint len; + u32 stack[MPLS_MAX_LABEL_STACK]; +} mpls_label_stack; + +static inline int +mpls_get(const char *buf, int buflen, u32 *stack) +{ + for (int i=0; (i<MPLS_MAX_LABEL_STACK) && (i*4+3 < buflen); i++) + { + u32 s = get_u32(buf + i*4); + stack[i] = s >> 12; + if (s & 0x100) + return i+1; + } + return -1; +} + +static inline int +mpls_put(char *buf, int len, u32 *stack) +{ + for (int i=0; i<len; i++) + put_u32(buf + i*4, stack[i] << 12 | (i+1 == len ? 0x100 : 0)); + + return len*4; +} /* * Unaligned data access (in network order) @@ -13,7 +13,8 @@ const char * const net_label[] = { [NET_ROA4] = "roa4", [NET_ROA6] = "roa6", [NET_FLOW4] = "flow4", - [NET_FLOW6] = "flow6" + [NET_FLOW6] = "flow6", + [NET_MPLS] = "mpls", }; const u16 net_addr_length[] = { @@ -24,7 +25,8 @@ const u16 net_addr_length[] = { [NET_ROA4] = sizeof(net_addr_roa4), [NET_ROA6] = sizeof(net_addr_roa6), [NET_FLOW4] = 0, - [NET_FLOW6] = 0 + [NET_FLOW6] = 0, + [NET_MPLS] = sizeof(net_addr_mpls), }; const u8 net_max_prefix_length[] = { @@ -35,7 +37,8 @@ const u8 net_max_prefix_length[] = { [NET_ROA4] = IP4_MAX_PREFIX_LENGTH, [NET_ROA6] = IP6_MAX_PREFIX_LENGTH, [NET_FLOW4] = IP4_MAX_PREFIX_LENGTH, - [NET_FLOW6] = IP6_MAX_PREFIX_LENGTH + [NET_FLOW6] = IP6_MAX_PREFIX_LENGTH, + [NET_MPLS] = 0, }; const u16 net_max_text_length[] = { @@ -46,14 +49,28 @@ const u16 net_max_text_length[] = { [NET_ROA4] = 34, /* "255.255.255.255/32-32 AS4294967295" */ [NET_ROA6] = 60, /* "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/128-128 AS4294967295" */ [NET_FLOW4] = 0, /* "flow4 { ... }" */ - [NET_FLOW6] = 0 /* "flow6 { ... }" */ + [NET_FLOW6] = 0, /* "flow6 { ... }" */ + [NET_MPLS] = 7, /* "1048575" */ }; int +rd_format(const u64 rd, char *buf, int buflen) +{ + switch (rd >> 48) + { + case 0: return bsnprintf(buf, buflen, "0:%u:%u", (u32) (rd >> 32), (u32) rd); + case 1: return bsnprintf(buf, buflen, "1:%I4:%u", ip4_from_u32(rd >> 16), (u32) (rd & 0xffff)); + case 2: return bsnprintf(buf, buflen, "2:%u:%u", (u32) (rd >> 16), (u32) (rd & 0xffff)); + default: return bsnprintf(buf, buflen, "X:%08x:%08x", (u32) (rd >> 32), (u32) rd); + } +} + +int net_format(const net_addr *N, char *buf, int buflen) { net_addr_union *n = (void *) N; + buf[0] = 0; switch (n->n.type) { @@ -62,9 +79,18 @@ net_format(const net_addr *N, char *buf, int buflen) case NET_IP6: return bsnprintf(buf, buflen, "%I6/%d", n->ip6.prefix, n->ip6.pxlen); case NET_VPN4: - return bsnprintf(buf, buflen, "%u:%u %I4/%d", (u32) (n->vpn4.rd >> 32), (u32) n->vpn4.rd, n->vpn4.prefix, n->vpn4.pxlen); + { + int c = rd_format(n->vpn4.rd, buf, buflen); + ADVANCE(buf, buflen, c); + return bsnprintf(buf, buflen, " %I4/%d", n->vpn4.prefix, n->vpn4.pxlen); + } case NET_VPN6: - return bsnprintf(buf, buflen, "%u:%u %I6/%d", (u32) (n->vpn6.rd >> 32), (u32) n->vpn6.rd, n->vpn6.prefix, n->vpn6.pxlen); + { + /* XXX: RD format is specified for VPN4; not found any for VPN6, reusing the same as for VPN4 */ + int c = rd_format(n->vpn6.rd, buf, buflen); + ADVANCE(buf, buflen, c); + return bsnprintf(buf, buflen, " %I6/%d", n->vpn6.prefix, n->vpn6.pxlen); + } case NET_ROA4: return bsnprintf(buf, buflen, "%I4/%u-%u AS%u", n->roa4.prefix, n->roa4.pxlen, n->roa4.max_pxlen, n->roa4.asn); case NET_ROA6: @@ -73,9 +99,11 @@ net_format(const net_addr *N, char *buf, int buflen) return flow4_net_format(buf, buflen, &n->flow4); case NET_FLOW6: return flow6_net_format(buf, buflen, &n->flow6); + case NET_MPLS: + return bsnprintf(buf, buflen, "%u", n->mpls.label); } - return 0; + bug("unknown network type"); } ip_addr @@ -95,6 +123,7 @@ net_pxmask(const net_addr *a) case NET_FLOW6: return ipa_from_ip6(ip6_mkmask(net6_pxlen(a))); + case NET_MPLS: default: return IPA_NONE; } @@ -124,6 +153,8 @@ net_compare(const net_addr *a, const net_addr *b) return net_compare_flow4((const net_addr_flow4 *) a, (const net_addr_flow4 *) b); case NET_FLOW6: return net_compare_flow6((const net_addr_flow6 *) a, (const net_addr_flow6 *) b); + case NET_MPLS: + return net_compare_mpls((const net_addr_mpls *) a, (const net_addr_mpls *) b); } return 0; } @@ -165,6 +196,9 @@ net_validate(const net_addr *N) case NET_FLOW6: return net_validate_ip6((net_addr_ip6 *) N); + case NET_MPLS: + return net_validate_mpls((net_addr_mpls *) N); + default: return 0; } @@ -188,6 +222,9 @@ net_normalize(net_addr *N) case NET_ROA6: case NET_FLOW6: return net_normalize_ip6(&n->ip6); + + case NET_MPLS: + return; } } @@ -209,6 +246,9 @@ net_classify(const net_addr *N) case NET_ROA6: case NET_FLOW6: return ip6_zero(n->ip6.prefix) ? (IADDR_HOST | SCOPE_UNIVERSE) : ip6_classify(&n->ip6.prefix); + + case NET_MPLS: + return IADDR_HOST | SCOPE_UNIVERSE; } return IADDR_INVALID; @@ -235,6 +275,7 @@ ipa_in_netX(const ip_addr a, const net_addr *n) return ip6_zero(ip6_and(ip6_xor(ipa_to_ip6(a), net6_prefix(n)), ip6_mkmask(net6_pxlen(n)))); + case NET_MPLS: default: return 0; } @@ -21,7 +21,8 @@ #define NET_ROA6 6 #define NET_FLOW4 7 #define NET_FLOW6 8 -#define NET_MAX 9 +#define NET_MPLS 9 +#define NET_MAX 10 #define NB_IP4 (1 << NET_IP4) #define NB_IP6 (1 << NET_IP6) @@ -31,6 +32,7 @@ #define NB_ROA6 (1 << NET_ROA6) #define NB_FLOW4 (1 << NET_FLOW4) #define NB_FLOW6 (1 << NET_FLOW6) +#define NB_MPLS (1 << NET_MPLS) #define NB_IP (NB_IP4 | NB_IP6) #define NB_ANY 0xffffffff @@ -108,6 +110,13 @@ typedef struct net_addr_flow6 { byte data[0]; } net_addr_flow6; +typedef struct net_addr_mpls { + u8 type; + u8 pxlen; + u16 length; + u32 label; +} net_addr_mpls; + typedef union net_addr_union { net_addr n; net_addr_ip4 ip4; @@ -118,6 +127,7 @@ typedef union net_addr_union { net_addr_roa6 roa6; net_addr_flow4 flow4; net_addr_flow6 flow6; + net_addr_mpls mpls; } net_addr_union; @@ -153,6 +163,8 @@ extern const u16 net_max_text_length[]; #define NET_ADDR_FLOW6(prefix,pxlen,dlen) \ ((net_addr_flow6) { NET_FLOW6, pxlen, sizeof(net_addr_ip6) + dlen, prefix }) +#define NET_ADDR_MPLS(label) \ + ((net_addr_mpls) { NET_MPLS, 20, sizeof(net_addr_mpls), label }) static inline void net_fill_ip4(net_addr *a, ip4_addr prefix, uint pxlen) @@ -173,6 +185,9 @@ static inline void net_fill_roa4(net_addr *a, ip4_addr prefix, uint pxlen, uint static inline void net_fill_roa6(net_addr *a, ip6_addr prefix, uint pxlen, uint max_pxlen, u32 asn) { *(net_addr_roa6 *)a = NET_ADDR_ROA6(prefix, pxlen, max_pxlen, asn); } +static inline void net_fill_mpls(net_addr *a, u32 label) +{ *(net_addr_mpls *)a = NET_ADDR_MPLS(label); } + static inline void net_fill_ipa(net_addr *a, ip_addr prefix, uint pxlen) { if (ipa_is_ip4(prefix)) @@ -215,6 +230,9 @@ static inline int net_is_ip(const net_addr *a) static inline int net_is_roa(const net_addr *a) { return (a->type == NET_ROA4) || (a->type == NET_ROA6); } +static inline int net_is_vpn(const net_addr *a) +{ return (a->type == NET_VPN4) || (a->type == NET_VPN6); } + static inline ip4_addr net4_prefix(const net_addr *a) { return ((net_addr_ip4 *) a)->prefix; } @@ -238,11 +256,20 @@ static inline ip_addr net_prefix(const net_addr *a) case NET_FLOW6: return ipa_from_ip6(net6_prefix(a)); + case NET_MPLS: default: return IPA_NONE; } } +static inline u32 net_mpls(const net_addr *a) +{ + if (a->type == NET_MPLS) + return ((net_addr_mpls *) a)->label; + + bug("Can't call net_mpls on non-mpls net_addr"); +} + static inline uint net4_pxlen(const net_addr *a) { return a->pxlen; } @@ -254,6 +281,18 @@ static inline uint net_pxlen(const net_addr *a) ip_addr net_pxmask(const net_addr *a); +static inline u64 net_rd(const net_addr *a) +{ + switch (a->type) + { + case NET_VPN4: + return ((net_addr_vpn4 *)a)->rd; + case NET_VPN6: + return ((net_addr_vpn6 *)a)->rd; + } + return 0; +} + static inline int net_equal(const net_addr *a, const net_addr *b) { return (a->length == b->length) && !memcmp(a, b, a->length); } @@ -282,6 +321,9 @@ static inline int net_equal_flow4(const net_addr_flow4 *a, const net_addr_flow4 static inline int net_equal_flow6(const net_addr_flow6 *a, const net_addr_flow6 *b) { return net_equal((const net_addr *) a, (const net_addr *) b); } +static inline int net_equal_mpls(const net_addr_mpls *a, const net_addr_mpls *b) +{ return !memcmp(a, b, sizeof(net_addr_mpls)); } + static inline int net_equal_prefix_roa4(const net_addr_roa4 *a, const net_addr_roa4 *b) { return ip4_equal(a->prefix, b->prefix) && (a->pxlen == b->pxlen); } @@ -314,6 +356,8 @@ static inline int net_zero_flow4(const net_addr_flow4 *a) static inline int net_zero_flow6(const net_addr_flow6 *a) { return !a->pxlen && ip6_zero(a->prefix) && !a->data; } +static inline int net_zero_mpls(const net_addr_mpls *a) +{ return !a->label; } static inline int net_compare_ip4(const net_addr_ip4 *a, const net_addr_ip4 *b) @@ -340,6 +384,9 @@ static inline int net_compare_flow4(const net_addr_flow4 *a, const net_addr_flow static inline int net_compare_flow6(const net_addr_flow6 *a, const net_addr_flow6 *b) { return ip6_compare(a->prefix, b->prefix) ?: uint_cmp(a->pxlen, b->pxlen) ?: uint_cmp(a->length, b->length) ?: memcmp(a->data, b->data, a->length - sizeof(net_addr_flow6)); } +static inline int net_compare_mpls(const net_addr_mpls *a, const net_addr_mpls *b) +{ return uint_cmp(a->label, b->label); } + int net_compare(const net_addr *a, const net_addr *b); @@ -370,6 +417,13 @@ static inline void net_copy_flow4(net_addr_flow4 *dst, const net_addr_flow4 *src static inline void net_copy_flow6(net_addr_flow6 *dst, const net_addr_flow6 *src) { memcpy(dst, src, src->length); } +static inline void net_copy_mpls(net_addr_mpls *dst, const net_addr_mpls *src) +{ memcpy(dst, src, sizeof(net_addr_mpls)); } + + +/* XXXX */ +static inline u32 u64_hash(u64 a) +{ return u32_hash(a); } static inline u32 net_hash_ip4(const net_addr_ip4 *n) { return ip4_hash(n->prefix) ^ ((u32) n->pxlen << 26); } @@ -377,10 +431,6 @@ static inline u32 net_hash_ip4(const net_addr_ip4 *n) static inline u32 net_hash_ip6(const net_addr_ip6 *n) { return ip6_hash(n->prefix) ^ ((u32) n->pxlen << 26); } -/* XXXX */ -static inline u32 u64_hash(u64 a) -{ return u32_hash(a); } - static inline u32 net_hash_vpn4(const net_addr_vpn4 *n) { return ip4_hash(n->prefix) ^ ((u32) n->pxlen << 26) ^ u64_hash(n->rd); } @@ -399,6 +449,9 @@ static inline u32 net_hash_flow4(const net_addr_flow4 *n) static inline u32 net_hash_flow6(const net_addr_flow6 *n) { return ip6_hash(n->prefix) ^ ((u32) n->pxlen << 26); } +static inline u32 net_hash_mpls(const net_addr_mpls *n) +{ return n->label; } + u32 net_hash(const net_addr *a); @@ -414,6 +467,11 @@ static inline int net_validate_ip6(const net_addr_ip6 *n) ip6_zero(ip6_and(n->prefix, ip6_not(ip6_mkmask(n->pxlen)))); } +static inline int net_validate_mpls(const net_addr_mpls *n) +{ + return n->label < (1 << 20); +} + int net_validate(const net_addr *N); @@ -428,7 +486,7 @@ void net_normalize(net_addr *N); int net_classify(const net_addr *N); int net_format(const net_addr *N, char *buf, int buflen); - +int rd_format(const u64 rd, char *buf, int buflen); int ipa_in_netX(const ip_addr A, const net_addr *N); int net_in_netX(const net_addr *A, const net_addr *N); diff --git a/nest/config.Y b/nest/config.Y index 23d6a452..e6b0927b 100644 --- a/nest/config.Y +++ b/nest/config.Y @@ -78,8 +78,7 @@ CF_KEYWORDS(GRACEFUL, RESTART, WAIT, MAX, FLUSH, AS) CF_ENUM(T_ENUM_RTS, RTS_, DUMMY, STATIC, INHERIT, DEVICE, STATIC_DEVICE, REDIRECT, RIP, OSPF, OSPF_IA, OSPF_EXT1, OSPF_EXT2, BGP, PIPE, BABEL) CF_ENUM(T_ENUM_SCOPE, SCOPE_, HOST, LINK, SITE, ORGANIZATION, UNIVERSE, UNDEFINED) -CF_ENUM(T_ENUM_RTC, RTC_, UNICAST, BROADCAST, MULTICAST, ANYCAST) -CF_ENUM(T_ENUM_RTD, RTD_, ROUTER, DEVICE, BLACKHOLE, UNREACHABLE, PROHIBIT, MULTIPATH) +CF_ENUM(T_ENUM_RTD, RTD_, UNICAST, BLACKHOLE, UNREACHABLE, PROHIBIT) CF_ENUM(T_ENUM_ROA, ROA_, UNKNOWN, VALID, INVALID) %type <i32> idval @@ -154,6 +153,8 @@ net_type: | FLOW6{ $$ = NET_FLOW6; } ; +CF_ENUM(T_ENUM_NETTYPE, NET_, IP4, IP6, VPN4, VPN6, ROA4, ROA6, FLOW4, FLOW6) + /* Creation of routing tables */ diff --git a/nest/iface.h b/nest/iface.h index d960b859..de5070d6 100644 --- a/nest/iface.h +++ b/nest/iface.h @@ -118,12 +118,15 @@ typedef struct neighbor { SCOPE_HOST when it's our own address */ } neighbor; -#define NEF_STICKY 1 -#define NEF_ONLINK 2 -#define NEF_BIND 4 /* Used internally for neighbors bound to an iface */ +#define NEF_STICKY 1 +#define NEF_ONLINK 2 +#define NEF_BIND 4 /* Used internally for neighbors bound to an iface */ +#define NEF_IFACE 8 /* Neighbors bound to iface */ + neighbor *neigh_find(struct proto *, ip_addr *, unsigned flags); neighbor *neigh_find2(struct proto *p, ip_addr *a, struct iface *ifa, unsigned flags); +neighbor *neigh_find_iface(struct proto *p, struct iface *ifa); static inline int neigh_connected_to(struct proto *p, ip_addr *a, struct iface *i) { diff --git a/nest/neighbor.c b/nest/neighbor.c index 2c7f9b84..96475a50 100644 --- a/nest/neighbor.c +++ b/nest/neighbor.c @@ -48,7 +48,7 @@ #define NEIGH_HASH_OFFSET 24 static slab *neigh_slab; -static list sticky_neigh_list, neigh_hash_table[NEIGH_HASH_SIZE]; +static list sticky_neigh_list, iface_neigh_list, neigh_hash_table[NEIGH_HASH_SIZE]; static inline uint neigh_hash(struct proto *p, ip_addr *a) @@ -166,6 +166,8 @@ neigh_find2(struct proto *p, ip_addr *a, struct iface *ifa, unsigned flags) return NULL; n = sl_alloc(neigh_slab); + memset(n, 0, sizeof(neighbor)); + n->addr = *a; if (scope >= 0) { @@ -187,6 +189,35 @@ neigh_find2(struct proto *p, ip_addr *a, struct iface *ifa, unsigned flags) return n; } +neighbor * +neigh_find_iface(struct proto *p, struct iface *ifa) +{ + neighbor *n; + node *nn; + + /* We keep neighbors with NEF_IFACE foremost in ifa->neighbors list */ + WALK_LIST2(n, nn, ifa->neighbors, if_n) + { + if (! (n->flags & NEF_IFACE)) + break; + + if (n->proto == p) + return n; + } + + n = sl_alloc(neigh_slab); + memset(n, 0, sizeof(neighbor)); + + add_tail(&iface_neigh_list, &n->n); + add_head(&ifa->neighbors, &n->if_n); + n->iface = ifa; + n->proto = p; + n->flags = NEF_IFACE; + n->scope = (ifa->flags & IF_UP) ? SCOPE_HOST : -1; + + return n; +} + /** * neigh_dump - dump specified neighbor entry. * @n: the entry to dump @@ -205,6 +236,8 @@ neigh_dump(neighbor *n) debug("%s %p %08x scope %s", n->proto->name, n->data, n->aux, ip_scope_text(n->scope)); if (n->flags & NEF_STICKY) debug(" STICKY"); + if (n->flags & NEF_IFACE) + debug(" IFACE"); debug("\n"); } @@ -223,6 +256,8 @@ neigh_dump_all(void) debug("Known neighbors:\n"); WALK_LIST(n, sticky_neigh_list) neigh_dump(n); + WALK_LIST(n, iface_neigh_list) + neigh_dump(n); for(i=0; i<NEIGH_HASH_SIZE; i++) WALK_LIST(n, neigh_hash_table[i]) neigh_dump(n); @@ -232,13 +267,18 @@ neigh_dump_all(void) static void neigh_up(neighbor *n, struct iface *i, int scope, struct ifa *a) { + DBG("Waking up sticky neighbor %I\n", n->addr); n->iface = i; n->ifa = a; n->scope = scope; - add_tail(&i->neighbors, &n->if_n); - rem_node(&n->n); - add_tail(&neigh_hash_table[neigh_hash(n->proto, &n->addr)], &n->n); - DBG("Waking up sticky neighbor %I\n", n->addr); + + if (! (n->flags & NEF_IFACE)) + { + add_tail(&i->neighbors, &n->if_n); + rem_node(&n->n); + add_tail(&neigh_hash_table[neigh_hash(n->proto, &n->addr)], &n->n); + } + if (n->proto->neigh_notify && (n->proto->proto_state != PS_STOP)) n->proto->neigh_notify(n); } @@ -247,14 +287,20 @@ static void neigh_down(neighbor *n) { DBG("Flushing neighbor %I on %s\n", n->addr, n->iface->name); - rem_node(&n->if_n); - if (! (n->flags & NEF_BIND)) + if (! (n->flags & (NEF_BIND | NEF_IFACE))) n->iface = NULL; n->ifa = NULL; n->scope = -1; + + if (! (n->flags & NEF_IFACE)) + { + rem_node(&n->if_n); + rem_node(&n->n); + } + if (n->proto->neigh_notify && (n->proto->proto_state != PS_STOP)) n->proto->neigh_notify(n); - rem_node(&n->n); + if (n->flags & NEF_STICKY) { add_tail(&sticky_neigh_list, &n->n); @@ -272,7 +318,8 @@ neigh_down(neighbor *n) return; } } - else + + if (! (n->flags & (NEF_STICKY | NEF_IFACE))) sl_free(neigh_slab, n); } @@ -290,10 +337,17 @@ void neigh_if_up(struct iface *i) { struct ifa *a; - neighbor *n, *next; + neighbor *n; + node *x, *y; int scope; - WALK_LIST_DELSAFE(n, next, sticky_neigh_list) + /* Wake up all iface neighbors */ + WALK_LIST2_DELSAFE(n, x, y, i->neighbors, if_n) + if ((n->scope < 0) && (n->flags & NEF_IFACE)) + neigh_up(n, i, SCOPE_HOST, NULL); + + /* Wake up appropriate sticky neighbors */ + WALK_LIST_DELSAFE(n, x, sticky_neigh_list) if ((!n->iface || n->iface == i) && ((scope = if_connected(&n->addr, i, &a)) >= 0)) neigh_up(n, i, scope, a); @@ -311,10 +365,11 @@ neigh_if_up(struct iface *i) void neigh_if_down(struct iface *i) { + neighbor *n; node *x, *y; - WALK_LIST_DELSAFE(x, y, i->neighbors) - neigh_down(SKIP_BACK(neighbor, if_n, x)); + WALK_LIST2_DELSAFE(n, x, y, i->neighbors, if_n) + neigh_down(n); } /** @@ -328,14 +383,12 @@ neigh_if_down(struct iface *i) void neigh_if_link(struct iface *i) { + neighbor *n; node *x, *y; - WALK_LIST_DELSAFE(x, y, i->neighbors) - { - neighbor *n = SKIP_BACK(neighbor, if_n, x); - if (n->proto->neigh_notify && (n->proto->proto_state != PS_STOP)) - n->proto->neigh_notify(n); - } + WALK_LIST2_DELSAFE(n, x, y, i->neighbors, if_n) + if (n->proto->neigh_notify && (n->proto->proto_state != PS_STOP)) + n->proto->neigh_notify(n); } /** @@ -352,19 +405,21 @@ void neigh_ifa_update(struct ifa *a) { struct iface *i = a->iface; + struct ifa *aa; node *x, *y; - + neighbor *n; + int scope; + /* Remove all neighbors whose scope has changed */ - WALK_LIST_DELSAFE(x, y, i->neighbors) - { - struct ifa *aa; - neighbor *n = SKIP_BACK(neighbor, if_n, x); - if (if_connected(&n->addr, i, &aa) != n->scope) - neigh_down(n); - } + WALK_LIST2_DELSAFE(n, x, y, i->neighbors, if_n) + if (n->ifa && (if_connected(&n->addr, i, &aa) != n->scope)) + neigh_down(n); /* Wake up all sticky neighbors that are reachable now */ - neigh_if_up(i); + WALK_LIST_DELSAFE(n, x, sticky_neigh_list) + if ((!n->iface || n->iface == i) && + ((scope = if_connected(&n->addr, i, &aa)) >= 0)) + neigh_up(n, i, scope, aa); } static inline void @@ -373,7 +428,7 @@ neigh_prune_one(neighbor *n) if (n->proto->proto_state != PS_DOWN) return; rem_node(&n->n); - if (n->scope >= 0) + if (n->if_n.next) rem_node(&n->if_n); sl_free(neigh_slab, n); } @@ -398,6 +453,8 @@ neigh_prune(void) neigh_prune_one(n); WALK_LIST_DELSAFE(n, m, sticky_neigh_list) neigh_prune_one(n); + WALK_LIST_DELSAFE(n, m, iface_neigh_list) + neigh_prune_one(n); } /** @@ -410,10 +467,11 @@ neigh_prune(void) void neigh_init(pool *if_pool) { - int i; - neigh_slab = sl_new(if_pool, sizeof(neighbor)); + init_list(&sticky_neigh_list); - for(i=0; i<NEIGH_HASH_SIZE; i++) + init_list(&iface_neigh_list); + + for(int i = 0; i < NEIGH_HASH_SIZE; i++) init_list(&neigh_hash_table[i]); } diff --git a/nest/route.h b/nest/route.h index 12e67d61..546b04c4 100644 --- a/nest/route.h +++ b/nest/route.h @@ -168,6 +168,11 @@ typedef struct rtable { struct fib_iterator nhu_fit; /* Next Hop Update FIB iterator */ } rtable; +#define NHU_CLEAN 0 +#define NHU_SCHEDULED 1 +#define NHU_RUNNING 2 +#define NHU_DIRTY 3 + typedef struct network { struct rte *routes; /* Available routes for this network */ struct fib_node n; /* FIB flags reserved for kernel syncer */ @@ -195,8 +200,8 @@ struct hostentry { unsigned hash_key; /* Hash key */ unsigned uc; /* Use count */ struct rta *src; /* Source rta entry */ - ip_addr gw; /* Chosen next hop */ byte dest; /* Chosen route destination type (RTD_...) */ + byte nexthop_linkable; /* Nexthop list is completely non-device */ u32 igp_metric; /* Chosen route IGP metric */ }; @@ -333,12 +338,15 @@ void rt_show(struct rt_show_data *); * construction of BGP route attribute lists. */ -/* Multipath next-hop */ -struct mpnh { +/* Nexthop structure */ +struct nexthop { ip_addr gw; /* Next hop */ struct iface *iface; /* Outgoing interface */ - struct mpnh *next; + struct nexthop *next; byte weight; + byte labels_orig; /* Number of labels before hostentry was applied */ + byte labels; /* Number of all labels */ + u32 label[0]; }; struct rte_src { @@ -354,20 +362,16 @@ typedef struct rta { struct rta *next, **pprev; /* Hash chain */ u32 uc; /* Use count */ u32 hash_key; /* Hash over important fields */ - struct mpnh *nexthops; /* Next-hops for multipath routes */ struct ea_list *eattrs; /* Extended Attribute chain */ struct rte_src *src; /* Route source that created the route */ struct hostentry *hostentry; /* Hostentry for recursive next-hops */ - struct iface *iface; /* Outgoing interface */ - ip_addr gw; /* Next hop */ ip_addr from; /* Advertising router */ u32 igp_metric; /* IGP metric to next hop (for iBGP routes) */ - byte source; /* Route source (RTS_...) */ - byte scope; /* Route scope (SCOPE_... -- see ip.h) */ - byte cast; /* Casting type (RTC_...) */ - byte dest; /* Route destination type (RTD_...) */ - byte flags; /* Route flags (RTF_...), now unused */ - byte aflags; /* Attribute cache flags (RTAF_...) */ + u8 source; /* Route source (RTS_...) */ + u8 scope; /* Route scope (SCOPE_... -- see ip.h) */ + u8 dest; /* Route destination type (RTD_...) */ + u8 aflags; + struct nexthop nh; /* Next hop */ } rta; #define RTS_DUMMY 0 /* Dummy route to be removed soon */ @@ -392,13 +396,12 @@ typedef struct rta { #define RTC_MULTICAST 2 #define RTC_ANYCAST 3 /* IPv6 Anycast */ -#define RTD_ROUTER 0 /* Next hop is neighbor router */ -#define RTD_DEVICE 1 /* Points to device */ +#define RTD_NONE 0 /* Undefined next hop */ +#define RTD_UNICAST 1 /* Next hop is neighbor router */ #define RTD_BLACKHOLE 2 /* Silently drop packets */ #define RTD_UNREACHABLE 3 /* Reject as unreachable */ #define RTD_PROHIBIT 4 /* Administratively prohibited */ -#define RTD_MULTIPATH 5 /* Multipath route (nexthops != NULL) */ -#define RTD_NONE 6 /* Invalid RTD */ +#define RTD_MAX 5 /* Flags for net->n.flags, used by kernel syncer */ #define KRF_INSTALLED 0x80 /* This route should be installed in the kernel */ @@ -410,9 +413,14 @@ typedef struct rta { protocol-specific metric is availabe */ +const char * rta_dest_names[RTD_MAX]; + +static inline const char *rta_dest_name(uint n) +{ return (n < RTD_MAX) ? rta_dest_names[n] : "???"; } + /* Route has regular, reachable nexthop (i.e. not RTD_UNREACHABLE and like) */ static inline int rte_is_reachable(rte *r) -{ uint d = r->attrs->dest; return (d == RTD_ROUTER) || (d == RTD_DEVICE) || (d == RTD_MULTIPATH); } +{ return r->attrs->dest == RTD_UNICAST; } /* @@ -517,14 +525,22 @@ uint ea_hash(ea_list *e); /* Calculate 16-bit hash value */ ea_list *ea_append(ea_list *to, ea_list *what); void ea_format_bitfield(struct eattr *a, byte *buf, int bufsize, const char **names, int min, int max); -int mpnh__same(struct mpnh *x, struct mpnh *y); /* Compare multipath nexthops */ -static inline int mpnh_same(struct mpnh *x, struct mpnh *y) -{ return (x == y) || mpnh__same(x, y); } -struct mpnh *mpnh_merge(struct mpnh *x, struct mpnh *y, int rx, int ry, int max, linpool *lp); -void mpnh_insert(struct mpnh **n, struct mpnh *y); -int mpnh_is_sorted(struct mpnh *x); +#define NEXTHOP_MAX_SIZE (sizeof(struct nexthop) + sizeof(u32)*MPLS_MAX_LABEL_STACK) + +static inline size_t nexthop_size(const struct nexthop *nh) +{ return sizeof(struct nexthop) + sizeof(u32)*nh->labels; } +int nexthop__same(struct nexthop *x, struct nexthop *y); /* Compare multipath nexthops */ +static inline int nexthop_same(struct nexthop *x, struct nexthop *y) +{ return (x == y) || nexthop__same(x, y); } +struct nexthop *nexthop_merge(struct nexthop *x, struct nexthop *y, int rx, int ry, int max, linpool *lp); +static inline void nexthop_link(struct rta *a, struct nexthop *from) +{ memcpy(&a->nh, from, nexthop_size(from)); } +void nexthop_insert(struct nexthop **n, struct nexthop *y); +int nexthop_is_sorted(struct nexthop *x); void rta_init(void); +static inline size_t rta_size(const rta *a) { return sizeof(rta) + sizeof(u32)*a->nh.labels; } +#define RTA_MAX_SIZE (sizeof(rta) + sizeof(u32)*MPLS_MAX_LABEL_STACK) rta *rta_lookup(rta *); /* Get rta equivalent to this one, uc++ */ static inline int rta_is_cached(rta *r) { return r->aflags & RTAF_CACHED; } static inline rta *rta_clone(rta *r) { r->uc++; return r; } @@ -535,7 +551,7 @@ static inline rta * rta_cow(rta *r, linpool *lp) { return rta_is_cached(r) ? rta void rta_dump(rta *); void rta_dump_all(void); void rta_show(struct cli *, rta *, ea_list *); -void rta_set_recursive_next_hop(rtable *dep, rta *a, rtable *tab, ip_addr gw, ip_addr ll); +void rta_set_recursive_next_hop(rtable *dep, rta *a, rtable *tab, ip_addr gw, ip_addr ll, mpls_label_stack *mls); /* * rta_set_recursive_next_hop() acquires hostentry from hostcache and fills diff --git a/nest/rt-attr.c b/nest/rt-attr.c index 94f25de8..1b7f5836 100644 --- a/nest/rt-attr.c +++ b/nest/rt-attr.c @@ -58,10 +58,18 @@ #include <stddef.h> +const char * rta_dest_names[RTD_MAX] = { + [RTD_NONE] = "", + [RTD_UNICAST] = "unicast", + [RTD_BLACKHOLE] = "blackhole", + [RTD_UNREACHABLE] = "unreachable", + [RTD_PROHIBIT] = "prohibited", +}; + pool *rta_pool; -static slab *rta_slab; -static slab *mpnh_slab; +static slab *rta_slab_[4]; +static slab *nexthop_slab_[4]; static slab *rte_src_slab; static struct idm src_ids; @@ -144,27 +152,38 @@ rt_prune_sources(void) */ static inline u32 -mpnh_hash(struct mpnh *x) +nexthop_hash(struct nexthop *x) { u32 h = 0; for (; x; x = x->next) - h ^= ipa_hash(x->gw); + { + h ^= ipa_hash(x->gw) ^ (h << 5) ^ (h >> 9); + + for (int i = 0; i < x->labels; i++) + h ^= x->label[i] ^ (h << 6) ^ (h >> 7); + } return h; } int -mpnh__same(struct mpnh *x, struct mpnh *y) +nexthop__same(struct nexthop *x, struct nexthop *y) { for (; x && y; x = x->next, y = y->next) - if (!ipa_equal(x->gw, y->gw) || (x->iface != y->iface) || (x->weight != y->weight)) + { + if (!ipa_equal(x->gw, y->gw) || (x->iface != y->iface) || (x->weight != y->weight) || (x->labels != y->labels)) return 0; + for (int i = 0; i < x->labels; i++) + if (x->label[i] != y->label[i]) + return 0; + } + return x == y; } static int -mpnh_compare_node(struct mpnh *x, struct mpnh *y) +nexthop_compare_node(struct nexthop *x, struct nexthop *y) { int r; @@ -182,22 +201,33 @@ mpnh_compare_node(struct mpnh *x, struct mpnh *y) if (r) return r; + r = ((int) y->labels) - ((int) x->labels); + if (r) + return r; + + for (int i = 0; i < y->labels; i++) + { + r = ((int) y->label[i]) - ((int) x->label[i]); + if (r) + return r; + } + return ((int) x->iface->index) - ((int) y->iface->index); } -static inline struct mpnh * -mpnh_copy_node(const struct mpnh *src, linpool *lp) +static inline struct nexthop * +nexthop_copy_node(const struct nexthop *src, linpool *lp) { - struct mpnh *n = lp_alloc(lp, sizeof(struct mpnh)); - n->gw = src->gw; - n->iface = src->iface; + struct nexthop *n = lp_alloc(lp, nexthop_size(src)); + + memcpy(n, src, nexthop_size(src)); n->next = NULL; - n->weight = src->weight; + return n; } /** - * mpnh_merge - merge nexthop lists + * nexthop_merge - merge nexthop lists * @x: list 1 * @y: list 2 * @rx: reusability of list @x @@ -205,7 +235,7 @@ mpnh_copy_node(const struct mpnh *src, linpool *lp) * @max: max number of nexthops * @lp: linpool for allocating nexthops * - * The mpnh_merge() function takes two nexthop lists @x and @y and merges them, + * The nexthop_merge() function takes two nexthop lists @x and @y and merges them, * eliminating possible duplicates. The input lists must be sorted and the * result is sorted too. The number of nexthops in result is limited by @max. * New nodes are allocated from linpool @lp. @@ -218,28 +248,28 @@ mpnh_copy_node(const struct mpnh *src, linpool *lp) * resulting list is no longer needed. When reusability is not set, the * corresponding lists are not modified nor linked from the resulting list. */ -struct mpnh * -mpnh_merge(struct mpnh *x, struct mpnh *y, int rx, int ry, int max, linpool *lp) +struct nexthop * +nexthop_merge(struct nexthop *x, struct nexthop *y, int rx, int ry, int max, linpool *lp) { - struct mpnh *root = NULL; - struct mpnh **n = &root; + struct nexthop *root = NULL; + struct nexthop **n = &root; while ((x || y) && max--) { - int cmp = mpnh_compare_node(x, y); + int cmp = nexthop_compare_node(x, y); if (cmp < 0) { - *n = rx ? x : mpnh_copy_node(x, lp); + *n = rx ? x : nexthop_copy_node(x, lp); x = x->next; } else if (cmp > 0) { - *n = ry ? y : mpnh_copy_node(y, lp); + *n = ry ? y : nexthop_copy_node(y, lp); y = y->next; } else { - *n = rx ? x : (ry ? y : mpnh_copy_node(x, lp)); + *n = rx ? x : (ry ? y : nexthop_copy_node(x, lp)); x = x->next; y = y->next; } @@ -251,11 +281,11 @@ mpnh_merge(struct mpnh *x, struct mpnh *y, int rx, int ry, int max, linpool *lp) } void -mpnh_insert(struct mpnh **n, struct mpnh *x) +nexthop_insert(struct nexthop **n, struct nexthop *x) { for (; *n; n = &((*n)->next)) { - int cmp = mpnh_compare_node(*n, x); + int cmp = nexthop_compare_node(*n, x); if (cmp < 0) continue; @@ -270,28 +300,37 @@ mpnh_insert(struct mpnh **n, struct mpnh *x) } int -mpnh_is_sorted(struct mpnh *x) +nexthop_is_sorted(struct nexthop *x) { for (; x && x->next; x = x->next) - if (mpnh_compare_node(x, x->next) >= 0) + if (nexthop_compare_node(x, x->next) >= 0) return 0; return 1; } -static struct mpnh * -mpnh_copy(struct mpnh *o) +static inline slab * +nexthop_slab(struct nexthop *nh) { - struct mpnh *first = NULL; - struct mpnh **last = &first; + return nexthop_slab_[MIN(nh->labels, 3)]; +} + +static struct nexthop * +nexthop_copy(struct nexthop *o) +{ + struct nexthop *first = NULL; + struct nexthop **last = &first; for (; o; o = o->next) { - struct mpnh *n = sl_alloc(mpnh_slab); + struct nexthop *n = sl_alloc(nexthop_slab(o)); n->gw = o->gw; n->iface = o->iface; n->next = NULL; n->weight = o->weight; + n->labels = o->labels; + for (int i=0; i<o->labels; i++) + n->label[i] = o->label[i]; *last = n; last = &(n->next); @@ -301,14 +340,14 @@ mpnh_copy(struct mpnh *o) } static void -mpnh_free(struct mpnh *o) +nexthop_free(struct nexthop *o) { - struct mpnh *n; + struct nexthop *n; while (o) { n = o->next; - sl_free(mpnh_slab, o); + sl_free(nexthop_slab(o), o); o = n; } } @@ -994,19 +1033,14 @@ rta_hash(rta *a) #define MIX(f) mem_hash_mix(&h, &(a->f), sizeof(a->f)); MIX(src); MIX(hostentry); - MIX(iface); - MIX(gw); MIX(from); MIX(igp_metric); MIX(source); MIX(scope); - MIX(cast); MIX(dest); - MIX(flags); - MIX(aflags); #undef MIX - return mem_hash_value(&h) ^ mpnh_hash(a->nexthops) ^ ea_hash(a->eattrs); + return mem_hash_value(&h) ^ nexthop_hash(&(a->nh)) ^ ea_hash(a->eattrs); } static inline int @@ -1015,26 +1049,28 @@ rta_same(rta *x, rta *y) return (x->src == y->src && x->source == y->source && x->scope == y->scope && - x->cast == y->cast && x->dest == y->dest && - x->flags == y->flags && x->igp_metric == y->igp_metric && - ipa_equal(x->gw, y->gw) && ipa_equal(x->from, y->from) && - x->iface == y->iface && x->hostentry == y->hostentry && - mpnh_same(x->nexthops, y->nexthops) && + nexthop_same(&(x->nh), &(y->nh)) && ea_same(x->eattrs, y->eattrs)); } +static inline slab * +rta_slab(rta *a) +{ + return rta_slab_[a->nh.labels > 2 ? 3 : a->nh.labels]; +} + static rta * rta_copy(rta *o) { - rta *r = sl_alloc(rta_slab); + rta *r = sl_alloc(rta_slab(o)); - memcpy(r, o, sizeof(rta)); + memcpy(r, o, rta_size(o)); r->uc = 1; - r->nexthops = mpnh_copy(o->nexthops); + r->nh.next = nexthop_copy(o->nh.next); r->eattrs = ea_list_copy(o->eattrs); return r; } @@ -1127,19 +1163,26 @@ rta__free(rta *a) *a->pprev = a->next; if (a->next) a->next->pprev = a->pprev; - a->aflags = 0; /* Poison the entry */ rt_unlock_hostentry(a->hostentry); rt_unlock_source(a->src); - mpnh_free(a->nexthops); + if (a->nh.next) + nexthop_free(a->nh.next); ea_free(a->eattrs); - sl_free(rta_slab, a); + a->aflags = 0; /* Poison the entry */ + sl_free(rta_slab(a), a); } rta * rta_do_cow(rta *o, linpool *lp) { - rta *r = lp_alloc(lp, sizeof(rta)); - memcpy(r, o, sizeof(rta)); + rta *r = lp_alloc(lp, rta_size(o)); + memcpy(r, o, rta_size(o)); + for (struct nexthop **nhn = &(r->nh.next), *nho = o->nh.next; nho; nho = nho->next) + { + *nhn = lp_alloc(lp, nexthop_size(nho)); + memcpy(*nhn, nho, nexthop_size(nho)); + nhn = &((*nhn)->next); + } r->aflags = 0; r->uc = 0; return r; @@ -1158,19 +1201,23 @@ rta_dump(rta *a) "RTS_STAT_DEV", "RTS_REDIR", "RTS_RIP", "RTS_OSPF", "RTS_OSPF_IA", "RTS_OSPF_EXT1", "RTS_OSPF_EXT2", "RTS_BGP", "RTS_PIPE", "RTS_BABEL" }; - static char *rtc[] = { "", " BC", " MC", " AC" }; static char *rtd[] = { "", " DEV", " HOLE", " UNREACH", " PROHIBIT" }; - debug("p=%s uc=%d %s %s%s%s h=%04x", - a->src->proto->name, a->uc, rts[a->source], ip_scope_text(a->scope), rtc[a->cast], + debug("p=%s uc=%d %s %s%s h=%04x", + a->src->proto->name, a->uc, rts[a->source], ip_scope_text(a->scope), rtd[a->dest], a->hash_key); if (!(a->aflags & RTAF_CACHED)) debug(" !CACHED"); debug(" <-%I", a->from); - if (a->dest == RTD_ROUTER) - debug(" ->%I", a->gw); - if (a->dest == RTD_DEVICE || a->dest == RTD_ROUTER) - debug(" [%s]", a->iface ? a->iface->name : "???" ); + if (a->dest == RTD_UNICAST) + for (struct nexthop *nh = &(a->nh); nh; nh = nh->next) + { + if (ipa_nonzero(nh->gw)) debug(" ->%I", nh->gw); + if (nh->labels) debug(" L %d", nh->label[0]); + for (int i=1; i<nh->labels; i++) + debug("/%d", nh->label[i]); + debug(" [%s]", nh->iface ? nh->iface->name : "???"); + } if (a->eattrs) { debug(" EA: "); @@ -1206,10 +1253,9 @@ rta_show(struct cli *c, rta *a, ea_list *eal) { static char *src_names[] = { "dummy", "static", "inherit", "device", "static-device", "redirect", "RIP", "OSPF", "OSPF-IA", "OSPF-E1", "OSPF-E2", "BGP", "pipe" }; - static char *cast_names[] = { "unicast", "broadcast", "multicast", "anycast" }; int i; - cli_printf(c, -1008, "\tType: %s %s %s", src_names[a->source], cast_names[a->cast], ip_scope_text(a->scope)); + cli_printf(c, -1008, "\tType: %s %s", src_names[a->source], ip_scope_text(a->scope)); if (!eal) eal = a->eattrs; for(; eal; eal=eal->next) @@ -1227,8 +1273,17 @@ void rta_init(void) { rta_pool = rp_new(&root_pool, "Attributes"); - rta_slab = sl_new(rta_pool, sizeof(rta)); - mpnh_slab = sl_new(rta_pool, sizeof(struct mpnh)); + + rta_slab_[0] = sl_new(rta_pool, sizeof(rta)); + rta_slab_[1] = sl_new(rta_pool, sizeof(rta) + sizeof(u32)); + rta_slab_[2] = sl_new(rta_pool, sizeof(rta) + sizeof(u32)*2); + rta_slab_[3] = sl_new(rta_pool, sizeof(rta) + sizeof(u32)*MPLS_MAX_LABEL_STACK); + + nexthop_slab_[0] = sl_new(rta_pool, sizeof(struct nexthop)); + nexthop_slab_[1] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32)); + nexthop_slab_[2] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32)*2); + nexthop_slab_[3] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32)*MPLS_MAX_LABEL_STACK); + rta_alloc_hash(); rte_src_init(); } diff --git a/nest/rt-dev.c b/nest/rt-dev.c index d98cd79f..9993da24 100644 --- a/nest/rt-dev.c +++ b/nest/rt-dev.c @@ -78,9 +78,8 @@ dev_ifa_notify(struct proto *P, uint flags, struct ifa *ad) .src = src, .source = RTS_DEVICE, .scope = SCOPE_UNIVERSE, - .cast = RTC_UNICAST, - .dest = RTD_DEVICE, - .iface = ad->iface + .dest = RTD_UNICAST, + .nh.iface = ad->iface, }; a = rta_lookup(&a0); diff --git a/nest/rt-table.c b/nest/rt-table.c index 8c429874..f8baf572 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -50,7 +50,6 @@ static linpool *rte_update_pool; static list routing_tables; -static byte *rt_format_via(rte *e); static void rt_free_hostcache(rtable *tab); static void rt_notify_hostcache(rtable *tab, net *net); static void rt_update_hostcache(rtable *tab); @@ -346,7 +345,7 @@ rte_mergable(rte *pri, rte *sec) static void rte_trace(struct proto *p, rte *e, int dir, char *msg) { - log(L_TRACE "%s %c %s %N %s", p->name, dir, msg, e->net->n.addr, rt_format_via(e)); + log(L_TRACE "%s %c %s %N %s", p->name, dir, msg, e->net->n.addr, rta_dest_name(e->attrs->dest)); } static inline void @@ -708,19 +707,17 @@ rt_notify_accepted(struct channel *c, net *net, rte *new_changed, rte *old_chang } -static struct mpnh * -mpnh_merge_rta(struct mpnh *nhs, rta *a, linpool *pool, int max) +static struct nexthop * +nexthop_merge_rta(struct nexthop *nhs, rta *a, linpool *pool, int max) { - struct mpnh nh = { .gw = a->gw, .iface = a->iface }; - struct mpnh *nh2 = (a->dest == RTD_MULTIPATH) ? a->nexthops : &nh; - return mpnh_merge(nhs, nh2, 1, 0, max, pool); + return nexthop_merge(nhs, &(a->nh), 1, 0, max, pool); } rte * rt_export_merged(struct channel *c, net *net, rte **rt_free, ea_list **tmpa, linpool *pool, int silent) { // struct proto *p = c->proto; - struct mpnh *nhs = NULL; + struct nexthop *nhs = NULL; rte *best0, *best, *rt0, *rt, *tmp; best0 = net->routes; @@ -745,7 +742,7 @@ rt_export_merged(struct channel *c, net *net, rte **rt_free, ea_list **tmpa, lin continue; if (rte_is_reachable(rt)) - nhs = mpnh_merge_rta(nhs, rt->attrs, pool, c->merge_limit); + nhs = nexthop_merge_rta(nhs, rt->attrs, pool, c->merge_limit); if (tmp) rte_free(tmp); @@ -753,13 +750,12 @@ rt_export_merged(struct channel *c, net *net, rte **rt_free, ea_list **tmpa, lin if (nhs) { - nhs = mpnh_merge_rta(nhs, best->attrs, pool, c->merge_limit); + nhs = nexthop_merge_rta(nhs, best->attrs, pool, c->merge_limit); if (nhs->next) { best = rte_cow_rta(best, pool); - best->attrs->dest = RTD_MULTIPATH; - best->attrs->nexthops = nhs; + nexthop_link(best->attrs, nhs); } } @@ -922,7 +918,7 @@ rte_validate(rte *e) return 0; } - if ((e->attrs->dest == RTD_MULTIPATH) && !mpnh_is_sorted(e->attrs->nexthops)) + if ((e->attrs->dest == RTD_UNICAST) && !nexthop_is_sorted(&(e->attrs->nh))) { log(L_WARN "Ignoring unsorted multipath route %N received via %s", n->n.addr, e->sender->proto->name); @@ -1564,11 +1560,14 @@ rt_schedule_hcu(rtable *tab) static inline void rt_schedule_nhu(rtable *tab) { - if (tab->nhu_state == 0) + if (tab->nhu_state == NHU_CLEAN) ev_schedule(tab->rt_event); - /* state change 0->1, 2->3 */ - tab->nhu_state |= 1; + /* state change: + * NHU_CLEAN -> NHU_SCHEDULED + * NHU_RUNNING -> NHU_DIRTY + */ + tab->nhu_state |= NHU_SCHEDULED; } void @@ -1763,33 +1762,102 @@ rta_next_hop_outdated(rta *a) if (!he->src) return a->dest != RTD_UNREACHABLE; - return (a->iface != he->src->iface) || !ipa_equal(a->gw, he->gw) || - (a->dest != he->dest) || (a->igp_metric != he->igp_metric) || - !mpnh_same(a->nexthops, he->src->nexthops); + return (a->dest != he->dest) || (a->igp_metric != he->igp_metric) || + (!he->nexthop_linkable) || !nexthop_same(&(a->nh), &(he->src->nh)); } static inline void -rta_apply_hostentry(rta *a, struct hostentry *he) +rta_apply_hostentry(rta *a, struct hostentry *he, mpls_label_stack *mls) { a->hostentry = he; - a->iface = he->src ? he->src->iface : NULL; - a->gw = he->gw; a->dest = he->dest; a->igp_metric = he->igp_metric; - a->nexthops = he->src ? he->src->nexthops : NULL; + + if (a->dest != RTD_UNICAST) + { + /* No nexthop */ +no_nexthop: + a->nh = (struct nexthop) {}; + if (mls) + { /* Store the label stack for later changes */ + a->nh.labels_orig = a->nh.labels = mls->len; + memcpy(a->nh.label, mls->stack, mls->len * sizeof(u32)); + } + return; + } + + if (((!mls) || (!mls->len)) && he->nexthop_linkable) + { /* Just link the nexthop chain, no label append happens. */ + memcpy(&(a->nh), &(he->src->nh), nexthop_size(&(he->src->nh))); + return; + } + + struct nexthop *nhp = NULL, *nhr = NULL; + int skip_nexthop = 0; + + for (struct nexthop *nh = &(he->src->nh); nh; nh = nh->next) + { + if (skip_nexthop) + skip_nexthop--; + else + { + nhr = nhp; + nhp = (nhp ? (nhp->next = lp_allocz(rte_update_pool, NEXTHOP_MAX_SIZE)) : &(a->nh)); + } + + nhp->iface = nh->iface; + nhp->weight = nh->weight; + if (mls) + { + nhp->labels = nh->labels + mls->len; + nhp->labels_orig = mls->len; + if (nhp->labels <= MPLS_MAX_LABEL_STACK) + { + memcpy(nhp->label, nh->label, nh->labels * sizeof(u32)); /* First the hostentry labels */ + memcpy(&(nhp->label[nh->labels]), mls->stack, mls->len * sizeof(u32)); /* Then the bottom labels */ + } + else + { + log(L_WARN "Sum of label stack sizes %d + %d = %d exceedes allowed maximum (%d)", + nh->labels, mls->len, nhp->labels, MPLS_MAX_LABEL_STACK); + skip_nexthop++; + continue; + } + } + if (ipa_nonzero(nh->gw)) + nhp->gw = nh->gw; /* Router nexthop */ + else if (ipa_nonzero(he->link)) + nhp->gw = he->link; /* Device nexthop with link-local address known */ + else + nhp->gw = he->addr; /* Device nexthop with link-local address unknown */ + } + + if (skip_nexthop) + if (nhr) + nhr->next = NULL; + else + { + a->dest = RTD_UNREACHABLE; + log(L_WARN "No valid nexthop remaining, setting route unreachable"); + goto no_nexthop; + } } static inline rte * rt_next_hop_update_rte(rtable *tab UNUSED, rte *old) { - rta a; - memcpy(&a, old->attrs, sizeof(rta)); - rta_apply_hostentry(&a, old->attrs->hostentry); - a.aflags = 0; + rta *a = alloca(RTA_MAX_SIZE); + memcpy(a, old->attrs, rta_size(old->attrs)); + + mpls_label_stack mls = { .len = a->nh.labels_orig }; + memcpy(mls.stack, &a->nh.label[a->nh.labels - mls.len], mls.len * sizeof(u32)); + + rta_apply_hostentry(a, old->attrs->hostentry, &mls); + a->aflags = 0; rte *e = sl_alloc(rte_slab); memcpy(e, old, sizeof(rte)); - e->attrs = rta_lookup(&a); + e->attrs = rta_lookup(a); return e; } @@ -1870,13 +1938,13 @@ rt_next_hop_update(rtable *tab) struct fib_iterator *fit = &tab->nhu_fit; int max_feed = 32; - if (tab->nhu_state == 0) + if (tab->nhu_state == NHU_CLEAN) return; - if (tab->nhu_state == 1) + if (tab->nhu_state == NHU_SCHEDULED) { FIB_ITERATE_INIT(fit, &tab->fib); - tab->nhu_state = 2; + tab->nhu_state = NHU_RUNNING; } FIB_ITERATE_START(&tab->fib, fit, net, n) @@ -1891,10 +1959,13 @@ rt_next_hop_update(rtable *tab) } FIB_ITERATE_END; - /* state change 2->0, 3->1 */ + /* State change: + * NHU_DIRTY -> NHU_SCHEDULED + * NHU_RUNNING -> NHU_CLEAN + */ tab->nhu_state &= 1; - if (tab->nhu_state > 0) + if (tab->nhu_state != NHU_CLEAN) ev_schedule(tab->rt_event); } @@ -2198,12 +2269,12 @@ hc_new_hostentry(struct hostcache *hc, ip_addr a, ip_addr ll, rtable *dep, unsig { struct hostentry *he = sl_alloc(hc->slab); - he->addr = a; - he->link = ll; - he->tab = dep; - he->hash_key = k; - he->uc = 0; - he->src = NULL; + *he = (struct hostentry) { + .addr = a, + .link = ll, + .tab = dep, + .hash_key = k, + }; add_tail(&hc->hostentries, &he->ln); hc_insert(hc, he); @@ -2310,8 +2381,7 @@ rt_get_igp_metric(rte *rt) return rt->u.rip.metric; #endif - /* Device routes */ - if ((a->dest != RTD_ROUTER) && (a->dest != RTD_MULTIPATH)) + if (a->source == RTS_DEVICE) return 0; return IGP_METRIC_UNKNOWN; @@ -2325,7 +2395,7 @@ rt_update_hostentry(rtable *tab, struct hostentry *he) /* Reset the hostentry */ he->src = NULL; - he->gw = IPA_NONE; + he->nexthop_linkable = 0; he->dest = RTD_UNREACHABLE; he->igp_metric = 0; @@ -2346,32 +2416,31 @@ rt_update_hostentry(rtable *tab, struct hostentry *he) goto done; } - if (a->dest == RTD_DEVICE) - { - if (if_local_addr(he->addr, a->iface)) - { - /* The host address is a local address, this is not valid */ - log(L_WARN "Next hop address %I is a local address of iface %s", - he->addr, a->iface->name); - goto done; - } - - /* The host is directly reachable, use link as a gateway */ - he->gw = he->link; - he->dest = RTD_ROUTER; - } - else + he->dest = a->dest; + he->nexthop_linkable = 1; + if (he->dest == RTD_UNICAST) { - /* The host is reachable through some route entry */ - he->gw = a->gw; - he->dest = a->dest; + for (struct nexthop *nh = &(a->nh); nh; nh = nh->next) + if (ipa_zero(nh->gw)) + { + if (if_local_addr(he->addr, nh->iface)) + { + /* The host address is a local address, this is not valid */ + log(L_WARN "Next hop address %I is a local address of iface %s", + he->addr, nh->iface->name); + goto done; + } + + he->nexthop_linkable = 0; + break; + } } he->src = rta_clone(a); he->igp_metric = rt_get_igp_metric(e); } - done: +done: /* Add a prefix range to the trie */ trie_add_prefix(tab->hostcache->trie, &he_addr, pxlen, he_addr.pxlen); @@ -2426,9 +2495,9 @@ rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep) } void -rta_set_recursive_next_hop(rtable *dep, rta *a, rtable *tab, ip_addr gw, ip_addr ll) +rta_set_recursive_next_hop(rtable *dep, rta *a, rtable *tab, ip_addr gw, ip_addr ll, mpls_label_stack *mls) { - rta_apply_hostentry(a, rt_get_hostentry(tab, gw, ipa_zero(ll) ? gw : ll, dep)); + rta_apply_hostentry(a, rt_get_hostentry(tab, gw, ipa_zero(ll) ? gw : ll, dep), mls); } @@ -2436,27 +2505,6 @@ rta_set_recursive_next_hop(rtable *dep, rta *a, rtable *tab, ip_addr gw, ip_addr * CLI commands */ -static byte * -rt_format_via(rte *e) -{ - rta *a = e->attrs; - - /* Max text length w/o IP addr and interface name is 16 */ - static byte via[IPA_MAX_TEXT_LENGTH+sizeof(a->iface->name)+16]; - - switch (a->dest) - { - case RTD_ROUTER: bsprintf(via, "via %I on %s", a->gw, a->iface->name); break; - case RTD_DEVICE: bsprintf(via, "dev %s", a->iface->name); break; - case RTD_BLACKHOLE: bsprintf(via, "blackhole"); break; - case RTD_UNREACHABLE: bsprintf(via, "unreachable"); break; - case RTD_PROHIBIT: bsprintf(via, "prohibited"); break; - case RTD_MULTIPATH: bsprintf(via, "multipath"); break; - default: bsprintf(via, "???"); - } - return via; -} - static void rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, ea_list *tmpa) { @@ -2466,10 +2514,10 @@ rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, ea_list *tm int primary = (e->net->routes == e); int sync_error = (e->net->n.flags & KRF_SYNC_ERROR); void (*get_route_info)(struct rte *, byte *buf, struct ea_list *attrs); - struct mpnh *nh; + struct nexthop *nh; tm_format_datetime(tm, &config->tf_route, e->lastmod); - if (ipa_nonzero(a->from) && !ipa_equal(a->from, a->gw)) + if (ipa_nonzero(a->from) && !ipa_equal(a->from, a->nh.gw)) bsprintf(from, " from %I", a->from); else from[0] = 0; @@ -2488,10 +2536,29 @@ rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, ea_list *tm get_route_info(e, info, tmpa); else bsprintf(info, " (%d)", e->pref); - cli_printf(c, -1007, "%-18s %s [%s %s%s]%s%s", ia, rt_format_via(e), a->src->proto->name, - tm, from, primary ? (sync_error ? " !" : " *") : "", info); - for (nh = a->nexthops; nh; nh = nh->next) - cli_printf(c, -1007, "\tvia %I on %s weight %d", nh->gw, nh->iface->name, nh->weight + 1); + + cli_printf(c, -1007, "%-18s %s [%s %s%s]%s%s", ia, rta_dest_name(a->dest), + a->src->proto->name, tm, from, primary ? (sync_error ? " !" : " *") : "", info); + + if (a->dest == RTD_UNICAST) + for (nh = &(a->nh); nh; nh = nh->next) + { + char mpls[MPLS_MAX_LABEL_STACK*12 + 5], *lsp = mpls; + + if (nh->labels) + { + lsp += bsprintf(lsp, " mpls %d", nh->label[0]); + for (int i=1;i<nh->labels; i++) + lsp += bsprintf(lsp, "/%d", nh->label[i]); + } + *lsp = '\0'; + + if (a->nh.next) + cli_printf(c, -1007, "\tvia %I%s on %s weight %d", nh->gw, mpls, nh->iface->name, nh->weight + 1); + else + cli_printf(c, -1007, "\tvia %I%s on %s", nh->gw, mpls, nh->iface->name); + } + if (d->verbose) rta_show(c, a, tmpa); } diff --git a/proto/babel/babel.c b/proto/babel/babel.c index 73cb5c3b..1b1d9f62 100644 --- a/proto/babel/babel.c +++ b/proto/babel/babel.c @@ -471,21 +471,20 @@ babel_announce_rte(struct babel_proto *p, struct babel_entry *e) if (r) { - rta a0 = { + rta *ap0 = allocz(RTA_MAX_SIZE); + *ap0 = (rta) { .src = p->p.main_source, .source = RTS_BABEL, .scope = SCOPE_UNIVERSE, - .cast = RTC_UNICAST, - .dest = r->metric == BABEL_INFINITY ? RTD_UNREACHABLE : RTD_ROUTER, - .flags = 0, + .dest = r->metric == BABEL_INFINITY ? RTD_UNREACHABLE : RTD_UNICAST, .from = r->neigh->addr, - .iface = r->neigh->ifa->iface, + .nh.iface = r->neigh->ifa->iface, }; if (r->metric < BABEL_INFINITY) - a0.gw = r->next_hop; + ap0->nh.gw = r->next_hop; - rta *a = rta_lookup(&a0); + rta *a = rta_lookup(ap0); rte *rte = rte_get_temp(a); rte->u.babel.metric = r->metric; rte->u.babel.router_id = r->router_id; diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c index 73318c6a..f2a8e8b5 100644 --- a/proto/bgp/attrs.c +++ b/proto/bgp/attrs.c @@ -1491,8 +1491,7 @@ bgp_get_neighbor(rte *r) static inline int rte_resolvable(rte *rt) { - int rd = rt->attrs->dest; - return (rd == RTD_ROUTER) || (rd == RTD_DEVICE) || (rd == RTD_MULTIPATH); + return rt->attrs->dest == RTD_UNICAST; } int diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h index 5d2539d5..e7647625 100644 --- a/proto/bgp/bgp.h +++ b/proto/bgp/bgp.h @@ -337,6 +337,8 @@ struct bgp_parse_state { u32 mp_reach_af; u32 mp_unreach_af; + mpls_label_stack mls; + uint attr_len; uint ip_reach_len; uint ip_unreach_len; diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c index 385d5a36..f7366804 100644 --- a/proto/bgp/packets.c +++ b/proto/bgp/packets.c @@ -743,9 +743,8 @@ bgp_apply_next_hop(struct bgp_parse_state *s, rta *a, ip_addr gw, ip_addr ll) if (!nbr || (nbr->scope == SCOPE_HOST)) WITHDRAW(BAD_NEXT_HOP); - a->dest = RTD_ROUTER; - a->gw = nbr->addr; - a->iface = nbr->iface; + a->dest = RTD_UNICAST; + a->nh = (struct nexthop){ .gw = nbr->addr, .iface = nbr->iface }; a->hostentry = NULL; a->igp_metric = 0; } @@ -754,7 +753,7 @@ bgp_apply_next_hop(struct bgp_parse_state *s, rta *a, ip_addr gw, ip_addr ll) if (ipa_zero(gw)) WITHDRAW(BAD_NEXT_HOP); - rta_set_recursive_next_hop(c->c.table, a, c->igp_table, gw, ll); + rta_set_recursive_next_hop(c->c.table, a, c->igp_table, gw, ll, &(s->mls)); } } @@ -792,8 +791,8 @@ bgp_use_gateway(struct bgp_export_state *s) if (s->channel->cf->next_hop_self) return 0; - /* We need valid global gateway */ - if ((ra->dest != RTD_ROUTER) || ipa_zero(ra->gw) || ipa_is_link_local(ra->gw)) + /* We need one valid global gateway */ + if ((ra->dest != RTD_UNICAST) || ra->nh.next || ipa_zero(ra->nh.gw) || ipa_is_link_local(ra->nh.gw)) return 0; /* Use it when exported to internal peers */ @@ -801,7 +800,7 @@ bgp_use_gateway(struct bgp_export_state *s) return 1; /* Use it when forwarded to single-hop BGP peer on on the same iface */ - return p->neigh && (p->neigh->iface == ra->iface); + return p->neigh && (p->neigh->iface == ra->nh.iface); } static void @@ -811,7 +810,7 @@ bgp_update_next_hop_ip(struct bgp_export_state *s, eattr *a, ea_list **to) { if (bgp_use_gateway(s)) { - ip_addr nh[1] = { s->route->attrs->gw }; + ip_addr nh[1] = { s->route->attrs->nh.gw }; bgp_set_attr_data(to, s->pool, BA_NEXT_HOP, 0, nh, 16); } else @@ -1706,13 +1705,10 @@ bgp_decode_nlri(struct bgp_parse_state *s, u32 afi, byte *nlri, uint len, ea_lis if (ea) { - a = alloca(sizeof(struct rta)); - memset(a, 0, sizeof(struct rta)); + a = allocz(RTA_MAX_SIZE); a->source = RTS_BGP; a->scope = SCOPE_UNIVERSE; - a->cast = RTC_UNICAST; - a->dest = RTD_UNREACHABLE; a->from = s->proto->cf->remote_ip; a->eattrs = ea; diff --git a/proto/ospf/ospf.c b/proto/ospf/ospf.c index d074600a..daf76ff2 100644 --- a/proto/ospf/ospf.c +++ b/proto/ospf/ospf.c @@ -235,7 +235,7 @@ ospf_start(struct proto *P) p->lsab_size = 256; p->lsab_used = 0; p->lsab = mb_alloc(P->pool, p->lsab_size); - p->nhpool = lp_new(P->pool, 12*sizeof(struct mpnh)); + p->nhpool = lp_new(P->pool, 12*sizeof(struct nexthop)); init_list(&(p->iface_list)); init_list(&(p->area_list)); fib_init(&p->rtf, P->pool, p->ospf2 ? NET_IP4 : NET_IP6, diff --git a/proto/ospf/rt.c b/proto/ospf/rt.c index 49167ceb..df9eb75b 100644 --- a/proto/ospf/rt.c +++ b/proto/ospf/rt.c @@ -22,7 +22,7 @@ static inline void reset_ri(ort *ort) } static inline int -nh_is_vlink(struct mpnh *nhs) +nh_is_vlink(struct nexthop *nhs) { return !nhs->iface; } @@ -33,20 +33,19 @@ unresolved_vlink(ort *ort) return ort->n.nhs && nh_is_vlink(ort->n.nhs); } -static inline struct mpnh * +static inline struct nexthop * new_nexthop(struct ospf_proto *p, ip_addr gw, struct iface *iface, byte weight) { - struct mpnh *nh = lp_alloc(p->nhpool, sizeof(struct mpnh)); + struct nexthop *nh = lp_allocz(p->nhpool, sizeof(struct nexthop)); nh->gw = gw; nh->iface = iface; - nh->next = NULL; nh->weight = weight; return nh; } /* Returns true if there are device nexthops in n */ static inline int -has_device_nexthops(const struct mpnh *n) +has_device_nexthops(const struct nexthop *n) { for (; n; n = n->next) if (ipa_zero(n->gw)) @@ -56,13 +55,13 @@ has_device_nexthops(const struct mpnh *n) } /* Replace device nexthops with nexthops to gw */ -static struct mpnh * -fix_device_nexthops(struct ospf_proto *p, const struct mpnh *n, ip_addr gw) +static struct nexthop * +fix_device_nexthops(struct ospf_proto *p, const struct nexthop *n, ip_addr gw) { - struct mpnh *root1 = NULL; - struct mpnh *root2 = NULL; - struct mpnh **nn1 = &root1; - struct mpnh **nn2 = &root2; + struct nexthop *root1 = NULL; + struct nexthop *root2 = NULL; + struct nexthop **nn1 = &root1; + struct nexthop **nn2 = &root2; if (!p->ecmp) return new_nexthop(p, gw, n->iface, n->weight); @@ -73,7 +72,7 @@ fix_device_nexthops(struct ospf_proto *p, const struct mpnh *n, ip_addr gw) for (; n; n = n->next) { - struct mpnh *nn = new_nexthop(p, ipa_zero(n->gw) ? gw : n->gw, n->iface, n->weight); + struct nexthop *nn = new_nexthop(p, ipa_zero(n->gw) ? gw : n->gw, n->iface, n->weight); if (ipa_zero(n->gw)) { @@ -87,7 +86,7 @@ fix_device_nexthops(struct ospf_proto *p, const struct mpnh *n, ip_addr gw) } } - return mpnh_merge(root1, root2, 1, 1, p->ecmp, p->nhpool); + return nexthop_merge(root1, root2, 1, 1, p->ecmp, p->nhpool); } @@ -283,7 +282,7 @@ ort_merge(struct ospf_proto *p, ort *o, const orta *new) if (old->nhs != new->nhs) { - old->nhs = mpnh_merge(old->nhs, new->nhs, old->nhs_reuse, new->nhs_reuse, + old->nhs = nexthop_merge(old->nhs, new->nhs, old->nhs_reuse, new->nhs_reuse, p->ecmp, p->nhpool); old->nhs_reuse = 1; } @@ -299,7 +298,7 @@ ort_merge_ext(struct ospf_proto *p, ort *o, const orta *new) if (old->nhs != new->nhs) { - old->nhs = mpnh_merge(old->nhs, new->nhs, old->nhs_reuse, new->nhs_reuse, + old->nhs = nexthop_merge(old->nhs, new->nhs, old->nhs_reuse, new->nhs_reuse, p->ecmp, p->nhpool); old->nhs_reuse = 1; } @@ -1674,18 +1673,18 @@ ospf_rt_spf(struct ospf_proto *p) static inline int -inherit_nexthops(struct mpnh *pn) +inherit_nexthops(struct nexthop *pn) { /* Proper nexthops (with defined GW) or dummy vlink nexthops (without iface) */ return pn && (ipa_nonzero(pn->gw) || !pn->iface); } -static struct mpnh * +static struct nexthop * calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry *par, int pos) { struct ospf_proto *p = oa->po; - struct mpnh *pn = par->nhs; + struct nexthop *pn = par->nhs; struct ospf_iface *ifa; u32 rid = en->lsa.rt; @@ -1813,7 +1812,7 @@ add_cand(list * l, struct top_hash_entry *en, struct top_hash_entry *par, if (!link_back(oa, en, par)) return; - struct mpnh *nhs = calc_next_hop(oa, en, par, pos); + struct nexthop *nhs = calc_next_hop(oa, en, par, pos); if (!nhs) { log(L_WARN "%s: Cannot find next hop for LSA (Type: %04x, Id: %R, Rt: %R)", @@ -1851,7 +1850,7 @@ add_cand(list * l, struct top_hash_entry *en, struct top_hash_entry *par, /* Merge old and new */ int new_reuse = (par->nhs != nhs); - en->nhs = mpnh_merge(en->nhs, nhs, en->nhs_reuse, new_reuse, p->ecmp, p->nhpool); + en->nhs = nexthop_merge(en->nhs, nhs, en->nhs_reuse, new_reuse, p->ecmp, p->nhpool); en->nhs_reuse = 1; return; } @@ -1907,8 +1906,7 @@ ort_changed(ort *nf, rta *nr) (nf->n.metric1 != nf->old_metric1) || (nf->n.metric2 != nf->old_metric2) || (nf->n.tag != nf->old_tag) || (nf->n.rid != nf->old_rid) || (nr->source != or->source) || (nr->dest != or->dest) || - (nr->iface != or->iface) || !ipa_equal(nr->gw, or->gw) || - !mpnh_same(nr->nexthops, or->nexthops); + !nexthop_same(&(nr->nh), &(or->nh)); } static void @@ -1932,7 +1930,7 @@ again1: /* Sanity check of next-hop addresses, failure should not happen */ if (nf->n.type) { - struct mpnh *nh; + struct nexthop *nh; for (nh = nf->n.nhs; nh; nh = nh->next) if (ipa_nonzero(nh->gw)) { @@ -1955,26 +1953,10 @@ again1: .src = p->p.main_source, .source = nf->n.type, .scope = SCOPE_UNIVERSE, - .cast = RTC_UNICAST + .dest = RTD_UNICAST, + .nh = *(nf->n.nhs), }; - if (nf->n.nhs->next) - { - a0.dest = RTD_MULTIPATH; - a0.nexthops = nf->n.nhs; - } - else if (ipa_nonzero(nf->n.nhs->gw)) - { - a0.dest = RTD_ROUTER; - a0.iface = nf->n.nhs->iface; - a0.gw = nf->n.nhs->gw; - } - else - { - a0.dest = RTD_DEVICE; - a0.iface = nf->n.nhs->iface; - } - if (reload || ort_changed(nf, &a0)) { rta *a = rta_lookup(&a0); diff --git a/proto/ospf/rt.h b/proto/ospf/rt.h index 118d09b7..589d2bc5 100644 --- a/proto/ospf/rt.h +++ b/proto/ospf/rt.h @@ -53,7 +53,7 @@ typedef struct orta struct ospf_area *oa; struct ospf_area *voa; /* Used when route is replaced in ospf_rt_sum_tr(), NULL otherwise */ - struct mpnh *nhs; /* Next hops computed during SPF */ + struct nexthop *nhs; /* Next hops computed during SPF */ struct top_hash_entry *en; /* LSA responsible for this orta */ } orta; diff --git a/proto/ospf/topology.c b/proto/ospf/topology.c index aaaf2e8e..ce77f57a 100644 --- a/proto/ospf/topology.c +++ b/proto/ospf/topology.c @@ -1288,8 +1288,8 @@ ospf_rt_notify(struct proto *P, struct channel *ch UNUSED, net *n, rte *new, rte ip_addr fwd = IPA_NONE; - if ((a->dest == RTD_ROUTER) && use_gw_for_fwaddr(p, a->gw, a->iface)) - fwd = a->gw; + if ((a->dest == RTD_UNICAST) && use_gw_for_fwaddr(p, a->nh.gw, a->nh.iface)) + fwd = a->nh.gw; /* NSSA-LSA with P-bit set must have non-zero forwarding address */ if (oa && ipa_zero(fwd)) diff --git a/proto/ospf/topology.h b/proto/ospf/topology.h index 38447fdf..d1682c54 100644 --- a/proto/ospf/topology.h +++ b/proto/ospf/topology.h @@ -28,7 +28,7 @@ struct top_hash_entry u16 next_lsa_opts; /* For postponed LSA origination */ bird_clock_t inst_time; /* Time of installation into DB */ struct ort *nf; /* Reference fibnode for sum and ext LSAs, NULL for otherwise */ - struct mpnh *nhs; /* Computed nexthops - valid only in ospf_rt_spf() */ + struct nexthop *nhs; /* Computed nexthops - valid only in ospf_rt_spf() */ ip_addr lb; /* In OSPFv2, link back address. In OSPFv3, any global address in the area useful for vlinks */ u32 lb_id; /* Interface ID of link back iface (for bcast or NBMA networks) */ u32 dist; /* Distance from the root */ diff --git a/proto/pipe/pipe.c b/proto/pipe/pipe.c index 8924c200..310f3c01 100644 --- a/proto/pipe/pipe.c +++ b/proto/pipe/pipe.c @@ -51,7 +51,7 @@ pipe_rt_notify(struct proto *P, struct channel *src_ch, net *n, rte *new, rte *o struct rte_src *src; rte *e; - rta a; + rta *a; if (!new && !old) return; @@ -65,12 +65,13 @@ pipe_rt_notify(struct proto *P, struct channel *src_ch, net *n, rte *new, rte *o if (new) { - memcpy(&a, new->attrs, sizeof(rta)); + a = alloca(rta_size(new->attrs)); + memcpy(a, new->attrs, rta_size(new->attrs)); - a.aflags = 0; - a.eattrs = attrs; - a.hostentry = NULL; - e = rte_get_temp(&a); + a->aflags = 0; + a->eattrs = attrs; + a->hostentry = NULL; + e = rte_get_temp(a); e->pflags = 0; /* Copy protocol specific embedded attributes. */ @@ -78,7 +79,7 @@ pipe_rt_notify(struct proto *P, struct channel *src_ch, net *n, rte *new, rte *o e->pref = new->pref; e->pflags = new->pflags; - src = a.src; + src = a->src; } else { diff --git a/proto/rip/rip.c b/proto/rip/rip.c index d87a078c..157093aa 100644 --- a/proto/rip/rip.c +++ b/proto/rip/rip.c @@ -147,21 +147,16 @@ rip_announce_rte(struct rip_proto *p, struct rip_entry *en) .src = p->p.main_source, .source = RTS_RIP, .scope = SCOPE_UNIVERSE, - .cast = RTC_UNICAST + .dest = RTD_UNICAST, }; u8 rt_metric = rt->metric; u16 rt_tag = rt->tag; - struct rip_rte *rt2 = rt->next; - /* Find second valid rte */ - while (rt2 && !rip_valid_rte(rt2)) - rt2 = rt2->next; - - if (p->ecmp && rt2) + if (p->ecmp) { /* ECMP route */ - struct mpnh *nhs = NULL; + struct nexthop *nhs = NULL; int num = 0; for (rt = en->routes; rt && (num < p->ecmp); rt = rt->next) @@ -169,33 +164,33 @@ rip_announce_rte(struct rip_proto *p, struct rip_entry *en) if (!rip_valid_rte(rt)) continue; - struct mpnh *nh = alloca(sizeof(struct mpnh)); + struct nexthop *nh = allocz(sizeof(struct nexthop)); + nh->gw = rt->next_hop; nh->iface = rt->from->nbr->iface; nh->weight = rt->from->ifa->cf->ecmp_weight; - mpnh_insert(&nhs, nh); + + nexthop_insert(&nhs, nh); num++; if (rt->tag != rt_tag) rt_tag = 0; } - a0.dest = RTD_MULTIPATH; - a0.nexthops = nhs; + a0.nh = *nhs; } else { /* Unipath route */ - a0.dest = RTD_ROUTER; - a0.gw = rt->next_hop; - a0.iface = rt->from->nbr->iface; a0.from = rt->from->nbr->addr; + a0.nh.gw = rt->next_hop; + a0.nh.iface = rt->from->nbr->iface; } rta *a = rta_lookup(&a0); rte *e = rte_get_temp(a); - e->u.rip.from = a0.iface; + e->u.rip.from = a0.nh.iface; e->u.rip.metric = rt_metric; e->u.rip.tag = rt_tag; @@ -345,8 +340,8 @@ rip_rt_notify(struct proto *P, struct channel *ch UNUSED, struct network *net, s en->metric = rt_metric; en->tag = rt_tag; en->from = (new->attrs->src->proto == P) ? new->u.rip.from : NULL; - en->iface = new->attrs->iface; - en->next_hop = new->attrs->gw; + en->iface = new->attrs->nh.iface; + en->next_hop = new->attrs->nh.gw; } else { diff --git a/proto/rpki/rpki.c b/proto/rpki/rpki.c index 0d4b1fd3..5459d9c3 100644 --- a/proto/rpki/rpki.c +++ b/proto/rpki/rpki.c @@ -124,8 +124,7 @@ rpki_table_add_roa(struct rpki_cache *cache, struct channel *channel, const net_ .src = p->p.main_source, .source = RTS_RPKI, .scope = SCOPE_UNIVERSE, - .cast = RTC_UNICAST, - .dest = RTD_BLACKHOLE, + .dest = RTD_NONE, }; rta *a = rta_lookup(&a0); diff --git a/proto/static/config.Y b/proto/static/config.Y index 86359f0b..cd8bfcec 100644 --- a/proto/static/config.Y +++ b/proto/static/config.Y @@ -13,25 +13,35 @@ CF_HDR CF_DEFINES #define STATIC_CFG ((struct static_config *) this_proto) -static struct static_route *this_srt, *this_srt_nh, *last_srt_nh; +static struct static_route *this_srt, *this_snh; static struct f_inst **this_srt_last_cmd; -static void -static_route_finish(void) +static struct static_route * +static_nexthop_new(void) { - struct static_route *r; + struct static_route *nh = this_srt; + + if (this_snh) + { + /* Additional next hop */ + nh = cfg_allocz(sizeof(struct static_route)); + nh->net = this_srt->net; + this_snh->mp_next = nh; + } - /* Update undefined use_bfd entries in multipath nexthops */ - if (this_srt->dest == RTD_MULTIPATH) - for (r = this_srt->mp_next; r; r = r->mp_next) - if (r->use_bfd < 0) - r->use_bfd = this_srt->use_bfd; -} + nh->dest = RTD_UNICAST; + nh->mp_head = this_srt; + return nh; +}; + +static void +static_route_finish(void) +{ } CF_DECLS CF_KEYWORDS(STATIC, ROUTE, VIA, DROP, REJECT, PROHIBIT, PREFERENCE, CHECK, LINK) -CF_KEYWORDS(MULTIPATH, WEIGHT, RECURSIVE, IGP, TABLE, BLACKHOLE, UNREACHABLE, BFD) +CF_KEYWORDS(WEIGHT, RECURSIVE, IGP, TABLE, BLACKHOLE, UNREACHABLE, BFD, MPLS) CF_GRAMMAR @@ -41,7 +51,7 @@ CF_ADDTO(proto, static_proto '}') static_proto_start: proto_start STATIC { this_proto = proto_config_new(&proto_static, $1); - static_init_config(STATIC_CFG); + init_list(&STATIC_CFG->routes); }; static_proto: @@ -53,58 +63,55 @@ static_proto: | static_proto stat_route stat_route_opt_list ';' { static_route_finish(); } ; +stat_nexthop: + VIA ipa ipa_scope { + this_snh = static_nexthop_new(); + this_snh->via = $2; + this_snh->iface = $3; + } + | VIA TEXT { + this_snh = static_nexthop_new(); + this_snh->via = IPA_NONE; + this_snh->iface = if_get_by_name($2); + } + | stat_nexthop MPLS label_stack { + this_snh->mls = $3; + } + | stat_nexthop WEIGHT expr { + this_snh->weight = $3 - 1; + if (($3<1) || ($3>256)) cf_error("Weight must be in range 1-256"); + } + | stat_nexthop BFD bool { + this_snh->use_bfd = $3; cf_check_bfd($3); + } +; + +stat_nexthops: + stat_nexthop + | stat_nexthops stat_nexthop +; + stat_route0: ROUTE net_any { this_srt = cfg_allocz(sizeof(struct static_route)); - add_tail(&STATIC_CFG->other_routes, &this_srt->n); + add_tail(&STATIC_CFG->routes, &this_srt->n); this_srt->net = $2; this_srt_last_cmd = &(this_srt->cmds); + this_srt->mp_next = NULL; + this_snh = NULL; } ; -stat_multipath1: - VIA ipa ipa_scope { - last_srt_nh = this_srt_nh; - this_srt_nh = cfg_allocz(sizeof(struct static_route)); - this_srt_nh->dest = RTD_NONE; - this_srt_nh->via = $2; - this_srt_nh->via_if = $3; - this_srt_nh->if_name = (void *) this_srt; /* really */ - this_srt_nh->use_bfd = -1; /* undefined */ - } - | stat_multipath1 WEIGHT expr { - this_srt_nh->weight = $3 - 1; - if (($3<1) || ($3>256)) cf_error("Weight must be in range 1-256"); - } - | stat_multipath1 BFD bool { - this_srt_nh->use_bfd = $3; cf_check_bfd($3); - } - ; - -stat_multipath: - stat_multipath1 { this_srt->mp_next = this_srt_nh; } - | stat_multipath stat_multipath1 { last_srt_nh->mp_next = this_srt_nh; } - ; - stat_route: - stat_route0 VIA ipa ipa_scope { - this_srt->dest = RTD_ROUTER; + stat_route0 stat_nexthops + | stat_route0 RECURSIVE ipa { + this_srt->dest = RTDX_RECURSIVE; this_srt->via = $3; - this_srt->via_if = $4; - } - | stat_route0 VIA TEXT { - this_srt->dest = RTD_DEVICE; - this_srt->if_name = $3; - rem_node(&this_srt->n); - add_tail(&STATIC_CFG->iface_routes, &this_srt->n); } - | stat_route0 MULTIPATH stat_multipath { - this_srt->dest = RTD_MULTIPATH; - } - | stat_route0 RECURSIVE ipa { + | stat_route0 RECURSIVE ipa MPLS label_stack { this_srt->dest = RTDX_RECURSIVE; this_srt->via = $3; + this_srt->mls = $5; } - | stat_route0 DROP { this_srt->dest = RTD_BLACKHOLE; } | stat_route0 REJECT { this_srt->dest = RTD_UNREACHABLE; } | stat_route0 BLACKHOLE { this_srt->dest = RTD_BLACKHOLE; } @@ -114,7 +121,6 @@ stat_route: stat_route_item: cmd { *this_srt_last_cmd = $1; this_srt_last_cmd = &($1->next); } - | BFD bool ';' { this_srt->use_bfd = $2; cf_check_bfd($2); } ; stat_route_opts: diff --git a/proto/static/static.c b/proto/static/static.c index fb547537..adefa0b2 100644 --- a/proto/static/static.c +++ b/proto/static/static.c @@ -9,33 +9,32 @@ /** * DOC: Static * - * The Static protocol is implemented in a straightforward way. It keeps - * two lists of static routes: one containing interface routes and one - * holding the remaining ones. Interface routes are inserted and removed according - * to interface events received from the core via the if_notify() hook. Routes - * pointing to a neighboring router use a sticky node in the neighbor cache - * to be notified about gaining or losing the neighbor. Special - * routes like black holes or rejects are inserted all the time. + * The Static protocol is implemented in a straightforward way. It keeps a list + * of static routes. Routes of dest RTD_UNICAST have associated sticky node in + * the neighbor cache to be notified about gaining or losing the neighbor and + * about interface-related events (e.g. link down). They may also have a BFD + * request if associated with a BFD session. When a route is notified, + * static_decide() is used to see whether the route activeness is changed. In + * such case, the route is marked as dirty and scheduled to be announced or + * withdrawn, which is done asynchronously from event hook. Routes of other + * types (e.g. black holes) are announced all the time. * - * Multipath routes are tricky. Because these routes depends on - * several neighbors we need to integrate that to the neighbor - * notification handling, we use dummy static_route nodes, one for - * each nexthop. Therefore, a multipath route consists of a master - * static_route node (of dest RTD_MULTIPATH), which specifies prefix - * and is used in most circumstances, and a list of dummy static_route - * nodes (of dest RTD_NONE), which stores info about nexthops and are - * connected to neighbor entries and neighbor notifications. Dummy - * nodes are chained using mp_next, they aren't in other_routes list, - * and abuse if_name field for other purposes. + * Multipath routes are a bit tricky. To represent additional next hops, dummy + * static_route nodes are used, which are chained using @mp_next field and link + * to the master node by @mp_head field. Each next hop has a separate neighbor + * entry and an activeness state, but the master node is used for most purposes. + * Note that most functions DO NOT accept dummy nodes as arguments. * * The only other thing worth mentioning is that when asked for reconfiguration, * Static not only compares the two configurations, but it also calculates - * difference between the lists of static routes and it just inserts the - * newly added routes and removes the obsolete ones. + * difference between the lists of static routes and it just inserts the newly + * added routes, removes the obsolete ones and reannounces changed ones. */ #undef LOCAL_DEBUG +#include <stdlib.h> + #include "nest/bird.h" #include "nest/iface.h" #include "nest/protocol.h" @@ -51,100 +50,119 @@ static linpool *static_lp; static inline rtable * -p_igp_table(struct proto *p) +p_igp_table(struct static_proto *p) { - struct static_config *cf = (void *) p->cf; - return cf->igp_table ? cf->igp_table->table : p->main_channel->table; + struct static_config *cf = (void *) p->p.cf; + return cf->igp_table ? cf->igp_table->table : p->p.main_channel->table; } static void -static_install(struct proto *p, struct static_route *r, struct iface *ifa) +static_announce_rte(struct static_proto *p, struct static_route *r) { - rta a; - rte *e; + rta *a = allocz(RTA_MAX_SIZE); + a->src = p->p.main_source; + a->source = RTS_STATIC; + a->scope = SCOPE_UNIVERSE; + a->dest = r->dest; - if (r->installed > 0) - return; + if (r->dest == RTD_UNICAST) + { + struct static_route *r2; + struct nexthop *nhs = NULL; - DBG("Installing static route %N, rtd=%d\n", r->net, r->dest); - bzero(&a, sizeof(a)); - a.src = p->main_source; - a.source = (r->dest == RTD_DEVICE) ? RTS_STATIC_DEVICE : RTS_STATIC; - a.scope = SCOPE_UNIVERSE; - a.cast = RTC_UNICAST; - a.dest = r->dest; - a.gw = r->via; - a.iface = ifa; - - if (r->dest == RTD_MULTIPATH) + for (r2 = r; r2; r2 = r2->mp_next) { - struct static_route *r2; - struct mpnh *nhs = NULL; - - for (r2 = r->mp_next; r2; r2 = r2->mp_next) - if (r2->installed) - { - struct mpnh *nh = alloca(sizeof(struct mpnh)); - nh->gw = r2->via; - nh->iface = r2->neigh->iface; - nh->weight = r2->weight; - mpnh_insert(&nhs, nh); - } - - /* There is at least one nexthop */ - if (!nhs->next) - { - /* Fallback to unipath route for exactly one nexthop */ - a.dest = RTD_ROUTER; - a.gw = nhs->gw; - a.iface = nhs->iface; - } - else - a.nexthops = nhs; + if (!r2->active) + continue; + + struct nexthop *nh = allocz(NEXTHOP_MAX_SIZE); + nh->gw = r2->via; + nh->iface = r2->neigh->iface; + nh->weight = r2->weight; + if (r2->mls) + { + nh->labels = r2->mls->len; + memcpy(nh->label, r2->mls->stack, r2->mls->len * sizeof(u32)); + } + + nexthop_insert(&nhs, nh); } + if (!nhs) + goto withdraw; + + nexthop_link(a, nhs); + } + if (r->dest == RTDX_RECURSIVE) - rta_set_recursive_next_hop(p->main_channel->table, &a, p_igp_table(p), r->via, IPA_NONE); + rta_set_recursive_next_hop(p->p.main_channel->table, a, p_igp_table(p), r->via, IPA_NONE, r->mls); - /* We skip rta_lookup() here */ + /* Already announced */ + if (r->state == SRS_CLEAN) + return; - e = rte_get_temp(&a); + /* We skip rta_lookup() here */ + rte *e = rte_get_temp(a); e->pflags = 0; if (r->cmds) f_eval_rte(r->cmds, &e, static_lp); - rte_update(p, r->net, e); - r->installed = 1; + rte_update(&p->p, r->net, e); + r->state = SRS_CLEAN; if (r->cmds) lp_flush(static_lp); + + return; + +withdraw: + if (r->state == SRS_DOWN) + return; + + rte_update(&p->p, r->net, NULL); + r->state = SRS_DOWN; } static void -static_remove(struct proto *p, struct static_route *r) +static_mark_rte(struct static_proto *p, struct static_route *r) { - if (!r->installed) + if (r->state == SRS_DIRTY) return; - DBG("Removing static route %N via %I\n", r->net, r->via); - rte_update(p, r->net, NULL); - r->installed = 0; + r->state = SRS_DIRTY; + BUFFER_PUSH(p->marked) = r; + + if (!ev_active(p->event)) + ev_schedule(p->event); +} + +static void +static_announce_marked(void *P) +{ + struct static_proto *p = P; + + BUFFER_WALK(p->marked, r) + static_announce_rte(P, r); + + BUFFER_FLUSH(p->marked); } static void static_bfd_notify(struct bfd_request *req); static void -static_update_bfd(struct proto *p, struct static_route *r) +static_update_bfd(struct static_proto *p, struct static_route *r) { + /* The @r is a RTD_UNICAST next hop, may be a dummy node */ + struct neighbor *nb = r->neigh; int bfd_up = (nb->scope > 0) && r->use_bfd; if (bfd_up && !r->bfd_req) { // ip_addr local = ipa_nonzero(r->local) ? r->local : nb->ifa->ip; - r->bfd_req = bfd_request_session(p->pool, r->via, nb->ifa->ip, nb->iface, + r->bfd_req = bfd_request_session(p->p.pool, r->via, nb->ifa->ip, nb->iface, static_bfd_notify, r); } @@ -156,223 +174,171 @@ static_update_bfd(struct proto *p, struct static_route *r) } static int -static_decide(struct static_config *cf, struct static_route *r) +static_decide(struct static_proto *p, struct static_route *r) { - /* r->dest != RTD_MULTIPATH, but may be RTD_NONE (part of multipath route) - the route also have to be valid (r->neigh != NULL) */ + /* The @r is a RTD_UNICAST next hop, may be a dummy node */ + + struct static_config *cf = (void *) p->p.cf; + uint old_active = r->active; if (r->neigh->scope < 0) - return 0; + goto fail; if (cf->check_link && !(r->neigh->iface->flags & IF_LINK_UP)) - return 0; + goto fail; - if (r->bfd_req && r->bfd_req->state != BFD_STATE_UP) - return 0; + if (r->bfd_req && (r->bfd_req->state != BFD_STATE_UP)) + goto fail; - return 1; -} + r->active = 1; + return !old_active; +fail: + r->active = 0; + return old_active; +} static void -static_add(struct proto *p, struct static_config *cf, struct static_route *r) +static_add_rte(struct static_proto *p, struct static_route *r) { - DBG("static_add(%N,%d)\n", r->net, r->dest); - switch (r->dest) - { - case RTD_ROUTER: - { - struct neighbor *n = neigh_find2(p, &r->via, r->via_if, NEF_STICKY); - if (n) - { - r->chain = n->data; - n->data = r; - r->neigh = n; - - static_update_bfd(p, r); - if (static_decide(cf, r)) - static_install(p, r, n->iface); - else - static_remove(p, r); - } - else - { - log(L_ERR "Static route destination %I is invalid. Ignoring.", r->via); - static_remove(p, r); - } - break; - } + if (r->dest == RTD_UNICAST) + { + struct static_route *r2; + struct neighbor *n; - case RTD_DEVICE: - break; + for (r2 = r; r2; r2 = r2->mp_next) + { + n = ipa_nonzero(r2->via) ? + neigh_find2(&p->p, &r2->via, r2->iface, NEF_STICKY) : + neigh_find_iface(&p->p, r2->iface); - case RTD_MULTIPATH: + if (!n) { - int count = 0; - struct static_route *r2; - - for (r2 = r->mp_next; r2; r2 = r2->mp_next) - { - struct neighbor *n = neigh_find2(p, &r2->via, r2->via_if, NEF_STICKY); - if (n) - { - r2->chain = n->data; - n->data = r2; - r2->neigh = n; - - static_update_bfd(p, r2); - r2->installed = static_decide(cf, r2); - count += r2->installed; - } - else - { - log(L_ERR "Static route destination %I is invalid. Ignoring.", r2->via); - r2->installed = 0; - } - } - - if (count) - static_install(p, r, NULL); - else - static_remove(p, r); - break; + log(L_WARN "Invalid next hop %I of static route %N", r2->via, r2->net); + continue; } - default: - static_install(p, r, NULL); + r2->neigh = n; + r2->chain = n->data; + n->data = r2; + + static_update_bfd(p, r2); + static_decide(p, r2); } + } + + static_announce_rte(p, r); } static void -static_rte_cleanup(struct proto *p UNUSED, struct static_route *r) +static_reset_rte(struct static_proto *p UNUSED, struct static_route *r) { struct static_route *r2; - if (r->bfd_req) + for (r2 = r; r2; r2 = r2->mp_next) { - rfree(r->bfd_req); - r->bfd_req = NULL; - } + r2->neigh = NULL; + r2->chain = NULL; - if (r->dest == RTD_MULTIPATH) - for (r2 = r->mp_next; r2; r2 = r2->mp_next) - if (r2->bfd_req) - { - rfree(r2->bfd_req); - r2->bfd_req = NULL; - } + r2->state = 0; + r2->active = 0; + + rfree(r2->bfd_req); + r2->bfd_req = NULL; + } } -static int -static_start(struct proto *p) +static void +static_remove_rte(struct static_proto *p, struct static_route *r) { - struct static_config *cf = (void *) p->cf; - struct static_route *r; + if (r->state) + rte_update(&p->p, r->net, NULL); - DBG("Static: take off!\n"); + static_reset_rte(p, r); +} - if (!static_lp) - static_lp = lp_new(&root_pool, 1008); - if (cf->igp_table) - rt_lock_table(cf->igp_table->table); +static inline int +static_same_dest(struct static_route *x, struct static_route *y) +{ + if (x->dest != y->dest) + return 0; - /* We have to go UP before routes could be installed */ - proto_notify_state(p, PS_UP); + switch (x->dest) + { + case RTD_UNICAST: + for (; x && y; x = x->mp_next, y = y->mp_next) + { + if (!ipa_equal(x->via, y->via) || + (x->iface != y->iface) || + (x->use_bfd != y->use_bfd) || + (x->weight != y->weight) || + (!x->mls != !y->mls) || + ((x->mls) && (y->mls) && (x->mls->len != y->mls->len))) + return 0; + + if (!x->mls) + continue; + + for (uint i = 0; i < x->mls->len; i++) + if (x->mls->stack[i] != y->mls->stack[i]) + return 0; + } + return !x && !y; - WALK_LIST(r, cf->other_routes) - static_add(p, cf, r); - return PS_UP; -} + case RTDX_RECURSIVE: + if (!ipa_equal(x->via, y->via) || + (!x->mls != !y->mls) || + ((x->mls) && (y->mls) && (x->mls->len != y->mls->len))) + return 0; -static int -static_shutdown(struct proto *p) -{ - struct static_config *cf = (void *) p->cf; - struct static_route *r; + if (!x->mls) + return 1; - /* Just reset the flag, the routes will be flushed by the nest */ - WALK_LIST(r, cf->iface_routes) - r->installed = 0; - WALK_LIST(r, cf->other_routes) - { - static_rte_cleanup(p, r); - r->installed = 0; - } + for (uint i = 0; i < x->mls->len; i++) + if (x->mls->stack[i] != y->mls->stack[i]) + return 0; - /* Handle failure during channel reconfigure */ - /* FIXME: This should be handled in a better way */ - cf = (void *) p->cf_new; - if (cf) - { - WALK_LIST(r, cf->iface_routes) - r->installed = 0; - WALK_LIST(r, cf->other_routes) - r->installed = 0; - } + return 1; - return PS_DOWN; + default: + return 1; + } } -static void -static_cleanup(struct proto *p) +static inline int +static_same_rte(struct static_route *or, struct static_route *nr) { - struct static_config *cf = (void *) p->cf; - - if (cf->igp_table) - rt_unlock_table(cf->igp_table->table); + /* Note that i_same() requires arguments in (new, old) order */ + return static_same_dest(or, nr) && i_same(nr->cmds, or->cmds); } static void -static_update_rte(struct proto *p, struct static_route *r) +static_reconfigure_rte(struct static_proto *p, struct static_route *or, struct static_route *nr) { - switch (r->dest) - { - case RTD_ROUTER: - if (static_decide((struct static_config *) p->cf, r)) - static_install(p, r, r->neigh->iface); - else - static_remove(p, r); - break; - - case RTD_NONE: /* a part of multipath route */ - { - int decision = static_decide((struct static_config *) p->cf, r); - if (decision == r->installed) - break; /* no change */ - r->installed = decision; - - struct static_route *r1, *r2; - int count = 0; - r1 = (void *) r->if_name; /* really */ - for (r2 = r1->mp_next; r2; r2 = r2->mp_next) - count += r2->installed; - - if (count) - { - /* Set of nexthops changed - force reinstall */ - r1->installed = 0; - static_install(p, r1, NULL); - } - else - static_remove(p, r1); + if ((or->state == SRS_CLEAN) && !static_same_rte(or, nr)) + nr->state = SRS_DIRTY; + else + nr->state = or->state; - break; - } - } + static_add_rte(p, nr); + static_reset_rte(p, or); } + static void static_neigh_notify(struct neighbor *n) { - struct proto *p = n->proto; + struct static_proto *p = (void *) n->proto; struct static_route *r; DBG("Static: neighbor notify for %I: iface %p\n", n->addr, n->iface); - for(r=n->data; r; r=r->chain) + for (r = n->data; r; r = r->chain) { static_update_bfd(p, r); - static_update_rte(p, r); + + if (static_decide(p, r)) + static_mark_rte(p, r->mp_head); } } @@ -380,77 +346,20 @@ static void static_bfd_notify(struct bfd_request *req) { struct static_route *r = req->data; - struct proto *p = r->neigh->proto; + struct static_proto *p = (void *) r->neigh->proto; // if (req->down) TRACE(D_EVENTS, "BFD session down for nbr %I on %s", XXXX); - static_update_rte(p, r); -} - -static void -static_dump_rt(struct static_route *r) -{ - debug("%-1N: ", r->net); - switch (r->dest) - { - case RTD_ROUTER: - debug("via %I\n", r->via); - break; - case RTD_DEVICE: - debug("dev %s\n", r->if_name); - break; - default: - debug("rtd %d\n", r->dest); - break; - } -} - -static void -static_dump(struct proto *p) -{ - struct static_config *c = (void *) p->cf; - struct static_route *r; - - debug("Independent static routes:\n"); - WALK_LIST(r, c->other_routes) - static_dump_rt(r); - debug("Device static routes:\n"); - WALK_LIST(r, c->iface_routes) - static_dump_rt(r); + if (static_decide(p, r)) + static_mark_rte(p, r->mp_head); } -static void -static_if_notify(struct proto *p, unsigned flags, struct iface *i) -{ - struct static_route *r; - struct static_config *c = (void *) p->cf; - - if (flags & IF_CHANGE_UP) - { - WALK_LIST(r, c->iface_routes) - if (!strcmp(r->if_name, i->name)) - static_install(p, r, i); - } - else if (flags & IF_CHANGE_DOWN) - { - WALK_LIST(r, c->iface_routes) - if (!strcmp(r->if_name, i->name)) - static_remove(p, r); - } -} - -int +static int static_rte_mergable(rte *pri UNUSED, rte *sec UNUSED) { return 1; } -void -static_init_config(struct static_config *c) -{ - init_list(&c->iface_routes); - init_list(&c->other_routes); -} static void static_postconfig(struct proto_config *CF) @@ -461,17 +370,11 @@ static_postconfig(struct proto_config *CF) if (EMPTY_LIST(CF->channels)) cf_error("Channel not specified"); - - WALK_LIST(r, cf->iface_routes) - if (r->net->type != CF->net_type) - cf_error("Route %N incompatible with channel type", r->net); - - WALK_LIST(r, cf->other_routes) - if (r->net->type != CF->net_type) + WALK_LIST(r, cf->routes) + if (r->net && (r->net->type != CF->net_type)) cf_error("Route %N incompatible with channel type", r->net); } - static struct proto * static_init(struct proto_config *CF) { @@ -482,84 +385,84 @@ static_init(struct proto_config *CF) P->main_channel = proto_add_channel(P, proto_cf_main_channel(CF)); P->neigh_notify = static_neigh_notify; - P->if_notify = static_if_notify; P->rte_mergable = static_rte_mergable; return P; } -static inline int -static_same_dest(struct static_route *x, struct static_route *y) +static int +static_start(struct proto *P) { - if (x->dest != y->dest) - return 0; + struct static_proto *p = (void *) P; + struct static_config *cf = (void *) P->cf; + struct static_route *r; - switch (x->dest) - { - case RTD_ROUTER: - return ipa_equal(x->via, y->via) && (x->via_if == y->via_if); - - case RTD_DEVICE: - return !strcmp(x->if_name, y->if_name); - - case RTD_MULTIPATH: - for (x = x->mp_next, y = y->mp_next; - x && y; - x = x->mp_next, y = y->mp_next) - if (!ipa_equal(x->via, y->via) || - (x->via_if != y->via_if) || - (x->use_bfd != y->use_bfd) || - (x->weight != y->weight)) - return 0; - return !x && !y; + if (!static_lp) + static_lp = lp_new(&root_pool, 1008); - case RTDX_RECURSIVE: - return ipa_equal(x->via, y->via); + if (cf->igp_table) + rt_lock_table(cf->igp_table->table); - default: - return 1; - } + p->event = ev_new(p->p.pool); + p->event->hook = static_announce_marked; + p->event->data = p; + + BUFFER_INIT(p->marked, p->p.pool, 4); + + /* We have to go UP before routes could be installed */ + proto_notify_state(P, PS_UP); + + WALK_LIST(r, cf->routes) + static_add_rte(p, r); + + return PS_UP; } -static inline int -static_same_rte(struct static_route *x, struct static_route *y) +static int +static_shutdown(struct proto *P) { - return static_same_dest(x, y) && i_same(x->cmds, y->cmds); -} + struct static_proto *p = (void *) P; + struct static_config *cf = (void *) P->cf; + struct static_route *r; + + /* Just reset the flag, the routes will be flushed by the nest */ + WALK_LIST(r, cf->routes) + static_reset_rte(p, r); + return PS_DOWN; +} static void -static_match(struct proto *p, struct static_route *r, struct static_config *n) +static_cleanup(struct proto *P) { - struct static_route *t; - - /* - * For given old route *r we find whether a route to the same - * network is also in the new route list. In that case, we keep the - * route and possibly update the route later if destination changed. - * Otherwise, we remove the route. - */ - - if (r->neigh) - r->neigh->data = NULL; + struct static_config *cf = (void *) P->cf; - WALK_LIST(t, n->iface_routes) - if (net_equal(r->net, t->net)) - goto found; + if (cf->igp_table) + rt_unlock_table(cf->igp_table->table); +} - WALK_LIST(t, n->other_routes) - if (net_equal(r->net, t->net)) - goto found; +static void +static_dump_rte(struct static_route *r) +{ + debug("%-1N: ", r->net); + if (r->dest == RTD_UNICAST) + if (r->iface && ipa_zero(r->via)) + debug("dev %s\n", r->iface->name); + else + debug("via %I%J\n", r->via, r->iface); + else + debug("rtd %d\n", r->dest); +} - static_remove(p, r); - return; +static void +static_dump(struct proto *P) +{ + struct static_config *c = (void *) P->cf; + struct static_route *r; - found: - /* If destination is different, force reinstall */ - if ((r->installed > 0) && !static_same_rte(r, t)) - t->installed = -1; - else - t->installed = r->installed; + debug("Static routes:\n"); + WALK_LIST(r, c->routes) + static_dump_rte(r); } static inline rtable * @@ -568,76 +471,87 @@ cf_igp_table(struct static_config *cf) return cf->igp_table ? cf->igp_table->table : NULL; } +static inline int +static_cmp_rte(const void *X, const void *Y) +{ + struct static_route *x = *(void **)X, *y = *(void **)Y; + return net_compare(x->net, y->net); +} + static int -static_reconfigure(struct proto *p, struct proto_config *CF) +static_reconfigure(struct proto *P, struct proto_config *CF) { - struct static_config *o = (void *) p->cf; + struct static_proto *p = (void *) P; + struct static_config *o = (void *) P->cf; struct static_config *n = (void *) CF; - struct static_route *r; + struct static_route *r, *r2, *or, *nr; if (cf_igp_table(o) != cf_igp_table(n)) return 0; - if (!proto_configure_channel(p, &p->main_channel, proto_cf_main_channel(CF))) + if (!proto_configure_channel(P, &P->main_channel, proto_cf_main_channel(CF))) return 0; - /* Delete all obsolete routes and reset neighbor entries */ - WALK_LIST(r, o->iface_routes) - static_match(p, r, n); - WALK_LIST(r, o->other_routes) - static_match(p, r, n); + p->p.cf = CF; - /* Now add all new routes, those not changed will be ignored by static_install() */ - WALK_LIST(r, n->iface_routes) - { - struct iface *ifa; - if ((ifa = if_find_by_name(r->if_name)) && (ifa->flags & IF_UP)) - static_install(p, r, ifa); - } - WALK_LIST(r, n->other_routes) - static_add(p, n, r); + /* Reset route lists in neighbor entries */ + WALK_LIST(r, o->routes) + for (r2 = r; r2; r2 = r2->mp_next) + if (r2->neigh) + r2->neigh->data = NULL; - WALK_LIST(r, o->other_routes) - static_rte_cleanup(p, r); + /* Reconfigure initial matching sequence */ + for (or = HEAD(o->routes), nr = HEAD(n->routes); + NODE_VALID(or) && NODE_VALID(nr) && net_equal(or->net, nr->net); + or = NODE_NEXT(or), nr = NODE_NEXT(nr)) + static_reconfigure_rte(p, or, nr); - return 1; -} + if (!NODE_VALID(or) && !NODE_VALID(nr)) + return 1; -static void -static_copy_routes(list *dlst, list *slst) -{ - struct static_route *dr, *sr; + /* Reconfigure remaining routes, sort them to find matching pairs */ + struct static_route *or2, *nr2, **orbuf, **nrbuf; + uint ornum = 0, nrnum = 0, orpos = 0, nrpos = 0, i; - init_list(dlst); - WALK_LIST(sr, *slst) - { - /* copy one route */ - dr = cfg_alloc(sizeof(struct static_route)); - memcpy(dr, sr, sizeof(struct static_route)); - - /* This fn is supposed to be called on fresh src routes, which have 'live' - fields (like .chain, .neigh or .installed) zero, so no need to zero them */ - - /* We need to copy multipath chain, because there are backptrs in 'if_name' */ - if (dr->dest == RTD_MULTIPATH) - { - struct static_route *md, *ms, **mp_last; - - mp_last = &(dr->mp_next); - for (ms = sr->mp_next; ms; ms = ms->mp_next) - { - md = cfg_alloc(sizeof(struct static_route)); - memcpy(md, ms, sizeof(struct static_route)); - md->if_name = (void *) dr; /* really */ - - *mp_last = md; - mp_last = &(md->mp_next); - } - *mp_last = NULL; - } - - add_tail(dlst, (node *) dr); - } + for (or2 = or; NODE_VALID(or2); or2 = NODE_NEXT(or2)) + ornum++; + + for (nr2 = nr; NODE_VALID(nr2); nr2 = NODE_NEXT(nr2)) + nrnum++; + + orbuf = xmalloc(ornum * sizeof(void *)); + nrbuf = xmalloc(nrnum * sizeof(void *)); + + for (i = 0, or2 = or; i < ornum; i++, or2 = NODE_NEXT(or2)) + orbuf[i] = or2; + + for (i = 0, nr2 = nr; i < nrnum; i++, nr2 = NODE_NEXT(nr2)) + nrbuf[i] = nr2; + + qsort(orbuf, ornum, sizeof(struct static_route *), static_cmp_rte); + qsort(nrbuf, nrnum, sizeof(struct static_route *), static_cmp_rte); + + while ((orpos < ornum) && (nrpos < nrnum)) + { + int x = net_compare(orbuf[orpos]->net, nrbuf[nrpos]->net); + if (x < 0) + static_remove_rte(p, orbuf[orpos++]); + else if (x > 0) + static_add_rte(p, nrbuf[nrpos++]); + else + static_reconfigure_rte(p, orbuf[orpos++], nrbuf[nrpos++]); + } + + while (orpos < ornum) + static_remove_rte(p, orbuf[orpos++]); + + while (nrpos < nrnum) + static_add_rte(p, nrbuf[nrpos++]); + + xfree(orbuf); + xfree(nrbuf); + + return 1; } static void @@ -646,52 +560,64 @@ static_copy_config(struct proto_config *dest, struct proto_config *src) struct static_config *d = (struct static_config *) dest; struct static_config *s = (struct static_config *) src; - /* Copy route lists */ - static_copy_routes(&d->iface_routes, &s->iface_routes); - static_copy_routes(&d->other_routes, &s->other_routes); -} + struct static_route *srt, *snh; -struct protocol proto_static = { - .name = "Static", - .template = "static%d", - .preference = DEF_PREF_STATIC, - .channel_mask = NB_ANY, - .proto_size = sizeof(struct proto), - .config_size = sizeof(struct static_config), - .postconfig = static_postconfig, - .init = static_init, - .dump = static_dump, - .start = static_start, - .shutdown = static_shutdown, - .cleanup = static_cleanup, - .reconfigure = static_reconfigure, - .copy_config = static_copy_config -}; + /* Copy route list */ + init_list(&d->routes); + WALK_LIST(srt, s->routes) + { + struct static_route *drt = NULL, *dnh = NULL, **dnp = &drt; + + for (snh = srt; snh; snh = snh->mp_next) + { + dnh = cfg_alloc(sizeof(struct static_route)); + memcpy(dnh, snh, sizeof(struct static_route)); + + if (!drt) + add_tail(&d->routes, &(dnh->n)); + + *dnp = dnh; + dnp = &(dnh->mp_next); + + if (snh->mp_head) + dnh->mp_head = drt; + } + } +} static void static_show_rt(struct static_route *r) { - byte via[IPA_MAX_TEXT_LENGTH + 25]; - switch (r->dest) + { + case RTD_UNICAST: + { + struct static_route *r2; + + cli_msg(-1009, "%N", r->net); + for (r2 = r; r2; r2 = r2->mp_next) { - case RTD_ROUTER: bsprintf(via, "via %I%J", r->via, r->via_if); break; - case RTD_DEVICE: bsprintf(via, "dev %s", r->if_name); break; - case RTD_BLACKHOLE: bsprintf(via, "blackhole"); break; - case RTD_UNREACHABLE: bsprintf(via, "unreachable"); break; - case RTD_PROHIBIT: bsprintf(via, "prohibited"); break; - case RTD_MULTIPATH: bsprintf(via, "multipath"); break; - case RTDX_RECURSIVE: bsprintf(via, "recursive %I", r->via); break; - default: bsprintf(via, "???"); + if (r2->iface && ipa_zero(r2->via)) + cli_msg(-1009, "\tdev %s%s%s", r2->iface->name, + r2->bfd_req ? " (bfd)" : "", r2->active ? "" : " (dormant)"); + else + cli_msg(-1009, "\tvia %I%J%s%s", r2->via, r2->iface, + r2->bfd_req ? " (bfd)" : "", r2->active ? "" : " (dormant)"); } - cli_msg(-1009, "%N %s%s%s", r->net, via, - r->bfd_req ? " (bfd)" : "", r->installed ? "" : " (dormant)"); + break; + } - struct static_route *r2; - if (r->dest == RTD_MULTIPATH) - for (r2 = r->mp_next; r2; r2 = r2->mp_next) - cli_msg(-1009, "\tvia %I%J weight %d%s%s", r2->via, r2->via_if, r2->weight + 1, - r2->bfd_req ? " (bfd)" : "", r2->installed ? "" : " (dormant)"); + case RTD_NONE: + case RTD_BLACKHOLE: + case RTD_UNREACHABLE: + case RTD_PROHIBIT: + cli_msg(-1009, "%N\t%s", r->net, rta_dest_names[r->dest]); + break; + + case RTDX_RECURSIVE: + cli_msg(-1009, "%N\trecursive %I", r->net, r->via); + break; + } } void @@ -700,9 +626,25 @@ static_show(struct proto *P) struct static_config *c = (void *) P->cf; struct static_route *r; - WALK_LIST(r, c->other_routes) - static_show_rt(r); - WALK_LIST(r, c->iface_routes) + WALK_LIST(r, c->routes) static_show_rt(r); cli_msg(0, ""); } + + +struct protocol proto_static = { + .name = "Static", + .template = "static%d", + .preference = DEF_PREF_STATIC, + .channel_mask = NB_ANY, + .proto_size = sizeof(struct static_proto), + .config_size = sizeof(struct static_config), + .postconfig = static_postconfig, + .init = static_init, + .dump = static_dump, + .start = static_start, + .shutdown = static_shutdown, + .cleanup = static_cleanup, + .reconfigure = static_reconfigure, + .copy_config = static_copy_config +}; diff --git a/proto/static/static.h b/proto/static/static.h index 51486e83..0976a9c9 100644 --- a/proto/static/static.h +++ b/proto/static/static.h @@ -11,41 +11,57 @@ #include "nest/route.h" #include "nest/bfd.h" +#include "lib/buffer.h" struct static_config { struct proto_config c; - list iface_routes; /* Routes to search on interface events */ - list other_routes; /* Routes hooked to neighbor cache and reject routes */ + list routes; /* List of static routes (struct static_route) */ int check_link; /* Whether iface link state is used */ struct rtable_config *igp_table; /* Table used for recursive next hop lookups */ }; +struct static_proto { + struct proto p; -void static_init_config(struct static_config *); + struct event *event; /* Event for announcing updated routes */ + BUFFER(struct static_route *) marked; /* Routes marked for reannouncement */ +}; struct static_route { node n; - struct static_route *chain; /* Next for the same neighbor */ net_addr *net; /* Network we route */ - int dest; /* Destination type (RTD_*) */ ip_addr via; /* Destination router */ - struct iface *via_if; /* Destination iface, for link-local vias */ - struct neighbor *neigh; - byte *if_name; /* Name for RTD_DEVICE routes */ - struct static_route *mp_next; /* Nexthops for RTD_MULTIPATH routes */ + struct iface *iface; /* Destination iface, for link-local vias or device routes */ + struct neighbor *neigh; /* Associated neighbor entry */ + struct static_route *chain; /* Next for the same neighbor */ + struct static_route *mp_head; /* First nexthop of this route */ + struct static_route *mp_next; /* Nexthops for multipath routes */ struct f_inst *cmds; /* List of commands for setting attributes */ - int installed; /* Installed in rt table, -1 for reinstall */ - int use_bfd; /* Configured to use BFD */ - int weight; /* Multipath next hop weight */ + byte dest; /* Destination type (RTD_*) */ + byte state; /* State of route announcement (SRS_*) */ + byte active; /* Next hop is active (nbr/iface/BFD available) */ + byte weight; /* Multipath next hop weight */ + byte use_bfd; /* Configured to use BFD */ struct bfd_request *bfd_req; /* BFD request, if BFD is used */ + mpls_label_stack *mls; /* MPLS label stack; may be NULL */ }; -/* Dummy nodes (parts of multipath route) abuses masklen field for weight - and if_name field for a ptr to the master (RTD_MULTIPATH) node. */ - +/* + * Note that data fields neigh, chain, state, active and bfd_req are runtime + * data, not real configuration data. Must be handled carefully. + * + * Regular (i.e. dest == RTD_UNICAST) routes use static_route structure for + * additional next hops (fields mp_head, mp_next). Note that 'state' is for + * whole route, while 'active' is for each next hop. Also note that fields + * mp_head, mp_next, active are zero for other kinds of routes. + */ #define RTDX_RECURSIVE 0x7f /* Phony dest value for recursive routes */ +#define SRS_DOWN 0 /* Route is not announced */ +#define SRS_CLEAN 1 /* Route is active and announced */ +#define SRS_DIRTY 2 /* Route changed since announcement */ + void static_show(struct proto *); #endif diff --git a/sysdep/bsd/krt-sock.c b/sysdep/bsd/krt-sock.c index d2372a3d..c65cba65 100644 --- a/sysdep/bsd/krt-sock.c +++ b/sysdep/bsd/krt-sock.c @@ -147,9 +147,7 @@ krt_capable(rte *e) rta *a = e->attrs; return - a->cast == RTC_UNICAST && - (a->dest == RTD_ROUTER - || a->dest == RTD_DEVICE + ((a->dest == RTD_UNICAST && !a->nh.next) /* No multipath support */ #ifdef RTF_REJECT || a->dest == RTD_UNREACHABLE #endif @@ -190,12 +188,11 @@ krt_send_route(struct krt_proto *p, int cmd, rte *e) net *net = e->net; rta *a = e->attrs; static int msg_seq; - struct iface *j, *i = a->iface; + struct iface *j, *i = a->nh.iface; int l; struct ks_msg msg; char *body = (char *)msg.buf; sockaddr gate, mask, dst; - ip_addr gw; DBG("krt-sock: send %I/%d via %I\n", net->n.prefix, net->n.pxlen, a->gw); @@ -225,14 +222,12 @@ krt_send_route(struct krt_proto *p, int cmd, rte *e) msg.rtm.rtm_flags |= RTF_BLACKHOLE; #endif - /* This is really very nasty, but I'm not able - * to add "(reject|blackhole)" route without - * gateway set + /* + * This is really very nasty, but I'm not able to add reject/blackhole route + * without gateway address. */ - if(!i) + if (!i) { - i = HEAD(iface_list); - WALK_LIST(j, iface_list) { if (j->flags & IF_LOOPBACK) @@ -241,13 +236,13 @@ krt_send_route(struct krt_proto *p, int cmd, rte *e) break; } } - } - - gw = a->gw; - /* Embed interface ID to link-local address */ - if (ipa_is_link_local(gw)) - _I0(gw) = 0xfe800000 | (i->index & 0x0000ffff); + if (!i) + { + log(L_ERR "KRT: Cannot find loopback iface"); + return -1; + } + } int af = AF_UNSPEC; @@ -263,43 +258,51 @@ krt_send_route(struct krt_proto *p, int cmd, rte *e) return -1; } - sockaddr_fill(&dst, af, net_prefix(net->n.addr), NULL, 0); sockaddr_fill(&mask, af, net_pxmask(net->n.addr), NULL, 0); - sockaddr_fill(&gate, af, gw, NULL, 0); switch (a->dest) { - case RTD_ROUTER: + case RTD_UNICAST: + if (ipa_nonzero(a->nh.gw)) + { + ip_addr gw = a->nh.gw; + + /* Embed interface ID to link-local address */ + if (ipa_is_link_local(gw)) + _I0(gw) = 0xfe800000 | (i->index & 0x0000ffff); + + sockaddr_fill(&gate, af, gw, NULL, 0); msg.rtm.rtm_flags |= RTF_GATEWAY; msg.rtm.rtm_addrs |= RTA_GATEWAY; break; + } #ifdef RTF_REJECT - case RTD_UNREACHABLE: + case RTD_UNREACHABLE: #endif #ifdef RTF_BLACKHOLE - case RTD_BLACKHOLE: + case RTD_BLACKHOLE: #endif - case RTD_DEVICE: - if(i) - { + { + /* Fallback for all other valid cases */ + if (!i->addr) + { + log(L_ERR "KRT: interface %s has no IP addess", i->name); + return -1; + } + #ifdef RTF_CLONING - if (cmd == RTM_ADD && (i->flags & IF_MULTIACCESS) != IF_MULTIACCESS) /* PTP */ - msg.rtm.rtm_flags |= RTF_CLONING; + if (cmd == RTM_ADD && (i->flags & IF_MULTIACCESS) != IF_MULTIACCESS) /* PTP */ + msg.rtm.rtm_flags |= RTF_CLONING; #endif - if(!i->addr) { - log(L_ERR "KRT: interface %s has no IP addess", i->name); - return -1; - } + sockaddr_fill(&gate, ipa_is_ip4(i->addr->ip) ? AF_INET : AF_INET6, i->addr->ip, NULL, 0); + msg.rtm.rtm_addrs |= RTA_GATEWAY; + } - sockaddr_fill(&gate, ipa_is_ip4(i->addr->ip) ? AF_INET : AF_INET6, i->addr->ip, NULL, 0); - msg.rtm.rtm_addrs |= RTA_GATEWAY; - } - break; - default: - bug("krt-sock: unknown flags, but not filtered"); + default: + bug("krt-sock: unknown flags, but not filtered"); } msg.rtm.rtm_index = i->index; @@ -469,7 +472,6 @@ krt_read_route(struct ks_msg *msg, struct krt_proto *p, int scan) .src = p->p.main_source, .source = RTS_INHERIT, .scope = SCOPE_UNIVERSE, - .cast = RTC_UNICAST }; /* reject/blackhole routes have also set RTF_GATEWAY, @@ -489,39 +491,37 @@ krt_read_route(struct ks_msg *msg, struct krt_proto *p, int scan) } #endif - a.iface = if_find_by_index(msg->rtm.rtm_index); - if (!a.iface) + a.nh.iface = if_find_by_index(msg->rtm.rtm_index); + if (!a.nh.iface) { log(L_ERR "KRT: Received route %N with unknown ifindex %u", net->n.addr, msg->rtm.rtm_index); return; } + a.dest = RTD_UNICAST; if (flags & RTF_GATEWAY) { neighbor *ng; - a.dest = RTD_ROUTER; - a.gw = igate; + a.nh.gw = igate; /* Clean up embedded interface ID returned in link-local address */ - if (ipa_is_link_local(a.gw)) - _I0(a.gw) = 0xfe800000; + if (ipa_is_link_local(a.nh.gw)) + _I0(a.nh.gw) = 0xfe800000; - ng = neigh_find2(&p->p, &a.gw, a.iface, 0); + ng = neigh_find2(&p->p, &a.nh.gw, a.nh.iface, 0); if (!ng || (ng->scope == SCOPE_HOST)) { /* Ignore routes with next-hop 127.0.0.1, host routes with such next-hop appear on OpenBSD for address aliases. */ - if (ipa_classify(a.gw) == (IADDR_HOST | SCOPE_HOST)) + if (ipa_classify(a.nh.gw) == (IADDR_HOST | SCOPE_HOST)) return; log(L_ERR "KRT: Received route %N with strange next-hop %I", - net->n.addr, a.gw); + net->n.addr, a.nh.gw); return; } } - else - a.dest = RTD_DEVICE; done: e = rte_get_temp(&a); diff --git a/sysdep/cf/linux.h b/sysdep/cf/linux.h index cec9499c..3a3a15da 100644 --- a/sysdep/cf/linux.h +++ b/sysdep/cf/linux.h @@ -21,6 +21,10 @@ #define CONFIG_INCLUDE_SYSPRIV_H "sysdep/linux/syspriv.h" +#ifndef AF_MPLS +#define AF_MPLS 28 +#endif + /* Link: sysdep/linux Link: sysdep/unix diff --git a/sysdep/linux/netlink.c b/sysdep/linux/netlink.c index 554f2c97..bb85a38b 100644 --- a/sysdep/linux/netlink.c +++ b/sysdep/linux/netlink.c @@ -20,6 +20,7 @@ #include "nest/route.h" #include "nest/protocol.h" #include "nest/iface.h" +#include "lib/alloca.h" #include "sysdep/unix/timer.h" #include "sysdep/unix/unix.h" #include "sysdep/unix/krt.h" @@ -30,6 +31,7 @@ #include <asm/types.h> #include <linux/if.h> +#include <linux/lwtunnel.h> #include <linux/netlink.h> #include <linux/rtnetlink.h> @@ -50,6 +52,21 @@ #define RTA_TABLE 15 #endif +#ifndef RTA_VIA +#define RTA_VIA 18 +#endif + +#ifndef RTA_NEWDST +#define RTA_NEWDST 19 +#endif + +#ifndef RTA_ENCAP_TYPE +#define RTA_ENCAP_TYPE 21 +#endif + +#ifndef RTA_ENCAP +#define RTA_ENCAP 22 +#endif #define krt_ecmp6(p) ((p)->af == AF_INET6) @@ -303,7 +320,7 @@ static struct nl_want_attrs ifa_attr_want4[BIRD_IFA_MAX] = { [IFA_ADDRESS] = { 1, 1, sizeof(ip4_addr) }, [IFA_LOCAL] = { 1, 1, sizeof(ip4_addr) }, [IFA_BROADCAST] = { 1, 1, sizeof(ip4_addr) }, - [IFA_FLAGS] = { 1, 1, sizeof(u32) }, + [IFA_FLAGS] = { 1, 1, sizeof(u32) }, }; static struct nl_want_attrs ifa_attr_want6[BIRD_IFA_MAX] = { @@ -313,10 +330,16 @@ static struct nl_want_attrs ifa_attr_want6[BIRD_IFA_MAX] = { }; -#define BIRD_RTA_MAX (RTA_TABLE+1) +#define BIRD_RTA_MAX (RTA_ENCAP+1) -static struct nl_want_attrs mpnh_attr_want4[BIRD_RTA_MAX] = { +static struct nl_want_attrs nexthop_attr_want4[BIRD_RTA_MAX] = { [RTA_GATEWAY] = { 1, 1, sizeof(ip4_addr) }, + [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) }, + [RTA_ENCAP] = { 1, 0, 0 }, +}; + +static struct nl_want_attrs encap_mpls_want[BIRD_RTA_MAX] = { + [RTA_DST] = { 1, 0, 0 }, }; static struct nl_want_attrs rtm_attr_want4[BIRD_RTA_MAX] = { @@ -329,6 +352,8 @@ static struct nl_want_attrs rtm_attr_want4[BIRD_RTA_MAX] = { [RTA_MULTIPATH] = { 1, 0, 0 }, [RTA_FLOW] = { 1, 1, sizeof(u32) }, [RTA_TABLE] = { 1, 1, sizeof(u32) }, + [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) }, + [RTA_ENCAP] = { 1, 0, 0 }, }; static struct nl_want_attrs rtm_attr_want6[BIRD_RTA_MAX] = { @@ -341,6 +366,20 @@ static struct nl_want_attrs rtm_attr_want6[BIRD_RTA_MAX] = { [RTA_METRICS] = { 1, 0, 0 }, [RTA_FLOW] = { 1, 1, sizeof(u32) }, [RTA_TABLE] = { 1, 1, sizeof(u32) }, + [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) }, + [RTA_ENCAP] = { 1, 0, 0 }, +}; + +static struct nl_want_attrs rtm_attr_want_mpls[BIRD_RTA_MAX] = { + [RTA_DST] = { 1, 1, sizeof(u32) }, + [RTA_IIF] = { 1, 1, sizeof(u32) }, + [RTA_OIF] = { 1, 1, sizeof(u32) }, + [RTA_PRIORITY] = { 1, 1, sizeof(u32) }, + [RTA_METRICS] = { 1, 0, 0 }, + [RTA_FLOW] = { 1, 1, sizeof(u32) }, + [RTA_TABLE] = { 1, 1, sizeof(u32) }, + [RTA_VIA] = { 1, 0, 0 }, + [RTA_NEWDST] = { 1, 0, 0 }, }; @@ -373,6 +412,9 @@ nl_parse_attrs(struct rtattr *a, struct nl_want_attrs *want, struct rtattr **k, return 1; } +static inline u16 rta_get_u16(struct rtattr *a) +{ return *(u16 *) RTA_DATA(a); } + static inline u32 rta_get_u32(struct rtattr *a) { return *(u32 *) RTA_DATA(a); } @@ -390,6 +432,25 @@ static inline ip_addr rta_get_ipa(struct rtattr *a) return ipa_from_ip6(rta_get_ip6(a)); } +static inline ip_addr rta_get_via(struct rtattr *a) +{ + struct rtvia *v = RTA_DATA(a); + switch(v->rtvia_family) { + case AF_INET: return ipa_from_ip4(ip4_ntoh(*(ip4_addr *) v->rtvia_addr)); + case AF_INET6: return ipa_from_ip6(ip6_ntoh(*(ip6_addr *) v->rtvia_addr)); + } + return IPA_NONE; +} + +static u32 rta_mpls_stack[MPLS_MAX_LABEL_STACK]; +static inline int rta_get_mpls(struct rtattr *a, u32 *stack) +{ + if (RTA_PAYLOAD(a) % 4) + log(L_WARN "KRT: Strange length of received MPLS stack: %u", RTA_PAYLOAD(a)); + + return mpls_get(RTA_DATA(a), RTA_PAYLOAD(a) & ~0x3, stack); +} + struct rtattr * nl_add_attr(struct nlmsghdr *h, uint bufsize, uint code, const void *data, uint dlen) { @@ -410,6 +471,24 @@ nl_add_attr(struct nlmsghdr *h, uint bufsize, uint code, const void *data, uint return a; } +static inline struct rtattr * +nl_open_attr(struct nlmsghdr *h, uint bufsize, uint code) +{ + return nl_add_attr(h, bufsize, code, NULL, 0); +} + +static inline void +nl_close_attr(struct nlmsghdr *h, struct rtattr *a) +{ + a->rta_len = (void *)h + NLMSG_ALIGN(h->nlmsg_len) - (void *)a; +} + +static inline void +nl_add_attr_u16(struct nlmsghdr *h, uint bufsize, int code, u16 data) +{ + nl_add_attr(h, bufsize, code, &data, 2); +} + static inline void nl_add_attr_u32(struct nlmsghdr *h, uint bufsize, int code, u32 data) { @@ -439,16 +518,46 @@ nl_add_attr_ipa(struct nlmsghdr *h, uint bufsize, int code, ip_addr ipa) nl_add_attr_ip6(h, bufsize, code, ipa_to_ip6(ipa)); } -static inline struct rtattr * -nl_open_attr(struct nlmsghdr *h, uint bufsize, uint code) +static inline void +nl_add_attr_mpls(struct nlmsghdr *h, uint bufsize, int code, int len, u32 *stack) { - return nl_add_attr(h, bufsize, code, NULL, 0); + char buf[len*4]; + mpls_put(buf, len, stack); + nl_add_attr(h, bufsize, code, buf, len*4); } static inline void -nl_close_attr(struct nlmsghdr *h, struct rtattr *a) +nl_add_attr_mpls_encap(struct nlmsghdr *h, uint bufsize, int len, u32 *stack) { - a->rta_len = (void *)h + NLMSG_ALIGN(h->nlmsg_len) - (void *)a; + nl_add_attr_u16(h, bufsize, RTA_ENCAP_TYPE, LWTUNNEL_ENCAP_MPLS); + + struct rtattr *nest = nl_open_attr(h, bufsize, RTA_ENCAP); + nl_add_attr_mpls(h, bufsize, RTA_DST, len, stack); + nl_close_attr(h, nest); +} + +static inline void +nl_add_attr_via(struct nlmsghdr *h, uint bufsize, ip_addr ipa) +{ + struct rtattr *nest = nl_open_attr(h, bufsize, RTA_VIA); + struct rtvia *via = RTA_DATA(nest); + + h->nlmsg_len += sizeof(*via); + + if (ipa_is_ip4(ipa)) + { + via->rtvia_family = AF_INET; + put_ip4(via->rtvia_addr, ipa_to_ip4(ipa)); + h->nlmsg_len += sizeof(ip4_addr); + } + else + { + via->rtvia_family = AF_INET6; + put_ip6(via->rtvia_addr, ipa_to_ip6(ipa)); + h->nlmsg_len += sizeof(ip6_addr); + } + + nl_close_attr(h, nest); } static inline struct rtnexthop * @@ -471,8 +580,24 @@ nl_close_nexthop(struct nlmsghdr *h, struct rtnexthop *nh) nh->rtnh_len = (void *)h + NLMSG_ALIGN(h->nlmsg_len) - (void *)nh; } +static inline void +nl_add_nexthop(struct nlmsghdr *h, uint bufsize, struct nexthop *nh, int af) +{ + if (nh->labels > 0) + if (af == AF_MPLS) + nl_add_attr_mpls(h, bufsize, RTA_NEWDST, nh->labels, nh->label); + else + nl_add_attr_mpls_encap(h, bufsize, nh->labels, nh->label); + + if (ipa_nonzero(nh->gw)) + if (af == AF_MPLS) + nl_add_attr_via(h, bufsize, nh->gw); + else + nl_add_attr_ipa(h, bufsize, RTA_GATEWAY, nh->gw); +} + static void -nl_add_multipath(struct nlmsghdr *h, uint bufsize, struct mpnh *nh) +nl_add_multipath(struct nlmsghdr *h, uint bufsize, struct nexthop *nh, int af) { struct rtattr *a = nl_open_attr(h, bufsize, RTA_MULTIPATH); @@ -484,7 +609,7 @@ nl_add_multipath(struct nlmsghdr *h, uint bufsize, struct mpnh *nh) rtnh->rtnh_hops = nh->weight; rtnh->rtnh_ifindex = nh->iface->index; - nl_add_attr_ipa(h, bufsize, RTA_GATEWAY, nh->gw); + nl_add_nexthop(h, bufsize, nh, af); nl_close_nexthop(h, rtnh); } @@ -492,17 +617,17 @@ nl_add_multipath(struct nlmsghdr *h, uint bufsize, struct mpnh *nh) nl_close_attr(h, a); } -static struct mpnh * +static struct nexthop * nl_parse_multipath(struct krt_proto *p, struct rtattr *ra) { /* Temporary buffer for multicast nexthops */ - static struct mpnh *nh_buffer; + static struct nexthop *nh_buffer; static int nh_buf_size; /* in number of structures */ static int nh_buf_used; struct rtattr *a[BIRD_RTA_MAX]; struct rtnexthop *nh = RTA_DATA(ra); - struct mpnh *rv, *first, **last; + struct nexthop *rv, *first, **last; unsigned len = RTA_PAYLOAD(ra); first = NULL; @@ -518,7 +643,7 @@ nl_parse_multipath(struct krt_proto *p, struct rtattr *ra) if (nh_buf_used == nh_buf_size) { nh_buf_size = nh_buf_size ? (nh_buf_size * 2) : 4; - nh_buffer = xrealloc(nh_buffer, nh_buf_size * sizeof(struct mpnh)); + nh_buffer = xrealloc(nh_buffer, nh_buf_size * NEXTHOP_MAX_SIZE); } *last = rv = nh_buffer + nh_buf_used++; rv->next = NULL; @@ -531,7 +656,7 @@ nl_parse_multipath(struct krt_proto *p, struct rtattr *ra) /* Nonexistent RTNH_PAYLOAD ?? */ nl_attr_len = nh->rtnh_len - RTNH_LENGTH(0); - nl_parse_attrs(RTNH_DATA(nh), mpnh_attr_want4, a, sizeof(a)); + nl_parse_attrs(RTNH_DATA(nh), nexthop_attr_want4, a, sizeof(a)); if (a[RTA_GATEWAY]) { rv->gw = rta_get_ipa(a[RTA_GATEWAY]); @@ -543,7 +668,22 @@ nl_parse_multipath(struct krt_proto *p, struct rtattr *ra) return NULL; } else - return NULL; + rv->gw = IPA_NONE; + + if (a[RTA_ENCAP_TYPE]) + { + if (rta_get_u16(a[RTA_ENCAP_TYPE]) != LWTUNNEL_ENCAP_MPLS) { + log(L_WARN "KRT: Unknown encapsulation method %d in multipath", rta_get_u16(a[RTA_ENCAP_TYPE])); + return NULL; + } + + struct rtattr *enca[BIRD_RTA_MAX]; + nl_attr_len = RTA_PAYLOAD(a[RTA_ENCAP]); + nl_parse_attrs(RTA_DATA(a[RTA_ENCAP]), encap_mpls_want, enca, sizeof(enca)); + rv->labels = rta_get_mpls(enca[RTA_DST], rv->label); + break; + } + len -= NLMSG_ALIGN(nh->rtnh_len); nh = RTNH_NEXT(nh); @@ -952,28 +1092,21 @@ krt_capable(rte *e) { rta *a = e->attrs; - if (a->cast != RTC_UNICAST) - return 0; - switch (a->dest) - { - case RTD_ROUTER: - case RTD_DEVICE: - if (a->iface == NULL) - return 0; + { + case RTD_UNICAST: case RTD_BLACKHOLE: case RTD_UNREACHABLE: case RTD_PROHIBIT: - case RTD_MULTIPATH: - break; + return 1; + default: return 0; - } - return 1; + } } static inline int -nh_bufsize(struct mpnh *nh) +nh_bufsize(struct nexthop *nh) { int rv = 0; for (; nh != NULL; nh = nh->next) @@ -982,12 +1115,12 @@ nh_bufsize(struct mpnh *nh) } static int -nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int op, int dest, ip_addr gw, struct iface *iface) +nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int op, int dest, struct nexthop *nh) { eattr *ea; net *net = e->net; rta *a = e->attrs; - int bufsize = 128 + KRT_METRICS_MAX*8 + nh_bufsize(a->nexthops); + int bufsize = 128 + KRT_METRICS_MAX*8 + nh_bufsize(&(a->nh)); u32 priority = 0; struct { @@ -1011,7 +1144,13 @@ nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int op, int d r->r.rtm_dst_len = net_pxlen(net->n.addr); r->r.rtm_protocol = RTPROT_BIRD; r->r.rtm_scope = RT_SCOPE_UNIVERSE; - nl_add_attr_ipa(&r->h, rsize, RTA_DST, net_prefix(net->n.addr)); + if (p->af == AF_MPLS) + { + u32 label = net_mpls(net->n.addr); + nl_add_attr_mpls(&r->h, rsize, RTA_DST, 1, &label); + } + else + nl_add_attr_ipa(&r->h, rsize, RTA_DST, net_prefix(net->n.addr)); /* * Strange behavior for RTM_DELROUTE: @@ -1043,7 +1182,7 @@ nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int op, int d if (ea = ea_find(eattrs, EA_KRT_SCOPE)) r->r.rtm_scope = ea->u.data; else - r->r.rtm_scope = (dest == RTD_DEVICE) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE; + r->r.rtm_scope = (dest == RTD_UNICAST && ipa_zero(nh->gw)) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE; if (ea = ea_find(eattrs, EA_KRT_PREFSRC)) nl_add_attr_ipa(&r->h, rsize, RTA_PREFSRC, *(ip_addr *)ea->u.ptr->data); @@ -1068,17 +1207,17 @@ nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int op, int d dest: - /* a->iface != NULL checked in krt_capable() for router and device routes */ switch (dest) { - case RTD_ROUTER: - r->r.rtm_type = RTN_UNICAST; - nl_add_attr_u32(&r->h, rsize, RTA_OIF, iface->index); - nl_add_attr_ipa(&r->h, rsize, RTA_GATEWAY, gw); - break; - case RTD_DEVICE: + case RTD_UNICAST: r->r.rtm_type = RTN_UNICAST; - nl_add_attr_u32(&r->h, rsize, RTA_OIF, iface->index); + if (nh->next && !krt_ecmp6(p)) + nl_add_multipath(&r->h, rsize, nh, p->af); + else + { + nl_add_attr_u32(&r->h, rsize, RTA_OIF, nh->iface->index); + nl_add_nexthop(&r->h, rsize, nh, p->af); + } break; case RTD_BLACKHOLE: r->r.rtm_type = RTN_BLACKHOLE; @@ -1089,10 +1228,6 @@ dest: case RTD_PROHIBIT: r->r.rtm_type = RTN_PROHIBIT; break; - case RTD_MULTIPATH: - r->r.rtm_type = RTN_UNICAST; - nl_add_multipath(&r->h, rsize, a->nexthops); - break; case RTD_NONE: break; default: @@ -1109,21 +1244,21 @@ nl_add_rte(struct krt_proto *p, rte *e, struct ea_list *eattrs) rta *a = e->attrs; int err = 0; - if (krt_ecmp6(p) && (a->dest == RTD_MULTIPATH)) + if (krt_ecmp6(p) && a->nh.next) { - struct mpnh *nh = a->nexthops; + struct nexthop *nh = &(a->nh); - err = nl_send_route(p, e, eattrs, NL_OP_ADD, RTD_ROUTER, nh->gw, nh->iface); + err = nl_send_route(p, e, eattrs, NL_OP_ADD, RTD_UNICAST, nh); if (err < 0) return err; for (nh = nh->next; nh; nh = nh->next) - err += nl_send_route(p, e, eattrs, NL_OP_APPEND, RTD_ROUTER, nh->gw, nh->iface); + err += nl_send_route(p, e, eattrs, NL_OP_APPEND, RTD_UNICAST, nh); return err; } - return nl_send_route(p, e, eattrs, NL_OP_ADD, a->dest, a->gw, a->iface); + return nl_send_route(p, e, eattrs, NL_OP_ADD, a->dest, &(a->nh)); } static inline int @@ -1133,7 +1268,7 @@ nl_delete_rte(struct krt_proto *p, rte *e, struct ea_list *eattrs) /* For IPv6, we just repeatedly request DELETE until we get error */ do - err = nl_send_route(p, e, eattrs, NL_OP_DELETE, RTD_NONE, IPA_NONE, NULL); + err = nl_send_route(p, e, eattrs, NL_OP_DELETE, RTD_NONE, NULL); while (krt_ecmp6(p) && !err); return err; @@ -1168,10 +1303,10 @@ krt_replace_rte(struct krt_proto *p, net *n, rte *new, rte *old, struct ea_list } -static inline struct mpnh * -nl_alloc_mpnh(struct nl_parse_state *s, ip_addr gw, struct iface *iface, byte weight) +static inline struct nexthop * +nl_alloc_nexthop(struct nl_parse_state *s, ip_addr gw, struct iface *iface, byte weight) { - struct mpnh *nh = lp_alloc(s->pool, sizeof(struct mpnh)); + struct nexthop *nh = lp_alloc(s->pool, sizeof(struct nexthop)); nh->gw = gw; nh->iface = iface; @@ -1280,6 +1415,19 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) net_fill_ip6(&dst, IP6_NONE, 0); break; + case AF_MPLS: + if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want_mpls, a, sizeof(a))) + return; + + if (a[RTA_DST]) + if (rta_get_mpls(a[RTA_DST], rta_mpls_stack) == 1) + net_fill_mpls(&dst, rta_mpls_stack[0]); + else + log(L_WARN "KRT: Got multi-label MPLS RTA_DST"); + else + return; /* No support for MPLS routes without RTA_DST */ + break; + default: return; } @@ -1342,60 +1490,58 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) if (s->net && !nl_mergable_route(s, net, p, priority, i->rtm_type)) nl_announce_route(s); - rta *ra = lp_allocz(s->pool, sizeof(rta)); + rta *ra = lp_allocz(s->pool, RTA_MAX_SIZE); ra->src = p->p.main_source; ra->source = RTS_INHERIT; ra->scope = SCOPE_UNIVERSE; - ra->cast = RTC_UNICAST; switch (i->rtm_type) { case RTN_UNICAST: + ra->dest = RTD_UNICAST; if (a[RTA_MULTIPATH] && (i->rtm_family == AF_INET)) { - ra->dest = RTD_MULTIPATH; - ra->nexthops = nl_parse_multipath(p, a[RTA_MULTIPATH]); - if (!ra->nexthops) + struct nexthop *nh = nl_parse_multipath(p, a[RTA_MULTIPATH]); + if (!nh) { log(L_ERR "KRT: Received strange multipath route %N", net->n.addr); return; } + ra->nh = *nh; break; } - ra->iface = if_find_by_index(oif); - if (!ra->iface) + ra->nh.iface = if_find_by_index(oif); + if (!ra->nh.iface) { log(L_ERR "KRT: Received route %N with unknown ifindex %u", net->n.addr, oif); return; } - if (a[RTA_GATEWAY]) + if ((i->rtm_family != AF_MPLS) && a[RTA_GATEWAY] || (i->rtm_family == AF_MPLS) && a[RTA_VIA]) { - ra->dest = RTD_ROUTER; - ra->gw = rta_get_ipa(a[RTA_GATEWAY]); + if (i->rtm_family == AF_MPLS) + ra->nh.gw = rta_get_via(a[RTA_VIA]); + else + ra->nh.gw = rta_get_ipa(a[RTA_GATEWAY]); /* Silently skip strange 6to4 routes */ const net_addr_ip6 sit = NET_ADDR_IP6(IP6_NONE, 96); - if ((i->rtm_family == AF_INET6) && ipa_in_netX(ra->gw, (net_addr *) &sit)) + if ((i->rtm_family == AF_INET6) && ipa_in_netX(ra->nh.gw, (net_addr *) &sit)) return; neighbor *nbr; - nbr = neigh_find2(&p->p, &ra->gw, ra->iface, + nbr = neigh_find2(&p->p, &(ra->nh.gw), ra->nh.iface, (i->rtm_flags & RTNH_F_ONLINK) ? NEF_ONLINK : 0); if (!nbr || (nbr->scope == SCOPE_HOST)) { - log(L_ERR "KRT: Received route %N with strange next-hop %I", net->n.addr, ra->gw); + log(L_ERR "KRT: Received route %N with strange next-hop %I", net->n.addr, + ra->nh.gw); return; } } - else - { - ra->dest = RTD_DEVICE; - def_scope = RT_SCOPE_LINK; - } break; case RTN_BLACKHOLE: @@ -1413,6 +1559,44 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) return; } + int labels = 0; + if ((i->rtm_family == AF_MPLS) && a[RTA_NEWDST] && !ra->nh.next) + labels = rta_get_mpls(a[RTA_NEWDST], ra->nh.label); + + if (a[RTA_ENCAP] && a[RTA_ENCAP_TYPE] && !ra->nh.next) + { + switch (rta_get_u16(a[RTA_ENCAP_TYPE])) + { + case LWTUNNEL_ENCAP_MPLS: + { + struct rtattr *enca[BIRD_RTA_MAX]; + nl_attr_len = RTA_PAYLOAD(a[RTA_ENCAP]); + nl_parse_attrs(RTA_DATA(a[RTA_ENCAP]), encap_mpls_want, enca, sizeof(enca)); + labels = rta_get_mpls(enca[RTA_DST], ra->nh.label); + break; + } + default: + SKIP("unknown encapsulation method %d\n", rta_get_u16(a[RTA_ENCAP_TYPE])); + break; + } + } + + if (labels < 0) + { + log(L_WARN "KRT: Too long MPLS stack received, ignoring."); + ra->nh.labels = 0; + } + else + ra->nh.labels = labels; + + rte *e = rte_get_temp(ra); + e->net = net; + e->u.krt.src = src; + e->u.krt.proto = i->rtm_protocol; + e->u.krt.seen = 0; + e->u.krt.best = 0; + e->u.krt.metric = 0; + if (i->rtm_scope != def_scope) { ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr)); @@ -1426,6 +1610,9 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) ea->attrs[0].u.data = i->rtm_scope; } + if (a[RTA_PRIORITY]) + e->u.krt.metric = rta_get_u32(a[RTA_PRIORITY]); + if (a[RTA_PREFSRC]) { ip_addr ps = rta_get_ipa(a[RTA_PREFSRC]); @@ -1508,15 +1695,20 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) else { /* Merge next hops with the stored route */ - rta *a = s->attrs; + rta *oa = s->attrs; + + struct nexthop *nhs = &oa->nh; + nexthop_insert(&nhs, &ra->nh); - if (a->dest != RTD_MULTIPATH) + /* Perhaps new nexthop is inserted at the first position */ + if (nhs == &ra->nh) { - a->dest = RTD_MULTIPATH; - a->nexthops = nl_alloc_mpnh(s, a->gw, a->iface, 0); - } + /* Swap rtas */ + s->attrs = ra; - mpnh_insert(&a->nexthops, nl_alloc_mpnh(s, ra->gw, ra->iface, 0)); + /* Keep old eattrs */ + ra->eattrs = oa->eattrs; + } } } @@ -1543,6 +1735,15 @@ krt_do_scan(struct krt_proto *p UNUSED) /* CONFIG_ALL_TABLES_AT_ONCE => p is NUL else log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type); nl_parse_end(&s); + + nl_parse_begin(&s, 1, 1); + nl_request_dump(AF_MPLS, RTM_GETROUTE); + while (h = nl_get_scan()) + if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE) + nl_parse_route(&s, h); + else + log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type); + nl_parse_end(&s); } /* diff --git a/sysdep/unix/krt.c b/sysdep/unix/krt.c index e899671d..c6ff6275 100644 --- a/sysdep/unix/krt.c +++ b/sysdep/unix/krt.c @@ -645,17 +645,11 @@ krt_same_dest(rte *k, rte *e) if (ka->dest != ea->dest) return 0; - switch (ka->dest) - { - case RTD_ROUTER: - return ipa_equal(ka->gw, ea->gw); - case RTD_DEVICE: - return !strcmp(ka->iface->name, ea->iface->name); - case RTD_MULTIPATH: - return mpnh_same(ka->nexthops, ea->nexthops); - default: - return 1; - } + + if (ka->dest == RTD_UNICAST) + return nexthop_same(&(ka->nh), &(ea->nh)); + + return 1; } /* @@ -990,7 +984,7 @@ krt_store_tmp_attrs(rte *rt, struct ea_list *attrs) static int krt_import_control(struct proto *P, rte **new, ea_list **attrs UNUSED, struct linpool *pool UNUSED) { - struct krt_proto *p = (struct krt_proto *) P; + // struct krt_proto *p = (struct krt_proto *) P; rte *e = *new; if (e->attrs->src->proto == P) @@ -1011,11 +1005,6 @@ krt_import_control(struct proto *P, rte **new, ea_list **attrs UNUSED, struct li return -1; } - if (!KRT_CF->devroutes && - (e->attrs->dest == RTD_DEVICE) && - (e->attrs->source != RTS_STATIC_DEVICE)) - return -1; - if (!krt_capable(e)) return -1; @@ -1153,7 +1142,8 @@ krt_start(struct proto *P) { case NET_IP4: p->af = AF_INET; break; case NET_IP6: p->af = AF_INET6; break; - default: ASSERT(0); + case NET_MPLS: p->af = AF_MPLS; break; + default: log(L_ERR "KRT: Tried to start with strange net type: %d", p->p.net_type); return PS_START; break; } add_tail(&krt_proto_list, &p->krt_node); @@ -1264,7 +1254,7 @@ struct protocol proto_unix_kernel = { .template = "kernel%d", .attr_class = EAP_KRT, .preference = DEF_PREF_INHERITED, - .channel_mask = NB_IP, + .channel_mask = NB_IP | NB_MPLS, .proto_size = sizeof(struct krt_proto), .config_size = sizeof(struct krt_config), .preconfig = krt_preconfig, |