diff options
author | Ondrej Zajicek (work) <santiago@crfreenet.org> | 2016-11-08 17:03:31 +0100 |
---|---|---|
committer | Ondrej Zajicek (work) <santiago@crfreenet.org> | 2016-11-08 17:04:29 +0100 |
commit | cc5b93f72db80abd1262a0a5e1d8400ceef54385 (patch) | |
tree | 42d75cb7898c6b6077e9cfbb04074cfc84e38930 | |
parent | 5de0e848de06a9187046dbc380d9ce6a6f8b21a2 (diff) | |
parent | f51b1f556595108d53b9f4580bfcb96bfbc85442 (diff) |
Merge tag 'v1.6.2' into int-new
40 files changed, 985 insertions, 335 deletions
diff --git a/.cvsignore b/.cvsignore deleted file mode 100644 index dc557db3..00000000 --- a/.cvsignore +++ /dev/null @@ -1 +0,0 @@ -bird.conf diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..52510836 --- /dev/null +++ b/.gitignore @@ -0,0 +1,12 @@ +/autom4te.cache/ +/doc/*.html +/obj/ +/Makefile +/bird +/birdc +/birdcl +/config.log +/config.status +/configure +/bird.conf +/bird.log @@ -1,3 +1,19 @@ +Version 1.6.2 (2016-09-29) + o Fixes serious bug introduced in the previous version + +Version 1.6.1 (2016-09-22) + o Support for IPv6 ECMP + o Better handling of IPv6 tentative addresses + o Several updates and fixes in Babel protocol + o Filter: New !~ operator + o Filter: ASN ranges in bgpmask + o KRT: New kernel protocol option 'metric' + o KRT: New route attribute 'krt_scope' + o Improved BIRD help messages + o Fixes memory leak in BGP multipath + o Fixes handling of empty path segments in BGP AS_PATH + o Several bug fixes + Version 1.6.0 (2016-04-29) o Major RIP protocol redesign o New Babel routing protocol @@ -1,82 +1,114 @@ - BIRD Internet Routing Daemon + BIRD Internet Routing Daemon - (c) 1998--2008 Martin Mares <mj@ucw.cz> + Home page http://bird.network.cz/ + Mailing list bird-users@network.cz + + (c) 1998--2008 Martin Mares <mj@ucw.cz> (c) 1998--2000 Pavel Machek <pavel@ucw.cz> (c) 1998--2008 Ondrej Filip <feela@network.cz> - (c) 2009--2013 CZ.NIC z.s.p.o. + (c) 2009--2016 CZ.NIC z.s.p.o. ================================================================================ -The BIRD project is an attempt to create a routing daemon running on UNIX-like -systems (but not necessarily limited to them) with full support of all modern -routing protocols, easy to use configuration interface and powerful route -filtering language. - -If you want to help us debugging, enhancing and porting BIRD or just lurk -around to see what's going to develop from this strange creature, feel free -to subscribe to the BIRD users mailing list (bird-users@bird.network.cz), -send subscribes to majordomo at the same machine). Bug reports, suggestions, -feature requests (: and code :) are welcome. +The BIRD project aims to develop a dynamic IP routing daemon with full support +of all modern routing protocols, easy to use configuration interface and +powerful route filtering language, primarily targeted on (but not limited to) +Linux and other UNIX-like systems and distributed under the GNU General +Public License. -You can download the latest version from ftp://bird.network.cz/pub/bird/ -and look at the BIRD home page at http://bird.network.cz/. +What do we support +================== -BIRD development started as a student project at the Faculty of Math -and Physics, Charles University, Prague, Czech Republic under supervision -of RNDr. Libor Forst <forst@cuni.cz>. BIRD has been developed and supported -by CZ.NIC z.s.p.o. http://www.nic.cz/ since 2009. + o Both IPv4 and IPv6 (use --enable-ipv6 when configuring) + o Multiple routing tables + o Border Gateway Protocol (BGPv4) + o Routing Information Protocol (RIPv2, RIPng) + o Open Shortest Path First protocol (OSPFv2, OSPFv3) + o Babel Routing Protocol (Babel) + o Bidirectional Forwarding Detection (BFD) + o IPv6 router advertisements + o Static routes + o Inter-table protocol + o Command-line interface allowing on-line control and inspection of + status of the daemon + o Soft reconfiguration, no need to use complex online commands to + change the configuration, just edit the configuration file and notify + BIRD to re-read it and it will smoothly switch itself to the new + configuration, not disturbing routing protocols unless they are + affected by the configuration changes + o Powerful language for route filtering, see doc/bird.conf.example + o Linux, FreeBSD, NetBSD and OpenBSD ports + +How to install BIRD +=================== + + o From standard distribution package of your OS (recommended) + o From official binary packages for Debian and Red Hat Linux + ftp://bird.network.cz/pub/bird/debian/ + ftp://bird.network.cz/pub/bird/redhat/ + o From source code of the latest stable release version + ftp://bird.network.cz/pub/bird/ + o From source code of the actual development version + git://git.nic.cz/bird.git + https://gitlab.labs.nic.cz/labs/bird/ + +How to install BIRD from source code +------------------------------------ + + $ ./configure + $ make + $ su + # make install + # vi /usr/local/etc/bird.conf + +See the file INSTALL for more information about installation from source code. + +Documentation +============= + +Online documentation is available at http://bird.network.cz/?get_doc or as HTML +files in the doc directory, you can install it by `make install-docs' and +rebuild it by `make docs', but you'll need SGMLtools and LaTeX to be installed +on your machine. You can also download a neatly formatted PostScript version as +a separate archive (bird-doc-*.tar.gz) from ftp://bird.network.cz/pub/bird/ + +User support +============ - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. +If you want to help us debugging, enhancing and porting BIRD or just lurk +around to see what's going to develop, feel free to subscribe to the BIRD +users mailing list bird-users@network.cz, just send `subscribe' to +bird-request@network.cz. Bug reports, suggestions, feature requests and +code are welcome! We don't use gitlab issues for reporting, sorry. - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. +Subscribe: http://bird.network.cz/mailman/listinfo/bird-users/ +Archive: http://bird.network.cz/pipermail/bird-users/ - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +Licence +======= +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. -How to install BIRD: +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. - ./configure - make - make install - vi /usr/local/etc/bird.conf +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -Online documentation is available as HTML files in the doc directory, -you can install it by `make install-docs' and rebuild it by `make docs', -but for the latter you need SGMLtools and LaTeX to be installed on your -machine. You can also download a neatly formatted PostScript version -as a separate archive (bird-doc-*.tar.gz). +History +======= -What do we support: +BIRD development started as a student project at the Faculty of Math +and Physics, Charles University, Prague, Czech Republic under supervision +of RNDr. Libor Forst <forst@cuni.cz>. BIRD has been developed and supported +by CZ.NIC z.s.p.o. http://www.nic.cz/ since 2009. - o Both IPv4 and IPv6 (use --enable-ipv6 when configuring) - o Multiple routing tables - o BGP - o RIP - o OSPF - o Static routes - o Inter-table protocol - o IPv6 router advertisements - o Bidirectional Forwarding Detection (BFD) - o Command-line interface (using the `birdc' client; to get - some help, just press `?') - o Soft reconfiguration -- no online commands for changing the - configuration in very limited ways, just edit the configuration - file and issue a `configure' command or send SIGHUP and BIRD - will start using the new configuration, possibly restarting - protocols affected by the configuration changes. - o Powerful language for route filtering (see doc/bird.conf.example). - -What is missing: - - o See the TODO list Good Luck and enjoy the BIRD :) The BIRD Team diff --git a/client/util.c b/client/util.c index c35cf8f4..2d6c074d 100644 --- a/client/util.c +++ b/client/util.c @@ -40,6 +40,7 @@ bug(const char *msg, ...) fputs("Internal error: ", stderr); vlog(msg, args); vfprintf(stderr, msg, args); + va_end(args); exit(1); } @@ -51,5 +52,6 @@ die(const char *msg, ...) va_start(args, msg); cleanup(); vlog(msg, args); + va_end(args); exit(1); } diff --git a/conf/cf-lex.l b/conf/cf-lex.l index dd99b497..9eb4f116 100644 --- a/conf/cf-lex.l +++ b/conf/cf-lex.l @@ -30,6 +30,7 @@ #include <errno.h> #include <stdlib.h> #include <stdarg.h> +#include <stdint.h> #include <unistd.h> #include <libgen.h> #include <glob.h> @@ -233,6 +234,7 @@ else: { <CCOMM>. \!\= return NEQ; +\!\~ return NMA; \<\= return LEQ; \>\= return GEQ; \&\& return AND; diff --git a/conf/conf.c b/conf/conf.c index 8d4d28e3..175e223b 100644 --- a/conf/conf.c +++ b/conf/conf.c @@ -512,6 +512,7 @@ cf_error(const char *msg, ...) va_start(args, msg); if (bvsnprintf(buf, sizeof(buf), msg, args) < 0) strcpy(buf, "<bug: error message too long>"); + va_end(args); new_config->err_msg = cfg_strdup(buf); new_config->err_lino = ifs->lino; new_config->err_file_name = ifs->file_name; diff --git a/conf/confbase.Y b/conf/confbase.Y index e64d7593..22edbdfd 100644 --- a/conf/confbase.Y +++ b/conf/confbase.Y @@ -87,7 +87,7 @@ CF_DECLS %nonassoc PREFIX_DUMMY %left AND OR -%nonassoc '=' '<' '>' '~' GEQ LEQ NEQ PO PC +%nonassoc '=' '<' '>' '~' GEQ LEQ NEQ NMA PO PC %left '+' '-' %left '*' '/' '%' %left '!' diff --git a/doc/bird.sgml b/doc/bird.sgml index 014225d1..26673f03 100644 --- a/doc/bird.sgml +++ b/doc/bird.sgml @@ -610,8 +610,8 @@ agreement"). options, in that case for given interface the first matching interface option is used. - This option is allowed in BFD, Direct, OSPF, RAdv and RIP protocols, but - in OSPF protocol it is used in the <cf/area/ subsection. + This option is allowed in Babel, BFD, Direct, OSPF, RAdv and RIP + protocols, but in OSPF protocol it is used in the <cf/area/ subsection. Default: none. @@ -667,7 +667,7 @@ agreement"). <descrip> <tag>id <M>num</M></tag> - ID of the password, (0-255). If it's not used, BIRD will choose ID based + ID of the password, (1-255). If it is not used, BIRD will choose ID based on an order of the password item in the interface. For example, second password item in one interface will have default ID 2. ID is used by some routing protocols to identify which password was used to @@ -1016,9 +1016,9 @@ foot). of type <cf/string/, print such variables, use standard string comparison operations (e.g. <cf/=, !=, <, >, <=, >=/), but you can't concatenate two strings. String literals are written as - <cf/"This is a string constant"/. Additionally matching <cf/˜/ - operator could be used to match a string value against a shell pattern - (represented also as a string). + <cf/"This is a string constant"/. Additionally matching (<cf/˜, + !˜/) operators could be used to match a string value against + a shell pattern (represented also as a string). <tag/ip/ This type can hold a single IP address. Depending on the compile-time @@ -1035,7 +1035,7 @@ foot). or <cf><m>ipaddress</m>/<m>netmask</m></cf>. There are two special operators on prefixes: <cf/.ip/ which extracts the IP address from the pair, and <cf/.len/, which separates prefix length from the pair. - So <cf>1.2.0.0/16.pxlen = 16</cf> is true. + So <cf>1.2.0.0/16.len = 16</cf> is true. <tag/ec/ This is a specialized type used to represent BGP extended community @@ -1165,8 +1165,10 @@ foot). is 4 3 2 1, then: <tt>bgp_path ˜ [= * 4 3 * =]</tt> is true, but <tt>bgp_path ˜ [= * 4 5 * =]</tt> is false. BGP mask expressions can also contain integer expressions enclosed in parenthesis - and integer variables, for example <tt>[= * 4 (1+2) a =]</tt>. There is - also old syntax that uses / .. / instead of [= .. =] and ? instead of *. + and integer variables, for example <tt>[= * 4 (1+2) a =]</tt>. You can + also use ranges, for example <tt>[= * 3..5 2 100..200 * =]</tt>. + There is also old (deprecated) syntax that uses / .. / instead of [= .. =] + and ? instead of *. <tag/clist/ Clist is similar to a set, except that unlike other sets, it can be @@ -1200,9 +1202,9 @@ foot). <tag/eclist/ Eclist is a data type used for BGP extended community lists. Eclists are very similar to clists, but they are sets of ECs instead of pairs. - The same operations (like <cf/add/, <cf/delete/, or <cf/˜/ - membership operator) can be used to modify or test eclists, with ECs - instead of pairs as arguments. + The same operations (like <cf/add/, <cf/delete/ or <cf/˜/ and + <cf/!˜/ membership operators) can be used to modify or test + eclists, with ECs instead of pairs as arguments. </descrip> @@ -1211,19 +1213,19 @@ foot). <p>The filter language supports common integer operators <cf>(+,-,*,/)</cf>, parentheses <cf/(a*(b+c))/, comparison <cf/(a=b, a!=b, a<b, a>=b)/. Logical operations include unary not (<cf/!/), and (<cf/&&/) and or -(<cf/||/). Special operators include <cf/˜/ for "is element -of a set" operation - it can be used on element and set of elements of the same -type (returning true if element is contained in the given set), or on two -strings (returning true if first string matches a shell-like pattern stored in -second string) or on IP and prefix (returning true if IP is within the range -defined by that prefix), or on prefix and prefix (returning true if first prefix -is more specific than second one) or on bgppath and bgpmask (returning true if -the path matches the mask) or on number and bgppath (returning true if the -number is in the path) or on bgppath and int (number) set (returning true if any -ASN from the path is in the set) or on pair/quad and clist (returning true if -the pair/quad is element of the clist) or on clist and pair/quad set (returning -true if there is an element of the clist that is also a member of the pair/quad -set). +(<cf/||/). Special operators include (<cf/˜/, +<cf/!˜/) for "is (not) element of a set" operation - it can be used on +element and set of elements of the same type (returning true if element is +contained in the given set), or on two strings (returning true if first string +matches a shell-like pattern stored in second string) or on IP and prefix +(returning true if IP is within the range defined by that prefix), or on prefix +and prefix (returning true if first prefix is more specific than second one) or +on bgppath and bgpmask (returning true if the path matches the mask) or on +number and bgppath (returning true if the number is in the path) or on bgppath +and int (number) set (returning true if any ASN from the path is in the set) or +on pair/quad and clist (returning true if the pair/quad is element of the +clist) or on clist and pair/quad set (returning true if there is an element of +the clist that is also a member of the pair/quad set). <p>There is one operator related to ROA infrastructure - <cf/roa_check()/. It examines a ROA table and does RFC 6483 route origin validation for a given @@ -1312,7 +1314,7 @@ clist for most purposes. <cf/RTS_DUMMY/, <cf/RTS_STATIC/, <cf/RTS_INHERIT/, <cf/RTS_DEVICE/, <cf/RTS_STATIC_DEVICE/, <cf/RTS_REDIRECT/, <cf/RTS_RIP/, <cf/RTS_OSPF/, <cf/RTS_OSPF_IA/, <cf/RTS_OSPF_EXT1/, <cf/RTS_OSPF_EXT2/, <cf/RTS_BGP/, - <cf/RTS_PIPE/. + <cf/RTS_PIPE/, <cf/RTS_BABEL/. <tag><m/enum/ cast</tag> Route type (Currently <cf/RTC_UNICAST/ for normal routes, @@ -1475,8 +1477,37 @@ protocol babel [<name>] { yes. </descrip> +<sect1>Attributes + +<p>Babel defines just one attribute: the internal babel metric of the route. It +is exposed as the <cf/babel_metric/ attribute and has range from 1 to infinity +(65535). + +<sect1>Example + +<p><code> +protocol babel { + interface "eth*" { + type wired; + }; + interface "wlan0", "wlan1" { + type wireless; + hello interval 1; + rxcost 512; + }; + interface "tap0"; + + # This matches the default of babeld: redistribute all addresses + # configured on local interfaces, plus re-distribute all routes received + # from other babel peers. + + export where (source = RTS_DEVICE) || (source = RTS_BABEL); +} +</code> + -<sect><label id="sect-bfd">BFD +<sect>BFD +<label id="sect-bfd"> <sect1>Introduction @@ -2358,6 +2389,17 @@ limitations can be overcome using another routing table and the pipe protocol. protocol work with. Available only on systems supporting multiple routing tables. + <tag>metric <m/number/</tag> (Linux) + Use specified value as a kernel metric (priority) for all routes sent to + the kernel. When multiple routes for the same network are in the kernel + routing table, the Linux kernel chooses one with lower metric. Also, + routes with different metrics do not clash with each other, therefore + using dedicated metric value is a reliable way to avoid overwriting + routes from other sources (e.g. kernel device routes). Metric 0 has a + special meaning of undefined metric, in which either OS default is used, + or per-route metric can be set using <cf/krt_metric/ attribute. Default: + 0 (undefined). + <tag>graceful restart <m/switch/</tag> Participate in graceful restart recovery. If this option is enabled and a graceful restart recovery is active, the Kernel protocol will defer @@ -2390,9 +2432,11 @@ these attributes: route. See /etc/iproute2/rt_protos for common values. On BSD, it is based on STATIC and PROTOx flags. The attribute is read-only. - <tag>int <cf/krt_metric/</tag> + <tag>int <cf/krt_metric/</tag> (Linux) The kernel metric of the route. When multiple same routes are in a kernel routing table, the Linux kernel chooses one with lower metric. + Note that preferred way to set kernel metric is to use protocol option + <cf/metric/, unless per-route metric values are needed. <tag>ip <cf/krt_prefsrc/</tag> (Linux) The preferred source address. Used in source address selection for @@ -2400,6 +2444,15 @@ these attributes: <tag>int <cf/krt_realm/</tag> (Linux) The realm of the route. Can be used for traffic classification. + + <tag>int <cf/krt_scope/</tag> (Linux IPv4) + The scope of the route. Valid values are 0-254, although Linux kernel + may reject some values depending on route type and nexthop. It is + supposed to represent `indirectness' of the route, where nexthops of + routes are resolved through routes with a higher scope, but in current + kernels anything below <it/link/ (253) is treated as <it/global/ (0). + When not present, global scope is implied for all routes except device + routes, where link scope is used by default. </descrip> <p>In Linux, there is also a plenty of obscure route attributes mostly focused diff --git a/filter/config.Y b/filter/config.Y index 3e70a63e..3eb5b08f 100644 --- a/filter/config.Y +++ b/filter/config.Y @@ -618,16 +618,17 @@ bgp_path: ; bgp_path_tail1: - NUM bgp_path_tail1 { $$ = cfg_alloc(sizeof(struct f_path_mask)); $$->next = $2; $$->kind = PM_ASN; $$->val = $1; } - | '*' bgp_path_tail1 { $$ = cfg_alloc(sizeof(struct f_path_mask)); $$->next = $2; $$->kind = PM_ASTERISK; $$->val = 0; } - | '?' bgp_path_tail1 { $$ = cfg_alloc(sizeof(struct f_path_mask)); $$->next = $2; $$->kind = PM_QUESTION; $$->val = 0; } - | bgp_path_expr bgp_path_tail1 { $$ = cfg_alloc(sizeof(struct f_path_mask)); $$->next = $2; $$->kind = PM_ASN_EXPR; $$->val = (uintptr_t) $1; } - | { $$ = NULL; } + NUM bgp_path_tail1 { $$ = cfg_allocz(sizeof(struct f_path_mask)); $$->next = $2; $$->kind = PM_ASN; $$->val = $1; } + | NUM DDOT NUM bgp_path_tail1 { $$ = cfg_allocz(sizeof(struct f_path_mask)); $$->next = $4; $$->kind = PM_ASN_RANGE; $$->val = $1; $$->val2 = $3; } + | '*' bgp_path_tail1 { $$ = cfg_allocz(sizeof(struct f_path_mask)); $$->next = $2; $$->kind = PM_ASTERISK; } + | '?' bgp_path_tail1 { $$ = cfg_allocz(sizeof(struct f_path_mask)); $$->next = $2; $$->kind = PM_QUESTION; } + | bgp_path_expr bgp_path_tail1 { $$ = cfg_allocz(sizeof(struct f_path_mask)); $$->next = $2; $$->kind = PM_ASN_EXPR; $$->val = (uintptr_t) $1; } + | { $$ = NULL; } ; bgp_path_tail2: - NUM bgp_path_tail2 { $$ = cfg_alloc(sizeof(struct f_path_mask)); $$->next = $2; $$->kind = PM_ASN; $$->val = $1; } - | '?' bgp_path_tail2 { $$ = cfg_alloc(sizeof(struct f_path_mask)); $$->next = $2; $$->kind = PM_ASTERISK; $$->val = 0; } + NUM bgp_path_tail2 { $$ = cfg_allocz(sizeof(struct f_path_mask)); $$->next = $2; $$->kind = PM_ASN; $$->val = $1; } + | '?' bgp_path_tail2 { $$ = cfg_allocz(sizeof(struct f_path_mask)); $$->next = $2; $$->kind = PM_ASTERISK; } | { $$ = NULL; } ; @@ -725,6 +726,7 @@ term: | term '>' term { $$ = f_new_inst(); $$->code = '<'; $$->a1.p = $3; $$->a2.p = $1; } | term GEQ term { $$ = f_new_inst(); $$->code = P('<','='); $$->a1.p = $3; $$->a2.p = $1; } | term '~' term { $$ = f_new_inst(); $$->code = '~'; $$->a1.p = $1; $$->a2.p = $3; } + | term NMA term { $$ = f_new_inst(); $$->code = P('!','~'); $$->a1.p = $1; $$->a2.p = $3; } | '!' term { $$ = f_new_inst(); $$->code = '!'; $$->a1.p = $2; } | DEFINED '(' term ')' { $$ = f_new_inst(); $$->code = P('d','e'); $$->a1.p = $3; } diff --git a/filter/filter.c b/filter/filter.c index 7b3e550f..3282bd50 100644 --- a/filter/filter.c +++ b/filter/filter.c @@ -81,6 +81,10 @@ pm_format(struct f_path_mask *p, buffer *buf) buffer_puts(buf, "* "); break; + case PM_ASN_RANGE: + buffer_print(buf, "%u..%u ", p->val, p->val2); + break; + case PM_ASN_EXPR: buffer_print(buf, "%u ", f_eval_asn((struct f_inst *) p->val)); break; @@ -146,18 +150,29 @@ val_compare(struct f_val v1, struct f_val v2) } static int -pm_path_same(struct f_path_mask *m1, struct f_path_mask *m2) +pm_same(struct f_path_mask *m1, struct f_path_mask *m2) { while (m1 && m2) { - if ((m1->kind != m2->kind) || (m1->val != m2->val)) + if (m1->kind != m2->kind) return 0; + if (m1->kind == PM_ASN_EXPR) + { + if (!i_same((struct f_inst *) m1->val, (struct f_inst *) m2->val)) + return 0; + } + else + { + if ((m1->val != m2->val) || (m1->val2 != m2->val2)) + return 0; + } + m1 = m1->next; m2 = m2->next; } - return !m1 && !m2; + return !m1 && !m2; } /** @@ -182,7 +197,7 @@ val_same(struct f_val v1, struct f_val v2) switch (v1.type) { case T_PATH_MASK: - return pm_path_same(v1.val.path_mask, v2.val.path_mask); + return pm_same(v1.val.path_mask, v2.val.path_mask); case T_PATH: case T_CLIST: case T_ECLIST: @@ -673,6 +688,16 @@ interpret(struct f_inst *what) runtime( "~ applied on unknown type pair" ); res.val.i = !!res.val.i; break; + + case P('!','~'): + TWOARGS; + res.type = T_BOOL; + res.val.i = val_in_range(v1, v2); + if (res.val.i == CMP_ERROR) + runtime( "!~ applied on unknown type pair" ); + res.val.i = !res.val.i; + break; + case P('d','e'): ONEARG; res.type = T_BOOL; @@ -1415,7 +1440,8 @@ i_same(struct f_inst *f1, struct f_inst *f2) case P('A','p'): TWOARGS; break; case P('C','a'): TWOARGS; break; case P('a','f'): - case P('a','l'): ONEARG; break; + case P('a','l'): + case P('a','L'): ONEARG; break; #if 0 case P('R','C'): TWOARGS; diff --git a/filter/test.conf b/filter/test.conf index 5cafe551..a8c3a508 100644 --- a/filter/test.conf +++ b/filter/test.conf @@ -96,22 +96,28 @@ clist l2; eclist el; eclist el2; { + print "Entering path test..."; pm1 = / 4 3 2 1 /; - pm2 = [= 4 3 2 1 =]; + pm2 = [= 3..6 3 2 1..2 =]; print "Testing path masks: ", pm1, " ", pm2; p2 = prepend( + empty +, 1 ); p2 = prepend( p2, 2 ); p2 = prepend( p2, 3 ); p2 = prepend( p2, 4 ); - print "Testing paths: ", p2; + print "Testing path: (4 3 2 1) = ", p2; print "Should be true: ", p2 ~ pm1, " ", p2 ~ pm2, " ", 3 ~ p2, " ", p2 ~ [2, 10..20], " ", p2 ~ [4, 10..20]; print "4 = ", p2.len; p2 = prepend( p2, 5 ); - print "Should be false: ", p2 ~ pm1, " ", p2 ~ pm2, " ", 10 ~ p2, " ", p2 ~ [8, ten..(2*ten)]; + print "Testing path: (5 4 3 2 1) = ", p2; + print "Should be false: ", p2 ~ pm1, " ", p2 ~ pm2, " ", 10 ~ p2, " ", p2 ~ [8, ten..(2*ten)], " ", p2 ~ [= 1..4 4 3 2 1 =], " ", p2 ~ [= 5 4 4..100 2 1 =]; print "Should be true: ", p2 ~ / ? 4 3 2 1 /, " ", p2, " ", / ? 4 3 2 1 /; print "Should be true: ", p2 ~ [= * 4 3 * 1 =], " ", p2, " ", [= * 4 3 * 1 =]; + print "Should be true: ", p2 ~ [= 5..6 4..10 1..3 1..3 1..65536 =]; print "Should be true: ", p2 ~ [= (3+2) (2*2) 3 2 1 =], " ", p2 ~ mkpath(5, 4); print "Should be true: ", p2.len = 5, " ", p2.first = 5, " ", p2.last = 1; + print "Should be true: ", pm1 = [= 4 3 2 1 =], " ", pm1 != [= 4 3 1 2 =], " ", + pm2 = [= 3..6 3 2 1..2 =], " ", pm2 != [= 3..6 3 2 1..3 =], " ", + [= 1 2 (1+2) =] = [= 1 2 (1+2) =], " ", [= 1 2 (1+2) =] != [= 1 2 (2+1) =]; print "5 = ", p2.len; print "Delete 3: ", delete(p2, 3); print "Filter 1-3: ", filter(p2, [1..3]); diff --git a/filter/tree.c b/filter/tree.c index ee9f448a..328c7184 100644 --- a/filter/tree.c +++ b/filter/tree.c @@ -82,7 +82,7 @@ build_tree(struct f_tree *from) if (len <= 1024) buf = alloca(len * sizeof(struct f_tree *)); else - buf = malloc(len * sizeof(struct f_tree *)); + buf = xmalloc(len * sizeof(struct f_tree *)); /* Convert a degenerated tree into an sorted array */ i = 0; @@ -94,7 +94,7 @@ build_tree(struct f_tree *from) root = build_tree_rec(buf, 0, len); if (len > 1024) - free(buf); + xfree(buf); return root; } @@ -163,9 +163,9 @@ tree_format(struct f_tree *t, buffer *buf) { buffer_puts(buf, "["); - tree_node_format(t, buf); + tree_node_format(t, buf); - /* Undo last separator */ + /* Undo last separator */ if (buf->pos[-1] != '[') buf->pos -= 2; diff --git a/lib/printf.c b/lib/printf.c index 844f5969..1632b5f3 100644 --- a/lib/printf.c +++ b/lib/printf.c @@ -169,7 +169,7 @@ int bvsnprintf(char *buf, int size, const char *fmt, va_list args) case ' ': flags |= SPACE; goto repeat; case '#': flags |= SPECIAL; goto repeat; case '0': flags |= ZEROPAD; goto repeat; - } + } /* get field width */ field_width = -1; diff --git a/misc/bird.spec b/misc/bird.spec index 857f03e5..ebc0c942 100644 --- a/misc/bird.spec +++ b/misc/bird.spec @@ -1,6 +1,6 @@ Summary: BIRD Internet Routing Daemon Name: bird -Version: 1.6.0 +Version: 1.6.2 Release: 1 Copyright: GPL Group: Networking/Daemons diff --git a/nest/a-path.c b/nest/a-path.c index 32e2d27e..e1031b7b 100644 --- a/nest/a-path.c +++ b/nest/a-path.c @@ -435,18 +435,23 @@ parse_path(struct adata *path, struct pm_pos *pos) static int -pm_match(struct pm_pos *pos, u32 asn) +pm_match(struct pm_pos *pos, u32 asn, u32 asn2) { + u32 gas; if (! pos->set) - return pos->val.asn == asn; + return ((pos->val.asn >= asn) && (pos->val.asn <= asn2)); u8 *p = pos->val.sp; int len = *p++; int i; for (i = 0; i < len; i++) - if (get_as(p + i * BS) == asn) + { + gas = get_as(p + i * BS); + + if ((gas >= asn) && (gas <= asn2)) return 1; + } return 0; } @@ -490,7 +495,7 @@ pm_mark(struct pm_pos *pos, int i, int plen, int *nl, int *nh) * next part of mask, we advance each marked state. * We start with marked first position, when we * run out of marked positions, we reject. When - * we process the whole mask, we accept iff final position + * we process the whole mask, we accept if final position * (auxiliary position after last real position in AS path) * is marked. */ @@ -502,6 +507,7 @@ as_path_match(struct adata *path, struct f_path_mask *mask) int plen = parse_path(path, pos); int l, h, i, nh, nl; u32 val = 0; + u32 val2 = 0; /* l and h are bound of interval of positions where are marked states */ @@ -525,12 +531,16 @@ as_path_match(struct adata *path, struct f_path_mask *mask) h = plen; break; - case PM_ASN: - val = mask->val; + case PM_ASN: /* Define single ASN as ASN..ASN - very narrow interval */ + val2 = val = mask->val; goto step; case PM_ASN_EXPR: - val = f_eval_asn((struct f_inst *) mask->val); + val2 = val = f_eval_asn((struct f_inst *) mask->val); goto step; + case PM_ASN_RANGE: + val = mask->val; + val2 = mask->val2; + goto step; case PM_QUESTION: step: nh = nl = -1; @@ -538,7 +548,7 @@ as_path_match(struct adata *path, struct f_path_mask *mask) if (pos[i].mark) { pos[i].mark = 0; - if ((mask->kind == PM_QUESTION) || pm_match(pos + i, val)) + if ((mask->kind == PM_QUESTION) || pm_match(pos + i, val, val2)) pm_mark(pos, i, plen, &nl, &nh); } diff --git a/nest/attrs.h b/nest/attrs.h index 0171c6a8..670b048f 100644 --- a/nest/attrs.h +++ b/nest/attrs.h @@ -45,11 +45,13 @@ struct adata *as_path_filter(struct linpool *pool, struct adata *path, struct f_ #define PM_QUESTION 1 #define PM_ASTERISK 2 #define PM_ASN_EXPR 3 +#define PM_ASN_RANGE 4 struct f_path_mask { struct f_path_mask *next; int kind; uintptr_t val; + uintptr_t val2; }; int as_path_match(struct adata *path, struct f_path_mask *mask); diff --git a/nest/config.Y b/nest/config.Y index 2961dafb..2a746657 100644 --- a/nest/config.Y +++ b/nest/config.Y @@ -74,7 +74,7 @@ CF_KEYWORDS(RELOAD, IN, OUT, MRTDUMP, MESSAGES, RESTRICT, MEMORY, IGP_METRIC, CL CF_KEYWORDS(GRACEFUL, RESTART, WAIT, MAX, FLUSH, AS) CF_ENUM(T_ENUM_RTS, RTS_, DUMMY, STATIC, INHERIT, DEVICE, STATIC_DEVICE, REDIRECT, - RIP, OSPF, OSPF_IA, OSPF_EXT1, OSPF_EXT2, BGP, PIPE) + RIP, OSPF, OSPF_IA, OSPF_EXT1, OSPF_EXT2, BGP, PIPE, BABEL) CF_ENUM(T_ENUM_SCOPE, SCOPE_, HOST, LINK, SITE, ORGANIZATION, UNIVERSE, UNDEFINED) CF_ENUM(T_ENUM_RTC, RTC_, UNICAST, BROADCAST, MULTICAST, ANYCAST) CF_ENUM(T_ENUM_RTD, RTD_, ROUTER, DEVICE, BLACKHOLE, UNREACHABLE, PROHIBIT, MULTIPATH) diff --git a/nest/route.h b/nest/route.h index b5885ee3..a536def7 100644 --- a/nest/route.h +++ b/nest/route.h @@ -287,7 +287,7 @@ void rte_update2(struct channel *c, net_addr *n, rte *new, struct rte_src *src); /* rte_update() moved to protocol.h to avoid dependency conflicts */ void rte_discard(rtable *tab, rte *old); int rt_examine(rtable *t, net_addr *a, struct proto *p, struct filter *filter); -rte *rt_export_merged(struct channel *c, net *net, rte **rt_free, struct ea_list **tmpa, int silent); +rte *rt_export_merged(struct channel *c, net *net, rte **rt_free, struct ea_list **tmpa, linpool *pool, int silent); void rt_refresh_begin(rtable *t, struct channel *c); void rt_refresh_end(rtable *t, struct channel *c); void rt_schedule_prune(rtable *t); @@ -509,6 +509,8 @@ int mpnh__same(struct mpnh *x, struct mpnh *y); /* Compare multipath nexthops */ static inline int mpnh_same(struct mpnh *x, struct mpnh *y) { return (x == y) || mpnh__same(x, y); } struct mpnh *mpnh_merge(struct mpnh *x, struct mpnh *y, int rx, int ry, int max, linpool *lp); +void mpnh_insert(struct mpnh **n, struct mpnh *y); +int mpnh_is_sorted(struct mpnh *x); void rta_init(void); rta *rta_lookup(rta *); /* Get rta equivalent to this one, uc++ */ diff --git a/nest/rt-attr.c b/nest/rt-attr.c index 167bfc44..bb2b3561 100644 --- a/nest/rt-attr.c +++ b/nest/rt-attr.c @@ -250,6 +250,34 @@ mpnh_merge(struct mpnh *x, struct mpnh *y, int rx, int ry, int max, linpool *lp) return root; } +void +mpnh_insert(struct mpnh **n, struct mpnh *x) +{ + for (; *n; n = &((*n)->next)) + { + int cmp = mpnh_compare_node(*n, x); + + if (cmp < 0) + continue; + else if (cmp > 0) + break; + else + return; + } + + x->next = *n; + *n = x; +} + +int +mpnh_is_sorted(struct mpnh *x) +{ + for (; x && x->next; x = x->next) + if (mpnh_compare_node(x, x->next) >= 0) + return 0; + + return 1; +} static struct mpnh * mpnh_copy(struct mpnh *o) @@ -1114,7 +1142,7 @@ rta_dump(rta *a) static char *rts[] = { "RTS_DUMMY", "RTS_STATIC", "RTS_INHERIT", "RTS_DEVICE", "RTS_STAT_DEV", "RTS_REDIR", "RTS_RIP", "RTS_OSPF", "RTS_OSPF_IA", "RTS_OSPF_EXT1", - "RTS_OSPF_EXT2", "RTS_BGP" }; + "RTS_OSPF_EXT2", "RTS_BGP", "RTS_PIPE", "RTS_BABEL" }; static char *rtc[] = { "", " BC", " MC", " AC" }; static char *rtd[] = { "", " DEV", " HOLE", " UNREACH", " PROHIBIT" }; diff --git a/nest/rt-table.c b/nest/rt-table.c index 9e9d4c7a..eb9dc3a5 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -50,7 +50,7 @@ static linpool *rte_update_pool; static list routing_tables; -static void rt_format_via(rte *e, byte *via); +static byte *rt_format_via(rte *e); static void rt_free_hostcache(rtable *tab); static void rt_notify_hostcache(rtable *tab, net *net); static void rt_update_hostcache(rtable *tab); @@ -346,10 +346,7 @@ rte_mergable(rte *pri, rte *sec) static void rte_trace(struct proto *p, rte *e, int dir, char *msg) { - byte via[IPA_MAX_TEXT_LENGTH+32]; - - rt_format_via(e, via); - log(L_TRACE "%s %c %s %N %s", p->name, dir, msg, e->net->n.addr, via); + log(L_TRACE "%s %c %s %N %s", p->name, dir, msg, e->net->n.addr, rt_format_via(e)); } static inline void @@ -367,7 +364,7 @@ rte_trace_out(uint flag, struct proto *p, rte *e, char *msg) } static rte * -export_filter(struct channel *c, rte *rt0, rte **rt_free, ea_list **tmpa, int silent) +export_filter_(struct channel *c, rte *rt0, rte **rt_free, ea_list **tmpa, linpool *pool, int silent) { struct proto *p = c->proto; struct filter *filter = c->out_filter; @@ -382,9 +379,9 @@ export_filter(struct channel *c, rte *rt0, rte **rt_free, ea_list **tmpa, int si if (!tmpa) tmpa = &tmpb; - *tmpa = make_tmp_attrs(rt, rte_update_pool); + *tmpa = make_tmp_attrs(rt, pool); - v = p->import_control ? p->import_control(p, &rt, tmpa, rte_update_pool) : 0; + v = p->import_control ? p->import_control(p, &rt, tmpa, pool) : 0; if (v < 0) { if (silent) @@ -403,7 +400,7 @@ export_filter(struct channel *c, rte *rt0, rte **rt_free, ea_list **tmpa, int si } v = filter && ((filter == FILTER_REJECT) || - (f_run(filter, &rt, tmpa, rte_update_pool, FF_FORCE_TMPATTR) > F_ACCEPT)); + (f_run(filter, &rt, tmpa, pool, FF_FORCE_TMPATTR) > F_ACCEPT)); if (v) { if (silent) @@ -426,6 +423,12 @@ export_filter(struct channel *c, rte *rt0, rte **rt_free, ea_list **tmpa, int si return NULL; } +static inline rte * +export_filter(struct channel *c, rte *rt0, rte **rt_free, ea_list **tmpa, int silent) +{ + return export_filter_(c, rt0, rt_free, tmpa, rte_update_pool, silent); +} + static void do_rt_notify(struct channel *c, net *net, rte *new, rte *old, ea_list *tmpa, int refeed) { @@ -706,7 +709,7 @@ rt_notify_accepted(struct channel *c, net *net, rte *new_changed, rte *old_chang static struct mpnh * -mpnh_merge_rta(struct mpnh *nhs, rta *a, int max) +mpnh_merge_rta(struct mpnh *nhs, rta *a, linpool *pool, int max) { struct mpnh nh = { .gw = a->gw, .iface = a->iface }; struct mpnh *nh2 = (a->dest == RTD_MULTIPATH) ? a->nexthops : &nh; @@ -714,7 +717,7 @@ mpnh_merge_rta(struct mpnh *nhs, rta *a, int max) } rte * -rt_export_merged(struct channel *c, net *net, rte **rt_free, ea_list **tmpa, int silent) +rt_export_merged(struct channel *c, net *net, rte **rt_free, ea_list **tmpa, linpool *pool, int silent) { // struct proto *p = c->proto; struct mpnh *nhs = NULL; @@ -726,7 +729,7 @@ rt_export_merged(struct channel *c, net *net, rte **rt_free, ea_list **tmpa, int if (!rte_is_valid(best0)) return NULL; - best = export_filter(c, best0, rt_free, tmpa, silent); + best = export_filter_(c, best0, rt_free, tmpa, pool, silent); if (!best || !rte_is_reachable(best)) return best; @@ -736,13 +739,13 @@ rt_export_merged(struct channel *c, net *net, rte **rt_free, ea_list **tmpa, int if (!rte_mergable(best0, rt0)) continue; - rt = export_filter(c, rt0, &tmp, NULL, 1); + rt = export_filter_(c, rt0, &tmp, NULL, pool, 1); if (!rt) continue; if (rte_is_reachable(rt)) - nhs = mpnh_merge_rta(nhs, rt->attrs, c->merge_limit); + nhs = mpnh_merge_rta(nhs, rt->attrs, pool, c->merge_limit); if (tmp) rte_free(tmp); @@ -750,11 +753,11 @@ rt_export_merged(struct channel *c, net *net, rte **rt_free, ea_list **tmpa, int if (nhs) { - nhs = mpnh_merge_rta(nhs, best->attrs, c->merge_limit); + nhs = mpnh_merge_rta(nhs, best->attrs, pool, c->merge_limit); if (nhs->next) { - best = rte_cow_rta(best, rte_update_pool); + best = rte_cow_rta(best, pool); best->attrs->dest = RTD_MULTIPATH; best->attrs->nexthops = nhs; } @@ -805,7 +808,7 @@ rt_notify_merged(struct channel *c, net *net, rte *new_changed, rte *old_changed /* Prepare new merged route */ if (new_best) - new_best = rt_export_merged(c, net, &new_best_free, &tmpa, 0); + new_best = rt_export_merged(c, net, &new_best_free, &tmpa, rte_update_pool, 0); /* Prepare old merged route (without proper merged next hops) */ /* There are some issues with running filter on old route - see rt_notify_basic() */ @@ -919,6 +922,13 @@ rte_validate(rte *e) return 0; } + if ((e->attrs->dest == RTD_MULTIPATH) && !mpnh_is_sorted(e->attrs->nexthops)) + { + log(L_WARN "Ignoring unsorted multipath route %N received via %s", + n->n.addr, e->sender->proto->name); + return 0; + } + return 1; } @@ -1848,7 +1858,7 @@ rt_next_hop_update_net(rtable *tab, net *n) /* FIXME: Better announcement of merged routes */ rte_announce_i(tab, RA_MERGED, n, new, old_best, new, old_best); - if (free_old_best) + if (free_old_best) rte_free_quick(old_best); return count; @@ -2426,11 +2436,14 @@ rta_set_recursive_next_hop(rtable *dep, rta *a, rtable *tab, ip_addr *gw, ip_add * CLI commands */ -static void -rt_format_via(rte *e, byte *via) +static byte * +rt_format_via(rte *e) { rta *a = e->attrs; + /* Max text length w/o IP addr and interface name is 16 */ + static byte via[IPA_MAX_TEXT_LENGTH+sizeof(a->iface->name)+16]; + switch (a->dest) { case RTD_ROUTER: bsprintf(via, "via %I on %s", a->gw, a->iface->name); break; @@ -2441,12 +2454,12 @@ rt_format_via(rte *e, byte *via) case RTD_MULTIPATH: bsprintf(via, "multipath"); break; default: bsprintf(via, "???"); } + return via; } static void rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, ea_list *tmpa) { - byte via[IPA_MAX_TEXT_LENGTH+32]; byte from[IPA_MAX_TEXT_LENGTH+8]; byte tm[TM_DATETIME_BUFFER_SIZE], info[256]; rta *a = e->attrs; @@ -2455,7 +2468,6 @@ rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, ea_list *tm void (*get_route_info)(struct rte *, byte *buf, struct ea_list *attrs); struct mpnh *nh; - rt_format_via(e, via); tm_format_datetime(tm, &config->tf_route, e->lastmod); if (ipa_nonzero(a->from) && !ipa_equal(a->from, a->gw)) bsprintf(from, " from %I", a->from); @@ -2476,7 +2488,7 @@ rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, ea_list *tm get_route_info(e, info, tmpa); else bsprintf(info, " (%d)", e->pref); - cli_printf(c, -1007, "%-18s %s [%s %s%s]%s%s", ia, via, a->src->proto->name, + cli_printf(c, -1007, "%-18s %s [%s %s%s]%s%s", ia, rt_format_via(e), a->src->proto->name, tm, from, primary ? (sync_error ? " !" : " *") : "", info); for (nh = a->nexthops; nh; nh = nh->next) cli_printf(c, -1007, "\tvia %I on %s weight %d", nh->gw, nh->iface->name, nh->weight + 1); @@ -2517,7 +2529,7 @@ rt_show_net(struct cli *c, net *n, struct rt_show_data *d) if ((d->export_mode == RSEM_EXPORT) && (ec->ra_mode == RA_MERGED)) { rte *rt_free; - e = rt_export_merged(ec, n, &rt_free, &tmpa, 1); + e = rt_export_merged(ec, n, &rt_free, &tmpa, rte_update_pool, 1); pass = 1; if (!e) diff --git a/proto/babel/babel.c b/proto/babel/babel.c index 8e104d60..9d73a264 100644 --- a/proto/babel/babel.c +++ b/proto/babel/babel.c @@ -565,6 +565,11 @@ babel_select_route(struct babel_entry *e) babel_send_seqno_request(e); babel_announce_rte(p, e); + + /* Section 3.6 of the RFC forbids an infeasible from being selected. This + is cleared after announcing the route to the core to make sure an + unreachable route is propagated first. */ + e->selected_in = NULL; } else { @@ -783,16 +788,21 @@ babel_send_update(struct babel_iface *ifa, bird_clock_t changed) msg.update.prefix = e->n.prefix; msg.update.router_id = r->router_id; - /* Update feasibility distance */ - struct babel_source *s = babel_get_source(e, r->router_id); - s->expires = now + BABEL_GARBAGE_INTERVAL; - if ((msg.update.seqno > s->seqno) || - ((msg.update.seqno == s->seqno) && (msg.update.metric < s->metric))) + babel_enqueue(&msg, ifa); + + /* Update feasibility distance for redistributed routes */ + if (!OUR_ROUTE(r)) { - s->seqno = msg.update.seqno; - s->metric = msg.update.metric; + struct babel_source *s = babel_get_source(e, r->router_id); + s->expires = now + BABEL_GARBAGE_INTERVAL; + + if ((msg.update.seqno > s->seqno) || + ((msg.update.seqno == s->seqno) && (msg.update.metric < s->metric))) + { + s->seqno = msg.update.seqno; + s->metric = msg.update.metric; + } } - babel_enqueue(&msg, ifa); } FIB_WALK_END; } @@ -834,8 +844,8 @@ babel_send_retraction(struct babel_iface *ifa, ip_addr prefix, int plen) struct babel_proto *p = ifa->proto; union babel_msg msg = {}; - TRACE(D_PACKETS, "Sending retraction for %I/%d router-id %lR seqno %d", - prefix, plen, p->router_id, p->update_seqno); + TRACE(D_PACKETS, "Sending retraction for %I/%d seqno %d", + prefix, plen, p->update_seqno); msg.type = BABEL_TLV_UPDATE; msg.update.plen = plen; @@ -843,7 +853,23 @@ babel_send_retraction(struct babel_iface *ifa, ip_addr prefix, int plen) msg.update.seqno = p->update_seqno; msg.update.metric = BABEL_INFINITY; msg.update.prefix = prefix; - msg.update.router_id = p->router_id; + + babel_enqueue(&msg, ifa); +} + +static void +babel_send_wildcard_retraction(struct babel_iface *ifa) +{ + struct babel_proto *p = ifa->proto; + union babel_msg msg = {}; + + TRACE(D_PACKETS, "Sending wildcard retraction on %s", ifa->ifname); + + msg.type = BABEL_TLV_UPDATE; + msg.update.wildcard = 1; + msg.update.interval = ifa->cf->update_interval; + msg.update.seqno = p->update_seqno; + msg.update.metric = BABEL_INFINITY; babel_enqueue(&msg, ifa); } @@ -1040,17 +1066,18 @@ babel_handle_update(union babel_msg *m, struct babel_iface *ifa) struct babel_proto *p = ifa->proto; struct babel_msg_update *msg = &m->update; - struct babel_neighbor *n; + struct babel_neighbor *nbr; struct babel_entry *e; struct babel_source *s; struct babel_route *r; + node *n; int feasible; TRACE(D_PACKETS, "Handling update for %I/%d with seqno %d metric %d", msg->prefix, msg->plen, msg->seqno, msg->metric); - n = babel_find_neighbor(ifa, msg->sender); - if (!n) + nbr = babel_find_neighbor(ifa, msg->sender); + if (!nbr) { DBG("Babel: Haven't heard from neighbor %I; ignoring update.\n", msg->sender); return; @@ -1095,55 +1122,88 @@ babel_handle_update(union babel_msg *m, struct babel_iface *ifa) * of the Interval value included in the update. */ + /* Retraction */ if (msg->metric == BABEL_INFINITY) - e = babel_find_entry(p, msg->prefix, msg->plen); - else - e = babel_get_entry(p, msg->prefix, msg->plen); + { + if (msg->wildcard) + { + /* + * Special case: This is a retraction of all prefixes announced by this + * neighbour (see second-to-last paragraph of section 4.4.9 in the RFC). + */ + WALK_LIST(n, nbr->routes) + { + r = SKIP_BACK(struct babel_route, neigh_route, n); + r->metric = BABEL_INFINITY; + babel_select_route(r->e); + } + } + else + { + e = babel_find_entry(p, msg->prefix, msg->plen); - if (!e) + if (!e) + return; + + /* The route entry indexed by neighbour */ + r = babel_find_route(e, nbr); + + if (!r) + return; + + r->metric = BABEL_INFINITY; + babel_select_route(e); + } + + /* Done with retractions */ return; + } + e = babel_get_entry(p, msg->prefix, msg->plen); + r = babel_find_route(e, nbr); /* the route entry indexed by neighbour */ s = babel_find_source(e, msg->router_id); /* for feasibility */ - r = babel_find_route(e, n); /* the route entry indexed by neighbour */ feasible = babel_is_feasible(s, msg->seqno, msg->metric); if (!r) { - if (!feasible || (msg->metric == BABEL_INFINITY)) + if (!feasible) return; - r = babel_get_route(e, n); + r = babel_get_route(e, nbr); r->advert_metric = msg->metric; r->router_id = msg->router_id; - r->metric = babel_compute_metric(n, msg->metric); + r->metric = babel_compute_metric(nbr, msg->metric); r->next_hop = msg->next_hop; r->seqno = msg->seqno; } else if (r == r->e->selected_in && !feasible) { - /* Route is installed and update is infeasible - we may lose the route, so - send a unicast seqno request (section 3.8.2.2 second paragraph). */ + /* + * Route is installed and update is infeasible - we may lose the route, + * so send a unicast seqno request (section 3.8.2.2 second paragraph). + */ babel_unicast_seqno_request(r); - if (msg->router_id == r->router_id) return; - r->metric = BABEL_INFINITY; /* retraction */ + if (msg->router_id == r->router_id) + return; + + /* Treat as retraction */ + r->metric = BABEL_INFINITY; } else { /* Last paragraph above - update the entry */ r->advert_metric = msg->metric; - r->metric = babel_compute_metric(n, msg->metric); - r->router_id = msg->router_id; + r->metric = babel_compute_metric(nbr, msg->metric); r->next_hop = msg->next_hop; + + r->router_id = msg->router_id; r->seqno = msg->seqno; - if (msg->metric != BABEL_INFINITY) - { - r->expiry_interval = BABEL_ROUTE_EXPIRY_FACTOR(msg->interval); - r->expires = now + r->expiry_interval; - if (r->expiry_interval > BABEL_ROUTE_REFRESH_INTERVAL) - r->refresh_time = now + r->expiry_interval - BABEL_ROUTE_REFRESH_INTERVAL; - } + r->expiry_interval = BABEL_ROUTE_EXPIRY_FACTOR(msg->interval); + r->expires = now + r->expiry_interval; + if (r->expiry_interval > BABEL_ROUTE_REFRESH_INTERVAL) + r->refresh_time = now + r->expiry_interval - BABEL_ROUTE_REFRESH_INTERVAL; /* If the route is not feasible at this point, it means it is from another neighbour than the one currently selected; so send a unicast seqno @@ -1313,6 +1373,7 @@ babel_iface_start(struct babel_iface *ifa) ifa->up = 1; babel_send_hello(ifa, 0); + babel_send_wildcard_retraction(ifa); babel_send_wildcard_request(ifa); babel_send_update(ifa, 0); /* Full update */ } @@ -1529,6 +1590,9 @@ babel_reconfigure_iface(struct babel_proto *p, struct babel_iface *ifa, struct b ifa->cf = new; + if (ifa->next_hello > (now + new->hello_interval)) + ifa->next_hello = now + (random() % new->hello_interval) + 1; + if (ifa->next_regular > (now + new->update_interval)) ifa->next_regular = now + (random() % new->update_interval) + 1; @@ -2022,6 +2086,30 @@ babel_start(struct proto *P) return PS_UP; } +static inline void +babel_iface_shutdown(struct babel_iface *ifa) +{ + if (ifa->sk) + { + babel_send_wildcard_retraction(ifa); + babel_send_queue(ifa); + } +} + +static int +babel_shutdown(struct proto *P) +{ + struct babel_proto *p = (void *) P; + struct babel_iface *ifa; + + TRACE(D_EVENTS, "Shutdown requested"); + + WALK_LIST(ifa, p->interfaces) + babel_iface_shutdown(ifa); + + return PS_DOWN; +} + static int babel_reconfigure(struct proto *P, struct proto_config *c) { @@ -2049,6 +2137,7 @@ struct protocol proto_babel = { .init = babel_init, .dump = babel_dump, .start = babel_start, + .shutdown = babel_shutdown, .reconfigure = babel_reconfigure, .get_route_info = babel_get_route_info, .get_attr = babel_get_attr diff --git a/proto/babel/babel.h b/proto/babel/babel.h index aea0dd88..481c88a7 100644 --- a/proto/babel/babel.h +++ b/proto/babel/babel.h @@ -50,10 +50,12 @@ #define BABEL_INITIAL_HOP_COUNT 255 #define BABEL_MAX_SEND_INTERVAL 5 #define BABEL_TIME_UNITS 100 /* On-wire times are counted in centiseconds */ - #define BABEL_SEQNO_REQUEST_EXPIRY 60 #define BABEL_GARBAGE_INTERVAL 300 +/* Max interval that will not overflow when carried as 16-bit centiseconds */ +#define BABEL_MAX_INTERVAL (0xFFFF/BABEL_TIME_UNITS) + #define BABEL_OVERHEAD (SIZE_OF_IP_HEADER+UDP_HEADER_LENGTH) #define BABEL_MIN_MTU (512 + BABEL_OVERHEAD) @@ -266,7 +268,7 @@ struct babel_msg_ihu { struct babel_msg_update { u8 type; - u8 ae; + u8 wildcard; u8 plen; u16 interval; u16 seqno; diff --git a/proto/babel/config.Y b/proto/babel/config.Y index e7ce6a93..b6170852 100644 --- a/proto/babel/config.Y +++ b/proto/babel/config.Y @@ -77,17 +77,18 @@ babel_iface_finish: BABEL_IFACE->rxcost = BABEL_RXCOST_WIRED; } + /* Make sure we do not overflow the 16-bit centisec fields */ if (!BABEL_IFACE->update_interval) - BABEL_IFACE->update_interval = BABEL_IFACE->hello_interval*BABEL_UPDATE_INTERVAL_FACTOR; - BABEL_IFACE->ihu_interval = BABEL_IFACE->hello_interval*BABEL_IHU_INTERVAL_FACTOR; + BABEL_IFACE->update_interval = MIN_(BABEL_IFACE->hello_interval*BABEL_UPDATE_INTERVAL_FACTOR, BABEL_MAX_INTERVAL); + BABEL_IFACE->ihu_interval = MIN_(BABEL_IFACE->hello_interval*BABEL_IHU_INTERVAL_FACTOR, BABEL_MAX_INTERVAL); }; babel_iface_item: | PORT expr { BABEL_IFACE->port = $2; if (($2<1) || ($2>65535)) cf_error("Invalid port number"); } | RXCOST expr { BABEL_IFACE->rxcost = $2; if (($2<1) || ($2>65535)) cf_error("Invalid rxcost"); } - | HELLO INTERVAL expr { BABEL_IFACE->hello_interval = $3; if (($3<1) || ($3>65535)) cf_error("Invalid hello interval"); } - | UPDATE INTERVAL expr { BABEL_IFACE->update_interval = $3; if (($3<1) || ($3>65535)) cf_error("Invalid hello interval"); } + | HELLO INTERVAL expr { BABEL_IFACE->hello_interval = $3; if (($3<1) || ($3>BABEL_MAX_INTERVAL)) cf_error("Invalid hello interval"); } + | UPDATE INTERVAL expr { BABEL_IFACE->update_interval = $3; if (($3<1) || ($3>BABEL_MAX_INTERVAL)) cf_error("Invalid update interval"); } | TYPE WIRED { BABEL_IFACE->type = BABEL_IFACE_TYPE_WIRED; } | TYPE WIRELESS { BABEL_IFACE->type = BABEL_IFACE_TYPE_WIRELESS; } | RX BUFFER expr { BABEL_IFACE->rx_buffer = $3; if (($3<256) || ($3>65535)) cf_error("RX buffer must be in range 256-65535"); } diff --git a/proto/babel/packets.c b/proto/babel/packets.c index be47aa75..65dd6853 100644 --- a/proto/babel/packets.c +++ b/proto/babel/packets.c @@ -462,7 +462,6 @@ babel_read_update(struct babel_tlv *hdr, union babel_msg *m, struct babel_msg_update *msg = &m->update; msg->type = BABEL_TLV_UPDATE; - msg->ae = tlv->ae; msg->interval = get_time16(&tlv->interval); msg->seqno = get_u16(&tlv->seqno); msg->metric = get_u16(&tlv->metric); @@ -480,7 +479,7 @@ babel_read_update(struct babel_tlv *hdr, union babel_msg *m, if (tlv->plen > 0) return PARSE_ERROR; - msg->prefix = IPA_NONE; + msg->wildcard = 1; break; case BABEL_AE_IP4: @@ -523,7 +522,8 @@ babel_read_update(struct babel_tlv *hdr, union babel_msg *m, return PARSE_IGNORE; } - if (!state->router_id_seen) + /* Update must have Router ID, unless it is retraction */ + if (!state->router_id_seen && (msg->metric != BABEL_INFINITY)) { DBG("Babel: No router ID seen before update\n"); return PARSE_ERROR; @@ -548,8 +548,11 @@ babel_write_update(struct babel_tlv *hdr, union babel_msg *m, * When needed, we write Router-ID TLV before Update TLV and return size of * both of them. There is enough space for the Router-ID TLV, because * sizeof(struct babel_tlv_router_id) == sizeof(struct babel_tlv_update). + * + * Router ID is not used for retractions, so do not us it in such case. */ - if (!state->router_id_seen || (msg->router_id != state->router_id)) + if ((msg->metric < BABEL_INFINITY) && + (!state->router_id_seen || (msg->router_id != state->router_id))) { len0 = babel_write_router_id(hdr, msg->router_id, state, max_len); tlv = (struct babel_tlv_update *) NEXT_TLV(tlv); @@ -562,12 +565,22 @@ babel_write_update(struct babel_tlv *hdr, union babel_msg *m, memset(tlv, 0, sizeof(struct babel_tlv_update)); TLV_HDR(tlv, BABEL_TLV_UPDATE, len); - tlv->ae = BABEL_AE_IP6; - tlv->plen = msg->plen; + + if (msg->wildcard) + { + tlv->ae = BABEL_AE_WILDCARD; + tlv->plen = 0; + } + else + { + tlv->ae = BABEL_AE_IP6; + tlv->plen = msg->plen; + put_ip6_px(tlv->addr, msg->prefix, msg->plen); + } + put_time16(&tlv->interval, msg->interval); put_u16(&tlv->seqno, msg->seqno); put_u16(&tlv->metric, msg->metric); - put_ip6_px(tlv->addr, msg->prefix, msg->plen); return len0 + len; } diff --git a/proto/bfd/io.c b/proto/bfd/io.c index 79ed9af7..8f4f5007 100644 --- a/proto/bfd/io.c +++ b/proto/bfd/io.c @@ -589,7 +589,7 @@ sockets_fire(struct birdloop *loop) times_update(loop); /* Last fd is internal wakeup fd */ - if (pfd[loop->sock_num].revents & POLLIN) + if (pfd[poll_num].revents & POLLIN) wakeup_drain(loop); int i; diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c index d85afa8f..b8371f32 100644 --- a/proto/bgp/attrs.c +++ b/proto/bgp/attrs.c @@ -118,7 +118,7 @@ validate_path(struct bgp_proto *p, int as_path, int bs, byte *idata, uint *ileng { int res = 0; u8 *a, *dst; - int len, plen, copy; + int len, plen; dst = a = idata; len = *ilength; @@ -132,15 +132,20 @@ validate_path(struct bgp_proto *p, int as_path, int bs, byte *idata, uint *ileng if (len < plen) return -1; + if (a[1] == 0) + { + log(L_WARN "%s: %s_PATH attribute contains empty segment, skipping it", + p->p.name, as_path ? "AS" : "AS4"); + goto skip; + } + switch (a[0]) { case AS_PATH_SET: - copy = 1; res++; break; case AS_PATH_SEQUENCE: - copy = 1; res += a[1]; break; @@ -154,20 +159,17 @@ validate_path(struct bgp_proto *p, int as_path, int bs, byte *idata, uint *ileng log(L_WARN "%s: %s_PATH attribute contains AS_CONFED_* segment, skipping segment", p->p.name, as_path ? "AS" : "AS4"); - copy = 0; - break; + goto skip; default: return -1; } - if (copy) - { - if (dst != a) - memmove(dst, a, plen); - dst += plen; - } + if (dst != a) + memmove(dst, a, plen); + dst += plen; + skip: len -= plen; a += plen; } diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c index 72ca3728..0cf38edf 100644 --- a/proto/bgp/packets.c +++ b/proto/bgp/packets.c @@ -369,7 +369,7 @@ bgp_create_update(struct bgp_conn *conn, byte *buf) } put_u16(buf, wd_size); - if (remains >= 3072) + if (!wd_size) { while ((buck = (struct bgp_bucket *) HEAD(p->bucket_queue))->send_node.next) { @@ -382,7 +382,7 @@ bgp_create_update(struct bgp_conn *conn, byte *buf) } DBG("Processing bucket %p\n", buck); - a_size = bgp_encode_attrs(p, w+2, buck->eattrs, 2048); + a_size = bgp_encode_attrs(p, w+2, buck->eattrs, remains - 1024); if (a_size < 0) { @@ -461,8 +461,7 @@ bgp_create_update(struct bgp_conn *conn, byte *buf) w += size; remains -= size; } - - if (remains >= 3072) + else { while ((buck = (struct bgp_bucket *) HEAD(p->bucket_queue))->send_node.next) { @@ -478,7 +477,7 @@ bgp_create_update(struct bgp_conn *conn, byte *buf) rem_stored = remains; w_stored = w; - size = bgp_encode_attrs(p, w, buck->eattrs, 2048); + size = bgp_encode_attrs(p, w, buck->eattrs, remains - 1024); if (size < 0) { log(L_ERR "%s: Attribute list too long, skipping corresponding routes", p->p.name); diff --git a/proto/rip/rip.c b/proto/rip/rip.c index 131c09ce..74d472c9 100644 --- a/proto/rip/rip.c +++ b/proto/rip/rip.c @@ -162,7 +162,6 @@ rip_announce_rte(struct rip_proto *p, struct rip_entry *en) { /* ECMP route */ struct mpnh *nhs = NULL; - struct mpnh **nhp = &nhs; int num = 0; for (rt = en->routes; rt && (num < p->ecmp); rt = rt->next) @@ -174,9 +173,7 @@ rip_announce_rte(struct rip_proto *p, struct rip_entry *en) nh->gw = rt->next_hop; nh->iface = rt->from->nbr->iface; nh->weight = rt->from->ifa->cf->ecmp_weight; - nh->next = NULL; - *nhp = nh; - nhp = &(nh->next); + mpnh_insert(&nhs, nh); num++; if (rt->tag != rt_tag) diff --git a/proto/static/static.c b/proto/static/static.c index 28cb1e77..a8abdef0 100644 --- a/proto/static/static.c +++ b/proto/static/static.c @@ -80,7 +80,6 @@ static_install(struct proto *p, struct static_route *r, struct iface *ifa) { struct static_route *r2; struct mpnh *nhs = NULL; - struct mpnh **nhp = &nhs; for (r2 = r->mp_next; r2; r2 = r2->mp_next) if (r2->installed) @@ -89,9 +88,7 @@ static_install(struct proto *p, struct static_route *r, struct iface *ifa) nh->gw = r2->via; nh->iface = r2->neigh->iface; nh->weight = r2->weight; - nh->next = NULL; - *nhp = nh; - nhp = &(nh->next); + mpnh_insert(&nhs, nh); } /* There is at least one nexthop */ diff --git a/sysdep/bsd/krt-sock.c b/sysdep/bsd/krt-sock.c index 56026bdd..3440ed63 100644 --- a/sysdep/bsd/krt-sock.c +++ b/sysdep/bsd/krt-sock.c @@ -946,6 +946,12 @@ krt_sock_hook(sock *sk, int size UNUSED) return 0; } +static void +krt_sock_err_hook(sock *sk, int e UNUSED) +{ + krt_sock_hook(sk, 0); +} + static sock * krt_sock_open(pool *pool, void *data, int table_id) { @@ -967,6 +973,7 @@ krt_sock_open(pool *pool, void *data, int table_id) sk = sk_new(pool); sk->type = SK_MAGIC; sk->rx_hook = krt_sock_hook; + sk->err_hook = krt_sock_err_hook; sk->fd = fd; sk->data = data; diff --git a/sysdep/config.h b/sysdep/config.h index a8d58349..c7f63e69 100644 --- a/sysdep/config.h +++ b/sysdep/config.h @@ -7,7 +7,7 @@ #define _BIRD_CONFIG_H_ /* BIRD version */ -#define BIRD_VERSION "1.6.0" +#define BIRD_VERSION "1.6.2" /* Include parameters determined by configure script */ #include "sysdep/autoconf.h" diff --git a/sysdep/linux/krt-sys.h b/sysdep/linux/krt-sys.h index 7fd5f139..6d6586d1 100644 --- a/sysdep/linux/krt-sys.h +++ b/sysdep/linux/krt-sys.h @@ -32,8 +32,11 @@ static inline struct ifa * kif_get_primary_ip(struct iface *i) { return NULL; } /* Kernel routes */ +#define KRT_ALLOW_MERGE_PATHS 1 + #define EA_KRT_PREFSRC EA_CODE(EAP_KRT, 0x10) #define EA_KRT_REALM EA_CODE(EAP_KRT, 0x11) +#define EA_KRT_SCOPE EA_CODE(EAP_KRT, 0x12) #define KRT_METRICS_MAX 0x10 /* RTAX_QUICKACK+1 */ @@ -86,6 +89,7 @@ static inline struct ifa * kif_get_primary_ip(struct iface *i) { return NULL; } struct krt_params { u32 table_id; /* Kernel table ID we sync with */ + u32 metric; /* Kernel metric used for all routes */ }; struct krt_state { diff --git a/sysdep/linux/netlink.Y b/sysdep/linux/netlink.Y index e9c225a2..f577244d 100644 --- a/sysdep/linux/netlink.Y +++ b/sysdep/linux/netlink.Y @@ -10,8 +10,8 @@ CF_HDR CF_DECLS -CF_KEYWORDS(KERNEL, TABLE, KRT_PREFSRC, KRT_REALM, KRT_MTU, KRT_WINDOW, KRT_RTT, - KRT_RTTVAR, KRT_SSTRESH, KRT_CWND, KRT_ADVMSS, KRT_REORDERING, +CF_KEYWORDS(KERNEL, TABLE, METRIC, KRT_PREFSRC, KRT_REALM, KRT_SCOPE, KRT_MTU, KRT_WINDOW, + KRT_RTT, KRT_RTTVAR, KRT_SSTRESH, KRT_CWND, KRT_ADVMSS, KRT_REORDERING, KRT_HOPLIMIT, KRT_INITCWND, KRT_RTO_MIN, KRT_INITRWND, KRT_QUICKACK, KRT_LOCK_MTU, KRT_LOCK_WINDOW, KRT_LOCK_RTT, KRT_LOCK_RTTVAR, KRT_LOCK_SSTRESH, KRT_LOCK_CWND, KRT_LOCK_ADVMSS, KRT_LOCK_REORDERING, @@ -22,13 +22,13 @@ CF_GRAMMAR CF_ADDTO(kern_proto, kern_proto kern_sys_item ';') kern_sys_item: - KERNEL TABLE expr { - THIS_KRT->sys.table_id = $3; - } + KERNEL TABLE expr { THIS_KRT->sys.table_id = $3; } + | METRIC expr { THIS_KRT->sys.metric = $2; } ; CF_ADDTO(dynamic_attr, KRT_PREFSRC { $$ = f_new_dynamic_attr(EAF_TYPE_IP_ADDRESS, T_IP, EA_KRT_PREFSRC); }) CF_ADDTO(dynamic_attr, KRT_REALM { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_REALM); }) +CF_ADDTO(dynamic_attr, KRT_SCOPE { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_SCOPE); }) CF_ADDTO(dynamic_attr, KRT_MTU { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_MTU); }) CF_ADDTO(dynamic_attr, KRT_WINDOW { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_WINDOW); }) diff --git a/sysdep/linux/netlink.c b/sysdep/linux/netlink.c index 8146072b..7af575a7 100644 --- a/sysdep/linux/netlink.c +++ b/sysdep/linux/netlink.c @@ -20,7 +20,6 @@ #include "nest/route.h" #include "nest/protocol.h" #include "nest/iface.h" -#include "lib/alloca.h" #include "sysdep/unix/timer.h" #include "sysdep/unix/unix.h" #include "sysdep/unix/krt.h" @@ -39,6 +38,10 @@ #define MSG_TRUNC 0x20 #endif +#ifndef IFA_FLAGS +#define IFA_FLAGS 8 +#endif + #ifndef IFF_LOWER_UP #define IFF_LOWER_UP 0x10000 #endif @@ -48,6 +51,45 @@ #endif +#define krt_ecmp6(p) ((p)->af == AF_INET6) + +/* + * Structure nl_parse_state keeps state of received route processing. Ideally, + * we could just independently parse received Netlink messages and immediately + * propagate received routes to the rest of BIRD, but Linux kernel represents + * and announces IPv6 ECMP routes not as one route with multiple next hops (like + * RTA_MULTIPATH in IPv4 ECMP), but as a set of routes with the same prefix. + * + * Therefore, BIRD keeps currently processed route in nl_parse_state structure + * and postpones its propagation until we expect it to be final; i.e., when + * non-matching route is received or when the scan ends. When another matching + * route is received, it is merged with the already processed route to form an + * ECMP route. Note that merging is done only for IPv6 (merge == 1), but the + * postponing is done in both cases (for simplicity). All IPv4 routes are just + * considered non-matching. + * + * This is ignored for asynchronous notifications (every notification is handled + * as a separate route). It is not an issue for our routes, as we ignore such + * notifications anyways. But importing alien IPv6 ECMP routes does not work + * properly. + */ + +struct nl_parse_state +{ + struct linpool *pool; + int scan; + int merge; + + net *net; + rta *attrs; + struct krt_proto *proto; + s8 new; + s8 krt_src; + u8 krt_type; + u8 krt_proto; + u32 krt_metric; +}; + /* * Synchronous Netlink interface */ @@ -63,6 +105,13 @@ struct nl_sock #define NL_RX_SIZE 8192 +#define NL_OP_DELETE 0 +#define NL_OP_ADD (NLM_F_CREATE|NLM_F_EXCL) +#define NL_OP_REPLACE (NLM_F_CREATE|NLM_F_REPLACE) +#define NL_OP_APPEND (NLM_F_CREATE|NLM_F_APPEND) + +static linpool *nl_linpool; + static struct nl_sock nl_scan = {.fd = -1}; /* Netlink socket for synchronous scan */ static struct nl_sock nl_req = {.fd = -1}; /* Netlink socket for requests */ @@ -166,7 +215,7 @@ nl_get_reply(struct nl_sock *nl) static struct tbf rl_netlink_err = TBF_DEFAULT_LOG_LIMITS; static int -nl_error(struct nlmsghdr *h) +nl_error(struct nlmsghdr *h, int ignore_esrch) { struct nlmsgerr *e; int ec; @@ -178,7 +227,7 @@ nl_error(struct nlmsghdr *h) } e = (struct nlmsgerr *) NLMSG_DATA(h); ec = -e->error; - if (ec) + if (ec && !(ignore_esrch && (ec == ESRCH))) log_rl(&rl_netlink_err, L_WARN "Netlink: %s", strerror(ec)); return ec; } @@ -192,14 +241,14 @@ nl_get_scan(void) return NULL; if (h->nlmsg_type == NLMSG_ERROR) { - nl_error(h); + nl_error(h, 0); return NULL; } return h; } static int -nl_exchange(struct nlmsghdr *pkt) +nl_exchange(struct nlmsghdr *pkt, int ignore_esrch) { struct nlmsghdr *h; @@ -211,7 +260,7 @@ nl_exchange(struct nlmsghdr *pkt) break; log(L_WARN "nl_exchange: Unexpected reply received"); } - return nl_error(h) ? -1 : 0; + return nl_error(h, ignore_esrch) ? -1 : 0; } /* @@ -248,17 +297,19 @@ static struct nl_want_attrs ifla_attr_want[BIRD_IFLA_MAX] = { }; -#define BIRD_IFA_MAX (IFA_ANYCAST+1) +#define BIRD_IFA_MAX (IFA_FLAGS+1) static struct nl_want_attrs ifa_attr_want4[BIRD_IFA_MAX] = { [IFA_ADDRESS] = { 1, 1, sizeof(ip4_addr) }, [IFA_LOCAL] = { 1, 1, sizeof(ip4_addr) }, [IFA_BROADCAST] = { 1, 1, sizeof(ip4_addr) }, + [IFA_FLAGS] = { 1, 1, sizeof(u32) }, }; static struct nl_want_attrs ifa_attr_want6[BIRD_IFA_MAX] = { [IFA_ADDRESS] = { 1, 1, sizeof(ip6_addr) }, [IFA_LOCAL] = { 1, 1, sizeof(ip6_addr) }, + [IFA_FLAGS] = { 1, 1, sizeof(u32) }, }; @@ -627,6 +678,7 @@ nl_parse_addr4(struct ifaddrmsg *i, int scan, int new) { struct rtattr *a[BIRD_IFA_MAX]; struct iface *ifi; + u32 ifa_flags; int scope; if (!nl_parse_attrs(IFA_RTA(i), ifa_attr_want4, a, sizeof(a))) @@ -650,10 +702,15 @@ nl_parse_addr4(struct ifaddrmsg *i, int scan, int new) return; } + if (a[IFA_FLAGS]) + ifa_flags = rta_get_u32(a[IFA_FLAGS]); + else + ifa_flags = i->ifa_flags; + struct ifa ifa; bzero(&ifa, sizeof(ifa)); ifa.iface = ifi; - if (i->ifa_flags & IFA_F_SECONDARY) + if (ifa_flags & IFA_F_SECONDARY) ifa.flags |= IA_SECONDARY; ifa.ip = rta_get_ipa(a[IFA_LOCAL]); @@ -730,6 +787,7 @@ nl_parse_addr6(struct ifaddrmsg *i, int scan, int new) { struct rtattr *a[BIRD_IFA_MAX]; struct iface *ifi; + u32 ifa_flags; int scope; if (!nl_parse_attrs(IFA_RTA(i), ifa_attr_want6, a, sizeof(a))) @@ -748,14 +806,22 @@ nl_parse_addr6(struct ifaddrmsg *i, int scan, int new) return; } + if (a[IFA_FLAGS]) + ifa_flags = rta_get_u32(a[IFA_FLAGS]); + else + ifa_flags = i->ifa_flags; + struct ifa ifa; bzero(&ifa, sizeof(ifa)); ifa.iface = ifi; - if (i->ifa_flags & IFA_F_SECONDARY) + if (ifa_flags & IFA_F_SECONDARY) ifa.flags |= IA_SECONDARY; - /* IFA_LOCAL can be unset for IPv6 interfaces */ + /* Ignore tentative addresses silently */ + if (ifa_flags & IFA_F_TENTATIVE) + return; + /* IFA_LOCAL can be unset for IPv6 interfaces */ ifa.ip = rta_get_ipa(a[IFA_LOCAL] ? : a[IFA_ADDRESS]); if (i->ifa_prefixlen > IP6_MAX_PREFIX_LENGTH) @@ -916,12 +982,13 @@ nh_bufsize(struct mpnh *nh) } static int -nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int new) +nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int op, int dest, ip_addr gw, struct iface *iface) { eattr *ea; net *net = e->net; rta *a = e->attrs; int bufsize = 128 + KRT_METRICS_MAX*8 + nh_bufsize(a->nexthops); + u32 priority = 0; struct { struct nlmsghdr h; @@ -932,13 +999,13 @@ nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int new) int rsize = sizeof(*r) + bufsize; r = alloca(rsize); - DBG("nl_send_route(%N,new=%d)\n", net->n.addr, new); + DBG("nl_send_route(%N,op=%x)\n", net->n.addr, op); bzero(&r->h, sizeof(r->h)); bzero(&r->r, sizeof(r->r)); - r->h.nlmsg_type = new ? RTM_NEWROUTE : RTM_DELROUTE; + r->h.nlmsg_type = op ? RTM_NEWROUTE : RTM_DELROUTE; r->h.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)); - r->h.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | (new ? NLM_F_CREATE|NLM_F_EXCL : 0); + r->h.nlmsg_flags = op | NLM_F_REQUEST | NLM_F_ACK; r->r.rtm_family = p->af; r->r.rtm_dst_len = net_pxlen(net->n.addr); @@ -946,18 +1013,37 @@ nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int new) r->r.rtm_scope = RT_SCOPE_UNIVERSE; nl_add_attr_ipa(&r->h, rsize, RTA_DST, net_prefix(net->n.addr)); + /* + * Strange behavior for RTM_DELROUTE: + * 1) rtm_family is ignored in IPv6, works for IPv4 + * 2) not setting RTA_PRIORITY is different from setting default value (on IPv6) + * 3) not setting RTA_PRIORITY is equivalent to setting 0, which is wildcard + */ + if (krt_table_id(p) < 256) r->r.rtm_table = krt_table_id(p); else nl_add_attr_u32(&r->h, rsize, RTA_TABLE, krt_table_id(p)); - /* For route delete, we do not specify route attributes */ - if (!new) - return nl_exchange(&r->h); + if (a->source == RTS_DUMMY) + priority = e->u.krt.metric; + else if (KRT_CF->sys.metric) + priority = KRT_CF->sys.metric; + else if ((op != NL_OP_DELETE) && (ea = ea_find(eattrs, EA_KRT_METRIC))) + priority = ea->u.data; + if (priority) + nl_add_attr_u32(&r->h, sizeof(r), RTA_PRIORITY, priority); - if (ea = ea_find(eattrs, EA_KRT_METRIC)) - nl_add_attr_u32(&r->h, rsize, RTA_PRIORITY, ea->u.data); + /* For route delete, we do not specify remaining route attributes */ + if (op == NL_OP_DELETE) + goto dest; + + /* Default scope is LINK for device routes, UNIVERSE otherwise */ + if (ea = ea_find(eattrs, EA_KRT_SCOPE)) + r->r.rtm_scope = ea->u.data; + else + r->r.rtm_scope = (dest == RTD_DEVICE) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE; if (ea = ea_find(eattrs, EA_KRT_PREFSRC)) nl_add_attr_ipa(&r->h, rsize, RTA_PREFSRC, *(ip_addr *)ea->u.ptr->data); @@ -981,18 +1067,18 @@ nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int new) nl_add_metrics(&r->h, rsize, metrics, KRT_METRICS_MAX); +dest: /* a->iface != NULL checked in krt_capable() for router and device routes */ - - switch (a->dest) + switch (dest) { case RTD_ROUTER: r->r.rtm_type = RTN_UNICAST; - nl_add_attr_u32(&r->h, rsize, RTA_OIF, a->iface->index); - nl_add_attr_ipa(&r->h, rsize, RTA_GATEWAY, a->gw); + nl_add_attr_u32(&r->h, rsize, RTA_OIF, iface->index); + nl_add_attr_ipa(&r->h, rsize, RTA_GATEWAY, gw); break; case RTD_DEVICE: r->r.rtm_type = RTN_UNICAST; - nl_add_attr_u32(&r->h, rsize, RTA_OIF, a->iface->index); + nl_add_attr_u32(&r->h, rsize, RTA_OIF, iface->index); break; case RTD_BLACKHOLE: r->r.rtm_type = RTN_BLACKHOLE; @@ -1007,11 +1093,50 @@ nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int new) r->r.rtm_type = RTN_UNICAST; nl_add_multipath(&r->h, rsize, a->nexthops); break; + case RTD_NONE: + break; default: bug("krt_capable inconsistent with nl_send_route"); } - return nl_exchange(&r->h); + /* Ignore missing for DELETE */ + return nl_exchange(&r->h, (op == NL_OP_DELETE)); +} + +static inline int +nl_add_rte(struct krt_proto *p, rte *e, struct ea_list *eattrs) +{ + rta *a = e->attrs; + int err = 0; + + if (krt_ecmp6(p) && (a->dest == RTD_MULTIPATH)) + { + struct mpnh *nh = a->nexthops; + + err = nl_send_route(p, e, eattrs, NL_OP_ADD, RTD_ROUTER, nh->gw, nh->iface); + if (err < 0) + return err; + + for (nh = nh->next; nh; nh = nh->next) + err += nl_send_route(p, e, eattrs, NL_OP_APPEND, RTD_ROUTER, nh->gw, nh->iface); + + return err; + } + + return nl_send_route(p, e, eattrs, NL_OP_ADD, a->dest, a->gw, a->iface); +} + +static inline int +nl_delete_rte(struct krt_proto *p, rte *e, struct ea_list *eattrs) +{ + int err = 0; + + /* For IPv6, we just repeatedly request DELETE until we get error */ + do + err = nl_send_route(p, e, eattrs, NL_OP_DELETE, RTD_NONE, IPA_NONE, NULL); + while (krt_ecmp6(p) && !err); + + return err; } void @@ -1020,17 +1145,21 @@ krt_replace_rte(struct krt_proto *p, net *n, rte *new, rte *old, struct ea_list int err = 0; /* - * NULL for eattr of the old route is a little hack, but we don't - * get proper eattrs for old in rt_notify() anyway. NULL means no - * extended route attributes and therefore matches if the kernel - * route has any of them. + * We could use NL_OP_REPLACE, but route replace on Linux has some problems: + * + * 1) Does not check for matching rtm_protocol + * 2) Has broken semantics for IPv6 ECMP + * 3) Crashes some kernel version when used for IPv6 ECMP + * + * So we use NL_OP_DELETE and then NL_OP_ADD. We also do not trust the old + * route value, so we do not try to optimize IPv6 ECMP reconfigurations. */ if (old) - nl_send_route(p, old, NULL, 0); + nl_delete_rte(p, old, eattrs); if (new) - err = nl_send_route(p, new, eattrs, 1); + err = nl_add_rte(p, new, eattrs); if (err < 0) n->n.flags |= KRF_SYNC_ERROR; @@ -1039,10 +1168,80 @@ krt_replace_rte(struct krt_proto *p, net *n, rte *new, rte *old, struct ea_list } +static inline struct mpnh * +nl_alloc_mpnh(struct nl_parse_state *s, ip_addr gw, struct iface *iface, byte weight) +{ + struct mpnh *nh = lp_alloc(s->pool, sizeof(struct mpnh)); + + nh->gw = gw; + nh->iface = iface; + nh->next = NULL; + nh->weight = weight; + + return nh; +} + +static int +nl_mergable_route(struct nl_parse_state *s, net *net, struct krt_proto *p, uint priority, uint krt_type) +{ + /* Route merging must be active */ + if (!s->merge) + return 0; + + /* Saved and new route must have same network, proto/table, and priority */ + if ((s->net != net) || (s->proto != p) || (s->krt_metric != priority)) + return 0; + + /* Both must be regular unicast routes */ + if ((s->krt_type != RTN_UNICAST) || (krt_type != RTN_UNICAST)) + return 0; + + return 1; +} + +static void +nl_announce_route(struct nl_parse_state *s) +{ + rte *e = rte_get_temp(s->attrs); + e->net = s->net; + e->u.krt.src = s->krt_src; + e->u.krt.proto = s->krt_proto; + e->u.krt.seen = 0; + e->u.krt.best = 0; + e->u.krt.metric = s->krt_metric; + + if (s->scan) + krt_got_route(s->proto, e); + else + krt_got_route_async(s->proto, e, s->new); + + s->net = NULL; + s->attrs = NULL; + s->proto = NULL; + lp_flush(s->pool); +} + +static inline void +nl_parse_begin(struct nl_parse_state *s, int scan, int merge) +{ + memset(s, 0, sizeof (struct nl_parse_state)); + s->pool = nl_linpool; + s->scan = scan; + s->merge = merge; +} + +static inline void +nl_parse_end(struct nl_parse_state *s) +{ + if (s->net) + nl_announce_route(s); +} + + #define SKIP(ARG...) do { DBG("KRT: Ignoring route - " ARG); return; } while(0) static void -nl_parse_route(struct nlmsghdr *h, int scan) +nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) { struct krt_proto *p; struct rtmsg *i; @@ -1052,6 +1251,8 @@ nl_parse_route(struct nlmsghdr *h, int scan) net_addr dst; u32 oif = ~0; u32 table_id; + u32 priority = 0; + u32 def_scope = RT_SCOPE_UNIVERSE; int src; if (!(i = nl_checkin(h, sizeof(*i)))) @@ -1069,9 +1270,9 @@ nl_parse_route(struct nlmsghdr *h, int scan) net_fill_ip4(&dst, IP4_NONE, 0); break; - case AF_INET6: - if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want6, a, sizeof(a))) - return; + case AF_INET6: + if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want6, a, sizeof(a))) + return; if (a[RTA_DST]) net_fill_ip6(&dst, rta_get_ip6(a[RTA_DST]), i->rtm_dst_len); @@ -1096,24 +1297,22 @@ nl_parse_route(struct nlmsghdr *h, int scan) if (!p) SKIP("unknown table %d\n", table); - if (a[RTA_IIF]) SKIP("IIF set\n"); if (i->rtm_tos != 0) /* We don't support TOS */ SKIP("TOS %02x\n", i->rtm_tos); - if (scan && !new) + if (s->scan && !new) SKIP("RTM_DELROUTE in scan\n"); + if (a[RTA_PRIORITY]) + priority = rta_get_u32(a[RTA_PRIORITY]); + int c = net_classify(&dst); if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK)) SKIP("strange class/scope\n"); - // ignore rtm_scope, it is not a real scope - // if (i->rtm_scope != RT_SCOPE_UNIVERSE) - // SKIP("scope %u\n", i->rtm_scope); - switch (i->rtm_protocol) { case RTPROT_UNSPEC: @@ -1128,7 +1327,7 @@ nl_parse_route(struct nlmsghdr *h, int scan) return; case RTPROT_BIRD: - if (!scan) + if (!s->scan) SKIP("echo\n"); src = KRT_SRC_BIRD; break; @@ -1140,12 +1339,14 @@ nl_parse_route(struct nlmsghdr *h, int scan) net *net = net_get(p->p.main_channel->table, &dst); - rta ra = { - .src= p->p.main_source, - .source = RTS_INHERIT, - .scope = SCOPE_UNIVERSE, - .cast = RTC_UNICAST - }; + if (s->net && !nl_mergable_route(s, net, p, priority, i->rtm_type)) + nl_announce_route(s); + + rta *ra = lp_allocz(s->pool, sizeof(rta)); + ra->src = p->p.main_source; + ra->source = RTS_INHERIT; + ra->scope = SCOPE_UNIVERSE; + ra->cast = RTC_UNICAST; switch (i->rtm_type) { @@ -1153,9 +1354,9 @@ nl_parse_route(struct nlmsghdr *h, int scan) if (a[RTA_MULTIPATH] && (i->rtm_family == AF_INET)) { - ra.dest = RTD_MULTIPATH; - ra.nexthops = nl_parse_multipath(p, a[RTA_MULTIPATH]); - if (!ra.nexthops) + ra->dest = RTD_MULTIPATH; + ra->nexthops = nl_parse_multipath(p, a[RTA_MULTIPATH]); + if (!ra->nexthops) { log(L_ERR "KRT: Received strange multipath route %N", net->n.addr); return; @@ -1164,8 +1365,8 @@ nl_parse_route(struct nlmsghdr *h, int scan) break; } - ra.iface = if_find_by_index(oif); - if (!ra.iface) + ra->iface = if_find_by_index(oif); + if (!ra->iface) { log(L_ERR "KRT: Received route %N with unknown ifindex %u", net->n.addr, oif); return; @@ -1173,37 +1374,38 @@ nl_parse_route(struct nlmsghdr *h, int scan) if (a[RTA_GATEWAY]) { - ra.dest = RTD_ROUTER; - ra.gw = rta_get_ipa(a[RTA_GATEWAY]); + ra->dest = RTD_ROUTER; + ra->gw = rta_get_ipa(a[RTA_GATEWAY]); /* Silently skip strange 6to4 routes */ const net_addr_ip6 sit = NET_ADDR_IP6(IP6_NONE, 96); - if ((i->rtm_family == AF_INET6) && ipa_in_netX(ra.gw, (net_addr *) &sit)) + if ((i->rtm_family == AF_INET6) && ipa_in_netX(ra->gw, (net_addr *) &sit)) return; neighbor *nbr; - nbr = neigh_find2(&p->p, &ra.gw, ra.iface, + nbr = neigh_find2(&p->p, &ra->gw, ra->iface, (i->rtm_flags & RTNH_F_ONLINK) ? NEF_ONLINK : 0); if (!nbr || (nbr->scope == SCOPE_HOST)) { - log(L_ERR "KRT: Received route %N with strange next-hop %I", net->n.addr, ra.gw); + log(L_ERR "KRT: Received route %N with strange next-hop %I", net->n.addr, ra->gw); return; } } else { - ra.dest = RTD_DEVICE; + ra->dest = RTD_DEVICE; + def_scope = RT_SCOPE_LINK; } break; case RTN_BLACKHOLE: - ra.dest = RTD_BLACKHOLE; + ra->dest = RTD_BLACKHOLE; break; case RTN_UNREACHABLE: - ra.dest = RTD_UNREACHABLE; + ra->dest = RTD_UNREACHABLE; break; case RTN_PROHIBIT: - ra.dest = RTD_PROHIBIT; + ra->dest = RTD_PROHIBIT; break; /* FIXME: What about RTN_THROW? */ default: @@ -1211,39 +1413,41 @@ nl_parse_route(struct nlmsghdr *h, int scan) return; } - rte *e = rte_get_temp(&ra); - e->net = net; - e->u.krt.src = src; - e->u.krt.proto = i->rtm_protocol; - e->u.krt.seen = 0; - e->u.krt.best = 0; - e->u.krt.metric = 0; - - if (a[RTA_PRIORITY]) - e->u.krt.metric = rta_get_u32(a[RTA_PRIORITY]); + if (i->rtm_scope != def_scope) + { + ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr)); + ea->next = ra->eattrs; + ra->eattrs = ea; + ea->flags = EALF_SORTED; + ea->count = 1; + ea->attrs[0].id = EA_KRT_SCOPE; + ea->attrs[0].flags = 0; + ea->attrs[0].type = EAF_TYPE_INT; + ea->attrs[0].u.data = i->rtm_scope; + } if (a[RTA_PREFSRC]) { ip_addr ps = rta_get_ipa(a[RTA_PREFSRC]); - ea_list *ea = alloca(sizeof(ea_list) + sizeof(eattr)); - ea->next = ra.eattrs; - ra.eattrs = ea; + ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr)); + ea->next = ra->eattrs; + ra->eattrs = ea; ea->flags = EALF_SORTED; ea->count = 1; ea->attrs[0].id = EA_KRT_PREFSRC; ea->attrs[0].flags = 0; ea->attrs[0].type = EAF_TYPE_IP_ADDRESS; - ea->attrs[0].u.ptr = alloca(sizeof(struct adata) + sizeof(ps)); + ea->attrs[0].u.ptr = lp_alloc(s->pool, sizeof(struct adata) + sizeof(ps)); ea->attrs[0].u.ptr->length = sizeof(ps); memcpy(ea->attrs[0].u.ptr->data, &ps, sizeof(ps)); } if (a[RTA_FLOW]) { - ea_list *ea = alloca(sizeof(ea_list) + sizeof(eattr)); - ea->next = ra.eattrs; - ra.eattrs = ea; + ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr)); + ea->next = ra->eattrs; + ra->eattrs = ea; ea->flags = EALF_SORTED; ea->count = 1; ea->attrs[0].id = EA_KRT_REALM; @@ -1255,7 +1459,7 @@ nl_parse_route(struct nlmsghdr *h, int scan) if (a[RTA_METRICS]) { u32 metrics[KRT_METRICS_MAX]; - ea_list *ea = alloca(sizeof(ea_list) + KRT_METRICS_MAX * sizeof(eattr)); + ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + KRT_METRICS_MAX * sizeof(eattr)); int t, n = 0; if (nl_parse_metrics(a[RTA_METRICS], metrics, ARRAY_SIZE(metrics)) < 0) @@ -1276,37 +1480,69 @@ nl_parse_route(struct nlmsghdr *h, int scan) if (n > 0) { - ea->next = ra.eattrs; + ea->next = ra->eattrs; ea->flags = EALF_SORTED; ea->count = n; - ra.eattrs = ea; + ra->eattrs = ea; } } - if (scan) - krt_got_route(p, e); + /* + * Ideally, now we would send the received route to the rest of kernel code. + * But IPv6 ECMP routes are sent as a sequence of routes, so we postpone it + * and merge next hops until the end of the sequence. + */ + + if (!s->net) + { + /* Store the new route */ + s->net = net; + s->attrs = ra; + s->proto = p; + s->new = new; + s->krt_src = src; + s->krt_type = i->rtm_type; + s->krt_proto = i->rtm_protocol; + s->krt_metric = priority; + } else - krt_got_route_async(p, e, new); + { + /* Merge next hops with the stored route */ + rta *a = s->attrs; + + if (a->dest != RTD_MULTIPATH) + { + a->dest = RTD_MULTIPATH; + a->nexthops = nl_alloc_mpnh(s, a->gw, a->iface, 0); + } + + mpnh_insert(&a->nexthops, nl_alloc_mpnh(s, ra->gw, ra->iface, 0)); + } } void krt_do_scan(struct krt_proto *p UNUSED) /* CONFIG_ALL_TABLES_AT_ONCE => p is NULL */ { struct nlmsghdr *h; + struct nl_parse_state s; + nl_parse_begin(&s, 1, 0); nl_request_dump(AF_INET, RTM_GETROUTE); while (h = nl_get_scan()) if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE) - nl_parse_route(h, 1); + nl_parse_route(&s, h); else log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type); + nl_parse_end(&s); + nl_parse_begin(&s, 1, 1); nl_request_dump(AF_INET6, RTM_GETROUTE); while (h = nl_get_scan()) if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE) - nl_parse_route(h, 1); + nl_parse_route(&s, h); else log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type); + nl_parse_end(&s); } /* @@ -1319,12 +1555,16 @@ static byte *nl_async_rx_buffer; /* Receive buffer */ static void nl_async_msg(struct nlmsghdr *h) { + struct nl_parse_state s; + switch (h->nlmsg_type) { case RTM_NEWROUTE: case RTM_DELROUTE: DBG("KRT: Received async route notification (%d)\n", h->nlmsg_type); - nl_parse_route(h, 0); + nl_parse_begin(&s, 0, 0); + nl_parse_route(&s, h); + nl_parse_end(&s); break; case RTM_NEWLINK: case RTM_DELLINK: @@ -1397,6 +1637,12 @@ nl_async_hook(sock *sk, int size UNUSED) } static void +nl_async_err_hook(sock *sk, int e UNUSED) +{ + nl_async_hook(sk, 0); +} + +static void nl_open_async(void) { sock *sk; @@ -1433,6 +1679,7 @@ nl_open_async(void) sk = nl_async_sk = sk_new(krt_pool); sk->type = SK_MAGIC; sk->rx_hook = nl_async_hook; + sk->err_hook = nl_async_err_hook; sk->fd = fd; if (sk_open(sk) < 0) bug("Netlink: sk_open failed"); @@ -1446,6 +1693,7 @@ nl_open_async(void) void krt_sys_io_init(void) { + nl_linpool = lp_new(krt_pool, 4080); HASH_INIT(nl_table_map, krt_pool, 6); } @@ -1478,19 +1726,21 @@ krt_sys_shutdown(struct krt_proto *p) int krt_sys_reconfigure(struct krt_proto *p UNUSED, struct krt_config *n, struct krt_config *o) { - return n->sys.table_id == o->sys.table_id; + return (n->sys.table_id == o->sys.table_id) && (n->sys.metric == o->sys.metric); } void krt_sys_init_config(struct krt_config *cf) { cf->sys.table_id = RT_TABLE_MAIN; + cf->sys.metric = 0; } void krt_sys_copy_config(struct krt_config *d, struct krt_config *s) { d->sys.table_id = s->sys.table_id; + d->sys.metric = s->sys.metric; } static const char *krt_metrics_names[KRT_METRICS_MAX] = { @@ -1515,6 +1765,10 @@ krt_sys_get_attr(eattr *a, byte *buf, int buflen UNUSED) bsprintf(buf, "realm"); return GA_NAME; + case EA_KRT_SCOPE: + bsprintf(buf, "scope"); + return GA_NAME; + case EA_KRT_LOCK: buf += bsprintf(buf, "lock:"); ea_format_bitfield(a, buf, buflen, krt_metrics_names, 2, KRT_METRICS_MAX); diff --git a/sysdep/unix/io.c b/sysdep/unix/io.c index 5ec728af..e90964c1 100644 --- a/sysdep/unix/io.c +++ b/sysdep/unix/io.c @@ -1893,6 +1893,20 @@ int sk_is_ipv6(sock *s) { return s->af == AF_INET6; } void +sk_err(sock *s, int revents) +{ + int se = 0, sse = sizeof(se); + if ((s->type != SK_MAGIC) && (revents & POLLERR)) + if (getsockopt(s->fd, SOL_SOCKET, SO_ERROR, &se, &sse) < 0) + { + log(L_ERR "IO: Socket error: SO_ERROR: %m"); + se = 0; + } + + s->err_hook(s, se); +} + +void sk_dump_all(void) { node *n; @@ -2202,7 +2216,7 @@ io_loop(void) int steps; steps = MAX_STEPS; - if (s->fast_rx && (pfd[s->index].revents & (POLLIN | POLLHUP | POLLERR)) && s->rx_hook) + if (s->fast_rx && (pfd[s->index].revents & POLLIN) && s->rx_hook) do { steps--; @@ -2224,6 +2238,7 @@ io_loop(void) goto next; } while (e && steps); + current_sock = sk_next(s); next: ; } @@ -2247,18 +2262,26 @@ io_loop(void) goto next2; } - if (!s->fast_rx && (pfd[s->index].revents & (POLLIN | POLLHUP | POLLERR)) && s->rx_hook) + if (!s->fast_rx && (pfd[s->index].revents & POLLIN) && s->rx_hook) { count++; io_log_event(s->rx_hook, s->data); sk_read(s, pfd[s->index].revents); if (s != current_sock) - goto next2; + goto next2; } + + if (pfd[s->index].revents & (POLLHUP | POLLERR)) + { + sk_err(s, pfd[s->index].revents); + goto next2; + } + current_sock = sk_next(s); next2: ; } + stored_sock = current_sock; } } diff --git a/sysdep/unix/krt.Y b/sysdep/unix/krt.Y index 91317d97..33dc4a19 100644 --- a/sysdep/unix/krt.Y +++ b/sysdep/unix/krt.Y @@ -29,6 +29,8 @@ CF_DECLS CF_KEYWORDS(KERNEL, PERSIST, SCAN, TIME, LEARN, DEVICE, ROUTES, GRACEFUL, RESTART, KRT_SOURCE, KRT_METRIC, MERGE, PATHS) +%type <i> kern_mp_limit + CF_GRAMMAR /* Kernel syncer protocol */ @@ -43,6 +45,11 @@ kern_proto_start: proto_start KERNEL { CF_ADDTO(kern_proto, kern_proto_start proto_name '{') CF_ADDTO(kern_proto, kern_proto kern_item ';') +kern_mp_limit: + /* empty */ { $$ = KRT_DEFAULT_ECMP_LIMIT; } + | LIMIT expr { $$ = $2; if (($2 <= 0) || ($2 > 255)) cf_error("Merge paths limit must be in range 1-255"); } + ; + kern_item: proto_item | proto_channel { this_proto->net_type = $1->net_type; } @@ -55,13 +62,18 @@ kern_item: THIS_KRT->learn = $2; #ifndef KRT_ALLOW_LEARN if ($2) - cf_error("Learning of kernel routes not supported in this configuration"); + cf_error("Learning of kernel routes not supported on this platform"); #endif } | DEVICE ROUTES bool { THIS_KRT->devroutes = $3; } | GRACEFUL RESTART bool { THIS_KRT->graceful_restart = $3; } - | MERGE PATHS bool { krt_set_merge_paths(this_channel, $3, KRT_DEFAULT_ECMP_LIMIT); } - | MERGE PATHS bool LIMIT expr { krt_set_merge_paths(this_channel, $3, $5); } + | MERGE PATHS bool kern_mp_limit { + krt_set_merge_paths(this_channel, $3, $4); +#ifndef KRT_ALLOW_MERGE_PATHS + if ($3) + cf_error("Path merging not supported on this platform"); +#endif + } ; /* Kernel interface protocol */ diff --git a/sysdep/unix/krt.c b/sysdep/unix/krt.c index 6531bb28..d4cb964e 100644 --- a/sysdep/unix/krt.c +++ b/sysdep/unix/krt.c @@ -604,7 +604,7 @@ krt_export_net(struct krt_proto *p, net *net, rte **rt_free, ea_list **tmpa) rte *rt; if (c->ra_mode == RA_MERGED) - return rt_export_merged(c, net, rt_free, tmpa, 1); + return rt_export_merged(c, net, rt_free, tmpa, krt_filter_lp, 1); rt = net->routes; *rt_free = NULL; diff --git a/sysdep/unix/log.c b/sysdep/unix/log.c index 9c56eb24..e5c5e74e 100644 --- a/sysdep/unix/log.c +++ b/sysdep/unix/log.c @@ -20,6 +20,7 @@ #include <stdarg.h> #include <time.h> #include <unistd.h> +#include <errno.h> #include "nest/bird.h" #include "nest/cli.h" @@ -209,6 +210,7 @@ bug(const char *msg, ...) va_start(args, msg); vlog(L_BUG[0], msg, args); + va_end(args); abort(); } @@ -226,6 +228,7 @@ die(const char *msg, ...) va_start(args, msg); vlog(L_FATAL[0], msg, args); + va_end(args); exit(1); } @@ -312,7 +315,11 @@ log_init_debug(char *f) else if (!*f) dbgf = stderr; else if (!(dbgf = fopen(f, "a"))) - log(L_ERR "Error opening debug file `%s': %m", f); + { + /* Cannot use die() nor log() here, logging is not yet initialized */ + fprintf(stderr, "bird: Unable to open debug file %s: %s\n", f, strerror(errno)); + exit(1); + } if (dbgf) setvbuf(dbgf, NULL, _IONBF, 0); } diff --git a/sysdep/unix/main.c b/sysdep/unix/main.c index 1f47680e..9594269d 100644 --- a/sysdep/unix/main.c +++ b/sysdep/unix/main.c @@ -621,7 +621,7 @@ signal_init(void) * Parsing of command-line arguments */ -static char *opt_list = "c:dD:ps:P:u:g:flR"; +static char *opt_list = "c:dD:ps:P:u:g:flRh"; static int parse_and_exit; char *bird_name; static char *use_user; @@ -629,10 +629,43 @@ static char *use_group; static int run_in_foreground = 0; static void -usage(void) +display_usage(void) { - fprintf(stderr, "Usage: %s [-c <config-file>] [-d] [-D <debug-file>] [-p] [-s <control-socket>] [-P <pid-file>] [-u <user>] [-g <group>] [-f] [-l] [-R]\n", bird_name); - exit(1); + fprintf(stderr, "Usage: %s [--version] [--help] [-c <config-file>] [OPTIONS]\n", bird_name); +} + +static void +display_help(void) +{ + display_usage(); + + fprintf(stderr, + "\n" + "Options: \n" + " -c <config-file> Use given configuration file instead\n" + " of prefix/etc/bird.conf\n" + " -d Enable debug messages and run bird in foreground\n" + " -D <debug-file> Log debug messages to given file instead of stderr\n" + " -f Run bird in foreground\n" + " -g <group> Use given group ID\n" + " -h, --help Display this information\n" + " -l Look for a configuration file and a communication socket\n" + " file in the current working directory\n" + " -p Test configuration file and exit without start\n" + " -P <pid-file> Create a PID file with given filename\n" + " -R Apply graceful restart recovery after start\n" + " -s <control-socket> Use given filename for a control socket\n" + " -u <user> Drop privileges and use given user ID\n" + " --version Display version of BIRD\n"); + + exit(0); +} + +static void +display_version(void) +{ + fprintf(stderr, "BIRD version " BIRD_VERSION "\n"); + exit(0); } static inline char * @@ -706,12 +739,9 @@ parse_args(int argc, char **argv) if (argc == 2) { if (!strcmp(argv[1], "--version")) - { - fprintf(stderr, "BIRD version " BIRD_VERSION "\n"); - exit(0); - } + display_version(); if (!strcmp(argv[1], "--help")) - usage(); + display_help(); } while ((c = getopt(argc, argv, opt_list)) >= 0) switch (c) @@ -755,11 +785,19 @@ parse_args(int argc, char **argv) case 'R': graceful_restart_recovery(); break; + case 'h': + display_help(); + break; default: - usage(); + fputc('\n', stderr); + display_usage(); + exit(1); } if (optind < argc) - usage(); + { + display_usage(); + exit(1); + } } /* |