diff options
51 files changed, 1332 insertions, 529 deletions
@@ -1,3 +1,15 @@ +Version 1.0.16 (2009-06-25) + o Parametrized pair and path mask expessions in the filter language. + o Transparent pipe mode allows to implement BGP route server with + independent route policy for each peer. + o Kernel route table synchronization does not allow overwriting + of alien routes. + o Configureable BGP import route limits. + o During BGP error delay, incoming connections are rejected. + o BGP route statistics. + o Better support for multple network addresses on OSPF interfaces. + o As usual, miscellaneous bugfixes. + Version 1.0.15 (2009-05-25) o FreeBSD and NetBSD port renewed. OpenBSD port introduced. o import/preimport 'show route' modifiers was renamed to export/preexport diff --git a/conf/conf.c b/conf/conf.c index fefcac51..eeffd4a8 100644 --- a/conf/conf.c +++ b/conf/conf.c @@ -175,6 +175,12 @@ global_commit(struct config *new, struct config *old) { if (!old) return 0; + + if (!ipa_equal(old->listen_bgp_addr, new->listen_bgp_addr) || + (old->listen_bgp_port != new->listen_bgp_port) || + (old->listen_bgp_flags != new->listen_bgp_flags)) + log(L_WARN "Reconfiguration of BGP listening socket not implemented, please restart BIRD."); + if (!new->router_id) new->router_id = old->router_id; if (new->router_id != old->router_id) @@ -183,7 +189,7 @@ global_commit(struct config *new, struct config *old) } static int -config_do_commit(struct config *c) +config_do_commit(struct config *c, int type) { int force_restart, nobs; @@ -199,7 +205,7 @@ config_do_commit(struct config *c) DBG("rt_commit\n"); rt_commit(c, old_config); DBG("protos_commit\n"); - protos_commit(c, old_config, force_restart); + protos_commit(c, old_config, force_restart, type); new_config = NULL; /* Just to be sure nobody uses that now */ if (old_config) nobs = --old_config->obstacle_count; @@ -230,7 +236,7 @@ config_done(void *unused UNUSED) c = future_config; future_config = NULL; log(L_INFO "Switching to queued configuration..."); - if (!config_do_commit(c)) + if (!config_do_commit(c, RECONFIG_HARD)) break; } } @@ -238,6 +244,7 @@ config_done(void *unused UNUSED) /** * config_commit - commit a configuration * @c: new configuration + * @type: type of reconfiguration (RECONFIG_SOFT or RECONFIG_HARD) * * When a configuration is parsed and prepared for use, the * config_commit() function starts the process of reconfiguration. @@ -257,11 +264,11 @@ config_done(void *unused UNUSED) * are accepted. */ int -config_commit(struct config *c) +config_commit(struct config *c, int type) { if (!config) /* First-time configuration */ { - config_do_commit(c); + config_do_commit(c, RECONFIG_HARD); return CONF_DONE; } if (old_config) /* Reconfiguration already in progress */ @@ -282,7 +289,7 @@ config_commit(struct config *c) future_config = c; return CONF_QUEUED; } - if (config_do_commit(c)) + if (config_do_commit(c, type)) { config_done(NULL); return CONF_DONE; @@ -315,7 +322,7 @@ order_shutdown(void) init_list(&c->tables); c->shutdown = 1; shutting_down = 1; - config_commit(c); + config_commit(c, RECONFIG_HARD); shutting_down = 2; } diff --git a/conf/conf.h b/conf/conf.h index 17b975b3..951dde3c 100644 --- a/conf/conf.h +++ b/conf/conf.h @@ -22,6 +22,9 @@ struct config { list logfiles; /* Configured log fils (sysdep) */ struct rtable_config *master_rtc; /* Configuration of master routing table */ u32 router_id; /* Our Router ID */ + ip_addr listen_bgp_addr; /* Listening BGP socket should use this address */ + unsigned listen_bgp_port; /* Listening BGP socket should use this port (0 is default) */ + u32 listen_bgp_flags; /* Listening BGP socket should use these flags */ unsigned int proto_default_debug; /* Default protocol debug mask */ int cli_debug; /* Tracing of CLI connections and commands */ char *err_msg; /* Parser error message */ @@ -47,7 +50,9 @@ struct config *config_alloc(byte *name); int config_parse(struct config *); int cli_parse(struct config *); void config_free(struct config *); -int config_commit(struct config *); +int config_commit(struct config *, int type); +#define RECONFIG_HARD 0 +#define RECONFIG_SOFT 1 void cf_error(char *msg, ...) NORET; void config_add_obstacle(struct config *); void config_del_obstacle(struct config *); diff --git a/conf/confbase.Y b/conf/confbase.Y index 43854623..a2df85dc 100644 --- a/conf/confbase.Y +++ b/conf/confbase.Y @@ -57,6 +57,7 @@ CF_DECLS %type <time> datetime %type <a> ipa %type <px> prefix prefix_or_ipa +%type <t> text_or_none %nonassoc PREFIX_DUMMY %nonassoc '=' '<' '>' '~' '.' GEQ LEQ NEQ AND OR PO PC @@ -153,6 +154,11 @@ datetime: } ; +text_or_none: + TEXT { $$ = $1; } + | { $$ = NULL; } + ; + CF_CODE CF_END diff --git a/doc/bird.sgml b/doc/bird.sgml index 666d9f62..267c768a 100644 --- a/doc/bird.sgml +++ b/doc/bird.sgml @@ -249,6 +249,15 @@ protocol rip { <tag>router id <m/IPv4 address/</tag> Set BIRD's router ID. It's a world-wide unique identification of your router, usually one of router's IPv4 addresses. Default: in IPv4 version, the lowest IP address of a non-loopback interface. In IPv6 version, this option is mandatory. + <tag>listen bgp [address <m/address/] [port <m/port/] [v6only]</tag> + This option allows to specify address and port where BGP + protocol should listen. It is global option as listening + socket is common to all BGP instances. Default is to listen on + all addresses (0.0.0.0) and port 179. In IPv6 mode, option + <cf/v6only/ can be used to specify that BGP socket should + listen to IPv6 connections only. This is needed if you want to + run both bird and bird6 on the same port. + <tag>table <m/name/</tag> Create a new routing table. The default routing table is created implicitly, other routing tables have to be added by this command. @@ -464,8 +473,16 @@ This argument can be omitted if there exists only a single instance. <tag>enable|disable|restart <m/name/|"<m/pattern/"|all</tag> Enable, disable or restart a given protocol instance, instances matching the <cf><m/pattern/</cf> or <cf/all/ instances. - <tag>configure ["<m/config file/"]</tag> - Reload configuration from a given file. + <tag>configure [soft] ["<m/config file/"]</tag> + Reload configuration from a given file. BIRD will smoothly + switch itself to the new configuration, protocols are + reconfigured if possible, restarted otherwise. Changes in + filters usualy lead to restart of affected protocols. If + <cf/soft> option is used, changes in filters does not cause + BIRD to restart affected protocols, therefore already accepted + routes (according to old filters) would be still propagated, + but new routes would be processed according to the new + filters. <tag/down/ Shut BIRD down. @@ -572,7 +589,8 @@ incompatible with each other (that is to prevent you from shooting in the foot). to +2000000000. Overflows are not checked. You can use <cf/0x1234/ syntax to write hexadecimal values. <tag/pair/ This is a pair of two short integers. Each component can have values from 0 to - 65535. Literals of this type is written as <cf/(1234,5678)/. + 65535. Literals of this type are written as <cf/(1234,5678)/. The same syntax can also be + used to construct a pair from two arbitrary integer expressions (for example <cf/(1+2,a)/). <tag/string/ This is a string of characters. There are no ways to modify strings in filters. You can pass them between functions, assign them to variables of type <cf/string/, print @@ -640,6 +658,8 @@ incompatible with each other (that is to prevent you from shooting in the foot). For example, if <cf>bgp_path</cf> is 4 3 2 1, then: <tt>bgp_path ˜ [= * 4 3 * =]</tt> is true, but <tt>bgp_path ˜ [= * 4 5 * =]</tt> is false. + BGP mask expressions can also contain integer expressions enclosed in parenthesis + and integer variables, for example <tt>[= * 4 (1+2) a =]</tt>. There is also old syntax that uses / .. / instead of [= .. =] and ? instead of *. <tag/clist/ Community list is similar to set of pairs, @@ -888,6 +908,10 @@ for each neighbor using the following configuration parameters: implementations (Cisco and Quagga). This option is relevant to IPv4 mode with enabled capability advertisement only. Default: on. + <tag>route limit <m/number/</tag> The maximal number of routes + that may be imported from the protocol. If the route limit is + exceeded, the connection is closed with error. Default: no limit. + <tag>disable after error <m/switch/</tag> When an error is encountered (either locally or by the other side), disable the instance automatically and wait for an administrator to fix the problem manually. Default: off. @@ -1022,7 +1046,7 @@ interfaces from the kernel. this protocol in the configuration since almost all other protocols require network interfaces to be defined for them to work with. -<p>The only configurable thing is interface scan time: +<sect1>Configuration <p><descrip> <tag>scan time <m/number/</tag> Time in seconds between two scans @@ -1030,14 +1054,34 @@ require network interfaces to be defined for them to work with. interface status changes asynchronously (such as newer versions of Linux), we need to scan the list only in order to avoid confusion by lost notification messages, so the default time is set to a large value. + + <tag>primary [ "<m/mask/" ] <m/prefix/</tag> + If a network interface has more than one network address, + BIRD has to choose one of them as a primary one, because some + routing protocols (for example OSPFv2) suppose there is only + one network address per interface. By default, BIRD chooses + the lexicographically smallest address as the primary one. + + This option allows to specify which network address should be + chosen as a primary one. Network addresses that match + <m/prefix/ are preferred to non-matching addresses. If more + <cf/primary/ options are used, the first one has the highest + preference. If "<m/mask/" is specified, then such + <cf/primary/ option is relevant only to matching network + interfaces. + + In all cases, an address marked by operating system as + secondary cannot be chosen as the primary one. </descrip> <p>As the Device protocol doesn't generate any routes, it cannot have -any attributes. Example configuration looks really simple: +any attributes. Example configuration looks like this: <p><code> protocol device { scan time 10; # Scan the interfaces often + primary "eth0" 192.168.1.1; + primary 192.168.0.0/16; } </code> @@ -1188,8 +1232,13 @@ protocol ospf <name> { <prefix>; <prefix> hidden; } - interface <interface pattern> - { + stubnet <prefix>; + stubnet <prefix> { + hidden <switch>; + summary <switch>; + cost <num>; + } + interface <interface pattern> { cost <num>; stub <switch>; hello <num>; @@ -1216,8 +1265,7 @@ protocol ospf <name> { <ip> eligible; }; }; - virtual link <id> - { + virtual link <id> { hello <num>; retransmit <num>; wait <num>; @@ -1258,6 +1306,24 @@ protocol ospf <name> { Definition of area IP ranges. This is used in summary lsa origination. Hidden networks are not propagated into other areas. + <tag>stubnet <m/prefix/ { <m/options/ }</tag> + Stub networks are networks that are not transit networks + between OSPF routers. They are also propagated through an + OSPF area as a part of a link state database. By default, + BIRD generates a stub network record for each primary network + address on each OSPF interface that does not have any OSPF + neighbors, and also for each non-primary network address on + each OSPF interface. This option allows to alter a set of + stub networks propagated by this router. + + Each instance of this option adds a stub network with given + network prefix to the set of propagated stub network, unless + option <cf/hidden/ is used. It also suppresses default stub + networks for given network prefix. When option + <cf/summary/ is used, also default stub networks that are + subnetworks of given stub network are suppressed. This might + be used, for example, to aggregate generated stub networks. + <tag>interface <M>pattern</M></tag> Defines that the specified interfaces belong to the area being defined. See <ref id="dsc-iface" name="interface"> common option for detailed description. @@ -1446,10 +1512,23 @@ and vice versa, depending on what's allowed by the filters. Export filters contr of routes from the primary table to the secondary one, import filters control the opposite direction. +<p>The Pipe protocol may work in the opaque mode or in the transparent +mode. In the opaque mode, the Pipe protocol retransmits optimal route +from one table to the other table in a similar way like other +protocols send and receive routes. Retransmitted route will have the +source set to the Pipe protocol, which may limit access to protocol +specific route attributes. The opaque mode is a default mode. + +<p>In transparent mode, the Pipe protocol retransmits all routes from +one table to the other table, retaining their original source and +attributes. If import and export filters are set to accept, then both +tables would have the same content. The mode can be set by +<tt/mode/ option. + <p>The primary use of multiple routing tables and the Pipe protocol is for policy routing, where handling of a single packet doesn't depend only on its destination address, but also on its source address, source interface, protocol type and other similar parameters. -In many systems (Linux 2.2 being a good example), the kernel allows to enforce routing policies +In many systems (Linux being a good example), the kernel allows to enforce routing policies by defining routing rules which choose one of several routing tables to be used for a packet according to its parameters. Setting of these rules is outside the scope of BIRD's work (on Linux, you can use the <tt/ip/ command), but you can create several routing tables in BIRD, @@ -1460,8 +1539,10 @@ another one. <sect1>Configuration <p><descrip> - <tag>peer table <m/table/</tag> Define secondary routing table to connect to. The + <tag>peer table <m/table/</tag> Defines secondary routing table to connect to. The primary one is selected by the <cf/table/ keyword. + + <tag>mode opaque|transparent</tag> Specifies the mode for the pipe to work in. Default is opaque. </descrip> <sect1>Attributes diff --git a/filter/config.Y b/filter/config.Y index fe79496f..ee4e638d 100644 --- a/filter/config.Y +++ b/filter/config.Y @@ -14,6 +14,17 @@ CF_DEFINES #define P(a,b) ((a<<8) | b) +static int make_pair(int i1, int i2) +{ + unsigned u1 = i1; + unsigned u2 = i2; + + if ((u1 > 0xFFFF) || (u2 > 0xFFFF)) + cf_error( "Can't operate with value out of bounds in pair constructor"); + + return (u1 << 16) | u2; +} + CF_DECLS CF_KEYWORDS(FUNCTION, PRINT, PRINTN, UNSET, RETURN, @@ -32,9 +43,9 @@ CF_KEYWORDS(FUNCTION, PRINT, PRINTN, UNSET, RETURN, %nonassoc THEN %nonassoc ELSE -%type <x> term block cmds cmd function_body constant print_one print_list var_list var_listn dynamic_attr static_attr function_call +%type <x> term block cmds cmd function_body constant print_one print_list var_list var_listn dynamic_attr static_attr function_call symbol dpair bgp_path_expr %type <f> filter filter_body where_filter -%type <i> type break_command pair +%type <i> type break_command cpair %type <e> set_item set_items switch_body %type <trie> fprefix_set %type <v> set_atom fprefix fprefix_s fipa @@ -203,8 +214,8 @@ block: /* * Simple types, their bison value is int */ -pair: - '(' NUM ',' NUM ')' { $$ = $2 << 16 | $4; } +cpair: + '(' NUM ',' NUM ')' { $$ = make_pair($2, $4); } ; /* @@ -215,10 +226,10 @@ fipa: ; set_atom: - NUM { $$.type = T_INT; $$.val.i = $1; } - | pair { $$.type = T_PAIR; $$.val.i = $1; } - | fipa { $$ = $1; } - | ENUM { $$.type = $1 >> 16; $$.val.i = $1 & 0xffff; } + NUM { $$.type = T_INT; $$.val.i = $1; } + | cpair { $$.type = T_PAIR; $$.val.i = $1; } + | fipa { $$ = $1; } + | ENUM { $$.type = $1 >> 16; $$.val.i = $1 & 0xffff; } ; set_item: @@ -277,16 +288,21 @@ switch_body: /* EMPTY */ { $$ = NULL; } /* CONST '(' expr ')' { $$ = f_new_inst(); $$->code = 'c'; $$->aux = T_INT; $$->a2.i = $3; } */ +bgp_path_expr: + symbol { $$ = $1; } + | '(' term ')' { $$ = $2; } + ; + bgp_path: PO bgp_path_tail1 PC { $$ = $2; } | '/' bgp_path_tail2 '/' { $$ = $2; } - | OR { $$ = NULL; } /* special case because of || is a different token */ ; bgp_path_tail1: NUM bgp_path_tail1 { $$ = cfg_alloc(sizeof(struct f_path_mask)); $$->next = $2; $$->kind = PM_ASN; $$->val = $1; } | '*' bgp_path_tail1 { $$ = cfg_alloc(sizeof(struct f_path_mask)); $$->next = $2; $$->kind = PM_ASTERISK; $$->val = 0; } | '?' bgp_path_tail1 { $$ = cfg_alloc(sizeof(struct f_path_mask)); $$->next = $2; $$->kind = PM_QUESTION; $$->val = 0; } + | bgp_path_expr bgp_path_tail1 { $$ = cfg_alloc(sizeof(struct f_path_mask)); $$->next = $2; $$->kind = PM_ASN_EXPR; $$->val = (uintptr_t) $1; } | { $$ = NULL; } ; @@ -296,12 +312,24 @@ bgp_path_tail2: | { $$ = NULL; } ; +dpair: + '(' term ',' term ')' { + if (($2->code == 'c') && ($4->code == 'c')) + { + if (($2->aux != T_INT) || ($4->aux != T_INT)) + cf_error( "Can't operate with value of non-integer type in pair constructor" ); + $$ = f_new_inst(); $$->code = 'c'; $$->aux = T_PAIR; $$->a2.i = make_pair($2->a2.i, $4->a2.i); + } + else + { $$ = f_new_inst(); $$->code = P('m', 'p'); $$->a1.p = $2; $$->a2.p = $4; } + } + ; + constant: NUM { $$ = f_new_inst(); $$->code = 'c'; $$->aux = T_INT; $$->a2.i = $1; } | TRUE { $$ = f_new_inst(); $$->code = 'c'; $$->aux = T_BOOL; $$->a2.i = 1; } | FALSE { $$ = f_new_inst(); $$->code = 'c'; $$->aux = T_BOOL; $$->a2.i = 0; } | TEXT { $$ = f_new_inst(); $$->code = 'c'; $$->aux = T_STRING; $$->a2.p = $1; } - | pair { $$ = f_new_inst(); $$->code = 'c'; $$->aux = T_PAIR; $$->a2.i = $1; } | fipa { NEW_F_VAL; $$ = f_new_inst(); $$->code = 'C'; $$->a1.p = val; *val = $1; } | fprefix_s {NEW_F_VAL; $$ = f_new_inst(); $$->code = 'C'; $$->a1.p = val; *val = $1; } | '[' set_items ']' { DBG( "We've got a set here..." ); $$ = f_new_inst(); $$->code = 'c'; $$->aux = T_SET; $$->a2.p = build_tree($2); DBG( "ook\n" ); } @@ -310,6 +338,7 @@ constant: | bgp_path { NEW_F_VAL; $$ = f_new_inst(); $$->code = 'C'; val->type = T_PATH_MASK; val->val.path_mask = $1; $$->a1.p = val; } ; + /* * Maybe there are no dynamic attributes defined by protocols. * For such cases, we force the dynamic_attr list to contain @@ -343,6 +372,39 @@ function_call: } ; +symbol: + SYM { + $$ = f_new_inst(); + switch ($1->class) { + case SYM_NUMBER: + $$ = f_new_inst(); + $$->code = 'c'; + $$->aux = T_INT; + $$->a2.i = $1->aux; + break; + case SYM_IPA: + { NEW_F_VAL; $$ = f_new_inst(); $$->code = 'C'; $$->a1.p = val; val->type = T_IP; val->val.px.ip = * (ip_addr *) ($1->def); } + break; + case SYM_VARIABLE | T_BOOL: + case SYM_VARIABLE | T_INT: + case SYM_VARIABLE | T_PAIR: + case SYM_VARIABLE | T_STRING: + case SYM_VARIABLE | T_IP: + case SYM_VARIABLE | T_PREFIX: + case SYM_VARIABLE | T_PREFIX_SET: + case SYM_VARIABLE | T_SET: + case SYM_VARIABLE | T_PATH: + case SYM_VARIABLE | T_PATH_MASK: + case SYM_VARIABLE | T_CLIST: + $$->code = 'V'; + $$->a1.p = $1->def; + $$->a2.p = $1->name; + break; + default: + cf_error("%s: variable expected.", $1->name ); + } + } + static_attr: FROM { $$ = f_new_inst(); $$->aux = T_IP; $$->a2.i = OFFSETOF(struct rta, from); $$->a1.i = 1; } @@ -373,38 +435,9 @@ term: | '!' term { $$ = f_new_inst(); $$->code = '!'; $$->a1.p = $2; } | DEFINED '(' term ')' { $$ = f_new_inst(); $$->code = P('d','e'); $$->a1.p = $3; } + | symbol { $$ = $1; } | constant { $$ = $1; } - | SYM { - $$ = f_new_inst(); - switch ($1->class) { - case SYM_NUMBER: - $$ = f_new_inst(); - $$->code = 'c'; - $$->aux = T_INT; - $$->a2.i = $1->aux; - break; - case SYM_IPA: - { NEW_F_VAL; $$ = f_new_inst(); $$->code = 'C'; $$->a1.p = val; val->type = T_IP; val->val.px.ip = * (ip_addr *) ($1->def); } - break; - case SYM_VARIABLE | T_BOOL: - case SYM_VARIABLE | T_INT: - case SYM_VARIABLE | T_PAIR: - case SYM_VARIABLE | T_STRING: - case SYM_VARIABLE | T_IP: - case SYM_VARIABLE | T_PREFIX: - case SYM_VARIABLE | T_PREFIX_SET: - case SYM_VARIABLE | T_SET: - case SYM_VARIABLE | T_PATH: - case SYM_VARIABLE | T_PATH_MASK: - case SYM_VARIABLE | T_CLIST: - $$->code = 'V'; - $$->a1.p = $1->def; - $$->a2.p = $1->name; - break; - default: - cf_error("%s: variable expected.", $1->name ); - } - } + | dpair { $$ = $1; } | PREFERENCE { $$ = f_new_inst(); $$->code = 'P'; } diff --git a/filter/filter.c b/filter/filter.c index fe3a2ac7..3df0f0c6 100644 --- a/filter/filter.c +++ b/filter/filter.c @@ -71,6 +71,8 @@ pm_path_compare(struct f_path_mask *m1, struct f_path_mask *m2) } } +u32 f_eval_asn(struct f_inst *expr); + static void pm_format(struct f_path_mask *p, byte *buf, unsigned int size) { @@ -84,10 +86,24 @@ pm_format(struct f_path_mask *p, byte *buf, unsigned int size) return; } - if (p->kind == PM_ASN) - buf += bsprintf(buf, " %u", p->val); - else - buf += bsprintf(buf, (p->kind == PM_ASTERISK) ? " *" : " ?"); + switch(p->kind) + { + case PM_ASN: + buf += bsprintf(buf, " %u", p->val); + break; + + case PM_QUESTION: + buf += bsprintf(buf, " ?"); + break; + + case PM_ASTERISK: + buf += bsprintf(buf, " *"); + break; + + case PM_ASN_EXPR: + buf += bsprintf(buf, " %u", f_eval_asn((struct f_inst *) p->val)); + break; + } p = p->next; } @@ -181,25 +197,11 @@ val_simple_in_range(struct f_val v1, struct f_val v2) return patmatch(v2.val.s, v1.val.s); if ((v1.type == T_IP) && (v2.type == T_PREFIX)) - return !(ipa_compare(ipa_and(v2.val.px.ip, ipa_mkmask(v2.val.px.len)), ipa_and(v1.val.px.ip, ipa_mkmask(v2.val.px.len)))); - - if ((v1.type == T_PREFIX) && (v2.type == T_PREFIX)) { - - if (v1.val.px.len & (LEN_PLUS | LEN_MINUS | LEN_RANGE)) - return CMP_ERROR; + return ipa_in_net(v1.val.px.ip, v2.val.px.ip, v2.val.px.len); - int p1 = v1.val.px.len & LEN_MASK; - int p2 = v2.val.px.len & LEN_MASK; - ip_addr mask = ipa_mkmask(MIN(p1, p2)); + if ((v1.type == T_PREFIX) && (v2.type == T_PREFIX)) + return ipa_in_net(v1.val.px.ip, v2.val.px.ip, v2.val.px.len) && (v1.val.px.len >= v2.val.px.len); - if (ipa_compare(ipa_and(v2.val.px.ip, mask), ipa_and(v1.val.px.ip, mask))) - return 0; - - int l, h; - f_prefix_get_bounds(&v2.val.px, &l, &h); - - return ((l <= v1.val.px.len) && (v1.val.px.len <= h)); - } return CMP_ERROR; } @@ -350,6 +352,7 @@ interpret(struct f_inst *what) { struct symbol *sym; struct f_val v1, v2, res; + unsigned u1, u2; int i; res.type = T_VOID; @@ -412,6 +415,18 @@ interpret(struct f_inst *what) res.val.i = v1.val.i || v2.val.i; break; + case P('m','p'): + TWOARGS_C; + if ((v1.type != T_INT) || (v2.type != T_INT)) + runtime( "Can't operate with value of non-integer type in pair constructor" ); + u1 = v1.val.i; + u2 = v2.val.i; + if ((u1 > 0xFFFF) || (u2 > 0xFFFF)) + runtime( "Can't operate with value out of bounds in pair constructor" ); + res.val.i = (u1 << 16) | u2; + res.type = T_PAIR; + break; + /* Relational operators */ #define COMPARE(x) \ @@ -828,6 +843,7 @@ i_same(struct f_inst *f1, struct f_inst *f2) case '/': case '|': case '&': + case P('m','p'): case P('!','='): case P('=','='): case '<': @@ -957,6 +973,16 @@ f_eval_int(struct f_inst *expr) return res.val.i; } +u32 +f_eval_asn(struct f_inst *expr) +{ + struct f_val res = interpret(expr); + if (res.type != T_INT) + cf_error("Can't operate with value of non-integer type in AS path mask constructor"); + + return res.val.i; +} + /** * filter_same - compare two filters * @new: first filter to be compared diff --git a/filter/test.conf b/filter/test.conf index f1cdf04c..f3b79619 100644 --- a/filter/test.conf +++ b/filter/test.conf @@ -9,6 +9,17 @@ router id 62.168.0.1; define xyzzy = (120+10); + +function mkpair(int a) +{ + return (1, a); +} + +function mkpath(int a; int b) +{ + return [= a b 3 2 1 =]; +} + function callme(int arg1; int arg2) int local1; int local2; @@ -50,6 +61,7 @@ clist l; print "Should be false: ", p2 ~ pm1, " ", p2 ~ pm2; print "Should be true: ", p2 ~ / ? 4 3 2 1 /, " ", p2, " ", / ? 4 3 2 1 /; print "Should be true: ", p2 ~ [= * 4 3 * 1 =], " ", p2, " ", [= * 4 3 * 1 =]; + print "Should be true: ", p2 ~ [= (3+2) (2*2) 3 2 1 =], " ", p2 ~ mkpath(5, 4); print "5 = ", p2.len; pm1 = [= 1 2 * 3 4 5 =]; @@ -143,11 +155,15 @@ string s; px = 1.2.0.0/18; print "Testing prefixes: 1.2.0.0/18 = ", px; + print " must be true: ", 192.168.0.0/16 ~ 192.168.0.0/16, " ", 192.168.0.0/17 ~ 192.168.0.0/16, " ", 192.168.254.0/24 ~ 192.168.0.0/16; + print " must be false: ", 192.168.0.0/15 ~ 192.168.0.0/16, " ", 192.160.0.0/17 ~ 192.168.0.0/16; + p = 127.1.2.3; print "Testing mask : 127.0.0.0 = ", p.mask(8); pp = (1, 2); - print "Testing pairs: (1,2) = ", (1,2), " = ", pp; + print "Testing pairs: (1,2) = ", (1,2), " = ", pp, " = ", (1,1+1), " = ", mkpair(2); + print " must be true: ", (1,2) = (1,1+1); print "Testing enums: ", RTS_DUMMY, " ", RTS_STATIC; s = "Hello"; @@ -31,7 +31,11 @@ typedef struct ipv6_addr { #define IPA_NONE _MI(0,0,0,0) -#define ipa_equal(x,y) (!memcmp(&(x),&(y),sizeof(ip_addr))) +#define ipa_equal(x,y) ({ ip_addr _a=(x), _b=(y); \ + _I0(_a) == _I0(_b) && \ + _I1(_a) == _I1(_b) && \ + _I2(_a) == _I2(_b) && \ + _I3(_a) == _I3(_b); }) #define ipa_nonzero(x) ({ ip_addr _a=(x); (_I0(_a) || _I1(_a) || _I2(_a) || _I3(_a)); }) #define ipa_and(x,y) ({ ip_addr _a=(x), _b=(y); \ _MI(_I0(_a) & _I0(_b), \ diff --git a/lib/resource.c b/lib/resource.c index 9e626815..289af933 100644 --- a/lib/resource.c +++ b/lib/resource.c @@ -328,6 +328,42 @@ mb_allocz(pool *p, unsigned size) } /** + * mb_realloc - reallocate a memory block + * @p: pool + * @m: memory block + * @size: new size of the block + * + * mb_realloc() changes the size of the memory block @m to a given size. + * The contents will be unchanged to the minimum of the old and new sizes; + * newly allocated memory will be uninitialized. If @m is NULL, the call + * is equivalent to mb_alloc(@p, @size). + * + * Like mb_alloc(), mb_realloc() also returns a pointer to the memory + * chunk , not to the resource, hence you have to free it using + * mb_free(), not rfree(). + */ +void * +mb_realloc(pool *p, void *m, unsigned size) +{ + struct mblock *ob = NULL; + + if (m) + { + ob = SKIP_BACK(struct mblock, data, m); + if (ob->r.n.next) + rem_node(&ob->r.n); + } + + struct mblock *b = xrealloc(ob, sizeof(struct mblock) + size); + + b->r.class = &mb_class; + add_tail(&p->inside, &b->r.n); + b->size = size; + return b->data; +} + + +/** * mb_free - free a memory block * @m: memory block * @@ -339,3 +375,4 @@ mb_free(void *m) struct mblock *b = SKIP_BACK(struct mblock, data, m); rfree(b); } + diff --git a/lib/resource.h b/lib/resource.h index 42ed26ed..8dd441f0 100644 --- a/lib/resource.h +++ b/lib/resource.h @@ -47,6 +47,7 @@ extern pool root_pool; void *mb_alloc(pool *, unsigned size); void *mb_allocz(pool *, unsigned size); +void *mb_realloc(pool *p, void *m, unsigned size); void mb_free(void *); /* Memory pools with linear allocation */ @@ -75,12 +76,13 @@ void sl_free(slab *, void *); #ifdef HAVE_LIBDMALLOC /* * The standard dmalloc macros tend to produce lots of namespace - * conflicts and we use only xmalloc and xfree, so we can define - * the stubs ourselves. + * conflicts and we use only xmalloc, xrealloc and xfree, so we + * can define the stubs ourselves. */ #define DMALLOC_DISABLE #include <dmalloc.h> #define xmalloc(size) _xmalloc_leap(__FILE__, __LINE__, size) +#define xrealloc(size) _xrealloc_leap(__FILE__, __LINE__, size) #define xfree(ptr) _xfree_leap(__FILE__, __LINE__, ptr) #else /* @@ -89,7 +91,9 @@ void sl_free(slab *, void *); * the renaming. */ #define xmalloc bird_xmalloc +#define xrealloc bird_xrealloc void *xmalloc(unsigned); +void *xrealloc(void *, unsigned); #define xfree(x) free(x) #endif diff --git a/lib/socket.h b/lib/socket.h index 5fe91931..f1922607 100644 --- a/lib/socket.h +++ b/lib/socket.h @@ -20,6 +20,7 @@ typedef struct birdsock { unsigned sport, dport; /* 0 = unspecified (for IP: protocol type) */ int tos; /* TOS and priority, -1 = default */ int ttl; /* Time To Live, -1 = default */ + u32 flags; struct iface *iface; /* Interface; specify this for broad/multicast sockets */ byte *rbuf, *rpos; /* NULL=allocate automatically */ @@ -57,6 +58,12 @@ sk_send_buffer_empty(sock *sk) return sk->tbuf == sk->tpos; } + +/* Socket flags */ + +#define SKF_V6ONLY 1 /* Use IPV6_V6ONLY socket option */ + + /* * Socket types SA SP DA DP IF TTL SendTo (?=may, -=must not, *=must) */ diff --git a/lib/xmalloc.c b/lib/xmalloc.c index bc386c83..da2f0941 100644 --- a/lib/xmalloc.c +++ b/lib/xmalloc.c @@ -32,4 +32,24 @@ xmalloc(unsigned size) die("Unable to allocate %d bytes of memory", size); } +/** + * xrealloc - realloc with checking + * @ptr: original memory block + * @size: block size + * + * This function is equivalent to realloc() except that in case of + * failure it calls die() to quit the program instead of returning + * a %NULL pointer. + * + * Wherever possible, please use the memory resources instead. + */ +void * +xrealloc(void *ptr, unsigned size) +{ + void *p = realloc(ptr, size); + if (p) + return p; + die("Unable to allocate %d bytes of memory", size); +} + #endif diff --git a/misc/bird.spec b/misc/bird.spec index 5ad0936c..56aed5b9 100644 --- a/misc/bird.spec +++ b/misc/bird.spec @@ -1,6 +1,6 @@ Summary: BIRD Internet Routing Daemon Name: bird -Version: 1.0.15 +Version: 1.0.16 Release: 1 Copyright: GPL Group: Networking/Daemons diff --git a/nest/a-path.c b/nest/a-path.c index f5499877..dba214d2 100644 --- a/nest/a-path.c +++ b/nest/a-path.c @@ -401,6 +401,7 @@ as_path_match(struct adata *path, struct f_path_mask *mask) struct pm_pos pos[2048 + 1]; int plen = parse_path(path, pos); int l, h, i, nh, nl; + u32 val; /* l and h are bound of interval of positions where are marked states */ @@ -424,14 +425,20 @@ as_path_match(struct adata *path, struct f_path_mask *mask) h = plen; break; - case PM_QUESTION: case PM_ASN: + val = mask->val; + goto step; + case PM_ASN_EXPR: + val = f_eval_asn((struct f_inst *) mask->val); + goto step; + case PM_QUESTION: + step: nh = -1; for (i = h; i >= l; i--) if (pos[i].mark) { pos[i].mark = 0; - if ((mask->kind == PM_QUESTION) || pm_match(pos + i, mask->val)) + if ((mask->kind == PM_QUESTION) || pm_match(pos + i, val)) pm_mark(pos, i, plen, &nl, &nh); } diff --git a/nest/attrs.h b/nest/attrs.h index 5542be6f..b838ce96 100644 --- a/nest/attrs.h +++ b/nest/attrs.h @@ -35,11 +35,12 @@ int as_path_is_member(struct adata *path, u32 as); #define PM_ASN 0 #define PM_QUESTION 1 #define PM_ASTERISK 2 +#define PM_ASN_EXPR 3 struct f_path_mask { struct f_path_mask *next; int kind; - u32 val; + uintptr_t val; }; int as_path_match(struct adata *path, struct f_path_mask *mask); diff --git a/nest/config.Y b/nest/config.Y index 3c6eb7b5..dc31224a 100644 --- a/nest/config.Y +++ b/nest/config.Y @@ -44,6 +44,7 @@ CF_KEYWORDS(ROUTER, ID, PROTOCOL, PREFERENCE, DISABLED, DEBUG, ALL, OFF, DIRECT) CF_KEYWORDS(INTERFACE, IMPORT, EXPORT, FILTER, NONE, TABLE, STATES, ROUTES, FILTERS) CF_KEYWORDS(PASSWORD, FROM, PASSIVE, TO, ID, EVENTS, PACKETS, PROTOCOLS, INTERFACES) CF_KEYWORDS(PRIMARY, STATS, COUNT, FOR, COMMANDS, PREEXPORT, GENERATE) +CF_KEYWORDS(LISTEN, BGP, V6ONLY, ADDRESS, PORT) CF_ENUM(T_ENUM_RTS, RTS_, DUMMY, STATIC, INHERIT, DEVICE, STATIC_DEVICE, REDIRECT, RIP, OSPF, OSPF_IA, OSPF_EXT1, OSPF_EXT2, BGP, PIPE) @@ -82,6 +83,23 @@ idval: } ; + +CF_ADDTO(conf, listen) + +listen: LISTEN BGP listen_opts ';' ; + +listen_opts: + /* Nothing */ + | listen_opts listen_opt + ; + +listen_opt: + ADDRESS ipa { new_config->listen_bgp_addr = $2; } + | PORT expr { new_config->listen_bgp_port = $2; } + | V6ONLY { new_config->listen_bgp_flags |= SKF_V6ONLY; } + ; + + /* Creation of routing tables */ CF_ADDTO(conf, newtab) diff --git a/nest/iface.c b/nest/iface.c index 01f25810..5e88b21b 100644 --- a/nest/iface.c +++ b/nest/iface.c @@ -399,29 +399,43 @@ if_find_by_name(char *name) return NULL; } +struct ifa *kif_choose_primary(struct iface *i); + static int ifa_recalc_primary(struct iface *i) { - struct ifa *a, *b = NULL; - int res; + struct ifa *a = kif_choose_primary(i); - WALK_LIST(a, i->addrs) + if (a == i->addr) + return 0; + + if (i->addr) + i->addr->flags &= ~IA_PRIMARY; + + if (a) { - if (!(a->flags & IA_SECONDARY) && (!b || a->scope > b->scope)) - b = a; - a->flags &= ~IA_PRIMARY; + a->flags |= IA_PRIMARY; + rem_node(&a->n); + add_head(&i->addrs, &a->n); } - res = (b != i->addr); - i->addr = b; - if (b) + + i->addr = a; + return 1; +} + +void +ifa_recalc_all_primary_addresses(void) +{ + struct iface *i; + + WALK_LIST(i, iface_list) { - b->flags |= IA_PRIMARY; - rem_node(&b->n); - add_head(&i->addrs, &b->n); + if (ifa_recalc_primary(i)) + if_change_flags(i, i->flags | IF_TMP_DOWN); } - return res; } + /** * ifa_update - update interface address * @a: new interface address @@ -464,7 +478,7 @@ ifa_update(struct ifa *a) memcpy(b, a, sizeof(struct ifa)); add_tail(&i->addrs, &b->n); b->flags = (i->flags & ~IA_FLAGS) | (a->flags & IA_FLAGS); - if ((!i->addr || i->addr->scope < b->scope) && ifa_recalc_primary(i)) + if (ifa_recalc_primary(i)) if_change_flags(i, i->flags | IF_TMP_DOWN); if (b->flags & IF_UP) ifa_notify_change(IF_CHANGE_CREATE | IF_CHANGE_UP, b); diff --git a/nest/iface.h b/nest/iface.h index f884dd90..af98a761 100644 --- a/nest/iface.h +++ b/nest/iface.h @@ -80,6 +80,7 @@ void if_end_partial_update(struct iface *); void if_feed_baby(struct proto *); struct iface *if_find_by_index(unsigned); struct iface *if_find_by_name(char *); +void ifa_recalc_all_primary_addresses(void); /* The Neighbor Cache */ diff --git a/nest/proto-hooks.c b/nest/proto-hooks.c index 82df5cb7..c30b1070 100644 --- a/nest/proto-hooks.c +++ b/nest/proto-hooks.c @@ -179,8 +179,8 @@ void ifa_notify(struct proto *p, unsigned flags, struct ifa *a) * rt_notify - notify instance about routing table change * @p: protocol instance * @net: a network entry - * @new: new optimal route for the network - * @old: old optimal route for the network + * @new: new route for the network + * @old: old route for the network * @attrs: extended attributes associated with the @new entry * * The rt_notify() hook is called to inform the protocol instance about @@ -188,6 +188,17 @@ void ifa_notify(struct proto *p, unsigned flags, struct ifa *a) * belonging to network @net being replaced by a new route @new with * extended attributes @attrs. Either @new or @old or both can be %NULL * if the corresponding route doesn't exist. + * + * If the type of route announcement is RA_OPTIMAL, it is an + * announcement of optimal route change, @new stores the new optimal + * route and @old stores the old optimal route. + * + * If the type of route announcement is RA_ANY, it is an announcement + * of any route change, @new stores the new route and @old stores the + * old route from the same protocol. + * + * @p->accept_ra_types specifies which kind of route announcements + * protocol wants to receive. */ void rt_notify(struct proto *p, net *net, rte *new, rte *old, ea_list *attrs) { DUMMY; } diff --git a/nest/proto.c b/nest/proto.c index 0ad7229c..2af077b8 100644 --- a/nest/proto.c +++ b/nest/proto.c @@ -269,6 +269,7 @@ proto_init(struct proto_config *c) * @old: old configuration or %NULL if it's boot time config * @force_reconfig: force restart of all protocols (used for example * when the router ID changes) + * @type: type of reconfiguration (RECONFIG_SOFT or RECONFIG_HARD) * * Scan differences between @old and @new configuration and adjust all * protocol instances to conform to the new configuration. @@ -281,15 +282,17 @@ proto_init(struct proto_config *c) * When a protocol exists in the old configuration, but it doesn't in the * new one, it's shut down and deleted after the shutdown completes. * - * When a protocol exists in both configurations, the core decides whether - * it's possible to reconfigure it dynamically (it checks all the core properties - * of the protocol and if they match, it asks the reconfigure() hook of the - * protocol to see if the protocol is able to switch to the new configuration). - * If it isn't possible, the protocol is shut down and a new instance is started - * with the new configuration after the shutdown is completed. + * When a protocol exists in both configurations, the core decides + * whether it's possible to reconfigure it dynamically - it checks all + * the core properties of the protocol (changes in filters are ignored + * if type is RECONFIG_SOFT) and if they match, it asks the + * reconfigure() hook of the protocol to see if the protocol is able + * to switch to the new configuration. If it isn't possible, the + * protocol is shut down and a new instance is started with the new + * configuration after the shutdown is completed. */ void -protos_commit(struct config *new, struct config *old, int force_reconfig) +protos_commit(struct config *new, struct config *old, int force_reconfig, int type) { struct proto_config *oc, *nc; struct proto *p, *n; @@ -310,8 +313,8 @@ protos_commit(struct config *new, struct config *old, int force_reconfig) && nc->preference == oc->preference && nc->disabled == oc->disabled && nc->table->table == oc->table->table - && filter_same(nc->in_filter, oc->in_filter) - && filter_same(nc->out_filter, oc->out_filter) + && ((type == RECONFIG_SOFT) || filter_same(nc->in_filter, oc->in_filter)) + && ((type == RECONFIG_SOFT) || filter_same(nc->out_filter, oc->out_filter)) && p->proto_state != PS_DOWN) { /* Generic attributes match, try converting them and then ask the protocol */ @@ -512,6 +515,9 @@ static void proto_fell_down(struct proto *p) { DBG("Protocol %s down\n", p->name); + ASSERT(p->stats.imp_routes == 0); + + bzero(&p->stats, sizeof(struct proto_stats)); rt_unlock_table(p->table); proto_rethink_goal(p); } @@ -693,9 +699,30 @@ proto_do_show(struct proto *p, int verbose) buf); if (verbose) { - cli_msg(-1006, "\tPreference: %d", p->preference); - cli_msg(-1006, "\tInput filter: %s", filter_name(p->in_filter)); - cli_msg(-1006, "\tOutput filter: %s", filter_name(p->out_filter)); + cli_msg(-1006, " Preference: %d", p->preference); + cli_msg(-1006, " Input filter: %s", filter_name(p->in_filter)); + cli_msg(-1006, " Output filter: %s", filter_name(p->out_filter)); + + if (p->proto_state != PS_DOWN) + { + cli_msg(-1006, " Routes: %u imported, %u exported, %u preferred", + p->stats.imp_routes, p->stats.exp_routes, p->stats.pref_routes); + cli_msg(-1006, " Route change stats: received rejected filtered ignored accepted"); + cli_msg(-1006, " Import updates: %10u %10u %10u %10u %10u", + p->stats.imp_updates_received, p->stats.imp_updates_invalid, + p->stats.imp_updates_filtered, p->stats.imp_updates_ignored, + p->stats.imp_updates_accepted); + cli_msg(-1006, " Import withdraws: %10u %10u --- %10u %10u", + p->stats.imp_withdraws_received, p->stats.imp_withdraws_invalid, + p->stats.imp_withdraws_ignored, p->stats.imp_withdraws_accepted); + cli_msg(-1006, " Export updates: %10u %10u %10u --- %10u", + p->stats.exp_updates_received, p->stats.exp_updates_rejected, + p->stats.exp_updates_filtered, p->stats.exp_updates_accepted); + cli_msg(-1006, " Export withdraws: %10u --- --- --- %10u", + p->stats.exp_withdraws_received, p->stats.exp_withdraws_accepted); + } + + cli_msg(-1006, ""); } } diff --git a/nest/protocol.h b/nest/protocol.h index d681ae68..0f9d59d8 100644 --- a/nest/protocol.h +++ b/nest/protocol.h @@ -55,7 +55,7 @@ void protos_build(void); void proto_build(struct protocol *); void protos_preconfig(struct config *); void protos_postconfig(struct config *); -void protos_commit(struct config *new, struct config *old, int force_restart); +void protos_commit(struct config *new, struct config *old, int force_restart, int type); void protos_dump_all(void); #define GA_UNKNOWN 0 /* Attribute not recognized */ @@ -87,6 +87,31 @@ struct proto_config { /* Protocol-specific data follow... */ }; + /* Protocol statistics */ +struct proto_stats { + /* Import - from protocol to core */ + u32 imp_routes; /* Number of routes successfully imported to the (adjacent) routing table */ + u32 pref_routes; /* Number of routes that are preferred, sum over all routing table */ + u32 imp_updates_received; /* Number of route updates received */ + u32 imp_updates_invalid; /* Number of route updates rejected as invalid */ + u32 imp_updates_filtered; /* Number of route updates rejected by filters */ + u32 imp_updates_ignored; /* Number of route updates rejected as already in route table */ + u32 imp_updates_accepted; /* Number of route updates accepted and imported */ + u32 imp_withdraws_received; /* Number of route withdraws received */ + u32 imp_withdraws_invalid; /* Number of route withdraws rejected as invalid */ + u32 imp_withdraws_ignored; /* Number of route withdraws rejected as already not in route table */ + u32 imp_withdraws_accepted; /* Number of route withdraws accepted and processed */ + + /* Export - from core to protocol */ + u32 exp_routes; /* Number of routes successfully exported to the protocol */ + u32 exp_updates_received; /* Number of route updates received */ + u32 exp_updates_rejected; /* Number of route updates rejected by protocol */ + u32 exp_updates_filtered; /* Number of route updates rejected by filters */ + u32 exp_updates_accepted; /* Number of route updates accepted and exported */ + u32 exp_withdraws_received; /* Number of route withdraws received */ + u32 exp_withdraws_accepted; /* Number of route withdraws accepted and processed */ +}; + struct proto { node n; /* Node in *_proto_list */ node glob_node; /* Node in global proto_list */ @@ -100,6 +125,7 @@ struct proto { unsigned debug; /* Debugging flags */ unsigned preference; /* Default route preference */ int min_scope; /* Minimal route scope accepted */ + unsigned accept_ra_types; /* Which types of route announcements are accepted (RA_OPTIMAL or RA_ANY) */ unsigned disabled; /* Manually disabled */ unsigned proto_state; /* Protocol state machine (see below) */ unsigned core_state; /* Core state machine (see below) */ @@ -108,6 +134,7 @@ struct proto { u32 hash_key; /* Random key used for hashing of neighbors */ bird_clock_t last_state_change; /* Time of last state transition */ char *last_state_name_announced; /* Last state name we've announced to the user */ + struct proto_stats stats; /* Current protocol statistics */ /* * General protocol hooks: diff --git a/nest/route.h b/nest/route.h index 43cfa9dd..1bd23a6b 100644 --- a/nest/route.h +++ b/nest/route.h @@ -146,6 +146,7 @@ typedef struct network { typedef struct rte { struct rte *next; net *net; /* Network this RTE belongs to */ + struct proto *sender; /* Protocol instance that sent the route to the routing table */ struct rta *attrs; /* Attributes of this route */ byte flags; /* Flags (REF_...) */ byte pflags; /* Protocol-specific flags */ @@ -178,6 +179,10 @@ typedef struct rte { #define REF_COW 1 /* Copy this rte on write */ +/* Types of route announcement, also used as flags */ +#define RA_OPTIMAL 1 /* Announcement of optimal route change */ +#define RA_ANY 2 /* Announcement of any route change */ + struct config; void rt_init(void); @@ -190,7 +195,7 @@ static inline net *net_find(rtable *tab, ip_addr addr, unsigned len) { return (n static inline net *net_get(rtable *tab, ip_addr addr, unsigned len) { return (net *) fib_get(&tab->fib, &addr, len); } rte *rte_find(net *net, struct proto *p); rte *rte_get_temp(struct rta *); -void rte_update(rtable *tab, net *net, struct proto *p, rte *new); +void rte_update(rtable *tab, net *net, struct proto *p, struct proto *src, rte *new); void rte_discard(rtable *tab, rte *old); void rte_dump(rte *); void rte_free(rte *); @@ -230,7 +235,7 @@ void rt_show(struct rt_show_data *); typedef struct rta { struct rta *next, **pprev; /* Hash chain */ - struct proto *proto; /* Protocol instance */ + struct proto *proto; /* Protocol instance that originally created the route */ unsigned uc; /* Use count */ byte source; /* Route source (RTS_...) */ byte scope; /* Route scope (SCOPE_... -- see ip.h) */ diff --git a/nest/rt-dev.c b/nest/rt-dev.c index 348bcc2e..b86015df 100644 --- a/nest/rt-dev.c +++ b/nest/rt-dev.c @@ -44,7 +44,7 @@ dev_ifa_notify(struct proto *p, unsigned c, struct ifa *ad) DBG("dev_if_notify: device shutdown: prefix not found\n"); return; } - rte_update(p->table, n, p, NULL); + rte_update(p->table, n, p, p, NULL); } else if (c & IF_CHANGE_UP) { @@ -66,7 +66,7 @@ dev_ifa_notify(struct proto *p, unsigned c, struct ifa *ad) e = rte_get_temp(a); e->net = n; e->pflags = 0; - rte_update(p->table, n, p, e); + rte_update(p->table, n, p, p, e); } } diff --git a/nest/rt-table.c b/nest/rt-table.c index b0781a33..fb2feaca 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -158,7 +158,7 @@ rte_trace_out(unsigned int flag, struct proto *p, rte *e, char *msg) } static inline void -do_rte_announce(struct announce_hook *a, net *net, rte *new, rte *old, ea_list *tmpa, int class) +do_rte_announce(struct announce_hook *a, int type, net *net, rte *new, rte *old, ea_list *tmpa, int class) { struct proto *p = a->proto; rte *new0 = new; @@ -167,16 +167,27 @@ do_rte_announce(struct announce_hook *a, net *net, rte *new, rte *old, ea_list * if (new) { + p->stats.exp_updates_received++; + char *drop_reason = NULL; if ((class & IADDR_SCOPE_MASK) < p->min_scope) - drop_reason = "out of scope"; + { + p->stats.exp_updates_rejected++; + drop_reason = "out of scope"; + } else if ((ok = p->import_control ? p->import_control(p, &new, &tmpa, rte_update_pool) : 0) < 0) - drop_reason = "rejected by protocol"; + { + p->stats.exp_updates_rejected++; + drop_reason = "rejected by protocol"; + } else if (ok) rte_trace_out(D_FILTERS, p, new, "forced accept by protocol"); else if (p->out_filter == FILTER_REJECT || p->out_filter && f_run(p->out_filter, &new, &tmpa, rte_update_pool, FF_FORCE_TMPATTR) > F_ACCEPT) - drop_reason = "filtered out"; + { + p->stats.exp_updates_filtered++; + drop_reason = "filtered out"; + } if (drop_reason) { rte_trace_out(D_FILTERS, p, new, drop_reason); @@ -185,7 +196,10 @@ do_rte_announce(struct announce_hook *a, net *net, rte *new, rte *old, ea_list * new = NULL; } } - if (old && p->out_filter) + else + p->stats.exp_withdraws_received++; + + if (old) { if (p->out_filter == FILTER_REJECT) old = NULL; @@ -193,7 +207,7 @@ do_rte_announce(struct announce_hook *a, net *net, rte *new, rte *old, ea_list * { ea_list *tmpb = p->make_tmp_attrs ? p->make_tmp_attrs(old, rte_update_pool) : NULL; ok = p->import_control ? p->import_control(p, &old, &tmpb, rte_update_pool) : 0; - if (ok < 0 || (!ok && f_run(p->out_filter, &old, &tmpb, rte_update_pool, FF_FORCE_TMPATTR) > F_ACCEPT)) + if (ok < 0 || (!ok && p->out_filter && f_run(p->out_filter, &old, &tmpb, rte_update_pool, FF_FORCE_TMPATTR) > F_ACCEPT)) { if (old != old0) rte_free(old); @@ -201,6 +215,20 @@ do_rte_announce(struct announce_hook *a, net *net, rte *new, rte *old, ea_list * } } } + + if (!new && !old) + return; + + if (new) + p->stats.exp_updates_accepted++; + else + p->stats.exp_withdraws_accepted++; + + if (new) + p->stats.exp_routes++; + if (old) + p->stats.exp_routes--; + if (p->debug & D_ROUTES) { if (new && old) @@ -210,8 +238,6 @@ do_rte_announce(struct announce_hook *a, net *net, rte *new, rte *old, ea_list * else if (old) rte_trace_out(D_ROUTES, p, old, "removed"); } - if (!new && !old) - return; if (!new) p->rt_notify(p, net, NULL, old, NULL); else if (tmpa) @@ -234,31 +260,51 @@ do_rte_announce(struct announce_hook *a, net *net, rte *new, rte *old, ea_list * /** * rte_announce - announce a routing table change * @tab: table the route has been added to + * @type: type of route announcement (RA_OPTIMAL or RA_ANY) * @net: network in question * @new: the new route to be announced - * @old: previous optimal route for the same network + * @old: the previous route for the same network * @tmpa: a list of temporary attributes belonging to the new route * * This function gets a routing table update and announces it - * to all protocols connected to the same table by their announcement hooks. + * to all protocols that acccepts given type of route announcement + * and are connected to the same table by their announcement hooks. * - * For each such protocol, we first call its import_control() hook which - * performs basic checks on the route (each protocol has a right to veto - * or force accept of the route before any filter is asked) and adds default - * values of attributes specific to the new protocol (metrics, tags etc.). - * Then it consults the protocol's export filter and if it accepts the - * route, the rt_notify() hook of the protocol gets called. + * Route announcement of type RA_OPTIMAL si generated when optimal + * route (in routing table @tab) changes. In that case @old stores the + * old optimal route. + * + * Route announcement of type RA_ANY si generated when any route (in + * routing table @tab) changes In that case @old stores the old route + * from the same protocol. + * + * For each appropriate protocol, we first call its import_control() + * hook which performs basic checks on the route (each protocol has a + * right to veto or force accept of the route before any filter is + * asked) and adds default values of attributes specific to the new + * protocol (metrics, tags etc.). Then it consults the protocol's + * export filter and if it accepts the route, the rt_notify() hook of + * the protocol gets called. */ static void -rte_announce(rtable *tab, net *net, rte *new, rte *old, ea_list *tmpa) +rte_announce(rtable *tab, int type, net *net, rte *new, rte *old, ea_list *tmpa) { struct announce_hook *a; int class = ipa_classify(net->n.prefix); + if (type == RA_OPTIMAL) + { + if (new) + new->attrs->proto->stats.pref_routes++; + if (old) + old->attrs->proto->stats.pref_routes--; + } + WALK_LIST(a, tab->hooks) { ASSERT(a->proto->core_state == FS_HAPPY || a->proto->core_state == FS_FEEDING); - do_rte_announce(a, net, new, old, tmpa, class); + if (a->proto->accept_ra_types == type) + do_rte_announce(a, type, net, new, old, tmpa, class); } } @@ -271,7 +317,7 @@ rte_validate(rte *e) if (ipa_nonzero(ipa_and(n->n.prefix, ipa_not(ipa_mkmask(n->n.pxlen))))) { log(L_BUG "Ignoring bogus prefix %I/%d received via %s", - n->n.prefix, n->n.pxlen, e->attrs->proto->name); + n->n.prefix, n->n.pxlen, e->sender->name); return 0; } if (n->n.pxlen) @@ -290,14 +336,14 @@ rte_validate(rte *e) return 1; } log(L_WARN "Ignoring bogus route %I/%d received via %s", - n->n.prefix, n->n.pxlen, e->attrs->proto->name); + n->n.prefix, n->n.pxlen, e->sender->name); return 0; } - if ((c & IADDR_SCOPE_MASK) < e->attrs->proto->min_scope) + if ((c & IADDR_SCOPE_MASK) < e->sender->min_scope) { log(L_WARN "Ignoring %s scope route %I/%d received from %I via %s", ip_scope_text(c & IADDR_SCOPE_MASK), - n->n.prefix, n->n.pxlen, e->attrs->from, e->attrs->proto->name); + n->n.prefix, n->n.pxlen, e->attrs->from, e->sender->name); return 0; } } @@ -337,7 +383,7 @@ rte_same(rte *x, rte *y) } static void -rte_recalculate(rtable *table, net *net, struct proto *p, rte *new, ea_list *tmpa) +rte_recalculate(rtable *table, net *net, struct proto *p, struct proto *src, rte *new, ea_list *tmpa) { rte *old_best = net->routes; rte *old = NULL; @@ -346,11 +392,12 @@ rte_recalculate(rtable *table, net *net, struct proto *p, rte *new, ea_list *tmp k = &net->routes; /* Find and remove original route from the same protocol */ while (old = *k) { - if (old->attrs->proto == p) + if (old->attrs->proto == src) { if (new && rte_same(old, new)) { /* No changes, ignore the new route */ + p->stats.imp_updates_ignored++; rte_trace_in(D_ROUTES, p, new, "ignored"); rte_free_quick(new); old->lastmod = now; @@ -362,10 +409,28 @@ rte_recalculate(rtable *table, net *net, struct proto *p, rte *new, ea_list *tmp k = &old->next; } + if (!old && !new) + { + p->stats.imp_withdraws_ignored++; + return; + } + + if (new) + p->stats.imp_updates_accepted++; + else + p->stats.imp_withdraws_accepted++; + + if (new) + p->stats.imp_routes++; + if (old) + p->stats.imp_routes--; + + rte_announce(table, RA_ANY, net, new, old, tmpa); + if (new && rte_better(new, old_best)) /* It's a new optimal route => announce and relink it */ { rte_trace_in(D_ROUTES, p, new, "added [best]"); - rte_announce(table, net, new, old_best, tmpa); + rte_announce(table, RA_OPTIMAL, net, new, old_best, tmpa); new->next = net->routes; net->routes = new; } @@ -377,7 +442,7 @@ rte_recalculate(rtable *table, net *net, struct proto *p, rte *new, ea_list *tmp for(s=net->routes; s; s=s->next) if (rte_better(s, r)) r = s; - rte_announce(table, net, r, old_best, tmpa); + rte_announce(table, RA_OPTIMAL, net, r, old_best, tmpa); if (r) /* Re-link the new optimal route */ { k = &net->routes; @@ -447,6 +512,7 @@ rte_update_unlock(void) * @table: table to be updated * @net: network node * @p: protocol submitting the update + * @src: protocol originating the update * @new: a &rte representing the new route or %NULL for route removal. * * This function is called by the routing protocols whenever they discover @@ -457,6 +523,12 @@ rte_update_unlock(void) * rta_clone()), call rte_get_temp() to obtain a temporary &rte, fill in all * the appropriate data and finally submit the new &rte by calling rte_update(). * + * @src specifies the protocol that originally created the route and the meaning + * of protocol-dependent data of @new. If @new is not %NULL, @src have to be the + * same value as @new->attrs->proto. @p specifies the protocol that called + * rte_update(). In most cases it is the same protocol as @src. rte_update() + * stores @p in @new->sender; + * * When rte_update() gets any route, it automatically validates it (checks, * whether the network and next hop address are valid IP addresses and also * whether a normal routing protocol doesn't try to smuggle a host or link @@ -466,7 +538,7 @@ rte_update_unlock(void) * stores the temporary attributes back to the &rte. * * Now, having a "public" version of the route, we - * automatically find any old route defined by the protocol @p + * automatically find any old route defined by the protocol @src * for network @n, replace it by the new one (or removing it if @new is %NULL), * recalculate the optimal route for this destination and finally broadcast * the change (if any) to all routing protocols by calling rte_announce(). @@ -475,14 +547,16 @@ rte_update_unlock(void) * from a special linear pool @rte_update_pool and freed when rte_update() * finishes. */ + void -rte_update(rtable *table, net *net, struct proto *p, rte *new) +rte_update(rtable *table, net *net, struct proto *p, struct proto *src, rte *new) { ea_list *tmpa = NULL; rte_update_lock(); if (new) { + new->sender = p; struct filter *filter = p->in_filter; /* Do not filter routes going to the secondary side of the pipe, @@ -491,35 +565,42 @@ rte_update(rtable *table, net *net, struct proto *p, rte *new) if (p->table != table) filter = FILTER_ACCEPT; + p->stats.imp_updates_received++; if (!rte_validate(new)) { rte_trace_in(D_FILTERS, p, new, "invalid"); + p->stats.imp_updates_invalid++; goto drop; } if (filter == FILTER_REJECT) { + p->stats.imp_updates_filtered++; rte_trace_in(D_FILTERS, p, new, "filtered out"); goto drop; } - if (p->make_tmp_attrs) - tmpa = p->make_tmp_attrs(new, rte_update_pool); + if (src->make_tmp_attrs) + tmpa = src->make_tmp_attrs(new, rte_update_pool); if (filter) { ea_list *old_tmpa = tmpa; int fr = f_run(filter, &new, &tmpa, rte_update_pool, 0); if (fr > F_ACCEPT) { + p->stats.imp_updates_filtered++; rte_trace_in(D_FILTERS, p, new, "filtered out"); goto drop; } - if (tmpa != old_tmpa && p->store_tmp_attrs) - p->store_tmp_attrs(new, tmpa); + if (tmpa != old_tmpa && src->store_tmp_attrs) + src->store_tmp_attrs(new, tmpa); } if (!(new->attrs->aflags & RTAF_CACHED)) /* Need to copy attributes */ new->attrs = rta_lookup(new->attrs); new->flags |= REF_COW; } - rte_recalculate(table, net, p, new, tmpa); + else + p->stats.imp_withdraws_received++; + + rte_recalculate(table, net, p, src, new, tmpa); rte_update_unlock(); return; @@ -531,11 +612,8 @@ drop: void rte_discard(rtable *t, rte *old) /* Non-filtered route deletion, used during garbage collection */ { - net *n = old->net; - struct proto *p = old->attrs->proto; - rte_update_lock(); - rte_recalculate(t, n, p, NULL, NULL); + rte_recalculate(t, old->net, old->sender, old->attrs->proto, NULL, NULL); rte_update_unlock(); } @@ -673,8 +751,8 @@ again: ncnt++; rescan: for (e=n->routes; e; e=e->next, rcnt++) - if (e->attrs->proto->core_state != FS_HAPPY && - e->attrs->proto->core_state != FS_FEEDING) + if (e->sender->core_state != FS_HAPPY && + e->sender->core_state != FS_FEEDING) { rte_discard(tab, e); rdel++; @@ -827,6 +905,18 @@ rt_commit(struct config *new, struct config *old) DBG("\tdone\n"); } +static inline void +do_feed_baby(struct proto *p, int type, struct announce_hook *h, net *n, rte *e) +{ + struct proto *q = e->attrs->proto; + ea_list *tmpa; + + rte_update_lock(); + tmpa = q->make_tmp_attrs ? q->make_tmp_attrs(e, rte_update_pool) : NULL; + do_rte_announce(h, type, n, e, NULL, tmpa, ipa_classify(n->n.prefix)); + rte_update_unlock(); +} + /** * rt_feed_baby - advertise routes to a new protocol * @p: protocol to be fed @@ -865,19 +955,24 @@ again: FIB_ITERATE_PUT(fit, fn); return 0; } - if (e) - { - struct proto *q = e->attrs->proto; - ea_list *tmpa; - - if (p->core_state != FS_FEEDING) - return 1; /* In the meantime, the protocol fell down. */ - rte_update_lock(); - tmpa = q->make_tmp_attrs ? q->make_tmp_attrs(e, rte_update_pool) : NULL; - do_rte_announce(h, n, e, NULL, tmpa, ipa_classify(n->n.prefix)); - rte_update_unlock(); - max_feed--; - } + + if (p->accept_ra_types == RA_OPTIMAL) + if (e) + { + if (p->core_state != FS_FEEDING) + return 1; /* In the meantime, the protocol fell down. */ + do_feed_baby(p, RA_OPTIMAL, h, n, e); + max_feed--; + } + + if (p->accept_ra_types == RA_ANY) + for(e = n->routes; e != NULL; e = e->next) + { + if (p->core_state != FS_FEEDING) + return 1; /* In the meantime, the protocol fell down. */ + do_feed_baby(p, RA_ANY, h, n, e); + max_feed--; + } } FIB_ITERATE_END(fn); p->feed_ahook = h->next; @@ -988,8 +1083,11 @@ rt_show_net(struct cli *c, net *n, struct rt_show_data *d) if (p2 && p2 != p0) ok = 0; if (ok && d->export_mode) { - int ic = (p1->import_control ? p1->import_control(p1, &e, &tmpa, rte_update_pool) : 0); - if (ic < 0) + int class = ipa_classify(n->n.prefix); + int ic; + if ((class & IADDR_SCOPE_MASK) < p1->min_scope) + ok = 0; + else if ((ic = p1->import_control ? p1->import_control(p1, &e, &tmpa, rte_update_pool) : 0) < 0) ok = 0; else if (!ic && d->export_mode > 1) { diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c index 8a849e73..68f21b97 100644 --- a/proto/bgp/attrs.c +++ b/proto/bgp/attrs.c @@ -39,9 +39,9 @@ struct attr_desc { }; static int -bgp_check_origin(struct bgp_proto *p UNUSED, byte *a UNUSED, int len) +bgp_check_origin(struct bgp_proto *p UNUSED, byte *a, int len UNUSED) { - if (len > 2) + if (*a > 2) return 6; return 0; } @@ -55,25 +55,97 @@ bgp_format_origin(eattr *a, byte *buf, int buflen) } static int -bgp_check_path(byte *a, int len, int bs, int errcode) +path_segment_contains(byte *p, int bs, u32 asn) { - while (len) + int i; + int len = p[1]; + p += 2; + + for(i=0; i<len; i++) { - DBG("Path segment %02x %02x\n", a[0], a[1]); - if (len < 2 || - (a[0] != AS_PATH_SET && a[0] != AS_PATH_SEQUENCE) || - bs * a[1] + 2 > len) - return errcode; - len -= bs * a[1] + 2; - a += bs * a[1] + 2; + u32 asn2 = (bs == 4) ? get_u32(p) : get_u16(p); + if (asn2 == asn) + return 1; + p += bs; } + return 0; } +/* Validates path attribute, removes AS_CONFED_* segments, and also returns path length */ static int -bgp_check_as_path(struct bgp_proto *p, byte *a, int len) +validate_path(struct bgp_proto *p, int as_path, int bs, byte *idata, unsigned int *ilength) +{ + int res = 0; + u8 *a, *dst; + int len, plen, copy; + + dst = a = idata; + len = *ilength; + + while (len) + { + if (len < 2) + return -1; + + plen = 2 + bs * a[1]; + if (len < plen) + return -1; + + switch (a[0]) + { + case AS_PATH_SET: + copy = 1; + res++; + break; + + case AS_PATH_SEQUENCE: + copy = 1; + res += a[1]; + break; + + case AS_PATH_CONFED_SEQUENCE: + case AS_PATH_CONFED_SET: + if (as_path && path_segment_contains(a, bs, p->remote_as)) + { + log(L_WARN "%s: AS_CONFED_* segment with peer ASN found, misconfigured confederation?", p->p.name); + return -1; + } + + log(L_WARN "%s: %s_PATH attribute contains AS_CONFED_* segment, skipping segment", + p->p.name, as_path ? "AS" : "AS4"); + copy = 0; + break; + + default: + return -1; + } + + if (copy) + { + if (dst != a) + memmove(dst, a, plen); + dst += plen; + } + + len -= plen; + a += plen; + } + + *ilength = dst - idata; + return res; +} + +static inline int +validate_as_path(struct bgp_proto *p, byte *a, int *len) { - return bgp_check_path(a, len, p->as4_session ? 4 : 2, 11); + return validate_path(p, 1, p->as4_session ? 4 : 2, a, len); +} + +static inline int +validate_as4_path(struct bgp_proto *p, struct adata *path) +{ + return validate_path(p, 0, 4, path->data, &path->length); } static int @@ -160,7 +232,7 @@ static struct attr_desc bgp_attr_table[] = { { "origin", 1, BAF_TRANSITIVE, EAF_TYPE_INT, 1, /* BA_ORIGIN */ bgp_check_origin, bgp_format_origin }, { "as_path", -1, BAF_TRANSITIVE, EAF_TYPE_AS_PATH, 1, /* BA_AS_PATH */ - bgp_check_as_path, NULL }, + NULL, NULL }, /* is checked by validate_as_path() as a special case */ { "next_hop", 4, BAF_TRANSITIVE, EAF_TYPE_IP_ADDRESS, 1, /* BA_NEXT_HOP */ bgp_check_next_hop, NULL }, { "med", 4, BAF_OPTIONAL, EAF_TYPE_INT, 1, /* BA_MULTI_EXIT_DISC */ @@ -1061,73 +1133,6 @@ as4_aggregator_valid(struct adata *aggr) return 0; } -static int -as4_path_sanitize_and_get_length(struct adata *path) -{ - int res = 0; - u8 *p, *dst; - int len, plen, copy; - - dst = p = path->data; - len = path->length; - - while (len) - { - if (len <= 2) /* We do not allow empty segments */ - goto inconsistent_path; - - switch (p[0]) - { - case AS_PATH_SET: - plen = 2 + 4 * p[1]; - copy = 1; - res++; - break; - - case AS_PATH_SEQUENCE: - plen = 2 + 4 * p[1]; - copy = 1; - res += p[1]; - break; - - case AS_PATH_CONFED_SEQUENCE: - case AS_PATH_CONFED_SET: - log(L_WARN "BGP: AS4_PATH attribute contains AS_CONFED_* segment, skipping segment"); - plen = 2 + 4 * p[1]; - copy = 0; - break; - - default: - goto unknown_segment; - } - - if (len < plen) - goto inconsistent_path; - - if (copy) - { - if (dst != p) - memmove(dst, p, plen); - dst += plen; - } - - len -= plen; - p += plen; - } - - path->length = dst - path->data; - return res; - - inconsistent_path: - log(L_WARN "BGP: AS4_PATH attribute is inconsistent, skipping attribute"); - return -1; - - unknown_segment: - log(L_WARN "BGP: AS4_PATH attribute contains unknown segment, skipping attribute"); - return -1; -} - - /* Reconstruct 4B AS_PATH and AGGREGATOR according to RFC 4893 4.2.3 */ static void @@ -1141,7 +1146,7 @@ bgp_reconstruct_4b_atts(struct bgp_proto *p, rta *a, struct linpool *pool) if (a4 && !as4_aggregator_valid(a4->u.ptr)) { - log(L_WARN "BGP: AS4_AGGREGATOR attribute is invalid, skipping attribute"); + log(L_WARN "%s: AS4_AGGREGATOR attribute is invalid, skipping attribute", p->p.name); a4 = NULL; a4_removed = 1; } @@ -1177,15 +1182,18 @@ bgp_reconstruct_4b_atts(struct bgp_proto *p, rta *a, struct linpool *pool) a2->u.ptr = bgp_aggregator_convert_to_new(a2->u.ptr, pool); if ((a2_as == AS_TRANS) && !a4_removed) - log(L_WARN "BGP: AGGREGATOR attribute contain AS_TRANS, but AS4_AGGREGATOR is missing"); + log(L_WARN "%s: AGGREGATOR attribute contain AS_TRANS, but AS4_AGGREGATOR is missing", p->p.name); } } else if (a4) - log(L_WARN "BGP: AS4_AGGREGATOR attribute received, but AGGREGATOR attribute is missing"); + log(L_WARN "%s: AS4_AGGREGATOR attribute received, but AGGREGATOR attribute is missing", p->p.name); int p2_len = as_path_getlen(p2->u.ptr); - int p4_len = p4 ? as4_path_sanitize_and_get_length(p4->u.ptr) : -1; + int p4_len = p4 ? validate_as4_path(p, p4->u.ptr) : -1; + + if (p4 && (p4_len < 0)) + log(L_WARN "%s: AS4_PATH attribute is malformed, skipping attribute", p->p.name); if ((p4_len <= 0) || (p2_len < p4_len)) p2->u.ptr = bgp_merge_as_paths(p2->u.ptr, NULL, AS_PATH_MAXLEN, pool); @@ -1200,7 +1208,7 @@ bgp_remove_as4_attrs(struct bgp_proto *p, rta *a) unsigned id2 = EA_CODE(EAP_BGP, BA_AS4_AGGREGATOR); ea_list **el = &(a->eattrs); - /* We know that ea_lists constructed in bgp_decode_attrs have one attribute per ea_list struct */ + /* We know that ea_lists constructed in bgp_decode attrs have one attribute per ea_list struct */ while (*el != NULL) { unsigned fid = (*el)->attrs[0].id; @@ -1302,6 +1310,12 @@ bgp_decode_attrs(struct bgp_conn *conn, byte *attr, unsigned int len, struct lin if (errcode < 0) continue; } + else if (code == BA_AS_PATH) + { + /* Special case as it might also trim the attribute */ + if (validate_as_path(bgp, z, &l) < 0) + { errcode = 11; goto err; } + } type = desc->type; } else /* Unknown attribute */ @@ -1310,6 +1324,11 @@ bgp_decode_attrs(struct bgp_conn *conn, byte *attr, unsigned int len, struct lin { errcode = 2; goto err; } type = EAF_TYPE_OPAQUE; } + + // Only OPTIONAL and TRANSITIVE attributes may have non-zero PARTIAL flag + // if (!((flags & BAF_OPTIONAL) && (flags & BAF_TRANSITIVE)) && (flags & BAF_PARTIAL)) + // { errcode = 4; goto err; } + seen[code/8] |= (1 << (code%8)); ea = lp_alloc(pool, sizeof(ea_list) + sizeof(eattr)); ea->next = a->eattrs; diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index cbc699bb..a6b9d574 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -73,10 +73,8 @@ static int bgp_counter; /* Number of protocol instances using the listening so static void bgp_close(struct bgp_proto *p, int apply_md5); static void bgp_connect(struct bgp_proto *p); -static void bgp_active(struct bgp_proto *p, int delay); -static void bgp_initiate(struct bgp_proto *p); -static void bgp_stop(struct bgp_proto *p); -static sock *bgp_setup_listen_sk(void); +static void bgp_active(struct bgp_proto *p); +static sock *bgp_setup_listen_sk(ip_addr addr, unsigned port, u32 flags); /** @@ -92,10 +90,11 @@ static sock *bgp_setup_listen_sk(void); static int bgp_open(struct bgp_proto *p) { + struct config *cfg = p->cf->c.global; bgp_counter++; if (!bgp_listen_sk) - bgp_listen_sk = bgp_setup_listen_sk(); + bgp_listen_sk = bgp_setup_listen_sk(cfg->listen_bgp_addr, cfg->listen_bgp_port, cfg->listen_bgp_flags); if (!bgp_linpool) bgp_linpool = lp_new(&root_pool, 4080); @@ -113,10 +112,36 @@ bgp_open(struct bgp_proto *p) } } - p->start_state = p->cf->capabilities ? BSS_CONNECT : BSS_CONNECT_NOCAP; return 0; } +static void +bgp_startup(struct bgp_proto *p) +{ + BGP_TRACE(D_EVENTS, "Started"); + p->start_state = p->cf->capabilities ? BSS_CONNECT : BSS_CONNECT_NOCAP; + bgp_active(p); +} + +static void +bgp_startup_timeout(timer *t) +{ + bgp_startup(t->data); +} + + +static void +bgp_initiate(struct bgp_proto *p) +{ + if (p->startup_delay) + { + BGP_TRACE(D_EVENTS, "Startup delayed by %d seconds", p->startup_delay); + bgp_start_timer(p->startup_timer, p->startup_delay); + } + else + bgp_startup(p); +} + /** * bgp_close - close a BGP instance * @p: BGP instance @@ -196,9 +221,6 @@ bgp_close_conn(struct bgp_conn *conn) /** * bgp_update_startup_delay - update a startup delay * @p: BGP instance - * @conn: related BGP connection - * @code: BGP error code - * @subcode: BGP error subcode * * This function updates a startup delay that is used to postpone next BGP connect. * It also handles disable_after_error and might stop BGP instance when error @@ -207,26 +229,14 @@ bgp_close_conn(struct bgp_conn *conn) * It should be called when BGP protocol error happened. */ void -bgp_update_startup_delay(struct bgp_proto *p, struct bgp_conn *conn, unsigned code, unsigned subcode) +bgp_update_startup_delay(struct bgp_proto *p) { struct bgp_config *cf = p->cf; - /* Don't handle cease messages as errors */ - if (code == 6 && !subcode) - { - p->startup_delay = 0; - return; - } - - /* During start, we only consider errors on outgoing connection, because - otherwise delay timer for outgoing connection is already running and - we could increase delay time two times (or more) per one attempt to - connect. - */ - if ((p->p.proto_state == PS_START) && (conn != &p->outgoing_conn)) - return; + DBG("BGP: Updating startup delay\n"); - DBG("BGP: Updating startup delay %d %d\n", code, subcode); + if (p->last_proto_error && ((now - p->last_proto_error) >= cf->error_amnesia_time)) + p->startup_delay = 0; p->last_proto_error = now; @@ -234,27 +244,17 @@ bgp_update_startup_delay(struct bgp_proto *p, struct bgp_conn *conn, unsigned co { p->startup_delay = 0; p->p.disabled = 1; - if (p->p.proto_state == PS_START) - bgp_stop(p); - return; } - if (p->last_proto_error && ((now - p->last_proto_error) >= cf->error_amnesia_time)) - p->startup_delay = 0; - if (!p->startup_delay) p->startup_delay = cf->error_delay_time_min; else - { - p->startup_delay *= 2; - if (p->startup_delay > cf->error_delay_time_max) - p->startup_delay = cf->error_delay_time_max; - } + p->startup_delay = MIN(2 * p->startup_delay, cf->error_delay_time_max); } static void -bgp_graceful_close_conn(struct bgp_conn *conn) +bgp_graceful_close_conn(struct bgp_conn *conn, unsigned subcode) { switch (conn->state) { @@ -268,7 +268,7 @@ bgp_graceful_close_conn(struct bgp_conn *conn) case BS_OPENSENT: case BS_OPENCONFIRM: case BS_ESTABLISHED: - bgp_error(conn, 6, 0, NULL, 0); + bgp_error(conn, 6, subcode, NULL, 0); return; default: bug("bgp_graceful_close_conn: Unknown state %d", conn->state); @@ -281,7 +281,7 @@ bgp_down(struct bgp_proto *p) if (p->start_state > BSS_PREPARE) bgp_close(p, 1); - DBG("BGP: DOWN\n"); + BGP_TRACE(D_EVENTS, "Down"); proto_notify_state(&p->p, PS_DOWN); } @@ -293,7 +293,7 @@ bgp_decision(void *vp) DBG("BGP: Decision start\n"); if ((p->p.proto_state == PS_START) && (p->outgoing_conn.state == BS_IDLE)) - bgp_initiate(p); + bgp_active(p); if ((p->p.proto_state == PS_STOP) && (p->outgoing_conn.state == BS_IDLE) @@ -301,12 +301,12 @@ bgp_decision(void *vp) bgp_down(p); } -static void -bgp_stop(struct bgp_proto *p) +void +bgp_stop(struct bgp_proto *p, unsigned subcode) { proto_notify_state(&p->p, PS_STOP); - bgp_graceful_close_conn(&p->outgoing_conn); - bgp_graceful_close_conn(&p->incoming_conn); + bgp_graceful_close_conn(&p->outgoing_conn, subcode); + bgp_graceful_close_conn(&p->incoming_conn, subcode); ev_schedule(p->event); } @@ -333,7 +333,7 @@ bgp_conn_leave_established_state(struct bgp_proto *p) p->conn = NULL; if (p->p.proto_state == PS_UP) - bgp_stop(p); + bgp_stop(p, 0); } void @@ -473,8 +473,9 @@ bgp_setup_sk(struct bgp_proto *p, struct bgp_conn *conn, sock *s) } static void -bgp_active(struct bgp_proto *p, int delay) +bgp_active(struct bgp_proto *p) { + int delay = MAX(1, p->cf->start_delay_time); struct bgp_conn *conn = &p->outgoing_conn; BGP_TRACE(D_EVENTS, "Connect delayed by %d seconds", delay); @@ -483,6 +484,22 @@ bgp_active(struct bgp_proto *p, int delay) bgp_start_timer(conn->connect_retry_timer, delay); } +int +bgp_apply_limits(struct bgp_proto *p) +{ + if (p->cf->route_limit && (p->p.stats.imp_routes > p->cf->route_limit)) + { + log(L_WARN "%s: Route limit exceeded, shutting down", p->p.name); + bgp_store_error(p, NULL, BE_AUTO_DOWN, BEA_ROUTE_LIMIT_EXCEEDED); + bgp_update_startup_delay(p); + bgp_stop(p, 1); // Errcode 6, 1 - max number of prefixes reached + return -1; + } + + return 0; +} + + /** * bgp_connect - initiate an outgoing connection * @p: BGP instance @@ -522,17 +539,6 @@ bgp_connect(struct bgp_proto *p) /* Enter Connect state and start establishing c bgp_start_timer(conn->connect_retry_timer, p->cf->connect_retry_time); } -static void -bgp_initiate(struct bgp_proto *p) -{ - unsigned delay = MAX(p->startup_delay, p->cf->start_delay_time); - - if (delay) - bgp_active(p, delay); - else - bgp_connect(p); -} - /** * bgp_incoming_connection - handle an incoming connection * @sk: TCP socket @@ -549,7 +555,6 @@ static int bgp_incoming_connection(sock *sk, int dummy UNUSED) { struct proto_config *pc; - int match = 0; DBG("BGP: Incoming connection from %I port %d\n", sk->daddr, sk->dport); WALK_LIST(pc, config->protos) @@ -558,36 +563,39 @@ bgp_incoming_connection(sock *sk, int dummy UNUSED) struct bgp_proto *p = (struct bgp_proto *) pc->proto; if (ipa_equal(p->cf->remote_ip, sk->daddr)) { - match = 1; - if ((p->p.proto_state == PS_START || p->p.proto_state == PS_UP) && (p->start_state > BSS_PREPARE)) - { - BGP_TRACE(D_EVENTS, "Incoming connection from %I port %d", sk->daddr, sk->dport); - if (p->incoming_conn.sk) - { - DBG("BGP: But one incoming connection already exists, how is that possible?\n"); - break; - } - bgp_setup_conn(p, &p->incoming_conn); - bgp_setup_sk(p, &p->incoming_conn, sk); - sk_set_ttl(sk, p->cf->multihop ? : 1); - bgp_send_open(&p->incoming_conn); - return 0; - } + /* We are in proper state and there is no other incoming connection */ + int acc = (p->p.proto_state == PS_START || p->p.proto_state == PS_UP) && + (p->start_state >= BSS_CONNECT) && (!p->incoming_conn.sk); + + BGP_TRACE(D_EVENTS, "Incoming connection from %I (port %d) %s", + sk->daddr, sk->dport, acc ? "accepted" : "rejected"); + + if (!acc) + goto err; + + bgp_setup_conn(p, &p->incoming_conn); + bgp_setup_sk(p, &p->incoming_conn, sk); + sk_set_ttl(sk, p->cf->multihop ? : 1); + bgp_send_open(&p->incoming_conn); + return 0; } } - if (!match) - log(L_AUTH "BGP: Unauthorized connect from %I port %d", sk->daddr, sk->dport); + + log(L_WARN "BGP: Unexpected connect from unknown address %I (port %d)", sk->daddr, sk->dport); + err: rfree(sk); return 0; } static sock * -bgp_setup_listen_sk(void) +bgp_setup_listen_sk(ip_addr addr, unsigned port, u32 flags) { sock *s = sk_new(&root_pool); DBG("BGP: Creating incoming socket\n"); s->type = SK_TCP_PASSIVE; - s->sport = BGP_PORT; + s->saddr = addr; + s->sport = port ? port : BGP_PORT; + s->flags = flags; s->tos = IP_PREC_INTERNET_CONTROL; s->rbsize = BGP_RX_BUFFER_SIZE; s->tbsize = BGP_TX_BUFFER_SIZE; @@ -653,7 +661,7 @@ bgp_neigh_notify(neighbor *n) { BGP_TRACE(D_EVENTS, "Neighbor lost"); bgp_store_error(p, NULL, BE_MISC, BEM_NEIGHBOR_LOST); - bgp_stop(p); + bgp_stop(p, 0); } } } @@ -715,6 +723,10 @@ bgp_start(struct proto *P) p->event->hook = bgp_decision; p->event->data = p; + p->startup_timer = tm_new(p->p.pool); + p->startup_timer->hook = bgp_startup_timeout; + p->startup_timer->data = p; + /* * Before attempting to create the connection, we need to lock the * port, so that are sure we're the only instance attempting to talk @@ -737,11 +749,23 @@ static int bgp_shutdown(struct proto *P) { struct bgp_proto *p = (struct bgp_proto *) P; + unsigned subcode; BGP_TRACE(D_EVENTS, "Shutdown requested"); bgp_store_error(p, NULL, BE_MAN_DOWN, 0); + + if (P->reconfiguring) + { + if (P->cf_new) + subcode = 6; // Errcode 6, 6 - other configuration change + else + subcode = 3; // Errcode 6, 3 - peer de-configured + } + else + subcode = 2; // Errcode 6, 2 - administrative shutdown + p->startup_delay = 0; - bgp_stop(p); + bgp_stop(p, subcode); return p->p.proto_state; } @@ -753,6 +777,7 @@ bgp_init(struct proto_config *C) struct proto *P = proto_new(C, sizeof(struct bgp_proto)); struct bgp_proto *p = (struct bgp_proto *) P; + P->accept_ra_types = RA_OPTIMAL; P->rt_notify = bgp_rt_notify; P->rte_better = bgp_rte_better; P->import_control = bgp_import_control; @@ -779,12 +804,13 @@ bgp_init(struct proto_config *C) void bgp_error(struct bgp_conn *c, unsigned code, unsigned subcode, byte *data, int len) { + struct bgp_proto *p = c->bgp; + if (c->state == BS_CLOSE) return; - bgp_log_error(c->bgp, "Error", code, subcode, data, (len > 0) ? len : -len); - bgp_store_error(c->bgp, c, BE_BGP_TX, (code << 16) | subcode); - bgp_update_startup_delay(c->bgp, c, code, subcode); + bgp_log_error(p, BE_BGP_TX, "Error", code, subcode, data, (len > 0) ? len : -len); + bgp_store_error(p, c, BE_BGP_TX, (code << 16) | subcode); bgp_conn_enter_close_state(c); c->notify_code = code; @@ -792,6 +818,12 @@ bgp_error(struct bgp_conn *c, unsigned code, unsigned subcode, byte *data, int l c->notify_data = data; c->notify_size = (len > 0) ? len : 0; bgp_schedule_packet(c, PKT_NOTIFICATION); + + if (code != 6) + { + bgp_update_startup_delay(p); + bgp_stop(p, 0); + } } /** @@ -847,8 +879,9 @@ bgp_check(struct bgp_config *c) } static char *bgp_state_names[] = { "Idle", "Connect", "Active", "OpenSent", "OpenConfirm", "Established", "Close" }; -static char *bgp_err_classes[] = { "", "Error: ", "Socket: ", "Received: ", "BGP Error: ", "Automatic shutdown", ""}; +static char *bgp_err_classes[] = { "", "Error: ", "Socket: ", "Received: ", "BGP Error: ", "Automatic shutdown: ", ""}; static char *bgp_misc_errors[] = { "", "Neighbor lost", "Invalid next hop", "Kernel MD5 auth failed" }; +static char *bgp_auto_errors[] = { "", "Route limit exceeded"}; static void @@ -872,6 +905,9 @@ bgp_get_status(struct proto *P, byte *buf) case BE_BGP_TX: err2 = bgp_error_dsc(errbuf, p->last_error_code >> 16, p->last_error_code & 0xFF); break; + case BE_AUTO_DOWN: + err2 = bgp_auto_errors[p->last_error_code]; + break; } if (P->proto_state == PS_DOWN) diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h index 8477f9e5..9bbdab8e 100644 --- a/proto/bgp/bgp.h +++ b/proto/bgp/bgp.h @@ -31,6 +31,7 @@ struct bgp_config { int rr_client; /* Whether neighbor is RR client of me */ int rs_client; /* Whether neighbor is RS client of me */ int advertise_ipv4; /* Whether we should add IPv4 capability advertisement to OPEN message */ + u32 route_limit; /* Number of routes that may be imported, 0 means disable limit */ unsigned connect_retry_time; unsigned hold_time, initial_hold_time; unsigned keepalive_time; @@ -81,6 +82,7 @@ struct bgp_proto { ip_addr local_addr; /* Address of the local end of the link to next_hop */ ip_addr source_addr; /* Address used as advertised next hop, usually local_addr */ struct event *event; /* Event for respawning and shutting process */ + struct timer *startup_timer; /* Timer used to delay protocol startup due to previous errors (startup_delay) */ struct bgp_bucket **bucket_hash; /* Hash table of attribute buckets */ unsigned int hash_size, hash_count, hash_limit; struct fib prefix_fib; /* Prefixes to be sent */ @@ -127,11 +129,14 @@ void bgp_start_timer(struct timer *t, int value); void bgp_check(struct bgp_config *c); void bgp_error(struct bgp_conn *c, unsigned code, unsigned subcode, byte *data, int len); void bgp_close_conn(struct bgp_conn *c); -void bgp_update_startup_delay(struct bgp_proto *p, struct bgp_conn *conn, unsigned code, unsigned subcode); +void bgp_update_startup_delay(struct bgp_proto *p); void bgp_conn_enter_established_state(struct bgp_conn *conn); void bgp_conn_enter_close_state(struct bgp_conn *conn); void bgp_conn_enter_idle_state(struct bgp_conn *conn); void bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code); +int bgp_apply_limits(struct bgp_proto *p); +void bgp_stop(struct bgp_proto *p, unsigned subcode); + #ifdef LOCAL_DEBUG @@ -181,7 +186,7 @@ void bgp_kick_tx(void *vconn); void bgp_tx(struct birdsock *sk); int bgp_rx(struct birdsock *sk, int size); const byte * bgp_error_dsc(byte *buff, unsigned code, unsigned subcode); -void bgp_log_error(struct bgp_proto *p, char *msg, unsigned code, unsigned subcode, byte *data, unsigned len); +void bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsigned subcode, byte *data, unsigned len); /* Packet types */ @@ -239,8 +244,9 @@ void bgp_log_error(struct bgp_proto *p, char *msg, unsigned code, unsigned subco */ #define BSS_PREPARE 0 /* Used before ordinary BGP started, i. e. waiting for lock */ -#define BSS_CONNECT 1 /* Ordinary BGP connecting */ -#define BSS_CONNECT_NOCAP 2 /* Legacy BGP connecting (without capabilities) */ +#define BSS_DELAY 1 /* Startup delay due to previous errors */ +#define BSS_CONNECT 2 /* Ordinary BGP connecting */ +#define BSS_CONNECT_NOCAP 3 /* Legacy BGP connecting (without capabilities) */ /* Error classes */ @@ -256,8 +262,11 @@ void bgp_log_error(struct bgp_proto *p, char *msg, unsigned code, unsigned subco #define BEM_NEIGHBOR_LOST 1 #define BEM_INVALID_NEXT_HOP 2 -#define BEM_INVALID_MD5 3 /* MD5 authentication kernel request failed (possibly not supported */ +#define BEM_INVALID_MD5 3 /* MD5 authentication kernel request failed (possibly not supported) */ + +/* Automatic shutdown error codes */ +#define BEA_ROUTE_LIMIT_EXCEEDED 1 /* Well-known communities */ diff --git a/proto/bgp/config.Y b/proto/bgp/config.Y index c5ea87de..872fb271 100644 --- a/proto/bgp/config.Y +++ b/proto/bgp/config.Y @@ -22,7 +22,7 @@ CF_KEYWORDS(BGP, LOCAL, NEIGHBOR, AS, HOLD, TIME, CONNECT, RETRY, KEEPALIVE, BGP_PATH, BGP_LOCAL_PREF, BGP_MED, BGP_ORIGIN, BGP_NEXT_HOP, BGP_ATOMIC_AGGR, BGP_AGGREGATOR, BGP_COMMUNITY, SOURCE, ADDRESS, PASSWORD, RR, RS, CLIENT, CLUSTER, ID, AS4, ADVERTISE, IPV4, - CAPABILITIES) + CAPABILITIES, LIMIT) CF_GRAMMAR @@ -77,6 +77,7 @@ bgp_proto: | bgp_proto CAPABILITIES bool ';' { BGP_CFG->capabilities = $3; } | bgp_proto ADVERTISE IPV4 bool ';' { BGP_CFG->advertise_ipv4 = $4; } | bgp_proto PASSWORD TEXT ';' { BGP_CFG->password = $3; } + | bgp_proto ROUTE LIMIT expr ';' { BGP_CFG->route_limit = $4; } ; CF_ADDTO(dynamic_attr, BGP_PATH diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c index 27adc166..ae4906ee 100644 --- a/proto/bgp/packets.c +++ b/proto/bgp/packets.c @@ -619,14 +619,14 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, int len) { /* Should close the other connection */ BGP_TRACE(D_EVENTS, "Connection collision, giving up the other connection"); - bgp_error(other, 6, 0, NULL, 0); + bgp_error(other, 6, 7, NULL, 0); break; } /* Fall thru */ case BS_ESTABLISHED: /* Should close this connection */ BGP_TRACE(D_EVENTS, "Connection collision, giving up this connection"); - bgp_error(conn, 6, 0, NULL, 0); + bgp_error(conn, 6, 7, NULL, 0); return; default: bug("bgp_rx_open: Unknown state"); @@ -705,7 +705,7 @@ bgp_do_rx_update(struct bgp_conn *conn, DECODE_PREFIX(withdrawn, withdrawn_len); DBG("Withdraw %I/%d\n", prefix, pxlen); if (n = net_find(p->p.table, prefix, pxlen)) - rte_update(p->p.table, n, &p->p, NULL); + rte_update(p->p.table, n, &p->p, &p->p, NULL); } if (!attr_len && !nlri_len) /* shortcut */ @@ -724,14 +724,20 @@ bgp_do_rx_update(struct bgp_conn *conn, n = net_get(p->p.table, prefix, pxlen); e->net = n; e->pflags = 0; - rte_update(p->p.table, n, &p->p, e); + rte_update(p->p.table, n, &p->p, &p->p, e); + if (bgp_apply_limits(p) < 0) + goto bad2; } + rta_free(a); } -bad: + + return; + + bad: + bgp_error(conn, 3, err, NULL, 0); + bad2: if (a) rta_free(a); - if (err) - bgp_error(conn, 3, err, NULL, 0); return; } @@ -783,7 +789,7 @@ bgp_do_rx_update(struct bgp_conn *conn, DECODE_PREFIX(x, len); DBG("Withdraw %I/%d\n", prefix, pxlen); if (n = net_find(p->p.table, prefix, pxlen)) - rte_update(p->p.table, n, &p->p, NULL); + rte_update(p->p.table, n, &p->p, &p->p, NULL); } } @@ -824,7 +830,9 @@ bgp_do_rx_update(struct bgp_conn *conn, n = net_get(p->p.table, prefix, pxlen); e->net = n; e->pflags = 0; - rte_update(p->p.table, n, &p->p, e); + rte_update(p->p.table, n, &p->p, &p->p, e); + if (bgp_apply_limits(p) < 0) + goto bad2; } rta_free(a); } @@ -832,8 +840,9 @@ bgp_do_rx_update(struct bgp_conn *conn, return; -bad: + bad: bgp_error(conn, 3, 9, start, len0); + bad2: if (a) rta_free(a); return; @@ -948,14 +957,15 @@ bgp_error_dsc(byte *buff, unsigned code, unsigned subcode) } void -bgp_log_error(struct bgp_proto *p, char *msg, unsigned code, unsigned subcode, byte *data, unsigned len) +bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsigned subcode, byte *data, unsigned len) { const byte *name; byte namebuf[32]; byte *t, argbuf[36]; unsigned i; - if (code == 6 && !subcode) /* Don't report Cease messages */ + /* Don't report Cease messages generated by myself */ + if (code == 6 && class == BE_BGP_TX) return; name = bgp_error_dsc(namebuf, code, subcode); @@ -985,10 +995,10 @@ bgp_rx_notification(struct bgp_conn *conn, byte *pkt, int len) unsigned code = pkt[19]; unsigned subcode = pkt[20]; - int delay = 1; + int err = (code != 6); - bgp_log_error(conn->bgp, "Received error notification", code, subcode, pkt+21, len-21); - bgp_store_error(conn->bgp, conn, BE_BGP_RX, (code << 16) | subcode); + bgp_log_error(p, BE_BGP_RX, "Received", code, subcode, pkt+21, len-21); + bgp_store_error(p, conn, BE_BGP_RX, (code << 16) | subcode); #ifndef IPV6 if ((code == 2) && ((subcode == 4) || (subcode == 7)) @@ -1005,14 +1015,19 @@ bgp_rx_notification(struct bgp_conn *conn, byte *pkt, int len) { /* We try connect without capabilities */ log(L_WARN "%s: Capability related error received, retry with capabilities disabled", p->p.name); - conn->bgp->start_state = BSS_CONNECT_NOCAP; - delay = 0; + p->start_state = BSS_CONNECT_NOCAP; + err = 0; } #endif - if (delay) bgp_update_startup_delay(conn->bgp, conn, code, subcode); bgp_conn_enter_close_state(conn); bgp_schedule_packet(conn, PKT_SCHEDULE_CLOSE); + + if (err) + { + bgp_update_startup_delay(p); + bgp_stop(p, 0); + } } static void diff --git a/proto/ospf/config.Y b/proto/ospf/config.Y index 7f7d6a31..77ca26cf 100644 --- a/proto/ospf/config.Y +++ b/proto/ospf/config.Y @@ -18,6 +18,7 @@ CF_DEFINES static struct ospf_area_config *this_area; static struct nbma_node *this_nbma; static struct area_net_config *this_pref; +static struct ospf_stubnet_config *this_stubnet; static void finish_iface_config(struct ospf_iface_patt *ip) @@ -38,7 +39,7 @@ CF_KEYWORDS(NEIGHBORS, RFC1583COMPAT, STUB, TICK, COST, RETRANSMIT) CF_KEYWORDS(HELLO, TRANSMIT, PRIORITY, DEAD, NONBROADCAST, POINTOPOINT, TYPE) CF_KEYWORDS(NONE, SIMPLE, AUTHENTICATION, STRICT, CRYPTOGRAPHIC) CF_KEYWORDS(ELIGIBLE, POLL, NETWORKS, HIDDEN, VIRTUAL, LINK) -CF_KEYWORDS(RX, BUFFER, LARGE, NORMAL) +CF_KEYWORDS(RX, BUFFER, LARGE, NORMAL, STUBNET, HIDDEN, SUMMARY) %type <t> opttext @@ -75,6 +76,7 @@ ospf_area_start: AREA idval '{' { init_list(&this_area->patt_list); init_list(&this_area->vlink_list); init_list(&this_area->net_list); + init_list(&this_area->stubnet_list); } ; @@ -90,10 +92,36 @@ ospf_area_item: STUB COST expr { this_area->stub = $3 ; if($3<=0) cf_error("Stub cost must be greater than zero"); } | STUB bool {if($2) { if(!this_area->stub) this_area->stub=DEFAULT_STUB_COST;}else{ this_area->stub=0;}} | NETWORKS '{' pref_list '}' + | STUBNET ospf_stubnet | INTERFACE ospf_iface | ospf_vlink ; +ospf_stubnet: + ospf_stubnet_start '{' ospf_stubnet_opts '}' + | ospf_stubnet_start + ; + +ospf_stubnet_start: + prefix { + this_stubnet = cfg_allocz(sizeof(struct ospf_stubnet_config)); + add_tail(&this_area->stubnet_list, NODE this_stubnet); + this_stubnet->px = $1; + this_stubnet->cost = COST_D; + } + ; + +ospf_stubnet_opts: + /* empty */ + | ospf_stubnet_opts ospf_stubnet_item ';' + ; + +ospf_stubnet_item: + HIDDEN bool { this_stubnet->hidden = $2; } + | SUMMARY bool { this_stubnet->summary = $2; } + | COST expr { this_stubnet->cost = $2; } + ; + ospf_vlink: ospf_vlink_start '{' ospf_vlink_opts '}' { finish_iface_config(OSPF_PATT); } | ospf_vlink_start diff --git a/proto/ospf/hello.c b/proto/ospf/hello.c index c7d20273..45b6b613 100644 --- a/proto/ospf/hello.c +++ b/proto/ospf/hello.c @@ -24,16 +24,36 @@ ospf_hello_receive(struct ospf_hello_packet *ps, mask = ps->netmask; ipa_ntoh(mask); - if (((ifa->type != OSPF_IT_VLINK) && (ifa->type != OSPF_IT_PTP)) && - ((unsigned) ipa_mklen(mask) != ifa->iface->addr->pxlen)) - { - log(L_ERR "%s%I%sbad netmask %I.", beg, faddr, rec, mask); - return; - } + if (ifa->type != OSPF_IT_VLINK) + { + char *msg = L_WARN "Received HELLO packet %s (%I) is inconsistent " + "with the primary address of interface %s."; + + if ((ifa->type != OSPF_IT_PTP) && + !ipa_equal(mask, ipa_mkmask(ifa->iface->addr->pxlen))) + { + if (!n) log(msg, "netmask", mask, ifa->iface->name); + return; + } + + /* This check is not specified in RFC 2328, but it is needed + * to handle the case when there is more IP networks on one + * physical network (which is not handled in RFC 2328). + * We allow OSPF on primary IP address only and ignore HELLO packets + * with secondary addresses (which are sent for example by Quagga. + */ + if ((ifa->iface->addr->flags & IA_UNNUMBERED) ? + !ipa_equal(faddr, ifa->iface->addr->opposite) : + !ipa_equal(ipa_and(faddr,mask), ifa->iface->addr->prefix)) + { + if (!n) log(msg, "address", faddr, ifa->iface->name); + return; + } + } if (ntohs(ps->helloint) != ifa->helloint) { - log(L_WARN "%s%I%shello interval mismatch (%d).", beg, faddr, rec, + log(L_ERR "%s%I%shello interval mismatch (%d).", beg, faddr, rec, ntohs(ps->helloint)); return; } @@ -205,7 +225,8 @@ ospf_hello_send(timer * timer, int poll, struct ospf_neighbor *dirn) pkt->netmask = ipa_mkmask(ifa->iface->addr->pxlen); ipa_hton(pkt->netmask); - if (ifa->type == OSPF_IT_VLINK) pkt->netmask = IPA_NONE; + if ((ifa->type == OSPF_IT_VLINK) || (ifa->type == OSPF_IT_PTP)) + pkt->netmask = IPA_NONE; pkt->helloint = ntohs(ifa->helloint); pkt->options = ifa->oa->opt.byte; pkt->priority = ifa->priority; diff --git a/proto/ospf/ospf.c b/proto/ospf/ospf.c index 0cab1d7b..c9b5f430 100644 --- a/proto/ospf/ospf.c +++ b/proto/ospf/ospf.c @@ -76,6 +76,9 @@ #include <stdlib.h> #include "ospf.h" + +static void ospf_rt_notify(struct proto *p, net * n, rte * new, rte * old UNUSED, ea_list * attrs); +static void ospf_ifa_notify(struct proto *p, unsigned flags, struct ifa *a); static int ospf_rte_better(struct rte *new, struct rte *old); static int ospf_rte_same(struct rte *new, struct rte *old); static void ospf_disp(timer *timer); @@ -124,6 +127,9 @@ ospf_start(struct proto *p) po->disp_timer->hook = ospf_disp; po->disp_timer->recurrent = po->tick; tm_start(po->disp_timer, 1); + po->lsab_size = 256; + po->lsab_used = 0; + po->lsab = mb_alloc(p->pool, po->lsab_size); init_list(&(po->iface_list)); init_list(&(po->area_list)); fib_init(&po->rtf, p->pool, sizeof(ort), 16, ospf_rt_initort); @@ -142,6 +148,7 @@ ospf_start(struct proto *p) oa = mb_allocz(p->pool, sizeof(struct ospf_area)); add_tail(&po->area_list, NODE oa); po->areano++; + oa->ac = ac; oa->stub = ac->stub; oa->areaid = ac->areaid; oa->rt = NULL; @@ -224,8 +231,10 @@ ospf_init(struct proto_config *c) p->import_control = ospf_import_control; p->make_tmp_attrs = ospf_make_tmp_attrs; p->store_tmp_attrs = ospf_store_tmp_attrs; + p->accept_ra_types = RA_OPTIMAL; p->rt_notify = ospf_rt_notify; p->if_notify = ospf_iface_notify; + p->ifa_notify = ospf_ifa_notify; p->rte_better = ospf_rte_better; p->rte_same = ospf_rte_same; @@ -428,7 +437,7 @@ ospf_shutdown(struct proto *p) return PS_DOWN; } -void +static void ospf_rt_notify(struct proto *p, net * n, rte * new, rte * old UNUSED, ea_list * attrs) { @@ -473,6 +482,25 @@ ospf_rt_notify(struct proto *p, net * n, rte * new, rte * old UNUSED, } static void +ospf_ifa_notify(struct proto *p, unsigned flags, struct ifa *a) +{ + struct proto_ospf *po = (struct proto_ospf *) p; + struct ospf_iface *ifa; + + if ((a->flags & IA_SECONDARY) || (a->flags & IA_UNNUMBERED)) + return; + + WALK_LIST(ifa, po->iface_list) + { + if (ifa->iface == a->iface) + { + schedule_rt_lsa(ifa->oa); + return; + } + } +} + +static void ospf_get_status(struct proto *p, byte * buf) { struct proto_ospf *po = (struct proto_ospf *) p; @@ -602,9 +630,31 @@ ospf_reconfigure(struct proto *p, struct proto_config *c) if (!oa) return 0; + oa->ac = newac; oa->stub = newac->stub; if (newac->stub && (oa->areaid == 0)) oa->stub = 0; + /* Check stubnet_list */ + struct ospf_stubnet_config *oldsn = HEAD(oldac->stubnet_list); + struct ospf_stubnet_config *newsn = HEAD(newac->stubnet_list); + + while (((NODE(oldsn))->next != NULL) && ((NODE(newsn))->next != NULL)) + { + if (!ipa_equal(oldsn->px.addr, newsn->px.addr) || + (oldsn->px.len != newsn->px.len) || + (oldsn->hidden != newsn->hidden) || + (oldsn->summary != newsn->summary) || + (oldsn->cost != newsn->cost)) + break; + + oldsn = (struct ospf_stubnet_config *)(NODE(oldsn))->next; + newsn = (struct ospf_stubnet_config *)(NODE(newsn))->next; + } + + /* If there is no change, both pointers should be NULL */ + if (((NODE(oldsn))->next) != ((NODE(newsn))->next)) + schedule_rt_lsa(oa); + /* Change net_list */ FIB_WALK(&oa->net_fib, nf) /* First check if some networks are deleted */ { diff --git a/proto/ospf/ospf.h b/proto/ospf/ospf.h index fb78af4e..23f21b8e 100644 --- a/proto/ospf/ospf.h +++ b/proto/ospf/ospf.h @@ -99,6 +99,14 @@ struct area_net u32 metric; }; +struct ospf_stubnet_config +{ + node n; + struct prefix px; + int hidden, summary; + u32 cost; +}; + struct ospf_area_config { node n; @@ -107,6 +115,7 @@ struct ospf_area_config list patt_list; list vlink_list; list net_list; + list stubnet_list; }; struct obits @@ -523,6 +532,7 @@ struct ospf_area { node n; u32 areaid; + struct ospf_area_config *ac; /* Related area config */ int origrt; /* Rt lsa origination scheduled? */ struct top_hash_entry *rt; /* My own router LSA */ list cand; /* List of candidates for RT calc. */ @@ -550,6 +560,8 @@ struct proto_ospf int rfc1583; /* RFC1583 compatibility */ int ebit; /* Did I originate any ext lsa? */ struct ospf_area *backbone; /* If exists */ + void *lsab; /* LSA buffer used when originating router LSAs */ + int lsab_size, lsab_used; }; struct ospf_iface_patt @@ -585,8 +597,6 @@ int ospf_import_control(struct proto *p, rte **new, ea_list **attrs, struct linpool *pool); struct ea_list *ospf_make_tmp_attrs(struct rte *rt, struct linpool *pool); void ospf_store_tmp_attrs(struct rte *rt, struct ea_list *attrs); -void ospf_rt_notify(struct proto *p, net *n, rte *new, rte *old, - ea_list * attrs); void schedule_rt_lsa(struct ospf_area *oa); void schedule_rtcalc(struct proto_ospf *po); void schedule_net_lsa(struct ospf_iface *ifa); diff --git a/proto/ospf/packet.c b/proto/ospf/packet.c index 23785fe8..783d28ed 100644 --- a/proto/ospf/packet.c +++ b/proto/ospf/packet.c @@ -323,6 +323,9 @@ ospf_rx_hook(sock * sk, int size) return 1; } + /* This is deviation from RFC 2328 - neighbours should be identified by + * IP address on broadcast and NBMA networks. + */ n = find_neigh(ifa, ntohl(((struct ospf_packet *) ps)->routerid)); if(!n && (ps->type != HELLO_P)) diff --git a/proto/ospf/rt.c b/proto/ospf/rt.c index f906de92..79b21e6a 100644 --- a/proto/ospf/rt.c +++ b/proto/ospf/rt.c @@ -1003,11 +1003,11 @@ again1: e->pref = p->preference; DBG("Mod rte type %d - %I/%d via %I on iface %s, met %d\n", a0.source, nf->fn.prefix, nf->fn.pxlen, a0.gw, a0.iface ? a0.iface->name : "(none)", nf->n.metric1); - rte_update(p->table, ne, p, e); + rte_update(p->table, ne, p, p, e); } else { - rte_update(p->table, ne, p, NULL); + rte_update(p->table, ne, p, p, NULL); FIB_ITERATE_PUT(&fit, nftmp); fib_delete(fib, nftmp); goto again1; diff --git a/proto/ospf/topology.c b/proto/ospf/topology.c index a15d2e35..371856f9 100644 --- a/proto/ospf/topology.c +++ b/proto/ospf/topology.c @@ -23,176 +23,198 @@ int ptp_unnumbered_stub_lsa = 0; static void * +lsab_alloc(struct proto_ospf *po, unsigned size) +{ + unsigned offset = po->lsab_used; + po->lsab_used += size; + if (po->lsab_used > po->lsab_size) + { + po->lsab_size = MAX(po->lsab_used, 2 * po->lsab_size); + po->lsab = mb_realloc(po->proto.pool, po->lsab, po->lsab_size); + } + return ((byte *) po->lsab) + offset; +} + +static inline void * +lsab_allocz(struct proto_ospf *po, unsigned size) +{ + void *r = lsab_alloc(po, size); + bzero(r, size); + return r; +} + +static inline void * +lsab_flush(struct proto_ospf *po) +{ + void *r = mb_alloc(po->proto.pool, po->lsab_size); + memcpy(r, po->lsab, po->lsab_used); + po->lsab_used = 0; + return r; +} + +static int +configured_stubnet(struct ospf_area *oa, struct ifa *a) +{ + struct ospf_stubnet_config *sn; + WALK_LIST(sn, oa->ac->stubnet_list) + { + if (sn->summary) + { + if (ipa_in_net(a->prefix, sn->px.addr, sn->px.len) && (a->pxlen >= sn->px.len)) + return 1; + } + else + { + if (ipa_equal(a->prefix, sn->px.addr) && (a->pxlen == sn->px.len)) + return 1; + } + } + return 0; +} + +static void * originate_rt_lsa_body(struct ospf_area *oa, u16 * length) { struct proto_ospf *po = oa->po; struct ospf_iface *ifa; - int j = 0, k = 0; - u16 i = 0; + int i = 0, j = 0, k = 0, bitv = 0; struct ospf_lsa_rt *rt; - struct ospf_lsa_rt_link *ln, *ln_after; + struct ospf_lsa_rt_link *ln; struct ospf_neighbor *neigh; DBG("%s: Originating RT_lsa body for area \"%I\".\n", po->proto.name, oa->areaid); - - WALK_LIST(ifa, po->iface_list) - { - if ((ifa->oa == oa) && (ifa->state != OSPF_IS_DOWN)) - { - i++; - if ((ifa->type == OSPF_IT_PTP) && (ifa->state == OSPF_IS_PTP) && - (ptp_unnumbered_stub_lsa || !(ifa->iface->addr->flags & IA_UNNUMBERED))) - i++; - } - } - rt = mb_allocz(po->proto.pool, sizeof(struct ospf_lsa_rt) + - i * sizeof(struct ospf_lsa_rt_link)); + + ASSERT(po->lsab_used == 0); + rt = lsab_allocz(po, sizeof(struct ospf_lsa_rt)); if (po->areano > 1) rt->veb.bit.b = 1; if ((po->ebit) && (!oa->stub)) rt->veb.bit.e = 1; - ln = (struct ospf_lsa_rt_link *) (rt + 1); - ln_after = ln + i; + rt = NULL; /* buffer might be reallocated later */ WALK_LIST(ifa, po->iface_list) { - if ((ifa->type == OSPF_IT_VLINK) && (ifa->voa == oa) && (!EMPTY_LIST(ifa->neigh_list))) + int master = 0; + + if ((ifa->type == OSPF_IT_VLINK) && (ifa->voa == oa) && + (!EMPTY_LIST(ifa->neigh_list))) { neigh = (struct ospf_neighbor *) HEAD(ifa->neigh_list); if ((neigh->state == NEIGHBOR_FULL) && (ifa->cost <= 0xffff)) - rt->veb.bit.v = 1; + bitv = 1; } if ((ifa->oa != oa) || (ifa->state == OSPF_IS_DOWN)) continue; - if (ln == ln_after) - die("LSA space overflow"); + /* BIRD does not support interface loops */ + ASSERT(ifa->state != OSPF_IS_LOOP); - if (ifa->state == OSPF_IS_LOOP) - { - ln->type = 3; - ln->id = ipa_to_u32(ifa->iface->addr->ip); - ln->data = 0xffffffff; - ln->metric = 0; - ln->notos = 0; - } - else - { - switch (ifa->type) + switch (ifa->type) { - case OSPF_IT_PTP: /* rfc2328 - pg126 */ + case OSPF_IT_PTP: /* RFC2328 - 12.4.1.1 */ neigh = (struct ospf_neighbor *) HEAD(ifa->neigh_list); if ((!EMPTY_LIST(ifa->neigh_list)) && (neigh->state == NEIGHBOR_FULL)) { + ln = lsab_alloc(po, sizeof(struct ospf_lsa_rt_link)); ln->type = LSART_PTP; ln->id = neigh->rid; + ln->data = (ifa->iface->addr->flags & IA_UNNUMBERED) ? + ifa->iface->index : ipa_to_u32(ifa->iface->addr->ip); ln->metric = ifa->cost; ln->notos = 0; - if (ifa->iface->addr->flags & IA_UNNUMBERED) - { - ln->data = ifa->iface->index; - } - else - { - ln->data = ipa_to_u32(ifa->iface->addr->ip); - } - } - else - { - ln--; - i--; /* No link added */ - } - - if ((ifa->state == OSPF_IS_PTP) && - (ptp_unnumbered_stub_lsa || !(ifa->iface->addr->flags & IA_UNNUMBERED))) - { - ln++; - if (ln == ln_after) - die("LSA space overflow"); - - ln->type = LSART_STUB; - ln->metric = ifa->cost; - ln->notos = 0; - if (ifa->iface->addr->flags & IA_UNNUMBERED) - { - ln->id = ipa_to_u32(ifa->iface->addr->opposite); - ln->data = 0xffffffff; - } - else - { - ln->data = ipa_to_u32(ipa_mkmask(ifa->iface->addr->pxlen)); - ln->id = ipa_to_u32(ifa->iface->addr->prefix) & ln->data; - } + i++; + master = 1; } break; - case OSPF_IT_BCAST: + + case OSPF_IT_BCAST: /* RFC2328 - 12.4.1.2 */ case OSPF_IT_NBMA: if (ifa->state == OSPF_IS_WAITING) - { - ln->type = LSART_STUB; - ln->data = ipa_to_u32(ipa_mkmask(ifa->iface->addr->pxlen)); - ln->id = ipa_to_u32(ifa->iface->addr->prefix) & ln->data; - ln->metric = ifa->cost; - ln->notos = 0; - } - else - { - j = 0, k = 0; - WALK_LIST(neigh, ifa->neigh_list) + break; + + j = 0, k = 0; + WALK_LIST(neigh, ifa->neigh_list) { if ((neigh->rid == ifa->drid) && (neigh->state == NEIGHBOR_FULL)) k = 1; if (neigh->state == NEIGHBOR_FULL) j = 1; } - if (((ifa->state == OSPF_IS_DR) && (j == 1)) || (k == 1)) + + if (((ifa->state == OSPF_IS_DR) && (j == 1)) || (k == 1)) { + ln = lsab_alloc(po, sizeof(struct ospf_lsa_rt_link)); ln->type = LSART_NET; ln->id = ipa_to_u32(ifa->drip); ln->data = ipa_to_u32(ifa->iface->addr->ip); ln->metric = ifa->cost; ln->notos = 0; + i++; + master = 1; } - else - { - ln->type = LSART_STUB; - ln->data = ipa_to_u32(ipa_mkmask(ifa->iface->addr->pxlen)); - ln->id = ipa_to_u32(ifa->iface->addr->prefix) & ln->data; - ln->metric = ifa->cost; - ln->notos = 0; - } - } break; - case OSPF_IT_VLINK: + + case OSPF_IT_VLINK: /* RFC2328 - 12.4.1.3 */ neigh = (struct ospf_neighbor *) HEAD(ifa->neigh_list); if ((!EMPTY_LIST(ifa->neigh_list)) && (neigh->state == NEIGHBOR_FULL) && (ifa->cost <= 0xffff)) { + ln = lsab_alloc(po, sizeof(struct ospf_lsa_rt_link)); ln->type = LSART_VLNK; ln->id = neigh->rid; + ln->data = ipa_to_u32(ifa->iface->addr->ip); ln->metric = ifa->cost; ln->notos = 0; - } - else - { - ln--; - i--; /* No link added */ + i++; + master = 1; } break; + default: - ln--; - i--; /* No link added */ log("Unknown interface type %s", ifa->iface->name); break; } - } - ln++; + + /* Now we will originate stub areas for interfaces addresses */ + struct ifa *a; + WALK_LIST(a, ifa->iface->addrs) + { + if (((a == ifa->iface->addr) && master) || + (a->flags & IA_SECONDARY) || + (a->flags & IA_UNNUMBERED) || + configured_stubnet(oa, a)) + continue; + + + ln = lsab_alloc(po, sizeof(struct ospf_lsa_rt_link)); + ln->type = LSART_STUB; + ln->id = ipa_to_u32(a->prefix); + ln->data = ipa_to_u32(ipa_mkmask(a->pxlen)); + ln->metric = ifa->cost; + ln->notos = 0; + i++; + } } + + struct ospf_stubnet_config *sn; + WALK_LIST(sn, oa->ac->stubnet_list) + if (!sn->hidden) + { + ln = lsab_alloc(po, sizeof(struct ospf_lsa_rt_link)); + ln->type = LSART_STUB; + ln->id = ipa_to_u32(sn->px.addr); + ln->data = ipa_to_u32(ipa_mkmask(sn->px.len)); + ln->metric = sn->cost; + ln->notos = 0; + i++; + } + + rt = po->lsab; rt->links = i; - *length = i * sizeof(struct ospf_lsa_rt_link) + sizeof(struct ospf_lsa_rt) + - sizeof(struct ospf_lsa_header); - return rt; + rt->veb.bit.v = bitv; + *length = po->lsab_used + sizeof(struct ospf_lsa_header); + return lsab_flush(po); } /** diff --git a/proto/pipe/config.Y b/proto/pipe/config.Y index 52f70dce..4e6c80cd 100644 --- a/proto/pipe/config.Y +++ b/proto/pipe/config.Y @@ -10,9 +10,13 @@ CF_HDR #include "proto/pipe/pipe.h" +CF_DEFINES + +#define PIPE_CFG ((struct pipe_config *) this_proto) + CF_DECLS -CF_KEYWORDS(PIPE, PEER, TABLE) +CF_KEYWORDS(PIPE, PEER, TABLE, MODE, OPAQUE, TRANSPARENT) CF_GRAMMAR @@ -21,6 +25,7 @@ CF_ADDTO(proto, pipe_proto '}') pipe_proto_start: proto_start PIPE { this_proto = proto_config_new(&proto_pipe, sizeof(struct pipe_config)); this_proto->preference = DEF_PREF_PIPE; + PIPE_CFG->mode = PIPE_OPAQUE; } ; @@ -30,8 +35,10 @@ pipe_proto: | pipe_proto PEER TABLE SYM ';' { if ($4->class != SYM_TABLE) cf_error("Routing table name expected"); - ((struct pipe_config *) this_proto)->peer = $4->def; + PIPE_CFG->peer = $4->def; } + | pipe_proto MODE OPAQUE ';' { PIPE_CFG->mode = PIPE_OPAQUE; } + | pipe_proto MODE TRANSPARENT ';' { PIPE_CFG->mode = PIPE_TRANSPARENT; } ; CF_CODE diff --git a/proto/pipe/pipe.c b/proto/pipe/pipe.c index d1d6bba9..8ff430a9 100644 --- a/proto/pipe/pipe.c +++ b/proto/pipe/pipe.c @@ -31,12 +31,16 @@ #include "pipe.h" static void -pipe_send(struct pipe_proto *p, rtable *dest, net *n, rte *new, rte *old UNUSED, ea_list *attrs) +pipe_send(struct pipe_proto *p, rtable *dest, net *n, rte *new, rte *old, ea_list *attrs) { + struct proto *src; net *nn; rte *e; rta a; + if (!new && !old) + return; + if (dest->pipe_busy) { log(L_ERR "Pipe loop detected when sending %I/%d to table %s", @@ -47,17 +51,34 @@ pipe_send(struct pipe_proto *p, rtable *dest, net *n, rte *new, rte *old UNUSED, if (new) { memcpy(&a, new->attrs, sizeof(rta)); - a.proto = &p->p; - a.source = RTS_PIPE; + + if (p->mode == PIPE_OPAQUE) + { + a.proto = &p->p; + a.source = RTS_PIPE; + } + a.aflags = 0; a.eattrs = attrs; e = rte_get_temp(&a); e->net = nn; + + if (p->mode == PIPE_TRANSPARENT) + { + /* Copy protocol specific embedded attributes. */ + memcpy(&(e->u), &(new->u), sizeof(e->u)); + } + + src = new->attrs->proto; } else - e = NULL; + { + e = NULL; + src = old->attrs->proto; + } + dest->pipe_busy = 1; - rte_update(dest, nn, &p->p, e); + rte_update(dest, nn, &p->p, (p->mode == PIPE_OPAQUE) ? &p->p : src, e); dest->pipe_busy = 0; } @@ -82,7 +103,7 @@ pipe_rt_notify_sec(struct proto *P, net *net, rte *new, rte *old, ea_list *attrs static int pipe_import_control(struct proto *P, rte **ee, ea_list **ea UNUSED, struct linpool *p UNUSED) { - struct proto *pp = (*ee)->attrs->proto; + struct proto *pp = (*ee)->sender; if (pp == P || pp == &((struct pipe_proto *) P)->phantom->p) return -1; /* Avoid local loops automatically */ @@ -106,6 +127,7 @@ pipe_start(struct proto *P) memcpy(ph, p, sizeof(struct pipe_proto)); p->phantom = ph; ph->phantom = p; + ph->p.accept_ra_types = (p->mode == PIPE_OPAQUE) ? RA_OPTIMAL : RA_ANY; ph->p.rt_notify = pipe_rt_notify_sec; ph->p.proto_state = PS_UP; ph->p.core_state = ph->p.core_goal = FS_HAPPY; @@ -141,6 +163,8 @@ pipe_init(struct proto_config *C) struct pipe_proto *p = (struct pipe_proto *) P; p->peer = c->peer->table; + p->mode = c->mode; + P->accept_ra_types = (p->mode == PIPE_OPAQUE) ? RA_OPTIMAL : RA_ANY; P->rt_notify = pipe_rt_notify_pri; P->import_control = pipe_import_control; return P; @@ -162,7 +186,7 @@ pipe_get_status(struct proto *P, byte *buf) { struct pipe_proto *p = (struct pipe_proto *) P; - bsprintf(buf, "-> %s", p->peer->name); + bsprintf(buf, "%c> %s", (p->mode == PIPE_OPAQUE) ? '-' : '=', p->peer->name); } static int @@ -171,7 +195,7 @@ pipe_reconfigure(struct proto *p, struct proto_config *new) struct pipe_config *o = (struct pipe_config *) p->cf; struct pipe_config *n = (struct pipe_config *) new; - return o->peer == n->peer; + return (o->peer == n->peer) && (o->mode == n->mode); } struct protocol proto_pipe = { diff --git a/proto/pipe/pipe.h b/proto/pipe/pipe.h index 7e9cf8ae..368ba41b 100644 --- a/proto/pipe/pipe.h +++ b/proto/pipe/pipe.h @@ -9,14 +9,19 @@ #ifndef _BIRD_PIPE_H_ #define _BIRD_PIPE_H_ +#define PIPE_OPAQUE 0 +#define PIPE_TRANSPARENT 1 + struct pipe_config { struct proto_config c; struct rtable_config *peer; /* Table we're connected to */ + int mode; /* PIPE_OPAQUE or PIPE_TRANSPARENT */ }; struct pipe_proto { struct proto p; struct rtable *peer; + int mode; /* PIPE_OPAQUE or PIPE_TRANSPARENT */ struct pipe_proto *phantom; }; diff --git a/proto/rip/rip.c b/proto/rip/rip.c index 12cc8783..c655cc36 100644 --- a/proto/rip/rip.c +++ b/proto/rip/rip.c @@ -268,7 +268,7 @@ rip_rte_update_if_better(rtable *tab, net *net, struct proto *p, rte *new) if (!old || p->rte_better(new, old) || (ipa_equal(old->attrs->from, new->attrs->from) && (old->u.rip.metric != new->u.rip.metric)) ) - rte_update(tab, net, p, new); + rte_update(tab, net, p, p, new); } /* @@ -946,6 +946,7 @@ rip_rte_remove(net *net UNUSED, rte *rte) void rip_init_instance(struct proto *p) { + p->accept_ra_types = RA_OPTIMAL; p->if_notify = rip_if_notify; p->rt_notify = rip_rt_notify; p->import_control = rip_import_control; diff --git a/proto/static/static.c b/proto/static/static.c index c5324796..c71d1da9 100644 --- a/proto/static/static.c +++ b/proto/static/static.c @@ -60,7 +60,7 @@ static_install(struct proto *p, struct static_route *r, struct iface *ifa) e = rte_get_temp(aa); e->net = n; e->pflags = 0; - rte_update(p->table, n, p, e); + rte_update(p->table, n, p, p, e); r->installed = 1; } @@ -75,7 +75,7 @@ static_remove(struct proto *p, struct static_route *r) DBG("Removing static route %I/%d\n", r->net, r->masklen); n = net_find(p->table, r->net, r->masklen); if (n) - rte_update(p->table, n, p, NULL); + rte_update(p->table, n, p, p, NULL); r->installed = 0; } diff --git a/sysdep/config.h b/sysdep/config.h index f0a5ecc8..df6e6df4 100644 --- a/sysdep/config.h +++ b/sysdep/config.h @@ -7,7 +7,7 @@ #define _BIRD_CONFIG_H_ /* BIRD version */ -#define BIRD_VERSION "1.0.15" +#define BIRD_VERSION "1.0.16" /* Include parameters determined by configure script */ #include "sysdep/autoconf.h" diff --git a/sysdep/linux/netlink/netlink.c b/sysdep/linux/netlink/netlink.c index e586847a..2e85f54b 100644 --- a/sysdep/linux/netlink/netlink.c +++ b/sysdep/linux/netlink/netlink.c @@ -500,7 +500,7 @@ nl_send_route(struct krt_proto *p, rte *e, int new) bzero(&r.r, sizeof(r.r)); r.h.nlmsg_type = new ? RTM_NEWROUTE : RTM_DELROUTE; r.h.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)); - r.h.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | (new ? NLM_F_CREATE|NLM_F_REPLACE : 0); + r.h.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | (new ? NLM_F_CREATE|NLM_F_EXCL : 0); r.r.rtm_family = BIRD_AF; r.r.rtm_dst_len = net->n.pxlen; @@ -540,22 +540,11 @@ nl_send_route(struct krt_proto *p, rte *e, int new) void krt_set_notify(struct krt_proto *p, net *n UNUSED, rte *new, rte *old) { - if (old && new) - { - /* - * We should check whether priority and TOS is identical as well, - * but we don't use these and default value is always equal to default value. :-) - */ - nl_send_route(p, new, 1); - } - else - { - if (old) - nl_send_route(p, old, 0); + if (old) + nl_send_route(p, old, 0); - if (new) - nl_send_route(p, new, 1); - } + if (new) + nl_send_route(p, new, 1); } static struct iface * diff --git a/sysdep/unix/config.Y b/sysdep/unix/config.Y index 5176be62..1917fe68 100644 --- a/sysdep/unix/config.Y +++ b/sysdep/unix/config.Y @@ -13,7 +13,7 @@ CF_HDR CF_DECLS -CF_KEYWORDS(LOG, SYSLOG, ALL, DEBUG, TRACE, INFO, REMOTE, WARNING, ERROR, AUTH, FATAL, BUG, STDERR) +CF_KEYWORDS(LOG, SYSLOG, ALL, DEBUG, TRACE, INFO, REMOTE, WARNING, ERROR, AUTH, FATAL, BUG, STDERR, SOFT) %type <i> log_mask log_mask_list log_cat %type <g> log_file @@ -65,8 +65,13 @@ log_cat: /* Unix specific commands */ +CF_CLI_HELP(CONFIGURE, [soft] [\"<file>\"], [[Reload configuration]]) + CF_CLI(CONFIGURE, cfg_name, [\"<file>\"], [[Reload configuration]]) -{ cmd_reconfig($2); } ; +{ cmd_reconfig($2, RECONFIG_HARD); } ; + +CF_CLI(CONFIGURE SOFT, cfg_name, [\"<file>\"], [[Reload configuration and ignore changes in filters]]) +{ cmd_reconfig($3, RECONFIG_SOFT); } ; CF_CLI(DOWN,,, [[Shut the daemon down]]) { cli_msg(7, "Shutdown requested"); order_shutdown(); } ; diff --git a/sysdep/unix/io.c b/sysdep/unix/io.c index 50992fb4..c86c1200 100644 --- a/sysdep/unix/io.c +++ b/sysdep/unix/io.c @@ -30,6 +30,12 @@ #include "lib/unix.h" #include "lib/sysio.h" +/* Maximum number of calls of rx/tx handler for one socket in one + * select iteration. Should be small enough to not monopolize CPU by + * one protocol instance. + */ +#define MAX_STEPS 4 + /* * Tracked Files */ @@ -593,6 +599,7 @@ sk_new(pool *p) s->saddr = s->daddr = IPA_NONE; s->sport = s->dport = 0; s->tos = s->ttl = -1; + s->flags = 0; s->iface = NULL; s->rbuf = NULL; s->rx_hook = NULL; @@ -703,7 +710,13 @@ sk_setup(sock *s) if ((s->tos >= 0) && setsockopt(fd, SOL_IP, IP_TOS, &s->tos, sizeof(s->tos)) < 0) WARN("IP_TOS"); #endif - + +#ifdef IPV6 + int v = 1; + if ((s->flags & SKF_V6ONLY) && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &v, sizeof(v)) < 0) + WARN("IPV6_V6ONLY"); +#endif + if (s->ttl >= 0) err = sk_set_ttl_int(s); else @@ -1335,22 +1348,27 @@ io_loop(void) { sock *s = current_sock; int e; + int steps = MAX_STEPS; if (FD_ISSET(s->fd, &rd) && s->rx_hook) do { + steps--; e = sk_read(s); if (s != current_sock) goto next; } - while (e && s->rx_hook); + while (e && s->rx_hook && steps); + + steps = MAX_STEPS; if (FD_ISSET(s->fd, &wr)) do { + steps--; e = sk_write(s); if (s != current_sock) goto next; } - while (e); + while (e && steps); current_sock = sk_next(s); next: ; } diff --git a/sysdep/unix/krt.Y b/sysdep/unix/krt.Y index 11fd0c01..40f1af9f 100644 --- a/sysdep/unix/krt.Y +++ b/sysdep/unix/krt.Y @@ -68,6 +68,7 @@ kif_proto_start: proto_start DEVICE { cf_kif = this_proto = proto_config_new(&proto_unix_iface, sizeof(struct kif_config)); this_proto->preference = DEF_PREF_DIRECT; THIS_KIF->scan_time = 60; + init_list(&THIS_KIF->primary); krt_if_construct(THIS_KIF); } ; @@ -81,6 +82,13 @@ kif_item: /* Scan time of 0 means scan on startup only */ THIS_KIF->scan_time = $3; } + | PRIMARY text_or_none prefix_or_ipa { + struct kif_primary_item *kpi = cfg_alloc(sizeof (struct kif_primary_item)); + kpi->pattern = $2; + kpi->prefix = $3.addr; + kpi->pxlen = $3.len; + add_tail(&THIS_KIF->primary, &kpi->n); + } ; CF_CODE diff --git a/sysdep/unix/krt.c b/sysdep/unix/krt.c index 6208f689..488447b7 100644 --- a/sysdep/unix/krt.c +++ b/sysdep/unix/krt.c @@ -151,6 +151,49 @@ kif_shutdown(struct proto *P) return PS_DOWN; } + +static inline int +prefer_scope(struct ifa *a, struct ifa *b) +{ return (a->scope > SCOPE_LINK) && (b->scope <= SCOPE_LINK); } + +static inline int +prefer_addr(struct ifa *a, struct ifa *b) +{ return ipa_compare(a->ip, b->ip) < 0; } + +static inline struct ifa * +find_preferred_ifa(struct iface *i, ip_addr prefix, ip_addr mask) +{ + struct ifa *a, *b = NULL; + + WALK_LIST(a, i->addrs) + { + if (!(a->flags & IA_SECONDARY) && + ipa_equal(ipa_and(a->ip, mask), prefix) && + (!b || prefer_scope(a, b) || prefer_addr(a, b))) + b = a; + } + + return b; +} + +struct ifa * +kif_choose_primary(struct iface *i) +{ + struct kif_config *cf = (struct kif_config *) (kif_proto->p.cf); + struct kif_primary_item *it; + struct ifa *a; + + WALK_LIST(it, cf->primary) + { + if (!it->pattern || patmatch(it->pattern, i->name)) + if (a = find_preferred_ifa(i, it->prefix, ipa_mkmask(it->pxlen))) + return a; + } + + return find_preferred_ifa(i, IPA_NONE, IPA_NONE); +} + + static int kif_reconfigure(struct proto *p, struct proto_config *new) { @@ -159,6 +202,7 @@ kif_reconfigure(struct proto *p, struct proto_config *new) if (!kif_params_same(&o->iface, &n->iface)) return 0; + if (o->scan_time != n->scan_time) { tm_stop(kif_scan_timer); @@ -166,6 +210,18 @@ kif_reconfigure(struct proto *p, struct proto_config *new) kif_scan(kif_scan_timer); tm_start(kif_scan_timer, n->scan_time); } + + if (!EMPTY_LIST(o->primary) || !EMPTY_LIST(n->primary)) + { + /* This is hack, we have to update a configuration + * to the new value just now, because it is used + * for recalculation of primary addresses. + */ + p->cf = new; + + ifa_recalc_all_primary_addresses(); + } + return 1; } @@ -224,7 +280,7 @@ krt_learn_announce_update(struct krt_proto *p, rte *e) ee->pflags = 0; ee->pref = p->p.preference; ee->u.krt = e->u.krt; - rte_update(p->p.table, nn, &p->p, ee); + rte_update(p->p.table, nn, &p->p, &p->p, ee); } static void @@ -232,7 +288,7 @@ krt_learn_announce_delete(struct krt_proto *p, net *n) { n = net_find(p->p.table, n->n.prefix, n->n.pxlen); if (n) - rte_update(p->p.table, n, &p->p, NULL); + rte_update(p->p.table, n, &p->p, &p->p, NULL); } static void @@ -819,6 +875,7 @@ krt_init(struct proto_config *c) { struct krt_proto *p = proto_new(c, sizeof(struct krt_proto)); + p->p.accept_ra_types = RA_OPTIMAL; p->p.rt_notify = krt_notify; p->p.min_scope = SCOPE_HOST; return &p->p; diff --git a/sysdep/unix/krt.h b/sysdep/unix/krt.h index 10da1a8f..607e6993 100644 --- a/sysdep/unix/krt.h +++ b/sysdep/unix/krt.h @@ -86,10 +86,18 @@ void krt_got_route_async(struct krt_proto *p, struct rte *e, int new); extern struct protocol proto_unix_iface; +struct kif_primary_item { + node n; + byte *pattern; + ip_addr prefix; + int pxlen; +}; + struct kif_config { struct proto_config c; struct krt_if_params iface; int scan_time; /* How often we re-scan interfaces */ + list primary; /* Preferences for primary addresses */ }; struct kif_proto { diff --git a/sysdep/unix/main.c b/sysdep/unix/main.c index 4df4e9fe..5f5b165f 100644 --- a/sysdep/unix/main.c +++ b/sysdep/unix/main.c @@ -115,7 +115,7 @@ read_config(void) else die("Unable to open configuration file %s: %m", config_name); } - config_commit(conf); + config_commit(conf, RECONFIG_HARD); } void @@ -133,11 +133,11 @@ async_config(void) config_free(conf); } else - config_commit(conf); + config_commit(conf, RECONFIG_HARD); } void -cmd_reconfig(char *name) +cmd_reconfig(char *name, int type) { struct config *conf; @@ -154,7 +154,7 @@ cmd_reconfig(char *name) } else { - switch (config_commit(conf)) + switch (config_commit(conf, type)) { case CONF_DONE: cli_msg(3, "Reconfigured."); diff --git a/sysdep/unix/unix.h b/sysdep/unix/unix.h index 997a4088..83f61af9 100644 --- a/sysdep/unix/unix.h +++ b/sysdep/unix/unix.h @@ -16,7 +16,7 @@ struct pool; void async_config(void); void async_dump(void); void async_shutdown(void); -void cmd_reconfig(char *name); +void cmd_reconfig(char *name, int type); /* io.c */ |