diff options
author | Maria Matejka <mq@ucw.cz> | 2019-07-03 11:09:52 +0200 |
---|---|---|
committer | Maria Matejka <mq@ucw.cz> | 2019-07-03 11:12:25 +0200 |
commit | eac9250fd5b10809830361b94438339b3b31b270 (patch) | |
tree | 5c9ec2591f0baa462f5572f83e4c452c3a166c95 | |
parent | 8816b6cdd98d24535eece6b5e35730aac57cd9f7 (diff) | |
parent | 026bfedb332d8c0dde28c693c177fe993b5df26d (diff) |
Merge branch 'master' into mq-filter-stack
-rw-r--r-- | .dir-locals.el | 7 | ||||
-rw-r--r-- | conf/cf-lex.l | 5 | ||||
-rw-r--r-- | conf/conf.c | 27 | ||||
-rw-r--r-- | conf/conf.h | 8 | ||||
-rw-r--r-- | doc/bird.sgml | 96 | ||||
-rw-r--r-- | doc/reply_codes | 1 | ||||
-rw-r--r-- | nest/proto.c | 61 | ||||
-rw-r--r-- | nest/protocol.h | 4 | ||||
-rw-r--r-- | proto/bgp/attrs.c | 4 | ||||
-rw-r--r-- | proto/bgp/bgp.c | 232 | ||||
-rw-r--r-- | proto/bgp/bgp.h | 27 | ||||
-rw-r--r-- | proto/bgp/config.Y | 22 | ||||
-rw-r--r-- | proto/bgp/packets.c | 121 | ||||
-rw-r--r-- | proto/mrt/mrt.c | 4 | ||||
-rw-r--r-- | proto/ospf/config.Y | 6 | ||||
-rw-r--r-- | proto/ospf/dbdes.c | 6 | ||||
-rw-r--r-- | proto/ospf/iface.c | 8 | ||||
-rw-r--r-- | proto/ospf/lsalib.c | 75 | ||||
-rw-r--r-- | proto/ospf/lsalib.h | 15 | ||||
-rw-r--r-- | proto/ospf/lsupd.c | 20 | ||||
-rw-r--r-- | proto/ospf/neighbor.c | 194 | ||||
-rw-r--r-- | proto/ospf/ospf.c | 60 | ||||
-rw-r--r-- | proto/ospf/ospf.h | 32 | ||||
-rw-r--r-- | proto/ospf/rt.c | 180 | ||||
-rw-r--r-- | proto/ospf/rt.h | 1 | ||||
-rw-r--r-- | proto/ospf/topology.c | 100 | ||||
-rw-r--r-- | proto/ospf/topology.h | 3 | ||||
-rw-r--r-- | sysdep/unix/config.Y | 9 | ||||
-rw-r--r-- | sysdep/unix/io.c | 1 | ||||
-rw-r--r-- | sysdep/unix/krt.c | 2 | ||||
-rw-r--r-- | sysdep/unix/main.c | 37 | ||||
-rw-r--r-- | sysdep/unix/unix.h | 2 | ||||
-rw-r--r-- | test/birdtest.c | 2 |
33 files changed, 1228 insertions, 144 deletions
diff --git a/.dir-locals.el b/.dir-locals.el new file mode 100644 index 00000000..f1eb3b51 --- /dev/null +++ b/.dir-locals.el @@ -0,0 +1,7 @@ +; BIRD project coding conventions + +((c-mode + (c-file-style . "bsd") + (c-basic-offset . 2) + (fill-column . 80) + (show-trailing-whitespace . t))) diff --git a/conf/cf-lex.l b/conf/cf-lex.l index 38250d90..1d6cae2c 100644 --- a/conf/cf-lex.l +++ b/conf/cf-lex.l @@ -88,7 +88,7 @@ HASH_DEFINE_REHASH_FN(SYM, struct symbol) HASH(struct keyword) kw_hash; -static struct sym_scope *conf_this_scope; +struct sym_scope *conf_this_scope; linpool *cfg_mem; @@ -719,7 +719,8 @@ cf_lex_init(int is_cli, struct config *c) else BEGIN(INITIAL); - conf_this_scope = cfg_allocz(sizeof(struct sym_scope)); + c->root_scope = cfg_allocz(sizeof(struct sym_scope)); + conf_this_scope = c->root_scope; conf_this_scope->active = 1; } diff --git a/conf/conf.c b/conf/conf.c index b0980d7e..b21d5213 100644 --- a/conf/conf.c +++ b/conf/conf.c @@ -447,6 +447,24 @@ config_undo(void) return CONF_PROGRESS; } +int +config_status(void) +{ + if (shutting_down) + return CONF_SHUTDOWN; + + if (configuring) + return future_cftype ? CONF_QUEUED : CONF_PROGRESS; + + return CONF_DONE; +} + +btime +config_timer_status(void) +{ + return tm_active(config_timer) ? tm_remains(config_timer) : -1; +} + extern void cmd_reconfig_undo_notify(void); static void @@ -477,19 +495,24 @@ config_init(void) * for switching to an empty configuration. */ void -order_shutdown(void) +order_shutdown(int gr) { struct config *c; if (shutting_down) return; - log(L_INFO "Shutting down"); + if (!gr) + log(L_INFO "Shutting down"); + else + log(L_INFO "Shutting down for graceful restart"); + c = lp_alloc(config->mem, sizeof(struct config)); memcpy(c, config, sizeof(struct config)); init_list(&c->protos); init_list(&c->tables); c->shutdown = 1; + c->gr_down = gr; config_commit(c, RECONFIG_HARD, 0); shutting_down = 1; diff --git a/conf/conf.h b/conf/conf.h index 708a1034..21dc3fa1 100644 --- a/conf/conf.h +++ b/conf/conf.h @@ -53,8 +53,10 @@ struct config { int file_fd; /* File descriptor of main configuration file */ HASH(struct symbol) sym_hash; /* Lexer: symbol hash table */ struct config *fallback; /* Link to regular config for CLI parsing */ + struct sym_scope *root_scope; /* Scope for root symbols */ int obstacle_count; /* Number of items blocking freeing of this config */ int shutdown; /* This is a pseudo-config for daemon shutdown */ + int gr_down; /* This is a pseudo-config for graceful restart */ btime load_time; /* When we've got this configuration */ }; @@ -69,11 +71,13 @@ void config_free(struct config *); int config_commit(struct config *, int type, uint timeout); int config_confirm(void); int config_undo(void); +int config_status(void); +btime config_timer_status(void); void config_init(void); void cf_error(const char *msg, ...) NORET; void config_add_obstacle(struct config *); void config_del_obstacle(struct config *); -void order_shutdown(void); +void order_shutdown(int gr); #define RECONFIG_NONE 0 #define RECONFIG_HARD 1 @@ -167,6 +171,8 @@ struct include_file_stack { extern struct include_file_stack *ifs; +extern struct sym_scope *conf_this_scope; + int cf_lex(void); void cf_lex_init(int is_cli, struct config *c); void cf_lex_unwind(void); diff --git a/doc/bird.sgml b/doc/bird.sgml index 8594b930..e0b60c81 100644 --- a/doc/bird.sgml +++ b/doc/bird.sgml @@ -561,7 +561,7 @@ include "tablename.conf";; can be seen (together with other symbols) using 'show symbols' command. <tag><label id="opt-attribute">attribute <m/type/ <m/name/</tag> - Define a custom route attribute. You can set and get it in filters like + Declare a custom route attribute. You can set and get it in filters like any other route atribute. This feature is intended for marking routes in import filters for export filtering purposes instead of locally assigned BGP communities which have to be deleted in export filters. @@ -1159,7 +1159,7 @@ int var; <p>As you can see, a filter has a header, a list of local variables, and a body. The header consists of the <cf/filter/ keyword followed by a (unique) name of filter. The list of local variables consists of <cf><M>type name</M>;</cf> -pairs where each pair defines one local variable. The body consists of <cf> +pairs where each pair declares one local variable. The body consists of <cf> { <M>statements</M> }</cf>. Each <m/statement/ is terminated by a <cf/;/. You can group several statements to a single compound statement by using braces (<cf>{ <M>statements</M> }</cf>) which is useful if you want to make a bigger @@ -1188,7 +1188,7 @@ called like in C: <cf>name(); with_parameters(5);</cf>. Function may return values using the <cf>return <m/[expr]/</cf> command. Returning a value exits from current function (this is similar to C). -<p>Filters are declared in a way similar to functions except they can't have +<p>Filters are defined in a way similar to functions except they can't have explicit parameters. They get a route table entry as an implicit parameter, it is also passed automatically to any functions called. The filter must terminate with either <cf/accept/ or <cf/reject/ statement. If there's a runtime error in @@ -1571,11 +1571,20 @@ if 1234 = i then printn "."; else { <label id="route-attributes"> <p>A filter is implicitly passed a route, and it can access its attributes just -like it accesses variables. Attempts to access undefined attribute result in a -runtime error; you can check if an attribute is defined by using the -<cf>defined( <m>attribute</m> )</cf> operator. One notable exception to this -rule are attributes of bgppath and *clist types, where undefined value is -regarded as empty bgppath/*clist for most purposes. +like it accesses variables. There are common route attributes, protocol-specific +route attributes and custom route attributes. Most common attributes are +mandatory (always defined), while remaining are optional. Attempts to access +undefined attribute result in a runtime error; you can check if an attribute is +defined by using the <cf>defined( <m>attribute</m> )</cf> operator. One notable +exception to this rule are attributes of bgppath and *clist types, where +undefined value is regarded as empty bgppath/*clist for most purposes. + +Attributes can be defined by just setting them in filters. Custom attributes +have to be first declared by <ref id="opt-attribute" name="attribute"> global +option. You can also undefine optional attribute back to non-existence by using +the <cf>unset( <m/attribute/ )</cf> operator. + +Common route attributes are: <descrip> <tag><label id="rta-net"><m/prefix/ net</tag> @@ -1642,8 +1651,8 @@ regarded as empty bgppath/*clist for most purposes. compare internal distances to boundary routers (see below). </descrip> -<p>There also exist protocol-specific attributes which are described in the -corresponding protocol sections. +<p>Protocol-specific route attributes are described in the corresponding +protocol sections. <sect>Other statements @@ -1653,7 +1662,7 @@ corresponding protocol sections. <descrip> <tag><label id="assignment"><m/variable/ = <m/expr/</tag> - Set variable to a given value. + Set variable (or route attribute) to a given value. <tag><label id="filter-accept-reject">accept|reject [ <m/expr/ ]</tag> Accept or reject the route, possibly printing <cf><m>expr</m></cf>. @@ -2186,12 +2195,23 @@ using the following configuration parameters: <cf/local 10.0.0.1; local as 65000;/ are valid). This parameter is mandatory. - <tag><label id="bgp-neighbor">neighbor [<m/ip/] [port <m/number/] [as <m/number/]</tag> + <tag><label id="bgp-neighbor">neighbor [<m/ip/ | range <m/prefix/] [port <m/number/] [as <m/number/] [internal|external]</tag> Define neighboring router this instance will be talking to and what AS it is located in. In case the neighbor is in the same AS as we are, we - automatically switch to iBGP. Optionally, the remote port may also be - specified. Like <cf/local/ parameter, this parameter may also be used - multiple times with different sub-options. This parameter is mandatory. + automatically switch to IBGP. Alternatively, it is possible to specify + just <cf/internal/ or </cf/external/ instead of AS number, in that case + either local AS number, or any external AS number is accepted. + Optionally, the remote port may also be specified. Like <cf/local/ + parameter, this parameter may also be used multiple times with different + sub-options. This parameter is mandatory. + + It is possible to specify network prefix (with <cf/range/ keyword) + instead of explicit neighbor IP address. This enables dynamic BGP + behavior, where the BGP instance listens on BGP port, but new BGP + instances are spawned for incoming BGP connections (if source address + matches the network prefix). It is possible to mix regular BGP instances + with dynamic BGP instances and have multiple dynamic BGP instances with + different ranges. <tag><label id="bgp-iface">interface <m/string/</tag> Define interface we should use for link-local BGP IPv6 sessions. @@ -2224,6 +2244,16 @@ using the following configuration parameters: session. Default: the address of the local end of the interface our neighbor is connected to. + <tag><label id="bgp-dynamic-name">dynamic name "<m/text/"</tag> + Define common prefix of names used for new BGP instances spawned when + dynamic BGP behavior is active. Actual names also contain numberic + index to distinguish individual instances. Default: "dynbgp". + + <tag><label id="bgp-dynamic-name">dynamic name digits <m/number/</tag> + Define minimum number of digits for index in names of spawned dynamic + BGP instances. E.g., if set to 2, then the first name would be + "dynbgp01". Default: 0. + <tag><label id="bgp-strict-bind">strict bind <m/switch/</tag> Specify whether BGP listening socket should be bound to a specific local address (the same as the <cf/source address/) and associated interface, @@ -2565,6 +2595,15 @@ be used in explicit configuration. <p>BGP channels have additional config options (together with the common ones): <descrip> + <tag><label id="bgp-mandatory">mandatory <m/switch/</tag> + When local and neighbor sets of configured AFI/SAFI pairs differ, + capability negotiation ensures that a common subset is used. For + mandatory channels their associated AFI/SAFI must be negotiated + (i.e., also announced by the neighbor), otherwise BGP session + negotiation fails with <it/'Required capability missing'/ error. + Regardless, at least one AFI/SAFI must be negotiated in order to BGP + session be successfully established. Default: off. + <tag><label id="bgp-next-hop-keep">next hop keep <m/switch/|ibgp|ebgp</tag> Do not modify the Next Hop attribute and advertise the current one unchanged even in cases where our own local address should be used @@ -3230,6 +3269,8 @@ protocol ospf [v2|v3] <name> { tick <num>; ecmp <switch> [limit <num>]; merge external <switch>; + graceful restart <switch>|aware; + graceful restart time <num>; area <id> { stub; nssa; @@ -3373,6 +3414,31 @@ protocol ospf [v2|v3] <name> { from different LSAs are treated as separate even if they represents the same destination. Default value is no. + <tag><label id="ospf-graceful-restart">graceful restart <m/switch/|aware</tag> + When an OSPF instance is restarted, neighbors break adjacencies and + recalculate their routing tables, which disrupts packet forwarding even + when the forwarding plane of the restarting router remains intact. + <rfc id="3623"> specifies a graceful restart mechanism to alleviate this + issue. For OSPF graceful restart, restarting router originates + Grace-LSAs, announcing intent to do graceful restart. Neighbors + receiving these LSAs enter helper mode, in which they ignore breakdown + of adjacencies, behave as if nothing is happening and keep old routes. + When adjacencies are reestablished, the restarting router flushes + Grace-LSAs and graceful restart is ended. + + This option controls the graceful restart mechanism. It has three + states: Disabled, when no support is provided. Aware, when graceful + restart helper mode is supported, but no local graceful restart is + allowed (i.e. helper-only role). Enabled, when the full graceful restart + support is provided (i.e. both restarting and helper role). Note that + proper support for local graceful restart requires also configuration of + other protocols. Default: aware. + + <tag><label id="ospf-graceful-restart-time">graceful restart time <m/num/</tag> + The restart time is announced in the Grace-LSA and specifies how long + neighbors should wait for proper end of the graceful restart before + exiting helper mode prematurely. Default: 120 seconds. + <tag><label id="ospf-area">area <M>id</M></tag> This defines an OSPF area with given area ID (an integer or an IPv4 address, similarly to a router ID). The most important area is the diff --git a/doc/reply_codes b/doc/reply_codes index 3a7f2c90..02f4e656 100644 --- a/doc/reply_codes +++ b/doc/reply_codes @@ -33,6 +33,7 @@ Reply codes of BIRD command-line interface 0022 Undo scheduled 0023 Evaluation of expression 0024 Graceful restart status report +0025 Graceful restart ordered 1000 BIRD version 1001 Interface list diff --git a/nest/proto.c b/nest/proto.c index 77bf082a..9d0990de 100644 --- a/nest/proto.c +++ b/nest/proto.c @@ -874,6 +874,28 @@ proto_copy_config(struct proto_config *dest, struct proto_config *src) dest->protocol->copy_config(dest, src); } +void +proto_clone_config(struct symbol *sym, struct proto_config *parent) +{ + struct proto_config *cf = proto_config_new(parent->protocol, SYM_PROTO); + proto_copy_config(cf, parent); + cf->name = sym->name; + cf->proto = NULL; + cf->parent = parent; + + sym->class = cf->class; + sym->proto = cf; +} + +static void +proto_undef_clone(struct symbol *sym, struct proto_config *cf) +{ + rem_node(&cf->n); + + sym->class = SYM_VOID; + sym->proto = NULL; +} + /** * protos_preconfig - pre-configuration processing * @c: new configuration @@ -973,6 +995,24 @@ protos_commit(struct config *new, struct config *old, int force_reconfig, int ty { p = oc->proto; sym = cf_find_symbol(new, oc->name); + + /* Handle dynamic protocols */ + if (!sym && oc->parent && !new->shutdown) + { + struct symbol *parsym = cf_find_symbol(new, oc->parent->name); + if (parsym && parsym->class == SYM_PROTO) + { + /* This is hack, we would like to share config, but we need to copy it now */ + new_config = new; + cfg_mem = new->mem; + conf_this_scope = new->root_scope; + sym = cf_get_symbol(oc->name); + proto_clone_config(sym, parsym->proto); + new_config = NULL; + cfg_mem = NULL; + } + } + if (sym && sym->class == SYM_PROTO && !new->shutdown) { /* Found match, let's check if we can smoothly switch to new configuration */ @@ -984,6 +1024,12 @@ protos_commit(struct config *new, struct config *old, int force_reconfig, int ty if (! force_reconfig && proto_reconfigure(p, oc, nc, type)) continue; + if (nc->parent) + { + proto_undef_clone(sym, nc); + goto remove; + } + /* Unsuccessful, we will restart it */ if (!p->disabled && !nc->disabled) log(L_INFO "Restarting protocol %s", p->name); @@ -997,10 +1043,16 @@ protos_commit(struct config *new, struct config *old, int force_reconfig, int ty } else if (!new->shutdown) { + remove: log(L_INFO "Removing protocol %s", p->name); p->down_code = PDC_CF_REMOVE; p->cf_new = NULL; } + else if (new->gr_down) + { + p->down_code = PDC_CMD_GR_DOWN; + p->cf_new = NULL; + } else /* global shutdown */ { p->down_code = PDC_CMD_SHUTDOWN; @@ -1105,6 +1157,15 @@ proto_rethink_goal(struct proto *p) } } +struct proto * +proto_spawn(struct proto_config *cf, uint disabled) +{ + struct proto *p = proto_init(cf, TAIL(proto_list)); + p->disabled = disabled; + proto_rethink_goal(p); + return p; +} + /** * DOC: Graceful restart recovery diff --git a/nest/protocol.h b/nest/protocol.h index 1e7bfb59..b6f414f6 100644 --- a/nest/protocol.h +++ b/nest/protocol.h @@ -89,6 +89,7 @@ void protos_build(void); void proto_build(struct protocol *); void protos_preconfig(struct config *); void protos_commit(struct config *new, struct config *old, int force_restart, int type); +struct proto * proto_spawn(struct proto_config *cf, uint disabled); void protos_dump_all(void); #define GA_UNKNOWN 0 /* Attribute not recognized */ @@ -113,6 +114,7 @@ struct proto_config { struct config *global; /* Global configuration data */ struct protocol *protocol; /* Protocol */ struct proto *proto; /* Instance we've created */ + struct proto_config *parent; /* Parent proto_config for dynamic protocols */ char *name; char *dsc; int class; /* SYM_PROTO or SYM_TEMPLATE */ @@ -255,6 +257,7 @@ struct proto_spec { #define PDC_CMD_DISABLE 0x11 /* Result of disable command */ #define PDC_CMD_RESTART 0x12 /* Result of restart command */ #define PDC_CMD_SHUTDOWN 0x13 /* Result of global shutdown */ +#define PDC_CMD_GR_DOWN 0x14 /* Result of global graceful restart */ #define PDC_RX_LIMIT_HIT 0x21 /* Route receive limit reached */ #define PDC_IN_LIMIT_HIT 0x22 /* Route import limit reached */ #define PDC_OUT_LIMIT_HIT 0x23 /* Route export limit reached */ @@ -263,6 +266,7 @@ struct proto_spec { void *proto_new(struct proto_config *); void *proto_config_new(struct protocol *, int class); void proto_copy_config(struct proto_config *dest, struct proto_config *src); +void proto_clone_config(struct symbol *sym, struct proto_config *parent); void proto_set_message(struct proto *p, char *msg, int len); void graceful_restart_recovery(void); diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c index 7c6f2ee9..69c4b172 100644 --- a/proto/bgp/attrs.c +++ b/proto/bgp/attrs.c @@ -1302,7 +1302,7 @@ bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b) #define PXH_FN(n,i,h) h #define PXH_REHASH bgp_pxh_rehash -#define PXH_PARAMS /8, *2, 2, 2, 8, 20 +#define PXH_PARAMS /8, *2, 2, 2, 8, 24 HASH_DEFINE_REHASH_FN(PXH, struct bgp_prefix) @@ -1730,7 +1730,7 @@ bgp_rte_better(rte *new, rte *old) return 0; /* RFC 4271 9.1.2.2. g) Compare peer IP adresses */ - return (ipa_compare(new_bgp->cf->remote_ip, old_bgp->cf->remote_ip) < 0); + return ipa_compare(new_bgp->remote_ip, old_bgp->remote_ip) < 0; } diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index 8dedde9f..b68575a5 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -129,6 +129,9 @@ static list bgp_sockets; /* Global list of listening sockets */ static void bgp_connect(struct bgp_proto *p); static void bgp_active(struct bgp_proto *p); +static void bgp_setup_conn(struct bgp_proto *p, struct bgp_conn *conn); +static void bgp_setup_sk(struct bgp_conn *conn, sock *s); +static void bgp_send_open(struct bgp_conn *conn); static void bgp_update_bfd(struct bgp_proto *p, int use_bfd); static int bgp_incoming_connection(sock *sk, uint dummy UNUSED); @@ -149,7 +152,7 @@ bgp_open(struct bgp_proto *p) struct bgp_socket *bs = NULL; struct iface *ifa = p->cf->strict_bind ? p->cf->iface : NULL; ip_addr addr = p->cf->strict_bind ? p->cf->local_ip : - (ipa_is_ip4(p->cf->remote_ip) ? IPA_NONE4 : IPA_NONE6); + (p->ipv4 ? IPA_NONE4 : IPA_NONE6); uint port = p->cf->local_port; /* FIXME: Add some global init? */ @@ -272,8 +275,17 @@ bgp_startup(struct bgp_proto *p) BGP_TRACE(D_EVENTS, "Started"); p->start_state = BSS_CONNECT; - if (!p->cf->passive) + if (!p->passive) bgp_active(p); + + if (p->postponed_sk) + { + /* Apply postponed incoming connection */ + bgp_setup_conn(p, &p->incoming_conn); + bgp_setup_sk(&p->incoming_conn, p->postponed_sk); + bgp_send_open(&p->incoming_conn); + p->postponed_sk = NULL; + } } static void @@ -387,7 +399,7 @@ bgp_close_conn(struct bgp_conn *conn) void bgp_update_startup_delay(struct bgp_proto *p) { - struct bgp_config *cf = p->cf; + const struct bgp_config *cf = p->cf; DBG("BGP: Updating startup delay\n"); @@ -410,7 +422,7 @@ bgp_update_startup_delay(struct bgp_proto *p) } static void -bgp_graceful_close_conn(struct bgp_conn *conn, uint subcode, byte *data, uint len) +bgp_graceful_close_conn(struct bgp_conn *conn, int subcode, byte *data, uint len) { switch (conn->state) { @@ -426,7 +438,13 @@ bgp_graceful_close_conn(struct bgp_conn *conn, uint subcode, byte *data, uint le case BS_OPENSENT: case BS_OPENCONFIRM: case BS_ESTABLISHED: - bgp_error(conn, 6, subcode, data, len); + if (subcode < 0) + { + bgp_conn_enter_close_state(conn); + bgp_schedule_packet(conn, NULL, PKT_SCHEDULE_CLOSE); + } + else + bgp_error(conn, 6, subcode, data, len); return; default: @@ -456,7 +474,7 @@ bgp_decision(void *vp) if ((p->p.proto_state == PS_START) && (p->outgoing_conn.state == BS_IDLE) && (p->incoming_conn.state != BS_OPENCONFIRM) && - !p->cf->passive) + !p->passive) bgp_active(p); if ((p->p.proto_state == PS_STOP) && @@ -465,8 +483,31 @@ bgp_decision(void *vp) bgp_down(p); } +static struct bgp_proto * +bgp_spawn(struct bgp_proto *pp, ip_addr remote_ip) +{ + struct symbol *sym; + char fmt[SYM_MAX_LEN]; + + bsprintf(fmt, "%s%%0%dd", pp->cf->dynamic_name, pp->cf->dynamic_name_digits); + + /* This is hack, we would like to share config, but we need to copy it now */ + new_config = config; + cfg_mem = config->mem; + conf_this_scope = config->root_scope; + sym = cf_default_name(fmt, &(pp->dynamic_name_counter)); + proto_clone_config(sym, pp->p.cf); + new_config = NULL; + cfg_mem = NULL; + + /* Just pass remote_ip to bgp_init() */ + ((struct bgp_config *) sym->proto)->remote_ip = remote_ip; + + return (void *) proto_spawn(sym->proto, 0); +} + void -bgp_stop(struct bgp_proto *p, uint subcode, byte *data, uint len) +bgp_stop(struct bgp_proto *p, int subcode, byte *data, uint len) { proto_notify_state(&p->p, PS_STOP); bgp_graceful_close_conn(&p->outgoing_conn, subcode, data, len); @@ -491,6 +532,7 @@ bgp_conn_enter_openconfirm_state(struct bgp_conn *conn) } static const struct bgp_af_caps dummy_af_caps = { }; +static const struct bgp_af_caps basic_af_caps = { .ready = 1 }; void bgp_conn_enter_established_state(struct bgp_conn *conn) @@ -503,8 +545,12 @@ bgp_conn_enter_established_state(struct bgp_conn *conn) BGP_TRACE(D_EVENTS, "BGP session established"); /* For multi-hop BGP sessions */ - if (ipa_zero(p->source_addr)) - p->source_addr = conn->sk->saddr; + if (ipa_zero(p->local_ip)) + p->local_ip = conn->sk->saddr; + + /* For promiscuous sessions */ + if (!p->remote_as) + p->remote_as = conn->received_as; /* In case of LLv6 is not valid during BGP start */ if (ipa_zero(p->link_addr) && p->neigh && p->neigh->iface && p->neigh->iface->llv6) @@ -541,6 +587,13 @@ bgp_conn_enter_established_state(struct bgp_conn *conn) const struct bgp_af_caps *loc = bgp_find_af_caps(local, c->afi); const struct bgp_af_caps *rem = bgp_find_af_caps(peer, c->afi); + /* Use default if capabilities were not announced */ + if (!local->length && (c->afi == BGP_AF_IPV4)) + loc = &basic_af_caps; + + if (!peer->length && (c->afi == BGP_AF_IPV4)) + rem = &basic_af_caps; + /* Ignore AFIs that were not announced in multiprotocol capability */ if (!loc || !loc->ready) loc = &dummy_af_caps; @@ -880,6 +933,7 @@ bgp_send_open(struct bgp_conn *conn) conn->sk->rx_hook = bgp_rx; conn->sk->tx_hook = bgp_tx; tm_stop(conn->connect_timer); + bgp_prepare_capabilities(conn); bgp_schedule_packet(conn, NULL, PKT_OPEN); bgp_conn_set_state(conn, BS_OPENSENT); bgp_start_timer(conn->hold_timer, conn->bgp->cf->initial_hold_time); @@ -1039,8 +1093,8 @@ bgp_connect(struct bgp_proto *p) /* Enter Connect state and start establishing c DBG("BGP: Connecting\n"); sock *s = sk_new(p->p.pool); s->type = SK_TCP_ACTIVE; - s->saddr = p->source_addr; - s->daddr = p->cf->remote_ip; + s->saddr = p->local_ip; + s->daddr = p->remote_ip; s->dport = p->cf->remote_port; s->iface = p->neigh ? p->neigh->iface : NULL; s->vrf = p->p.vrf; @@ -1075,6 +1129,9 @@ err: return; } +static inline int bgp_is_dynamic(struct bgp_proto *p) +{ return ipa_zero(p->remote_ip); } + /** * bgp_find_proto - find existing proto for incoming connection * @sk: TCP socket @@ -1083,6 +1140,7 @@ err: static struct bgp_proto * bgp_find_proto(sock *sk) { + struct bgp_proto *best = NULL; struct bgp_proto *p; /* sk->iface is valid only if src or dst address is link-local */ @@ -1090,13 +1148,20 @@ bgp_find_proto(sock *sk) WALK_LIST(p, proto_list) if ((p->p.proto == &proto_bgp) && - (p->sock == sk->data) && - ipa_equal(p->cf->remote_ip, sk->daddr) && + (ipa_equal(p->remote_ip, sk->daddr) || bgp_is_dynamic(p)) && + (!p->cf->remote_range || ipa_in_netX(sk->daddr, p->cf->remote_range)) && + (p->p.vrf == sk->vrf) && + (p->cf->local_port == sk->sport) && (!link || (p->cf->iface == sk->iface)) && (ipa_zero(p->cf->local_ip) || ipa_equal(p->cf->local_ip, sk->saddr))) - return p; + { + best = p; - return NULL; + if (!bgp_is_dynamic(p)) + break; + } + + return best; } /** @@ -1175,6 +1240,16 @@ bgp_incoming_connection(sock *sk, uint dummy UNUSED) sk_reallocate(sk); } + /* For dynamic BGP, spawn new instance and postpone the socket */ + if (bgp_is_dynamic(p)) + { + p = bgp_spawn(p, sk->daddr); + p->postponed_sk = sk; + rmove(sk, p->p.pool); + return 0; + } + + rmove(sk, p->p.pool); bgp_setup_conn(p, &p->incoming_conn); bgp_setup_sk(&p->incoming_conn, sk); bgp_send_open(&p->incoming_conn); @@ -1201,11 +1276,11 @@ bgp_start_neighbor(struct bgp_proto *p) { /* Called only for single-hop BGP sessions */ - if (ipa_zero(p->source_addr)) - p->source_addr = p->neigh->ifa->ip; + if (ipa_zero(p->local_ip)) + p->local_ip = p->neigh->ifa->ip; - if (ipa_is_link_local(p->source_addr)) - p->link_addr = p->source_addr; + if (ipa_is_link_local(p->local_ip)) + p->link_addr = p->local_ip; else if (p->neigh->iface->llv6) p->link_addr = p->neigh->iface->llv6->ip; @@ -1293,8 +1368,8 @@ bgp_bfd_notify(struct bfd_request *req) static void bgp_update_bfd(struct bgp_proto *p, int use_bfd) { - if (use_bfd && !p->bfd_req) - p->bfd_req = bfd_request_session(p->p.pool, p->cf->remote_ip, p->source_addr, + if (use_bfd && !p->bfd_req && !bgp_is_dynamic(p)) + p->bfd_req = bfd_request_session(p->p.pool, p->remote_ip, p->local_ip, p->cf->multihop ? NULL : p->neigh->iface, bgp_bfd_notify, p); @@ -1375,7 +1450,7 @@ static void bgp_start_locked(struct object_lock *lock) { struct bgp_proto *p = lock->data; - struct bgp_config *cf = p->cf; + const struct bgp_config *cf = p->cf; if (p->p.proto_state != PS_START) { @@ -1385,17 +1460,17 @@ bgp_start_locked(struct object_lock *lock) DBG("BGP: Got lock\n"); - if (cf->multihop) + if (cf->multihop || bgp_is_dynamic(p)) { /* Multi-hop sessions do not use neighbor entries */ bgp_initiate(p); return; } - neighbor *n = neigh_find(&p->p, cf->remote_ip, cf->iface, NEF_STICKY); + neighbor *n = neigh_find(&p->p, p->remote_ip, cf->iface, NEF_STICKY); if (!n) { - log(L_ERR "%s: Invalid remote address %I%J", p->p.name, cf->remote_ip, cf->iface); + log(L_ERR "%s: Invalid remote address %I%J", p->p.name, p->remote_ip, cf->iface); /* As we do not start yet, we can just disable protocol */ p->p.disabled = 1; bgp_store_error(p, NULL, BE_MISC, BEM_INVALID_NEXT_HOP); @@ -1406,7 +1481,7 @@ bgp_start_locked(struct object_lock *lock) p->neigh = n; if (n->scope <= 0) - BGP_TRACE(D_EVENTS, "Waiting for %I%J to become my neighbor", cf->remote_ip, cf->iface); + BGP_TRACE(D_EVENTS, "Waiting for %I%J to become my neighbor", p->remote_ip, cf->iface); else if (p->cf->check_link && !(n->iface->flags & IF_LINK_UP)) BGP_TRACE(D_EVENTS, "Waiting for link on %s", n->iface->name); else @@ -1417,14 +1492,29 @@ static int bgp_start(struct proto *P) { struct bgp_proto *p = (struct bgp_proto *) P; - struct object_lock *lock; + const struct bgp_config *cf = p->cf; + + p->local_ip = cf->local_ip; + p->local_as = cf->local_as; + p->remote_as = cf->remote_as; + p->public_as = cf->local_as; + + /* For dynamic BGP childs, remote_ip is already set */ + if (ipa_nonzero(cf->remote_ip)) + p->remote_ip = cf->remote_ip; + + /* Confederation ID is used for truly external peers */ + if (p->cf->confederation && !p->is_interior) + p->public_as = cf->confederation; + + p->passive = cf->passive || bgp_is_dynamic(p); - DBG("BGP: Startup.\n"); p->start_state = BSS_PREPARE; p->outgoing_conn.state = BS_IDLE; p->incoming_conn.state = BS_IDLE; p->neigh = NULL; p->bfd_req = NULL; + p->postponed_sk = NULL; p->gr_ready = 0; p->gr_active_num = 0; @@ -1437,7 +1527,6 @@ bgp_start(struct proto *P) p->rr_cluster_id = p->cf->rr_cluster_id ? p->cf->rr_cluster_id : p->local_id; p->remote_id = 0; - p->source_addr = p->cf->local_ip; p->link_addr = IPA_NONE; /* Lock all channels when in GR recovery mode */ @@ -1452,9 +1541,9 @@ bgp_start(struct proto *P) * Before attempting to create the connection, we need to lock the port, * so that we are the only instance attempting to talk with that neighbor. */ - + struct object_lock *lock; lock = p->lock = olock_new(P->pool); - lock->addr = p->cf->remote_ip; + lock->addr = p->remote_ip; lock->port = p->cf->remote_port; lock->iface = p->cf->iface; lock->vrf = p->cf->iface ? NULL : p->p.vrf; @@ -1472,7 +1561,7 @@ static int bgp_shutdown(struct proto *P) { struct bgp_proto *p = (struct bgp_proto *) P; - uint subcode = 0; + int subcode = 0; char *message = NULL; byte *data = NULL; @@ -1493,6 +1582,7 @@ bgp_shutdown(struct proto *P) case PDC_CMD_DISABLE: case PDC_CMD_SHUTDOWN: + shutdown: subcode = 2; // Errcode 6, 2 - administrative shutdown message = P->message; break; @@ -1502,6 +1592,14 @@ bgp_shutdown(struct proto *P) message = P->message; break; + case PDC_CMD_GR_DOWN: + if ((p->cf->gr_mode != BGP_GR_ABLE) && + (p->cf->llgr_mode != BGP_LLGR_ABLE)) + goto shutdown; + + subcode = -1; // Do not send NOTIFICATION, just close the connection + break; + case PDC_RX_LIMIT_HIT: case PDC_IN_LIMIT_HIT: subcode = 1; // Errcode 6, 1 - max number of prefixes reached @@ -1528,7 +1626,7 @@ bgp_shutdown(struct proto *P) if (message) { uint msg_len = strlen(message); - msg_len = MIN(msg_len, 128); + msg_len = MIN(msg_len, 255); /* Buffer will be freed automatically by protocol shutdown */ data = mb_alloc(p->p.pool, msg_len + 1); @@ -1562,17 +1660,21 @@ bgp_init(struct proto_config *CF) P->rte_modify = bgp_rte_modify_stale; p->cf = cf; - p->local_as = cf->local_as; - p->remote_as = cf->remote_as; - p->public_as = cf->local_as; p->is_internal = (cf->local_as == cf->remote_as); p->is_interior = p->is_internal || cf->confederation_member; p->rs_client = cf->rs_client; p->rr_client = cf->rr_client; - /* Confederation ID is used for truly external peers */ - if (cf->confederation && !p->is_interior) - p->public_as = cf->confederation; + p->ipv4 = ipa_nonzero(cf->remote_ip) ? + ipa_is_ip4(cf->remote_ip) : + (cf->remote_range && (cf->remote_range->type == NET_IP4)); + + p->remote_ip = cf->remote_ip; + p->remote_as = cf->remote_as; + + /* Hack: We use cf->remote_ip just to pass remote_ip from bgp_spawn() */ + if (cf->c.parent) + cf->remote_ip = IPA_NONE; /* Add all channels */ struct bgp_channel_config *cc; @@ -1604,7 +1706,7 @@ bgp_channel_start(struct channel *C) { struct bgp_proto *p = (void *) C->proto; struct bgp_channel *c = (void *) C; - ip_addr src = p->source_addr; + ip_addr src = p->local_ip; if (c->igp_table_ip4) rt_lock_table(c->igp_table_ip4); @@ -1745,14 +1847,19 @@ void bgp_postconfig(struct proto_config *CF) { struct bgp_config *cf = (void *) CF; - int internal = (cf->local_as == cf->remote_as); - int interior = internal || cf->confederation_member; /* Do not check templates at all */ if (cf->c.class == SYM_TEMPLATE) return; + /* Handle undefined remote_as, zero should mean unspecified external */ + if (!cf->remote_as && (cf->peer_type == BGP_PT_INTERNAL)) + cf->remote_as = cf->local_as; + + int internal = (cf->local_as == cf->remote_as); + int interior = internal || cf->confederation_member; + /* EBGP direct by default, IBGP multihop by default */ if (cf->multihop < 0) cf->multihop = internal ? 64 : 0; @@ -1769,11 +1876,20 @@ bgp_postconfig(struct proto_config *CF) if (!cf->local_as) cf_error("Local AS number must be set"); - if (ipa_zero(cf->remote_ip)) + if (ipa_zero(cf->remote_ip) && !cf->remote_range) cf_error("Neighbor must be configured"); - if (!cf->remote_as) - cf_error("Remote AS number must be set"); + if (ipa_zero(cf->local_ip) && cf->strict_bind) + cf_error("Local address must be configured for strict bind"); + + if (!cf->remote_as && !cf->peer_type) + cf_error("Remote AS number (or peer type) must be set"); + + if ((cf->peer_type == BGP_PT_INTERNAL) && !internal) + cf_error("IBGP cannot have different ASNs"); + + if ((cf->peer_type == BGP_PT_EXTERNAL) && internal) + cf_error("EBGP cannot have the same ASNs"); if (!cf->iface && (ipa_is_link_local(cf->local_ip) || ipa_is_link_local(cf->remote_ip))) @@ -1885,8 +2001,8 @@ static int bgp_reconfigure(struct proto *P, struct proto_config *CF) { struct bgp_proto *p = (void *) P; - struct bgp_config *new = (void *) CF; - struct bgp_config *old = p->cf; + const struct bgp_config *new = (void *) CF; + const struct bgp_config *old = p->cf; if (proto_get_router_id(CF) != p->local_id) return 0; @@ -1896,7 +2012,12 @@ bgp_reconfigure(struct proto *P, struct proto_config *CF) // password item is last and must be checked separately OFFSETOF(struct bgp_config, password) - sizeof(struct proto_config)) && ((!old->password && !new->password) - || (old->password && new->password && !strcmp(old->password, new->password))); + || (old->password && new->password && !strcmp(old->password, new->password))) + && ((!old->remote_range && !new->remote_range) + || (old->remote_range && new->remote_range && net_equal(old->remote_range, new->remote_range))) + && ((!old->dynamic_name && !new->dynamic_name) + || (old->dynamic_name && new->dynamic_name && !strcmp(old->dynamic_name, new->dynamic_name))) + && (old->dynamic_name_digits == new->dynamic_name_digits); /* FIXME: Move channel reconfiguration to generic protocol code ? */ struct channel *C, *C2; @@ -1926,6 +2047,9 @@ bgp_reconfigure(struct proto *P, struct proto_config *CF) if (same) p->cf = new; + /* Reset name counter */ + p->dynamic_name_counter = 0; + return same; } @@ -2056,7 +2180,7 @@ bgp_state_dsc(struct bgp_proto *p) return "Down"; int state = MAX(p->incoming_conn.state, p->outgoing_conn.state); - if ((state == BS_IDLE) && (p->start_state >= BSS_CONNECT) && p->cf->passive) + if ((state == BS_IDLE) && (p->start_state >= BSS_CONNECT) && p->passive) return "Passive"; return bgp_state_names[state]; @@ -2232,8 +2356,14 @@ bgp_show_proto_info(struct proto *P) struct bgp_proto *p = (struct bgp_proto *) P; cli_msg(-1006, " BGP state: %s", bgp_state_dsc(p)); - cli_msg(-1006, " Neighbor address: %I%J", p->cf->remote_ip, p->cf->iface); + + if (bgp_is_dynamic(p) && p->cf->remote_range) + cli_msg(-1006, " Neighbor range: %N", p->cf->remote_range); + else + cli_msg(-1006, " Neighbor address: %I%J", p->remote_ip, p->cf->iface); + cli_msg(-1006, " Neighbor AS: %u", p->remote_as); + cli_msg(-1006, " Local AS: %u", p->cf->local_as); if (p->gr_active_num) cli_msg(-1006, " Neighbor graceful restart active"); @@ -2269,7 +2399,7 @@ bgp_show_proto_info(struct proto *P) p->rr_client ? " route-reflector" : "", p->rs_client ? " route-server" : "", p->as4_session ? " AS4" : ""); - cli_msg(-1006, " Source address: %I", p->source_addr); + cli_msg(-1006, " Source address: %I", p->local_ip); cli_msg(-1006, " Hold timer: %t/%u", tm_remains(p->conn->hold_timer), p->conn->hold_time); cli_msg(-1006, " Keepalive timer: %t/%u", diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h index b604c7aa..075e1bb9 100644 --- a/proto/bgp/bgp.h +++ b/proto/bgp/bgp.h @@ -83,6 +83,7 @@ struct bgp_config { struct iface *iface; /* Interface for link-local addresses */ u16 local_port; /* Local listening port */ u16 remote_port; /* Neighbor destination port */ + int peer_type; /* Internal or external BGP (BGP_PT_*, optional) */ int multihop; /* Number of hops if multihop */ int strict_bind; /* Bind listening socket to local address */ int ttl_security; /* Enable TTL security [RFC 5082] */ @@ -123,6 +124,9 @@ struct bgp_config { u32 disable_after_cease; /* Disable it when cease is received, bitfield */ char *password; /* Password used for MD5 authentication */ + net_addr *remote_range; /* Allowed neighbor range for dynamic BGP */ + char *dynamic_name; /* Name pattern for dynamic BGP */ + int dynamic_name_digits; /* Minimum number of digits for dynamic names */ int check_link; /* Use iface link state for liveness detection */ int bfd; /* Use BFD for liveness detection */ }; @@ -136,6 +140,7 @@ struct bgp_channel_config { ip_addr next_hop_addr; /* Local address for NEXT_HOP attribute */ u8 next_hop_self; /* Always set next hop to local IP address (NH_*) */ u8 next_hop_keep; /* Do not modify next hop attribute (NH_*) */ + u8 mandatory; /* Channel is mandatory in capability negotiation */ u8 missing_lladdr; /* What we will do when we don' know link-local addr, see MLL_* */ u8 gw_mode; /* How we compute route gateway from next_hop attr, see GW_* */ u8 secondary; /* Accept also non-best routes (i.e. RA_ACCEPTED) */ @@ -151,6 +156,9 @@ struct bgp_channel_config { struct rtable_config *igp_table_ip6; /* Table for recursive IPv6 next hop lookups */ }; +#define BGP_PT_INTERNAL 1 +#define BGP_PT_EXTERNAL 2 + #define NH_NO 0 #define NH_ALL 1 #define NH_IBGP 2 @@ -213,8 +221,11 @@ struct bgp_caps { u16 gr_time; /* Graceful restart time in seconds */ u8 llgr_aware; /* Long-lived GR capability, RFC draft */ + u8 any_ext_next_hop; /* Bitwise OR of per-AF ext_next_hop */ + u8 any_add_path; /* Bitwise OR of per-AF add_path */ u16 af_count; /* Number of af_data items */ + u16 length; /* Length of capabilities in OPEN msg */ struct bgp_af_caps af_data[0]; /* Per-AF capability data */ }; @@ -235,6 +246,7 @@ struct bgp_conn { u8 state; /* State of connection state machine */ u8 as4_session; /* Session uses 4B AS numbers in AS_PATH (both sides support it) */ u8 ext_messages; /* Session uses extended message length */ + u32 received_as; /* ASN received in OPEN message */ struct bgp_caps *local_caps; struct bgp_caps *remote_caps; @@ -254,18 +266,21 @@ struct bgp_conn { struct bgp_proto { struct proto p; - struct bgp_config *cf; /* Shortcut to BGP configuration */ + const struct bgp_config *cf; /* Shortcut to BGP configuration */ + ip_addr local_ip, remote_ip; u32 local_as, remote_as; u32 public_as; /* Externally visible ASN (local_as or confederation id) */ u32 local_id; /* BGP identifier of this router */ u32 remote_id; /* BGP identifier of the neighbor */ u32 rr_cluster_id; /* Route reflector cluster ID */ - int start_state; /* Substates that partitions BS_START */ + u8 start_state; /* Substates that partitions BS_START */ u8 is_internal; /* Internal BGP session (local_as == remote_as) */ u8 is_interior; /* Internal or intra-confederation BGP session */ u8 as4_session; /* Session uses 4B AS numbers in AS_PATH (both sides support it) */ u8 rr_client; /* Whether neighbor is RR client of me */ u8 rs_client; /* Whether neighbor is RS client of me */ + u8 ipv4; /* Use IPv4 connection, i.e. remote_ip is IPv4 */ + u8 passive; /* Do not initiate outgoing connection */ u8 route_refresh; /* Route refresh allowed to send [RFC 2918] */ u8 enhanced_refresh; /* Enhanced refresh is negotiated [RFC 7313] */ u8 gr_ready; /* Neighbor could do graceful restart */ @@ -282,11 +297,12 @@ struct bgp_proto { struct neighbor *neigh; /* Neighbor entry corresponding to remote ip, NULL if multihop */ struct bgp_socket *sock; /* Shared listening socket */ struct bfd_request *bfd_req; /* BFD request, if BFD is used */ - ip_addr source_addr; /* Local address used as an advertised next hop */ - ip_addr link_addr; /* Link-local version of source_addr */ + struct birdsock *postponed_sk; /* Postponed incoming socket for dynamic BGP */ + ip_addr link_addr; /* Link-local version of local_ip */ event *event; /* Event for respawning and shutting process */ timer *startup_timer; /* Timer used to delay protocol startup due to previous errors (startup_delay) */ timer *gr_timer; /* Timer waiting for reestablishment after graceful restart */ + int dynamic_name_counter; /* Counter for dynamic BGP names */ uint startup_delay; /* Delay (in seconds) of protocol startup due to previous errors */ btime last_proto_error; /* Time of last error that leads to protocol stop */ u8 last_error_class; /* Error class of last error */ @@ -472,7 +488,7 @@ void bgp_graceful_restart_done(struct bgp_channel *c); void bgp_refresh_begin(struct bgp_channel *c); void bgp_refresh_end(struct bgp_channel *c); void bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code); -void bgp_stop(struct bgp_proto *p, uint subcode, byte *data, uint len); +void bgp_stop(struct bgp_proto *p, int subcode, byte *data, uint len); struct rte_source *bgp_find_source(struct bgp_proto *p, u32 path_id); struct rte_source *bgp_get_source(struct bgp_proto *p, u32 path_id); @@ -549,6 +565,7 @@ void bgp_get_route_info(struct rte *, byte *buf); /* packets.c */ void bgp_dump_state_change(struct bgp_conn *conn, uint old, uint new); +void bgp_prepare_capabilities(struct bgp_conn *conn); const struct bgp_af_desc *bgp_get_af_desc(u32 afi); const struct bgp_af_caps *bgp_find_af_caps(struct bgp_caps *caps, u32 afi); void bgp_schedule_packet(struct bgp_conn *conn, struct bgp_channel *c, int type); diff --git a/proto/bgp/config.Y b/proto/bgp/config.Y index ac8d024a..bbc7d9a4 100644 --- a/proto/bgp/config.Y +++ b/proto/bgp/config.Y @@ -29,7 +29,8 @@ CF_KEYWORDS(BGP, LOCAL, NEIGHBOR, AS, HOLD, TIME, CONNECT, RETRY, KEEPALIVE, SECURITY, DETERMINISTIC, SECONDARY, ALLOW, BFD, ADD, PATHS, RX, TX, GRACEFUL, RESTART, AWARE, CHECK, LINK, PORT, EXTENDED, MESSAGES, SETKEY, STRICT, BIND, CONFEDERATION, MEMBER, MULTICAST, FLOW4, FLOW6, LONG, - LIVED, STALE, IMPORT, IBGP, EBGP) + LIVED, STALE, IMPORT, IBGP, EBGP, MANDATORY, INTERNAL, EXTERNAL, + DYNAMIC, RANGE, NAME, DIGITS) %type <i> bgp_nh %type <i32> bgp_afi @@ -68,6 +69,7 @@ bgp_proto_start: proto_start BGP { BGP_CFG->llgr_mode = -1; BGP_CFG->llgr_time = 3600; BGP_CFG->setkey = 1; + BGP_CFG->dynamic_name = "dynbgp"; BGP_CFG->check_link = -1; } ; @@ -82,6 +84,8 @@ bgp_nbr_opts: /* empty */ | bgp_nbr_opts PORT expr { BGP_CFG->remote_port = $3; if (($3<1) || ($3>65535)) cf_error("Invalid port number"); } | bgp_nbr_opts AS expr { BGP_CFG->remote_as = $3; } + | bgp_nbr_opts INTERNAL { BGP_CFG->peer_type = BGP_PT_INTERNAL; } + | bgp_nbr_opts EXTERNAL { BGP_CFG->peer_type = BGP_PT_EXTERNAL; } ; bgp_cease_mask: @@ -118,11 +122,18 @@ bgp_proto: } | bgp_proto NEIGHBOR bgp_nbr_opts ';' | bgp_proto NEIGHBOR ipa ipa_scope bgp_nbr_opts ';' { - if (ipa_nonzero(BGP_CFG->remote_ip)) + if (ipa_nonzero(BGP_CFG->remote_ip) || BGP_CFG->remote_range) cf_error("Only one neighbor per BGP instance is allowed"); BGP_CFG->remote_ip = $3; if ($4) BGP_CFG->iface = $4; } + | bgp_proto NEIGHBOR RANGE net_ip bgp_nbr_opts ';' { + if (ipa_nonzero(BGP_CFG->remote_ip) || BGP_CFG->remote_range) + cf_error("Only one neighbor per BGP instance is allowed"); + net_addr *n = cfg_alloc($4.length); + net_copy(n, &($4)); + BGP_CFG->remote_range = n; + } | bgp_proto INTERFACE TEXT ';' { BGP_CFG->iface = if_get_by_name($3); } | bgp_proto RR CLUSTER ID idval ';' { BGP_CFG->rr_cluster_id = $5; } | bgp_proto RR CLIENT bool ';' { BGP_CFG->rr_client = $4; } @@ -134,6 +145,12 @@ bgp_proto: | bgp_proto DIRECT ';' { BGP_CFG->multihop = 0; } | bgp_proto MULTIHOP ';' { BGP_CFG->multihop = 64; } | bgp_proto MULTIHOP expr ';' { BGP_CFG->multihop = $3; if (($3<1) || ($3>255)) cf_error("Multihop must be in range 1-255"); } + | bgp_proto DYNAMIC NAME text ';' { + if (strchr($4, '%')) cf_error("Forbidden character '%%' in dynamic name"); + if (strlen($4) > (SYM_MAX_LEN - 16)) cf_error("Dynamic name too long"); + BGP_CFG->dynamic_name = $4; + } + | bgp_proto DYNAMIC NAME DIGITS expr ';' { BGP_CFG->dynamic_name_digits = $5; if ($5>10) cf_error("Dynamic name digits must be at most 10"); } | bgp_proto STRICT BIND bool ';' { BGP_CFG->strict_bind = $4; } | bgp_proto PATH METRIC bool ';' { BGP_CFG->compare_path_lengths = $4; } | bgp_proto MED METRIC bool ';' { BGP_CFG->med_metric = $4; } @@ -223,6 +240,7 @@ bgp_channel_item: | NEXT HOP ADDRESS ipa { BGP_CC->next_hop_addr = $4; } | NEXT HOP SELF bgp_nh { BGP_CC->next_hop_self = $4; } | NEXT HOP KEEP bgp_nh { BGP_CC->next_hop_keep = $4; } + | MANDATORY bool { BGP_CC->mandatory = $2; } | MISSING LLADDR SELF { BGP_CC->missing_lladdr = MLL_SELF; } | MISSING LLADDR DROP { BGP_CC->missing_lladdr = MLL_DROP; } | MISSING LLADDR IGNORE { BGP_CC->missing_lladdr = MLL_IGNORE; } diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c index 2b5cc440..daa88630 100644 --- a/proto/bgp/packets.c +++ b/proto/bgp/packets.c @@ -100,7 +100,7 @@ init_mrt_bgp_data(struct bgp_conn *conn, struct mrt_bgp_data *d) d->peer_as = p->remote_as; d->local_as = p->local_as; d->index = (p->neigh && p->neigh->iface) ? p->neigh->iface->index : 0; - d->af = ipa_is_ip4(p->cf->remote_ip) ? BGP_AFI_IPV4 : BGP_AFI_IPV6; + d->af = ipa_is_ip4(p->remote_ip) ? BGP_AFI_IPV4 : BGP_AFI_IPV6; d->peer_ip = conn->sk ? conn->sk->daddr : IPA_NONE; d->local_ip = conn->sk ? conn->sk->saddr : IPA_NONE; d->as4 = p_ok ? p->as4_session : 0; @@ -208,19 +208,22 @@ bgp_af_caps_cmp(const void *X, const void *Y) } -static byte * -bgp_write_capabilities(struct bgp_conn *conn, byte *buf) +void +bgp_prepare_capabilities(struct bgp_conn *conn) { struct bgp_proto *p = conn->bgp; struct bgp_channel *c; struct bgp_caps *caps; struct bgp_af_caps *ac; - uint any_ext_next_hop = 0; - uint any_add_path = 0; - byte *data; - /* Prepare bgp_caps structure */ + if (!p->cf->capabilities) + { + /* Just prepare empty local_caps */ + conn->local_caps = mb_allocz(p->p.pool, sizeof(struct bgp_caps)); + return; + } + /* Prepare bgp_caps structure */ int n = list_length(&p->p.channels); caps = mb_allocz(p->p.pool, sizeof(struct bgp_caps) + n * sizeof(struct bgp_af_caps)); conn->local_caps = caps; @@ -251,10 +254,10 @@ bgp_write_capabilities(struct bgp_conn *conn, byte *buf) ac->ready = 1; ac->ext_next_hop = bgp_channel_is_ipv4(c) && c->cf->ext_next_hop; - any_ext_next_hop |= ac->ext_next_hop; + caps->any_ext_next_hop |= ac->ext_next_hop; ac->add_path = c->cf->add_path; - any_add_path |= ac->add_path; + caps->any_add_path |= ac->add_path; if (c->cf->gr_able) { @@ -276,7 +279,16 @@ bgp_write_capabilities(struct bgp_conn *conn, byte *buf) /* Sort capability fields by AFI/SAFI */ qsort(caps->af_data, caps->af_count, sizeof(struct bgp_af_caps), bgp_af_caps_cmp); +} +static byte * +bgp_write_capabilities(struct bgp_conn *conn, byte *buf) +{ + struct bgp_proto *p = conn->bgp; + struct bgp_caps *caps = conn->local_caps; + struct bgp_af_caps *ac; + byte *buf_head = buf; + byte *data; /* Create capability list in buffer */ @@ -301,7 +313,7 @@ bgp_write_capabilities(struct bgp_conn *conn, byte *buf) *buf++ = 0; /* Capability data length */ } - if (any_ext_next_hop) + if (caps->any_ext_next_hop) { *buf++ = 5; /* Capability 5: Support for extended next hop */ *buf++ = 0; /* Capability data length, will be fixed later */ @@ -353,7 +365,7 @@ bgp_write_capabilities(struct bgp_conn *conn, byte *buf) buf += 4; } - if (any_add_path) + if (caps->any_add_path) { *buf++ = 69; /* Capability 69: Support for ADD-PATH */ *buf++ = 0; /* Capability data length, will be fixed later */ @@ -394,6 +406,8 @@ bgp_write_capabilities(struct bgp_conn *conn, byte *buf) data[-1] = buf - data; } + caps->length = buf - buf_head; + return buf; } @@ -405,6 +419,8 @@ bgp_read_capabilities(struct bgp_conn *conn, struct bgp_caps *caps, byte *pos, i int i, cl; u32 af; + caps->length += len; + while (len > 0) { if (len < 2 || len < (2 + pos[1])) @@ -569,6 +585,42 @@ err: } static int +bgp_check_capabilities(struct bgp_conn *conn) +{ + struct bgp_proto *p = conn->bgp; + struct bgp_caps *local = conn->local_caps; + struct bgp_caps *remote = conn->remote_caps; + struct bgp_channel *c; + int count = 0; + + /* This is partially overlapping with bgp_conn_enter_established_state(), + but we need to run this just after we receive OPEN message */ + + WALK_LIST(c, p->p.channels) + { + const struct bgp_af_caps *loc = bgp_find_af_caps(local, c->afi); + const struct bgp_af_caps *rem = bgp_find_af_caps(remote, c->afi); + + /* Find out whether this channel will be active */ + int active = loc && loc->ready && + ((rem && rem->ready) || (!remote->length && (c->afi == BGP_AF_IPV4))); + + /* Mandatory must be active */ + if (c->cf->mandatory && !active) + return 0; + + if (active) + count++; + } + + /* We need at least one channel active */ + if (!count) + return 0; + + return 1; +} + +static int bgp_read_options(struct bgp_conn *conn, byte *pos, int len) { struct bgp_proto *p = conn->bgp; @@ -635,9 +687,6 @@ bgp_create_open(struct bgp_conn *conn, byte *buf) } else { - /* Prepare empty local_caps */ - conn->local_caps = mb_allocz(p->p.pool, sizeof(struct bgp_caps)); - buf[9] = 0; /* No optional parameters */ return buf + 10; } @@ -678,6 +727,10 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, uint len) if (!id || (p->is_internal && id == p->local_id)) { bgp_error(conn, 2, 3, pkt+24, -4); return; } + /* RFC 5492 4 - check for required capabilities */ + if (p->cf->capabilities && !bgp_check_capabilities(conn)) + { bgp_error(conn, 2, 7, NULL, 0); return; } + struct bgp_caps *caps = conn->remote_caps; if (caps->as4_support) @@ -687,13 +740,18 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, uint len) if ((as4 != asn) && (asn != AS_TRANS)) log(L_WARN "%s: Peer advertised inconsistent AS numbers", p->p.name); - if (as4 != p->remote_as) + /* When remote ASN is unspecified, it must be external one */ + if (p->remote_as ? (as4 != p->remote_as) : (as4 == p->local_as)) { as4 = htonl(as4); bgp_error(conn, 2, 2, (byte *) &as4, 4); return; } + + conn->received_as = as4; } else { - if (asn != p->remote_as) + if (p->remote_as ? (asn != p->remote_as) : (asn == p->local_as)) { bgp_error(conn, 2, 2, pkt+20, 2); return; } + + conn->received_as = asn; } /* Check the other connection */ @@ -962,7 +1020,7 @@ bgp_update_next_hop_ip(struct bgp_export_state *s, eattr *a, ea_list **to) WITHDRAW(NO_NEXT_HOP); ip_addr *nh = (void *) a->u.ptr->data; - ip_addr peer = s->proto->cf->remote_ip; + ip_addr peer = s->proto->remote_ip; uint len = a->u.ptr->length; /* Forbid zero next hop */ @@ -2280,7 +2338,7 @@ bgp_decode_nlri(struct bgp_parse_state *s, u32 afi, byte *nlri, uint len, ea_lis a->source = RTS_BGP; a->scope = SCOPE_UNIVERSE; - a->from = s->proto->cf->remote_ip; + a->from = s->proto->remote_ip; a->eattrs = ea; c->desc->decode_next_hop(s, nh, nh_len, a); @@ -2634,6 +2692,12 @@ bgp_fire_tx(struct bgp_conn *conn) end = bgp_create_notification(conn, pkt); return bgp_send(conn, PKT_NOTIFICATION, end - buf); } + else if (s & (1 << PKT_OPEN)) + { + conn->packets_to_send &= ~(1 << PKT_OPEN); + end = bgp_create_open(conn, pkt); + return bgp_send(conn, PKT_OPEN, end - buf); + } else if (s & (1 << PKT_KEEPALIVE)) { conn->packets_to_send &= ~(1 << PKT_KEEPALIVE); @@ -2641,12 +2705,6 @@ bgp_fire_tx(struct bgp_conn *conn) bgp_start_timer(conn->keepalive_timer, conn->keepalive_time); return bgp_send(conn, PKT_KEEPALIVE, BGP_HEADER_LENGTH); } - else if (s & (1 << PKT_OPEN)) - { - conn->packets_to_send &= ~(1 << PKT_OPEN); - end = bgp_create_open(conn, pkt); - return bgp_send(conn, PKT_OPEN, end - buf); - } else while (conn->channels_to_send) { c = bgp_get_channel_to_send(p, conn); @@ -2731,15 +2789,18 @@ bgp_schedule_packet(struct bgp_conn *conn, struct bgp_channel *c, int type) if ((conn->sk->tpos == conn->sk->tbuf) && !ev_active(conn->tx_ev)) ev_schedule(conn->tx_ev); } - void bgp_kick_tx(void *vconn) { struct bgp_conn *conn = vconn; DBG("BGP: kicking TX\n"); - while (bgp_fire_tx(conn) > 0) + uint max = 1024; + while (--max && (bgp_fire_tx(conn) > 0)) ; + + if (!max && !ev_active(conn->tx_ev)) + ev_schedule(conn->tx_ev); } void @@ -2748,8 +2809,12 @@ bgp_tx(sock *sk) struct bgp_conn *conn = sk->data; DBG("BGP: TX hook\n"); - while (bgp_fire_tx(conn) > 0) + uint max = 1024; + while (--max && (bgp_fire_tx(conn) > 0)) ; + + if (!max && !ev_active(conn->tx_ev)) + ev_schedule(conn->tx_ev); } @@ -2835,7 +2900,7 @@ bgp_handle_message(struct bgp_proto *p, byte *data, uint len, byte **bp) return 1; /* Handle proper message */ - if ((msg_len > 128) && (msg_len + 1 > len)) + if ((msg_len > 255) && (msg_len + 1 > len)) return 0; /* Some elementary cleanup */ diff --git a/proto/mrt/mrt.c b/proto/mrt/mrt.c index e4f1acea..7a396a84 100644 --- a/proto/mrt/mrt.c +++ b/proto/mrt/mrt.c @@ -361,7 +361,7 @@ mrt_peer_table_dump(struct mrt_table_dump_state *s) if ((P->proto == &proto_bgp) && (P->proto_state != PS_DOWN)) { struct bgp_proto *p = (void *) P; - mrt_peer_table_entry(s, p->remote_id, p->remote_as, p->cf->remote_ip); + mrt_peer_table_entry(s, p->remote_id, p->remote_as, p->remote_ip); } #endif @@ -429,7 +429,7 @@ mrt_rib_table_entry(struct mrt_table_dump_state *s, rte *r) { struct bgp_proto *p = (void *) r->attrs->src->proto; struct mrt_peer_entry *n = - HASH_FIND(s->peer_hash, PEER, p->remote_id, p->remote_as, p->cf->remote_ip); + HASH_FIND(s->peer_hash, PEER, p->remote_id, p->remote_as, p->remote_ip); peer = n ? n->index : 0; } diff --git a/proto/ospf/config.Y b/proto/ospf/config.Y index 2e9ed0ac..2ec8c0b6 100644 --- a/proto/ospf/config.Y +++ b/proto/ospf/config.Y @@ -200,6 +200,7 @@ CF_KEYWORDS(RX, BUFFER, LARGE, NORMAL, STUBNET, HIDDEN, SUMMARY, TAG, EXTERNAL) CF_KEYWORDS(WAIT, DELAY, LSADB, ECMP, LIMIT, WEIGHT, NSSA, TRANSLATOR, STABILITY) CF_KEYWORDS(GLOBAL, LSID, ROUTER, SELF, INSTANCE, REAL, NETMASK, TX, PRIORITY, LENGTH) CF_KEYWORDS(MERGE, LSA, SUPPRESSION, MULTICAST, RFC5838, VPN, PE) +CF_KEYWORDS(GRACEFUL, RESTART, AWARE, TIME) %type <ld> lsadb_args %type <i> ospf_variant ospf_af_mc nbma_eligible @@ -226,6 +227,8 @@ ospf_proto_start: proto_start ospf_variant OSPF_CFG->tick = OSPF_DEFAULT_TICK; OSPF_CFG->ospf2 = $2; OSPF_CFG->af_ext = !$2; + OSPF_CFG->gr_mode = OSPF_GR_AWARE; + OSPF_CFG->gr_time = OSPF_DEFAULT_GR_TIME; }; ospf_proto: @@ -258,6 +261,9 @@ ospf_proto_item: | RFC5838 bool { OSPF_CFG->af_ext = $2; if (!ospf_cfg_is_v3()) cf_error("RFC5838 option requires OSPFv3"); } | VPN PE bool { OSPF_CFG->vpn_pe = $3; } | STUB ROUTER bool { OSPF_CFG->stub_router = $3; } + | GRACEFUL RESTART bool { OSPF_CFG->gr_mode = $3; } + | GRACEFUL RESTART AWARE { OSPF_CFG->gr_mode = OSPF_GR_AWARE; } + | GRACEFUL RESTART TIME expr { OSPF_CFG->gr_time = $4; if (($4 < 1) || ($4 > 1800)) cf_error("Graceful restart time must be in range 1-1800"); } | ECMP bool { OSPF_CFG->ecmp = $2 ? OSPF_DEFAULT_ECMP_LIMIT : 0; } | ECMP bool LIMIT expr { OSPF_CFG->ecmp = $2 ? $4 : 0; } | MERGE EXTERNAL bool { OSPF_CFG->merge_external = $3; } diff --git a/proto/ospf/dbdes.c b/proto/ospf/dbdes.c index a1559782..b39595d9 100644 --- a/proto/ospf/dbdes.c +++ b/proto/ospf/dbdes.c @@ -215,7 +215,7 @@ ospf_send_dbdes(struct ospf_proto *p, struct ospf_neighbor *n) ASSERT((n->state == NEIGHBOR_EXSTART) || (n->state == NEIGHBOR_EXCHANGE)); - if (n->ifa->oa->rt == NULL) + if (!n->ifa->oa->rt && !p->gr_recovery) return; ospf_prepare_dbdes(p, n); @@ -279,6 +279,10 @@ ospf_process_dbdes(struct ospf_proto *p, struct ospf_packet *pkt, struct ospf_ne if (LSA_SCOPE(lsa_type) == LSA_SCOPE_RES) DROP1("LSA with invalid scope"); + /* RFC 3623 2.2 (2) special case - check for my router-LSA (GR recovery) */ + if ((lsa_type == LSA_T_RT) && (lsa.rt == p->router_id)) + n->got_my_rt_lsa = 1; + en = ospf_hash_find(p->gr, lsa_domain, lsa.id, lsa.rt, lsa_type); if (!en || (lsa_comp(&lsa, &(en->lsa)) == CMP_NEWER)) { diff --git a/proto/ospf/iface.c b/proto/ospf/iface.c index 388c91c8..f5c69199 100644 --- a/proto/ospf/iface.c +++ b/proto/ospf/iface.c @@ -772,6 +772,14 @@ ospf_iface_reconfigure(struct ospf_iface *ifa, struct ospf_iface_patt *new) ifa->cf = new; ifa->marked = 0; + /* Cancel GR peers if GR is disabled */ + if (!p->gr_mode && p->gr_count) + { + struct ospf_neighbor *n, *nx; + WALK_LIST_DELSAFE(n, nx, ifa->neigh_list) + if (n->gr_active) + ospf_neigh_cancel_graceful_restart(n); + } /* HELLO TIMER */ if (ifa->helloint != new->helloint) diff --git a/proto/ospf/lsalib.c b/proto/ospf/lsalib.c index 7ddf64e3..7767700f 100644 --- a/proto/ospf/lsalib.c +++ b/proto/ospf/lsalib.c @@ -12,6 +12,9 @@ #include "lib/fletcher16.h" +#define HDRLEN sizeof(struct ospf_lsa_header) + + #ifndef CPU_BIG_ENDIAN void lsa_hton_hdr(struct ospf_lsa_header *h, struct ospf_lsa_header *n) @@ -61,7 +64,6 @@ lsa_ntoh_body(void *n, void *h, u16 len) #endif /* little endian */ - int lsa_flooding_allowed(u32 type, u32 domain, struct ospf_iface *ifa) { @@ -147,11 +149,13 @@ static const u16 lsa_v2_types[] = { /* Maps OSPFv2 opaque types to OSPFv3 function codes */ static const u16 opaque_lsa_types[] = { + [LSA_OT_GR] = LSA_T_GR, [LSA_OT_RI] = LSA_T_RI_, }; /* Maps (subset of) OSPFv3 function codes to OSPFv2 opaque types */ static const u8 opaque_lsa_types_inv[] = { + [LSA_T_GR] = LSA_OT_GR, [LSA_T_RI_] = LSA_OT_RI, }; @@ -168,7 +172,13 @@ lsa_get_type_domain_(u32 type, u32 id, struct ospf_iface *ifa, u32 *otype, u32 * uint code; if (LSA_FUNCTION(type) == LSA_T_OPAQUE_) if (code = LOOKUP(opaque_lsa_types, id >> 24)) + { type = code | LSA_UBIT | LSA_SCOPE(type); + + /* Hack for Grace-LSA: It does not use U-bit for link-scoped LSAs */ + if (type == (LSA_T_GR | LSA_UBIT)) + type = LSA_T_GR; + } } else { @@ -196,6 +206,13 @@ lsa_get_type_domain_(u32 type, u32 id, struct ospf_iface *ifa, u32 *otype, u32 * } } +int +lsa_is_opaque(u32 type) +{ + u32 fn = LSA_FUNCTION(type); + return LOOKUP(opaque_lsa_types_inv, fn) || (fn == LSA_T_OPAQUE_); +} + u32 lsa_get_opaque_type(u32 type) { @@ -267,6 +284,51 @@ lsa_comp(struct ospf_lsa_header *l1, struct ospf_lsa_header *l2) } +#define LSA_TLV_LENGTH(tlv) \ + (sizeof(struct ospf_tlv) + BIRD_ALIGN((tlv)->length, 4)) + +#define LSA_NEXT_TLV(tlv) \ + ((struct ospf_tlv *) ((byte *) (tlv) + LSA_TLV_LENGTH(tlv))) + +#define LSA_WALK_TLVS(tlv,buf,len) \ + for(struct ospf_tlv *tlv = (void *) (buf); \ + (byte *) tlv < (byte *) (buf) + (len); \ + tlv = LSA_NEXT_TLV(tlv)) + +struct ospf_tlv * +lsa_get_tlv(struct top_hash_entry *en, uint type) +{ + LSA_WALK_TLVS(tlv, en->lsa_body, en->lsa.length - HDRLEN) + if (tlv->type == type) + return tlv; + + return NULL; +} + +int +lsa_validate_tlvs(byte *buf, uint len) +{ + byte *pos = buf; + byte *end = buf + len; + + while (pos < end) + { + if ((pos + sizeof(struct ospf_tlv)) > end) + return 0; + + struct ospf_tlv *tlv = (void *) pos; + uint len = LSA_TLV_LENGTH(tlv); + + if ((pos + len) > end) + return 0; + + pos += len; + } + + return 1; +} + + static inline int lsa_walk_rt2(struct ospf_lsa_rt_walk *rt) { @@ -408,7 +470,6 @@ lsa_parse_ext(struct top_hash_entry *en, int ospf2, int af, struct ospf_lsa_ext_ } } -#define HDRLEN sizeof(struct ospf_lsa_header) static int lsa_validate_rt2(struct ospf_lsa_header *lsa, struct ospf_lsa_rt *body) @@ -604,6 +665,12 @@ lsa_validate_prefix(struct ospf_lsa_header *lsa, struct ospf_lsa_prefix *body) } static int +lsa_validate_gr(struct ospf_lsa_header *lsa, void *body) +{ + return lsa_validate_tlvs(body, lsa->length - HDRLEN); +} + +static int lsa_validate_ri(struct ospf_lsa_header *lsa UNUSED, struct ospf_lsa_net *body UNUSED) { /* @@ -643,6 +710,8 @@ lsa_validate(struct ospf_lsa_header *lsa, u32 lsa_type, int ospf2, void *body) case LSA_T_EXT: case LSA_T_NSSA: return lsa_validate_ext2(lsa, body); + case LSA_T_GR: + return lsa_validate_gr(lsa, body); case LSA_T_RI_LINK: case LSA_T_RI_AREA: case LSA_T_RI_AS: @@ -674,6 +743,8 @@ lsa_validate(struct ospf_lsa_header *lsa, u32 lsa_type, int ospf2, void *body) return lsa_validate_link(lsa, body); case LSA_T_PREFIX: return lsa_validate_prefix(lsa, body); + case LSA_T_GR: + return lsa_validate_gr(lsa, body); case LSA_T_RI_LINK: case LSA_T_RI_AREA: case LSA_T_RI_AS: diff --git a/proto/ospf/lsalib.h b/proto/ospf/lsalib.h index af8901ce..eca138d7 100644 --- a/proto/ospf/lsalib.h +++ b/proto/ospf/lsalib.h @@ -44,10 +44,7 @@ static inline void lsa_get_type_domain(struct ospf_lsa_header *lsa, struct ospf_ static inline u32 lsa_get_etype(struct ospf_lsa_header *h, struct ospf_proto *p) { return ospf_is_v2(p) ? (h->type_raw & LSA_T_V2_MASK) : h->type_raw; } -/* Assuming OSPFv2 - All U-bit LSAs are mapped to Opaque LSAs */ -static inline int lsa_is_opaque(u32 type) -{ return !!(type & LSA_UBIT); } - +int lsa_is_opaque(u32 type); u32 lsa_get_opaque_type(u32 type); int lsa_flooding_allowed(u32 type, u32 domain, struct ospf_iface *ifa); int lsa_is_acceptable(u32 type, struct ospf_neighbor *n, struct ospf_proto *p); @@ -58,6 +55,16 @@ u16 lsa_verify_checksum(const void *lsa_n, int lsa_len); #define CMP_SAME 0 #define CMP_OLDER -1 int lsa_comp(struct ospf_lsa_header *l1, struct ospf_lsa_header *l2); + +struct ospf_tlv * lsa_get_tlv(struct top_hash_entry *en, uint type); + +static inline u32 +lsa_get_tlv_u32(struct top_hash_entry *en, uint type) +{ + struct ospf_tlv *tlv = lsa_get_tlv(en, type); + return (tlv && (tlv->length == 4)) ? tlv->data[0] : 0; +} + void lsa_walk_rt_init(struct ospf_proto *po, struct top_hash_entry *act, struct ospf_lsa_rt_walk *rt); int lsa_walk_rt(struct ospf_lsa_rt_walk *rt); void lsa_parse_sum_net(struct top_hash_entry *en, int ospf2, int af, net_addr *net, u8 *pxopts, u32 *metric); diff --git a/proto/ospf/lsupd.c b/proto/ospf/lsupd.c index 7318b751..fafe4872 100644 --- a/proto/ospf/lsupd.c +++ b/proto/ospf/lsupd.c @@ -185,6 +185,13 @@ static int ospf_flood_lsupd(struct ospf_proto *p, struct top_hash_entry **lsa_li static void ospf_enqueue_lsa(struct ospf_proto *p, struct top_hash_entry *en, struct ospf_iface *ifa) { + /* Exception for local Grace-LSA, they are flooded synchronously */ + if ((en->lsa_type == LSA_T_GR) && (en->lsa.rt == p->router_id)) + { + ospf_flood_lsupd(p, &en, 1, 1, ifa); + return; + } + if (ifa->flood_queue_used == ifa->flood_queue_size) { /* If we already have full queue, we send some packets */ @@ -591,8 +598,9 @@ ospf_receive_lsupd(struct ospf_packet *pkt, struct ospf_iface *ifa, } /* 13. (5f) - handle self-originated LSAs, see also 13.4. */ - if ((lsa.rt == p->router_id) || - (ospf_is_v2(p) && (lsa_type == LSA_T_NET) && ospf_addr_is_local(p, ifa->oa, ipa_from_u32(lsa.id)))) + if (!p->gr_recovery && + ((lsa.rt == p->router_id) || + (ospf_is_v2(p) && (lsa_type == LSA_T_NET) && ospf_addr_is_local(p, ifa->oa, ipa_from_u32(lsa.id))))) { OSPF_TRACE(D_EVENTS, "Received unexpected self-originated LSA"); ospf_advance_lsa(p, en, &lsa, lsa_type, lsa_domain, body); @@ -629,6 +637,14 @@ ospf_receive_lsupd(struct ospf_packet *pkt, struct ospf_iface *ifa, if (lsa_type == LSA_T_LINK) ospf_notify_net_lsa(ifa); + /* RFC 3623 3.1 - entering graceful restart helper mode */ + if (lsa_type == LSA_T_GR) + ospf_neigh_notify_grace_lsa(n, en); + + /* Link received pre-restart router LSA */ + if (p->gr_recovery && (lsa_type == LSA_T_RT) && (lsa.rt == p->router_id)) + ifa->oa->rt = en; + /* 13. (5b) - flood new LSA */ int flood_back = ospf_flood_lsa(p, en, n); diff --git a/proto/ospf/neighbor.c b/proto/ospf/neighbor.c index c143b130..50ef6a49 100644 --- a/proto/ospf/neighbor.c +++ b/proto/ospf/neighbor.c @@ -28,6 +28,8 @@ static void dbdes_timer_hook(timer *t); static void lsrq_timer_hook(timer *t); static void lsrt_timer_hook(timer *t); static void ackd_timer_hook(timer *t); +static void ospf_neigh_stop_graceful_restart_(struct ospf_neighbor *n); +static void graceful_restart_timeout(timer *t); static void @@ -163,7 +165,7 @@ ospf_neigh_chstate(struct ospf_neighbor *n, u8 state) if (old_state == NEIGHBOR_FULL) ifa->fadj--; - if (ifa->fadj != old_fadj) + if ((ifa->fadj != old_fadj) && !n->gr_active) { /* RFC 2328 12.4 Event 4 - neighbor enters/leaves Full state */ ospf_notify_rt_lsa(ifa->oa); @@ -182,6 +184,7 @@ ospf_neigh_chstate(struct ospf_neighbor *n, u8 state) n->dds++; n->myimms = DBDES_IMMS; + n->got_my_rt_lsa = 0; tm_start(n->dbdes_timer, 0); tm_start(n->ackd_timer, ifa->rxmtint S / 2); @@ -191,9 +194,9 @@ ospf_neigh_chstate(struct ospf_neighbor *n, u8 state) n->myimms &= ~DBDES_I; /* Generate NeighborChange event if needed, see RFC 2328 9.2 */ - if ((state == NEIGHBOR_2WAY) && (old_state < NEIGHBOR_2WAY)) + if ((state == NEIGHBOR_2WAY) && (old_state < NEIGHBOR_2WAY) && !n->gr_active) ospf_iface_sm(ifa, ISM_NEICH); - if ((state < NEIGHBOR_2WAY) && (old_state >= NEIGHBOR_2WAY)) + if ((state < NEIGHBOR_2WAY) && (old_state >= NEIGHBOR_2WAY) && !n->gr_active) ospf_iface_sm(ifa, ISM_NEICH); } @@ -291,6 +294,17 @@ ospf_neigh_sm(struct ospf_neighbor *n, int event) case INM_KILLNBR: case INM_LLDOWN: case INM_INACTTIM: + if (n->gr_active && (event == INM_INACTTIM)) + { + /* Just down the neighbor, but do not remove it */ + reset_lists(p, n); + ospf_neigh_chstate(n, NEIGHBOR_DOWN); + break; + } + + if (n->gr_active) + ospf_neigh_stop_graceful_restart_(n); + /* No need for reset_lists() */ ospf_neigh_chstate(n, NEIGHBOR_DOWN); ospf_neigh_down(n); @@ -356,6 +370,180 @@ can_do_adj(struct ospf_neighbor *n) return i; } +static void +ospf_neigh_start_graceful_restart(struct ospf_neighbor *n, uint gr_time) +{ + struct ospf_proto *p = n->ifa->oa->po; + + OSPF_TRACE(D_EVENTS, "Neighbor %R on %s started graceful restart", + n->rid, n->ifa->ifname); + + n->gr_active = 1; + p->gr_count++; + + n->gr_timer = tm_new_init(n->pool, graceful_restart_timeout, n, 0, 0); + tm_start(n->gr_timer, gr_time S); +} + +static void +ospf_neigh_stop_graceful_restart_(struct ospf_neighbor *n) +{ + struct ospf_proto *p = n->ifa->oa->po; + struct ospf_iface *ifa = n->ifa; + + n->gr_active = 0; + p->gr_count--; + + rfree(n->gr_timer); + n->gr_timer = NULL; + + ospf_notify_rt_lsa(ifa->oa); + ospf_notify_net_lsa(ifa); + + if (ifa->type == OSPF_IT_VLINK) + ospf_notify_rt_lsa(ifa->voa); + + ospf_iface_sm(ifa, ISM_NEICH); +} + +static void +ospf_neigh_stop_graceful_restart(struct ospf_neighbor *n) +{ + struct ospf_proto *p = n->ifa->oa->po; + + OSPF_TRACE(D_EVENTS, "Neighbor %R on %s finished graceful restart", + n->rid, n->ifa->ifname); + + ospf_neigh_stop_graceful_restart_(n); +} + +void +ospf_neigh_cancel_graceful_restart(struct ospf_neighbor *n) +{ + struct ospf_proto *p = n->ifa->oa->po; + + OSPF_TRACE(D_EVENTS, "Graceful restart canceled for nbr %R on %s", + n->rid, n->ifa->ifname); + + ospf_neigh_stop_graceful_restart_(n); + + if (n->state == NEIGHBOR_DOWN) + ospf_neigh_down(n); +} + +static void +graceful_restart_timeout(timer *t) +{ + struct ospf_neighbor *n = t->data; + struct ospf_proto *p = n->ifa->oa->po; + + OSPF_TRACE(D_EVENTS, "Graceful restart timer expired for nbr %R on %s", + n->rid, n->ifa->ifname); + + ospf_neigh_stop_graceful_restart_(n); + + if (n->state == NEIGHBOR_DOWN) + ospf_neigh_down(n); +} + +static inline int +changes_in_lsrtl(struct ospf_neighbor *n) +{ + /* This could be improved, see RFC 3623 3.1 (2) */ + + struct top_hash_entry *en; + WALK_SLIST(en, n->lsrtl) + if (LSA_FUNCTION(en->lsa_type) <= LSA_FUNCTION(LSA_T_NSSA)) + return 1; + + return 0; +} + +void +ospf_neigh_notify_grace_lsa(struct ospf_neighbor *n, struct top_hash_entry *en) +{ + struct ospf_iface *ifa = n->ifa; + struct ospf_proto *p = ifa->oa->po; + + /* In OSPFv2, neighbors are identified by either IP or Router ID, based on network type */ + uint t = ifa->type; + if (ospf_is_v2(p) && ((t == OSPF_IT_BCAST) || (t == OSPF_IT_NBMA) || (t == OSPF_IT_PTMP))) + { + struct ospf_tlv *tlv = lsa_get_tlv(en, LSA_GR_ADDRESS); + if (!tlv || tlv->length != 4) + return; + + ip_addr addr = ipa_from_u32(tlv->data[0]); + if (!ipa_equal(n->ip, addr)) + n = find_neigh_by_ip(ifa, addr); + } + else + { + if (n->rid != en->lsa.rt) + n = find_neigh(ifa, en->lsa.rt); + } + + if (!n) + return; + + if (en->lsa.age < LSA_MAXAGE) + { + u32 period = lsa_get_tlv_u32(en, LSA_GR_PERIOD); + + /* Exception for updating grace period */ + if (n->gr_active) + { + tm_start(n->gr_timer, (period S) - (en->lsa.age S)); + return; + } + + /* RFC 3623 3.1 (1) - full adjacency */ + if (n->state != NEIGHBOR_FULL) + return; + + /* RFC 3623 3.1 (2) - no changes in LSADB */ + if (changes_in_lsrtl(n)) + return; + + /* RFC 3623 3.1 (3) - grace period not expired */ + if (en->lsa.age >= period) + return; + + /* RFC 3623 3.1 (4) - helper mode allowed */ + if (!p->gr_mode) + return; + + /* RFC 3623 3.1 (5) - no local graceful restart */ + if (p->p.gr_recovery) + return; + + ospf_neigh_start_graceful_restart(n, period - en->lsa.age); + } + else /* Grace-LSA is flushed */ + { + if (n->gr_active) + ospf_neigh_stop_graceful_restart(n); + } +} + +void +ospf_neigh_lsadb_changed_(struct ospf_proto *p, struct top_hash_entry *en) +{ + struct ospf_iface *ifa; + struct ospf_neighbor *n, *nx; + + if (LSA_FUNCTION(en->lsa_type) > LSA_FUNCTION(LSA_T_NSSA)) + return; + + /* RFC 3623 3.2 (3) - cancel graceful restart when LSdb changed */ + WALK_LIST(ifa, p->iface_list) + if (lsa_flooding_allowed(en->lsa_type, en->domain, ifa)) + WALK_LIST_DELSAFE(n, nx, ifa->neigh_list) + if (n->gr_active) + ospf_neigh_cancel_graceful_restart(n); +} + + static inline u32 neigh_get_id(struct ospf_proto *p, struct ospf_neighbor *n) { return ospf_is_v2(p) ? ipa_to_u32(n->ip) : n->rid; } diff --git a/proto/ospf/ospf.c b/proto/ospf/ospf.c index f26f0160..b6d5570c 100644 --- a/proto/ospf/ospf.c +++ b/proto/ospf/ospf.c @@ -92,7 +92,9 @@ * - RFC 2328 - main OSPFv2 standard * - RFC 5340 - main OSPFv3 standard * - RFC 3101 - OSPFv2 NSSA areas + * - RFC 3623 - OSPFv2 Graceful Restart * - RFC 4576 - OSPFv2 VPN loop prevention + * - RFC 5187 - OSPFv3 Graceful Restart * - RFC 5250 - OSPFv2 Opaque LSAs * - RFC 5709 - OSPFv2 HMAC-SHA Cryptographic Authentication * - RFC 5838 - OSPFv3 Support of Address Families @@ -207,7 +209,6 @@ ospf_area_remove(struct ospf_area *oa) mb_free(oa); } - struct ospf_area * ospf_find_area(struct ospf_proto *p, u32 aid) { @@ -228,6 +229,37 @@ ospf_find_vlink(struct ospf_proto *p, u32 voa, u32 vid) return NULL; } +static void +ospf_start_gr_recovery(struct ospf_proto *p) +{ + OSPF_TRACE(D_EVENTS, "Graceful restart started"); + + p->gr_recovery = 1; + p->gr_timeout = current_time() + (p->gr_time S); + channel_graceful_restart_lock(p->p.main_channel); + p->p.main_channel->gr_wait = 1; + + /* NOTE: We should get end of grace period from non-volatile storage */ +} + +void +ospf_stop_gr_recovery(struct ospf_proto *p) +{ + p->gr_recovery = 0; + p->gr_timeout = 0; + channel_graceful_restart_unlock(p->p.main_channel); + + /* Reorigination of router/network LSAs is already scheduled */ + ospf_mark_lsadb(p); + + /* + * NOTE: We should move channel_graceful_restart_unlock() to the end of + * ospf_disp() in order to have local LSA reorigination / LSAdb cleanup / + * routing table recomputation before official end of GR. It does not matter + * when we are single-threaded. + */ +} + static int ospf_start(struct proto *P) { @@ -246,6 +278,8 @@ ospf_start(struct proto *P) p->asbr = c->asbr; p->vpn_pe = c->vpn_pe; p->ecmp = c->ecmp; + p->gr_mode = c->gr_mode; + p->gr_time = c->gr_time; p->tick = c->tick; p->disp_timer = tm_new_init(P->pool, ospf_disp, p, p->tick S, 0); tm_start(p->disp_timer, 100 MS); @@ -267,6 +301,10 @@ ospf_start(struct proto *P) p->log_pkt_tbf = (struct tbf){ .rate = 1, .burst = 5 }; p->log_lsa_tbf = (struct tbf){ .rate = 4, .burst = 20 }; + /* Lock the channel when in GR recovery mode */ + if (p->p.gr_recovery && (p->gr_mode == OSPF_GR_ABLE)) + ospf_start_gr_recovery(p); + WALK_LIST(ac, c->area_list) ospf_area_add(p, ac); @@ -398,6 +436,9 @@ ospf_disp(timer * timer) { struct ospf_proto *p = timer->data; + if (p->gr_recovery) + ospf_update_gr_recovery(p); + /* Originate or flush local topology LSAs */ ospf_update_topology(p); @@ -475,9 +516,18 @@ ospf_shutdown(struct proto *P) OSPF_TRACE(D_EVENTS, "Shutdown requested"); - /* And send to all my neighbors 1WAY */ - WALK_LIST(ifa, p->iface_list) - ospf_iface_shutdown(ifa); + if ((P->down_code == PDC_CMD_GR_DOWN) && (p->gr_mode == OSPF_GR_ABLE)) + { + /* Originate Grace LSAs */ + WALK_LIST(ifa, p->iface_list) + ospf_originate_gr_lsa(p, ifa); + } + else + { + /* Send to all my neighbors 1WAY */ + WALK_LIST(ifa, p->iface_list) + ospf_iface_shutdown(ifa); + } /* Cleanup locked rta entries */ FIB_WALK(&p->rtf, ort, nf) @@ -664,6 +714,8 @@ ospf_reconfigure(struct proto *P, struct proto_config *CF) p->merge_external = new->merge_external; p->asbr = new->asbr; p->ecmp = new->ecmp; + p->gr_mode = new->gr_mode; + p->gr_time = new->gr_time; p->tick = new->tick; p->disp_timer->recurrent = p->tick S; tm_start(p->disp_timer, 10 MS); diff --git a/proto/ospf/ospf.h b/proto/ospf/ospf.h index 82ae4df4..beecd2b6 100644 --- a/proto/ospf/ospf.h +++ b/proto/ospf/ospf.h @@ -75,6 +75,7 @@ #define OSPF_DEFAULT_TICK 1 #define OSPF_DEFAULT_STUB_COST 1000 #define OSPF_DEFAULT_ECMP_LIMIT 16 +#define OSPF_DEFAULT_GR_TIME 120 #define OSPF_DEFAULT_TRANSINT 40 #define OSPF_MIN_PKT_SIZE 256 @@ -82,6 +83,9 @@ #define OSPF_VLINK_ID_OFFSET 0x80000000 +#define OSPF_GR_ABLE 1 +#define OSPF_GR_AWARE 2 + struct ospf_config { struct proto_config c; @@ -97,7 +101,9 @@ struct ospf_config u8 abr; u8 asbr; u8 vpn_pe; - int ecmp; + u8 gr_mode; /* Graceful restart mode (OSPF_GR_*) */ + uint gr_time; /* Graceful restart interval */ + uint ecmp; list area_list; /* list of area configs (struct ospf_area_config) */ list vlink_list; /* list of configured vlinks (struct ospf_iface_patt) */ }; @@ -216,6 +222,9 @@ struct ospf_proto list area_list; /* List of OSPF areas (struct ospf_area) */ int areano; /* Number of area I belong to */ int padj; /* Number of neighbors in Exchange or Loading state */ + int gr_count; /* Number of neighbors in graceful restart state */ + int gr_recovery; /* Graceful restart recovery is active */ + btime gr_timeout; /* The end time of grace restart recovery */ struct fib rtf; /* Routing table */ struct idm idm; /* OSPFv3 LSA ID map */ u8 ospf2; /* OSPF v2 or v3 */ @@ -228,6 +237,8 @@ struct ospf_proto u8 asbr; /* May i originate any ext/NSSA lsa? */ u8 vpn_pe; /* Should we do VPN PE specific behavior (RFC 4577)? */ u8 ecmp; /* Maximal number of nexthops in ECMP route, or 0 */ + u8 gr_mode; /* Graceful restart mode (OSPF_GR_*) */ + uint gr_time; /* Graceful restart interval */ u64 csn64; /* Last used cryptographic sequence number */ struct ospf_area *backbone; /* If exists */ event *flood_event; /* Event for flooding LS updates */ @@ -346,6 +357,8 @@ struct ospf_neighbor pool *pool; struct ospf_iface *ifa; u8 state; + u8 gr_active; /* We act as GR helper for the neighbor */ + u8 got_my_rt_lsa; /* Received my Rt-LSA in DBDES exchanged */ timer *inactim; /* Inactivity timer */ u8 imms; /* I, M, Master/slave received */ u8 myimms; /* I, M Master/slave */ @@ -388,6 +401,7 @@ struct ospf_neighbor #define ACKL_DIRECT 0 #define ACKL_DELAY 1 timer *ackd_timer; /* Delayed ack timer */ + timer *gr_timer; /* Graceful restart timer, non-NULL only if gr_active */ struct bfd_request *bfd_req; /* BFD request, if BFD is used */ void *ldd_buffer; /* Last database description packet */ u32 ldd_bsize; /* Buffer size for ldd_buffer */ @@ -555,6 +569,7 @@ struct ospf_auth3 #define LSA_T_NSSA 0x2007 #define LSA_T_LINK 0x0008 #define LSA_T_PREFIX 0x2009 +#define LSA_T_GR 0x000B #define LSA_T_RI_ 0x000C #define LSA_T_RI_LINK 0x800C #define LSA_T_RI_AREA 0xA00C @@ -569,6 +584,7 @@ struct ospf_auth3 /* OSPFv2 Opaque LSA Types */ /* https://www.iana.org/assignments/ospf-opaque-types/ospf-opaque-types.xhtml#ospf-opaque-types-2 */ +#define LSA_OT_GR 0x03 #define LSA_OT_RI 0x04 #define LSA_FUNCTION_MASK 0x1FFF @@ -613,6 +629,12 @@ struct ospf_auth3 #define LSA_EXT3_FBIT 0x02000000 #define LSA_EXT3_TBIT 0x01000000 +/* OSPF Grace LSA (GR) TLVs */ +/* https://www.iana.org/assignments/ospfv2-parameters/ospfv2-parameters.xhtml#ospfv2-parameters-13 */ +#define LSA_GR_PERIOD 1 +#define LSA_GR_REASON 2 +#define LSA_GR_ADDRESS 3 + /* OSPF Router Information (RI) TLVs */ /* https://www.iana.org/assignments/ospf-parameters/ospf-parameters.xhtml#ri-tlv */ #define LSA_RI_RIC 1 @@ -959,6 +981,8 @@ static inline int oa_is_ext(struct ospf_area *oa) static inline int oa_is_nssa(struct ospf_area *oa) { return oa->options & OPT_N; } +void ospf_stop_gr_recovery(struct ospf_proto *p); + void ospf_sh_neigh(struct proto *P, char *iff); void ospf_sh(struct proto *P); void ospf_sh_iface(struct proto *P, char *iff); @@ -990,12 +1014,18 @@ static inline struct nbma_node * find_nbma_node(struct ospf_iface *ifa, ip_addr /* neighbor.c */ struct ospf_neighbor *ospf_neighbor_new(struct ospf_iface *ifa); void ospf_neigh_sm(struct ospf_neighbor *n, int event); +void ospf_neigh_cancel_graceful_restart(struct ospf_neighbor *n); +void ospf_neigh_notify_grace_lsa(struct ospf_neighbor *n, struct top_hash_entry *en); +void ospf_neigh_lsadb_changed_(struct ospf_proto *p, struct top_hash_entry *en); void ospf_dr_election(struct ospf_iface *ifa); struct ospf_neighbor *find_neigh(struct ospf_iface *ifa, u32 rid); struct ospf_neighbor *find_neigh_by_ip(struct ospf_iface *ifa, ip_addr ip); void ospf_neigh_update_bfd(struct ospf_neighbor *n, int use_bfd); void ospf_sh_neigh_info(struct ospf_neighbor *n); +static inline void ospf_neigh_lsadb_changed(struct ospf_proto *p, struct top_hash_entry *en) +{ if (p->gr_count) ospf_neigh_lsadb_changed_(p, en); } + /* packet.c */ void ospf_pkt_fill_hdr(struct ospf_iface *ifa, void *buf, u8 h_type); int ospf_rx_hook(sock * sk, uint size); diff --git a/proto/ospf/rt.c b/proto/ospf/rt.c index 6ddd6c9f..126ef201 100644 --- a/proto/ospf/rt.c +++ b/proto/ospf/rt.c @@ -10,7 +10,7 @@ #include "ospf.h" -static void add_cand(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry *par, u32 dist, int i, uint lif, uint nif); +static void add_cand(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry *par, u32 dist, int i, uint data, uint lif, uint nif); static void rt_sync(struct ospf_proto *p); @@ -392,6 +392,40 @@ px_pos_to_ifa(struct ospf_area *oa, int pos) return NULL; } +static inline struct ospf_iface * +rt_find_iface2(struct ospf_area *oa, uint data) +{ + ip_addr addr = ipa_from_u32(data); + + /* We should handle it differently for unnumbered PTP links */ + struct ospf_iface *ifa; + WALK_LIST(ifa, oa->po->iface_list) + if ((ifa->oa == oa) && ifa->addr && (ipa_equal(ifa->addr->ip, addr))) + return ifa; + + return NULL; +} + +static inline struct ospf_iface * +rt_find_iface3(struct ospf_area *oa, uint lif) +{ + struct ospf_iface *ifa; + WALK_LIST(ifa, oa->po->iface_list) + if ((ifa->oa == oa) && (ifa->iface_id == lif)) + return ifa; + + return NULL; +} + +static struct ospf_iface * +rt_find_iface(struct ospf_area *oa, int pos, uint data, uint lif) +{ + if (0) + return rt_pos_to_ifa(oa, pos); + else + return ospf_is_v2(oa->po) ? rt_find_iface2(oa, data) : rt_find_iface3(oa, lif); +} + static void add_network(struct ospf_area *oa, net_addr *net, int metric, struct top_hash_entry *en, int pos) @@ -503,7 +537,7 @@ spfa_process_rt(struct ospf_proto *p, struct ospf_area *oa, struct top_hash_entr break; } - add_cand(oa, tmp, act, act->dist + rtl.metric, i, rtl.lif, rtl.nif); + add_cand(oa, tmp, act, act->dist + rtl.metric, i, rtl.data, rtl.lif, rtl.nif); } } @@ -526,7 +560,7 @@ spfa_process_net(struct ospf_proto *p, struct ospf_area *oa, struct top_hash_ent for (i = 0; i < cnt; i++) { tmp = ospf_hash_find_rt(p->gr, oa->areaid, ln->routers[i]); - add_cand(oa, tmp, act, act->dist, -1, 0, 0); + add_cand(oa, tmp, act, act->dist, -1, 0, 0, 0); } } @@ -1708,7 +1742,7 @@ link_lsa_lladdr(struct ospf_proto *p, struct top_hash_entry *en) static struct nexthop * calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en, - struct top_hash_entry *par, int pos, uint lif, uint nif) + struct top_hash_entry *par, int pos, uint data, uint lif, uint nif) { struct ospf_proto *p = oa->po; struct nexthop *pn = par->nhs; @@ -1735,7 +1769,7 @@ calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en, /* The first case - local network */ if ((en->lsa_type == LSA_T_NET) && (par == oa->rt)) { - ifa = rt_pos_to_ifa(oa, pos); + ifa = rt_find_iface(oa, pos, data, lif); if (!ifa) return NULL; @@ -1748,7 +1782,7 @@ calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en, /* The second case - ptp or ptmp neighbor */ if ((en->lsa_type == LSA_T_RT) && (par == oa->rt)) { - ifa = rt_pos_to_ifa(oa, pos); + ifa = rt_find_iface(oa, pos, data, lif); if (!ifa) return NULL; @@ -1838,7 +1872,7 @@ calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en, /* Add LSA into list of candidates in Dijkstra's algorithm */ static void add_cand(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry *par, - u32 dist, int pos, uint lif, uint nif) + u32 dist, int pos, uint data, uint lif, uint nif) { struct ospf_proto *p = oa->po; node *prev, *n; @@ -1871,7 +1905,7 @@ add_cand(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry if (!link_back(oa, en, par, lif, nif)) return; - struct nexthop *nhs = calc_next_hop(oa, en, par, pos, lif, nif); + struct nexthop *nhs = calc_next_hop(oa, en, par, pos, data, lif, nif); if (!nhs) { log(L_WARN "%s: Cannot find next hop for LSA (Type: %04x, Id: %R, Rt: %R)", @@ -2086,3 +2120,133 @@ again2: if (en->mode == LSA_M_STALE) ospf_flush_lsa(p, en); } + + +/* RFC 3623 2.2 - checking for graceful restart termination conditions */ +void +ospf_update_gr_recovery(struct ospf_proto *p) +{ + struct top_hash_entry *rt, *net, *nbr; + struct ospf_lsa_rt_walk rtl; + struct ospf_neighbor *n; + struct ospf_iface *ifa; + struct ospf_area *oa; + const char *err_dsc = NULL; + uint i, j, missing = 0, err_val = 0; + + /* + * We check here for three cases: + * RFC 3623 2.2 (1) - success when all adjacencies are established + * RFC 3623 2.2 (2) - failure when inconsistent LSA was received + * RFC 3623 2.2 (3) - grace period timeout + * + * It is handled by processing pre-restart local router-LSA and adjacent + * network-LSAs, checking neighbor association for referenced routers (1) + * and checking back links from their router-LSAs (2). + * + * TODO: Use timer for grace period timeout. We avoided that as function + * ospf_stop_gr_recovery() called from ospf_disp() makes ending of graceful + * restart uninterrupted by other events. + */ + + #define CONTINUE { missing++; continue; } + + if (current_time() > p->gr_timeout) + goto timeout; + + WALK_LIST(oa, p->area_list) + { + /* Get the router-LSA */ + rt = oa->rt; + if (!rt || (rt->lsa.age == LSA_MAXAGE)) + CONTINUE; + + for (lsa_walk_rt_init(p, rt, &rtl), i = 0; lsa_walk_rt(&rtl); i++) + { + if (rtl.type == LSART_STUB) + continue; + + ifa = rt_find_iface(oa, i, rtl.data, rtl.lif); + if (!ifa) + DROP("inconsistent interface", ospf_is_v2(p) ? rtl.data : rtl.lif); + + switch (rtl.type) + { + case LSART_NET: + /* Find the network-LSA */ + net = ospf_hash_find_net(p->gr, oa->areaid, rtl.id, rtl.nif); + if (!net) + CONTINUE; + + if (!link_back(oa, net, rt, rtl.lif, rtl.nif)) + DROP("Inconsistent network-LSA", net->lsa.id); + + if (ifa->state == OSPF_IS_DR) + { + /* Find all neighbors from the network-LSA */ + struct ospf_lsa_net *net_body = net->lsa_body; + uint cnt = lsa_net_count(&net->lsa); + for (j = 0; j < cnt; i++) + { + n = find_neigh(ifa, net_body->routers[j]); + if (!n || (n->state != NEIGHBOR_FULL)) + CONTINUE; + + if (!n->got_my_rt_lsa) + DROP("not received my router-LSA", n->rid); + + nbr = ospf_hash_find_rt(p->gr, oa->areaid, n->rid); + if (!link_back(oa, nbr, net, 0, 0)) + DROP("inconsistent router-LSA", n->rid); + } + } + else + { + /* Find the DR (by IP for OSPFv2) */ + n = ospf_is_v2(p) ? + find_neigh_by_ip(ifa, ipa_from_u32(rtl.id)) : + find_neigh(ifa, rtl.id); + if (!n || (n->state != NEIGHBOR_FULL)) + CONTINUE; + + if (!n->got_my_rt_lsa) + DROP("not received my router-LSA", n->rid); + } + break; + + case LSART_VLNK: + case LSART_PTP: + /* Find the PtP peer */ + n = find_neigh(ifa, rtl.id); + if (!n || (n->state != NEIGHBOR_FULL)) + CONTINUE; + + if (!n->got_my_rt_lsa) + DROP("not received my router-LSA", n->rid); + + nbr = ospf_hash_find_rt(p->gr, oa->areaid, rtl.id); + if (!link_back(oa, nbr, rt, rtl.lif, rtl.nif)) + DROP("inconsistent router-LSA", rtl.id); + } + } + } + + #undef CONTINUE + + if (missing) + return; + + OSPF_TRACE(D_EVENTS, "Graceful restart finished"); + ospf_stop_gr_recovery(p); + return; + +drop: + log(L_INFO "%s: Graceful restart ended - %s (%R)", p->p.name, err_dsc, err_val); + ospf_stop_gr_recovery(p); + return; + +timeout: + log(L_INFO "%s: Graceful restart ended - grace period expired", p->p.name); + ospf_stop_gr_recovery(p); + return; +} diff --git a/proto/ospf/rt.h b/proto/ospf/rt.h index 589d2bc5..094e125b 100644 --- a/proto/ospf/rt.h +++ b/proto/ospf/rt.h @@ -130,6 +130,7 @@ static inline int rt_is_nssa(ort *nf) void ospf_rt_spf(struct ospf_proto *p); void ospf_rt_initort(struct fib_node *fn); +void ospf_update_gr_recovery(struct ospf_proto *p); #endif /* _BIRD_OSPF_RT_H_ */ diff --git a/proto/ospf/topology.c b/proto/ospf/topology.c index 7d5deca0..efd03b54 100644 --- a/proto/ospf/topology.c +++ b/proto/ospf/topology.c @@ -83,7 +83,10 @@ ospf_install_lsa(struct ospf_proto *p, struct ospf_lsa_header *lsa, u32 type, u3 en->lsa_type, en->lsa.id, en->lsa.rt, en->lsa.sn, en->lsa.age); if (change) + { + ospf_neigh_lsadb_changed(p, en); ospf_schedule_rtcalc(p); + } return en; } @@ -243,6 +246,7 @@ ospf_do_originate_lsa(struct ospf_proto *p, struct top_hash_entry *en, void *lsa en->lsa.age = 0; en->init_age = 0; en->inst_time = current_time(); + en->dirty = 0; lsa_generate_checksum(&en->lsa, en->lsa_body); OSPF_TRACE(D_EVENTS, "Originating LSA: Type: %04x, Id: %R, Rt: %R, Seq: %08x", @@ -251,7 +255,10 @@ ospf_do_originate_lsa(struct ospf_proto *p, struct top_hash_entry *en, void *lsa ospf_flood_lsa(p, en, NULL); if (en->mode == LSA_M_BASIC) + { + ospf_neigh_lsadb_changed(p, en); ospf_schedule_rtcalc(p); + } return 1; } @@ -321,7 +328,8 @@ ospf_originate_lsa(struct ospf_proto *p, struct ospf_new_lsa *lsa) if ((en->lsa.age < LSA_MAXAGE) && (lsa_length == en->lsa.length) && !memcmp(lsa_body, en->lsa_body, lsa_blen) && - (!ospf_is_v2(p) || (lsa->opts == lsa_get_options(&en->lsa)))) + (!ospf_is_v2(p) || (lsa->opts == lsa_get_options(&en->lsa))) && + !en->dirty) goto drop; lsa_body = lsab_flush(p); @@ -433,7 +441,10 @@ ospf_flush_lsa(struct ospf_proto *p, struct top_hash_entry *en) ospf_flood_lsa(p, en, NULL); if (en->mode == LSA_M_BASIC) + { + ospf_neigh_lsadb_changed(p, en); ospf_schedule_rtcalc(p); + } en->mode = LSA_M_BASIC; } @@ -509,6 +520,12 @@ ospf_update_lsadb(struct ospf_proto *p) continue; } + if (en->dirty) + { + ospf_flush_lsa(p, en); + continue; + } + if ((en->lsa.rt == p->router_id) && (real_age >= LSREFRESHTIME)) { ospf_refresh_lsa(p, en); @@ -525,6 +542,16 @@ ospf_update_lsadb(struct ospf_proto *p) } } +void +ospf_mark_lsadb(struct ospf_proto *p) +{ + struct top_hash_entry *en; + + /* Mark all local LSAs as dirty */ + WALK_SLIST(en, p->lsal) + if (en->lsa.rt == p->router_id) + en->dirty = 1; +} static u32 ort_to_lsaid(struct ospf_proto *p, ort *nf) @@ -1424,6 +1451,7 @@ prepare_prefix_rt_lsa_body(struct ospf_proto *p, struct ospf_area *oa) struct ospf_config *cf = (struct ospf_config *) (p->p.cf); struct ospf_iface *ifa; struct ospf_lsa_prefix *lp; + uint max = ospf_is_ip4(p) ? IP4_MAX_PREFIX_LENGTH : IP6_MAX_PREFIX_LENGTH; int host_addr = 0; int net_lsa; int i = 0; @@ -1457,7 +1485,7 @@ prepare_prefix_rt_lsa_body(struct ospf_proto *p, struct ospf_area *oa) (a->scope <= SCOPE_LINK)) continue; - if (((a->prefix.pxlen < IP6_MAX_PREFIX_LENGTH) && net_lsa) || + if (((a->prefix.pxlen < max) && net_lsa) || configured_stubnet(oa, a)) continue; @@ -1465,8 +1493,13 @@ prepare_prefix_rt_lsa_body(struct ospf_proto *p, struct ospf_area *oa) (ifa->state == OSPF_IS_LOOP) || (ifa->type == OSPF_IT_PTMP)) { - net_addr_ip6 net = NET_ADDR_IP6(a->ip, IP6_MAX_PREFIX_LENGTH); - lsab_put_prefix(p, (net_addr *) &net, 0); + net_addr net; + if (a->prefix.type == NET_IP4) + net_fill_ip4(&net, ipa_to_ip4(a->ip), IP4_MAX_PREFIX_LENGTH); + else + net_fill_ip6(&net, ipa_to_ip6(a->ip), IP6_MAX_PREFIX_LENGTH); + + lsab_put_prefix(p, &net, 0); host_addr = 1; } else @@ -1482,7 +1515,7 @@ prepare_prefix_rt_lsa_body(struct ospf_proto *p, struct ospf_area *oa) if (!sn->hidden) { lsab_put_prefix(p, &sn->prefix, sn->cost); - if (sn->prefix.pxlen == IP6_MAX_PREFIX_LENGTH) + if (sn->prefix.pxlen == max) host_addr = 1; i++; } @@ -1670,6 +1703,59 @@ ospf_originate_prefix_net_lsa(struct ospf_proto *p, struct ospf_iface *ifa) /* + * Grace LSA handling + * Type = LSA_T_GR, opaque type = LSA_OT_GR + */ + +static inline void +ospf_add_gr_period_tlv(struct ospf_proto *p, uint period) +{ + struct ospf_tlv *tlv = lsab_allocz(p, sizeof(struct ospf_tlv) + sizeof(u32)); + tlv->type = LSA_GR_PERIOD; + tlv->length = 4; + tlv->data[0] = period; +} + +static inline void +ospf_add_gr_reason_tlv(struct ospf_proto *p, uint reason) +{ + struct ospf_tlv *tlv = lsab_allocz(p, sizeof(struct ospf_tlv) + sizeof(u32)); + tlv->type = LSA_GR_REASON; + tlv->length = 1; + tlv->data[0] = reason << 24; +} + +static inline void +ospf_add_gr_address_tlv(struct ospf_proto *p, ip4_addr addr) +{ + struct ospf_tlv *tlv = lsab_allocz(p, sizeof(struct ospf_tlv) + sizeof(u32)); + tlv->type = LSA_GR_ADDRESS; + tlv->length = 4; + tlv->data[0] = ip4_to_u32(addr); +} + +void +ospf_originate_gr_lsa(struct ospf_proto *p, struct ospf_iface *ifa) +{ + struct ospf_new_lsa lsa = { + .type = LSA_T_GR, + .dom = ifa->iface_id, + .id = ospf_is_v2(p) ? 0 : ifa->iface_id, + .ifa = ifa + }; + + ospf_add_gr_period_tlv(p, p->gr_time); + ospf_add_gr_reason_tlv(p, 0); + + uint t = ifa->type; + if (ospf_is_v2(p) && ((t == OSPF_IT_BCAST) || (t == OSPF_IT_NBMA) || (t == OSPF_IT_PTMP))) + ospf_add_gr_address_tlv(p, ipa_to_ip4(ifa->addr->ip)); + + ospf_originate_lsa(p, &lsa); +} + + +/* * Router Information LSA handling * Type = LSA_T_RI_AREA, opaque type = LSA_OT_RI */ @@ -1712,6 +1798,10 @@ ospf_update_topology(struct ospf_proto *p) struct ospf_area *oa; struct ospf_iface *ifa; + /* No LSA reorigination during GR recovery */ + if (p->gr_recovery) + return; + WALK_LIST(oa, p->area_list) { if (oa->update_rt_lsa) diff --git a/proto/ospf/topology.h b/proto/ospf/topology.h index fd70239d..ffae436a 100644 --- a/proto/ospf/topology.h +++ b/proto/ospf/topology.h @@ -33,6 +33,7 @@ struct top_hash_entry u32 lb_id; /* Interface ID of link back iface (for bcast or NBMA networks) */ u32 dist; /* Distance from the root */ int ret_count; /* Number of retransmission lists referencing the entry */ + u8 dirty; /* Will be flushed during next LSAdb update unless reoriginated*/ u8 color; #define OUTSPF 0 #define CANDIDATE 1 @@ -180,6 +181,7 @@ struct top_hash_entry * ospf_originate_lsa(struct ospf_proto *p, struct ospf_new void ospf_advance_lsa(struct ospf_proto *p, struct top_hash_entry *en, struct ospf_lsa_header *lsa, u32 type, u32 domain, void *body); void ospf_flush_lsa(struct ospf_proto *p, struct top_hash_entry *en); void ospf_update_lsadb(struct ospf_proto *p); +void ospf_mark_lsadb(struct ospf_proto *p); static inline void ospf_flush2_lsa(struct ospf_proto *p, struct top_hash_entry **en) { if (*en) { ospf_flush_lsa(p, *en); *en = NULL; } } @@ -187,6 +189,7 @@ static inline void ospf_flush2_lsa(struct ospf_proto *p, struct top_hash_entry * void ospf_originate_sum_net_lsa(struct ospf_proto *p, struct ospf_area *oa, ort *nf, int metric); void ospf_originate_sum_rt_lsa(struct ospf_proto *p, struct ospf_area *oa, u32 drid, int metric, u32 options); void ospf_originate_ext_lsa(struct ospf_proto *p, struct ospf_area *oa, ort *nf, u8 mode, u32 metric, u32 ebit, ip_addr fwaddr, u32 tag, int pbit, int dn); +void ospf_originate_gr_lsa(struct ospf_proto *p, struct ospf_iface *ifa); void ospf_rt_notify(struct proto *P, struct channel *ch, net *n, rte *new, rte *old); void ospf_update_topology(struct ospf_proto *p); diff --git a/sysdep/unix/config.Y b/sysdep/unix/config.Y index e7ecd735..b78e0e6c 100644 --- a/sysdep/unix/config.Y +++ b/sysdep/unix/config.Y @@ -18,7 +18,7 @@ static struct log_config *this_log; CF_DECLS CF_KEYWORDS(LOG, SYSLOG, ALL, DEBUG, TRACE, INFO, REMOTE, WARNING, ERROR, AUTH, FATAL, BUG, STDERR, SOFT) -CF_KEYWORDS(NAME, CONFIRM, UNDO, CHECK, TIMEOUT, DEBUG, LATENCY, LIMIT, WATCHDOG, WARNING) +CF_KEYWORDS(NAME, CONFIRM, UNDO, CHECK, TIMEOUT, DEBUG, LATENCY, LIMIT, WATCHDOG, WARNING, STATUS) %type <i> log_mask log_mask_list log_cat cfg_timeout %type <t> cfg_name @@ -124,12 +124,19 @@ CF_CLI(CONFIGURE CONFIRM,,, [[Confirm last configuration change - deactivate und CF_CLI(CONFIGURE UNDO,,, [[Undo last configuration change]]) { cmd_reconfig_undo(); } ; +CF_CLI(CONFIGURE STATUS,,, [[Show configuration status]]) +{ cmd_reconfig_status(); } ; + CF_CLI(CONFIGURE CHECK, cfg_name, [\"<file>\"], [[Parse configuration and check its validity]]) { cmd_check_config($3); } ; CF_CLI(DOWN,,, [[Shut the daemon down]]) { cmd_shutdown(); } ; +CF_CLI(GRACEFUL DOWN,,, [[Shut the daemon down for graceful restart]]) +{ cmd_graceful_restart(); } ; + + cfg_name: /* empty */ { $$ = NULL; } | TEXT diff --git a/sysdep/unix/io.c b/sysdep/unix/io.c index 5b0e49c1..c9fee3ab 100644 --- a/sysdep/unix/io.c +++ b/sysdep/unix/io.c @@ -1082,6 +1082,7 @@ sk_passive_connected(sock *s, int type) t->fd = fd; t->ttl = s->ttl; t->tos = s->tos; + t->vrf = s->vrf; t->rbsize = s->rbsize; t->tbsize = s->tbsize; diff --git a/sysdep/unix/krt.c b/sysdep/unix/krt.c index 2cec2cae..27868fab 100644 --- a/sysdep/unix/krt.c +++ b/sysdep/unix/krt.c @@ -1129,7 +1129,7 @@ krt_shutdown(struct proto *P) krt_scan_timer_stop(p); /* FIXME we should flush routes even when persist during reconfiguration */ - if (p->initialized && !KRT_CF->persist) + if (p->initialized && !KRT_CF->persist && (P->down_code != PDC_CMD_GR_DOWN)) krt_flush_routes(p); p->ready = 0; diff --git a/sysdep/unix/main.c b/sysdep/unix/main.c index b0d764fa..05becbe7 100644 --- a/sysdep/unix/main.c +++ b/sysdep/unix/main.c @@ -338,6 +338,28 @@ cmd_reconfig_undo(void) cmd_reconfig_msg(r); } +void +cmd_reconfig_status(void) +{ + int s = config_status(); + btime t = config_timer_status(); + + switch (s) + { + case CONF_DONE: cli_msg(-3, "Daemon is up and running"); break; + case CONF_PROGRESS: cli_msg(-4, "Reconfiguration in progress"); break; + case CONF_QUEUED: cli_msg(-5, "Reconfiguration in progress, next one enqueued"); break; + case CONF_SHUTDOWN: cli_msg(-6, "Shutdown in progress"); break; + default: break; + } + + if (t >= 0) + cli_msg(-22, "Configuration unconfirmed, undo in %t s", t); + + cli_msg(0, ""); +} + + /* * Command-Line Interface */ @@ -542,14 +564,14 @@ cmd_shutdown(void) return; cli_msg(7, "Shutdown requested"); - order_shutdown(); + order_shutdown(0); } void async_shutdown(void) { DBG("Shutting down...\n"); - order_shutdown(); + order_shutdown(0); } void @@ -561,6 +583,17 @@ sysdep_shutdown_done(void) exit(0); } +void +cmd_graceful_restart(void) +{ + if (cli_access_restricted()) + return; + + cli_msg(25, "Graceful restart requested"); + order_shutdown(1); +} + + /* * Signals */ diff --git a/sysdep/unix/unix.h b/sysdep/unix/unix.h index 0e1e98c0..bf0aedeb 100644 --- a/sysdep/unix/unix.h +++ b/sysdep/unix/unix.h @@ -26,7 +26,9 @@ void cmd_check_config(char *name); void cmd_reconfig(char *name, int type, uint timeout); void cmd_reconfig_confirm(void); void cmd_reconfig_undo(void); +void cmd_reconfig_status(void); void cmd_shutdown(void); +void cmd_graceful_restart(void); #define UNIX_DEFAULT_CONFIGURE_TIMEOUT 300 diff --git a/test/birdtest.c b/test/birdtest.c index b5ee48c7..2d1e73de 100644 --- a/test/birdtest.c +++ b/test/birdtest.c @@ -492,6 +492,8 @@ void cmd_check_config(char *name UNUSED) {} void cmd_reconfig(char *name UNUSED, int type UNUSED, int timeout UNUSED) {} void cmd_reconfig_confirm(void) {} void cmd_reconfig_undo(void) {} +void cmd_reconfig_status(void) {} +void cmd_graceful_restart(void) {} void cmd_shutdown(void) {} void cmd_reconfig_undo_notify(void) {} |