diff options
Diffstat (limited to 'proto')
-rw-r--r-- | proto/bgp/attrs.c | 4 | ||||
-rw-r--r-- | proto/bgp/bgp.c | 232 | ||||
-rw-r--r-- | proto/bgp/bgp.h | 27 | ||||
-rw-r--r-- | proto/bgp/config.Y | 22 | ||||
-rw-r--r-- | proto/bgp/packets.c | 121 | ||||
-rw-r--r-- | proto/mrt/mrt.c | 4 | ||||
-rw-r--r-- | proto/ospf/config.Y | 6 | ||||
-rw-r--r-- | proto/ospf/dbdes.c | 6 | ||||
-rw-r--r-- | proto/ospf/iface.c | 8 | ||||
-rw-r--r-- | proto/ospf/lsalib.c | 75 | ||||
-rw-r--r-- | proto/ospf/lsalib.h | 15 | ||||
-rw-r--r-- | proto/ospf/lsupd.c | 20 | ||||
-rw-r--r-- | proto/ospf/neighbor.c | 194 | ||||
-rw-r--r-- | proto/ospf/ospf.c | 60 | ||||
-rw-r--r-- | proto/ospf/ospf.h | 32 | ||||
-rw-r--r-- | proto/ospf/rt.c | 180 | ||||
-rw-r--r-- | proto/ospf/rt.h | 1 | ||||
-rw-r--r-- | proto/ospf/topology.c | 100 | ||||
-rw-r--r-- | proto/ospf/topology.h | 3 |
19 files changed, 990 insertions, 120 deletions
diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c index 7c6f2ee9..69c4b172 100644 --- a/proto/bgp/attrs.c +++ b/proto/bgp/attrs.c @@ -1302,7 +1302,7 @@ bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b) #define PXH_FN(n,i,h) h #define PXH_REHASH bgp_pxh_rehash -#define PXH_PARAMS /8, *2, 2, 2, 8, 20 +#define PXH_PARAMS /8, *2, 2, 2, 8, 24 HASH_DEFINE_REHASH_FN(PXH, struct bgp_prefix) @@ -1730,7 +1730,7 @@ bgp_rte_better(rte *new, rte *old) return 0; /* RFC 4271 9.1.2.2. g) Compare peer IP adresses */ - return (ipa_compare(new_bgp->cf->remote_ip, old_bgp->cf->remote_ip) < 0); + return ipa_compare(new_bgp->remote_ip, old_bgp->remote_ip) < 0; } diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index 8dedde9f..b68575a5 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -129,6 +129,9 @@ static list bgp_sockets; /* Global list of listening sockets */ static void bgp_connect(struct bgp_proto *p); static void bgp_active(struct bgp_proto *p); +static void bgp_setup_conn(struct bgp_proto *p, struct bgp_conn *conn); +static void bgp_setup_sk(struct bgp_conn *conn, sock *s); +static void bgp_send_open(struct bgp_conn *conn); static void bgp_update_bfd(struct bgp_proto *p, int use_bfd); static int bgp_incoming_connection(sock *sk, uint dummy UNUSED); @@ -149,7 +152,7 @@ bgp_open(struct bgp_proto *p) struct bgp_socket *bs = NULL; struct iface *ifa = p->cf->strict_bind ? p->cf->iface : NULL; ip_addr addr = p->cf->strict_bind ? p->cf->local_ip : - (ipa_is_ip4(p->cf->remote_ip) ? IPA_NONE4 : IPA_NONE6); + (p->ipv4 ? IPA_NONE4 : IPA_NONE6); uint port = p->cf->local_port; /* FIXME: Add some global init? */ @@ -272,8 +275,17 @@ bgp_startup(struct bgp_proto *p) BGP_TRACE(D_EVENTS, "Started"); p->start_state = BSS_CONNECT; - if (!p->cf->passive) + if (!p->passive) bgp_active(p); + + if (p->postponed_sk) + { + /* Apply postponed incoming connection */ + bgp_setup_conn(p, &p->incoming_conn); + bgp_setup_sk(&p->incoming_conn, p->postponed_sk); + bgp_send_open(&p->incoming_conn); + p->postponed_sk = NULL; + } } static void @@ -387,7 +399,7 @@ bgp_close_conn(struct bgp_conn *conn) void bgp_update_startup_delay(struct bgp_proto *p) { - struct bgp_config *cf = p->cf; + const struct bgp_config *cf = p->cf; DBG("BGP: Updating startup delay\n"); @@ -410,7 +422,7 @@ bgp_update_startup_delay(struct bgp_proto *p) } static void -bgp_graceful_close_conn(struct bgp_conn *conn, uint subcode, byte *data, uint len) +bgp_graceful_close_conn(struct bgp_conn *conn, int subcode, byte *data, uint len) { switch (conn->state) { @@ -426,7 +438,13 @@ bgp_graceful_close_conn(struct bgp_conn *conn, uint subcode, byte *data, uint le case BS_OPENSENT: case BS_OPENCONFIRM: case BS_ESTABLISHED: - bgp_error(conn, 6, subcode, data, len); + if (subcode < 0) + { + bgp_conn_enter_close_state(conn); + bgp_schedule_packet(conn, NULL, PKT_SCHEDULE_CLOSE); + } + else + bgp_error(conn, 6, subcode, data, len); return; default: @@ -456,7 +474,7 @@ bgp_decision(void *vp) if ((p->p.proto_state == PS_START) && (p->outgoing_conn.state == BS_IDLE) && (p->incoming_conn.state != BS_OPENCONFIRM) && - !p->cf->passive) + !p->passive) bgp_active(p); if ((p->p.proto_state == PS_STOP) && @@ -465,8 +483,31 @@ bgp_decision(void *vp) bgp_down(p); } +static struct bgp_proto * +bgp_spawn(struct bgp_proto *pp, ip_addr remote_ip) +{ + struct symbol *sym; + char fmt[SYM_MAX_LEN]; + + bsprintf(fmt, "%s%%0%dd", pp->cf->dynamic_name, pp->cf->dynamic_name_digits); + + /* This is hack, we would like to share config, but we need to copy it now */ + new_config = config; + cfg_mem = config->mem; + conf_this_scope = config->root_scope; + sym = cf_default_name(fmt, &(pp->dynamic_name_counter)); + proto_clone_config(sym, pp->p.cf); + new_config = NULL; + cfg_mem = NULL; + + /* Just pass remote_ip to bgp_init() */ + ((struct bgp_config *) sym->proto)->remote_ip = remote_ip; + + return (void *) proto_spawn(sym->proto, 0); +} + void -bgp_stop(struct bgp_proto *p, uint subcode, byte *data, uint len) +bgp_stop(struct bgp_proto *p, int subcode, byte *data, uint len) { proto_notify_state(&p->p, PS_STOP); bgp_graceful_close_conn(&p->outgoing_conn, subcode, data, len); @@ -491,6 +532,7 @@ bgp_conn_enter_openconfirm_state(struct bgp_conn *conn) } static const struct bgp_af_caps dummy_af_caps = { }; +static const struct bgp_af_caps basic_af_caps = { .ready = 1 }; void bgp_conn_enter_established_state(struct bgp_conn *conn) @@ -503,8 +545,12 @@ bgp_conn_enter_established_state(struct bgp_conn *conn) BGP_TRACE(D_EVENTS, "BGP session established"); /* For multi-hop BGP sessions */ - if (ipa_zero(p->source_addr)) - p->source_addr = conn->sk->saddr; + if (ipa_zero(p->local_ip)) + p->local_ip = conn->sk->saddr; + + /* For promiscuous sessions */ + if (!p->remote_as) + p->remote_as = conn->received_as; /* In case of LLv6 is not valid during BGP start */ if (ipa_zero(p->link_addr) && p->neigh && p->neigh->iface && p->neigh->iface->llv6) @@ -541,6 +587,13 @@ bgp_conn_enter_established_state(struct bgp_conn *conn) const struct bgp_af_caps *loc = bgp_find_af_caps(local, c->afi); const struct bgp_af_caps *rem = bgp_find_af_caps(peer, c->afi); + /* Use default if capabilities were not announced */ + if (!local->length && (c->afi == BGP_AF_IPV4)) + loc = &basic_af_caps; + + if (!peer->length && (c->afi == BGP_AF_IPV4)) + rem = &basic_af_caps; + /* Ignore AFIs that were not announced in multiprotocol capability */ if (!loc || !loc->ready) loc = &dummy_af_caps; @@ -880,6 +933,7 @@ bgp_send_open(struct bgp_conn *conn) conn->sk->rx_hook = bgp_rx; conn->sk->tx_hook = bgp_tx; tm_stop(conn->connect_timer); + bgp_prepare_capabilities(conn); bgp_schedule_packet(conn, NULL, PKT_OPEN); bgp_conn_set_state(conn, BS_OPENSENT); bgp_start_timer(conn->hold_timer, conn->bgp->cf->initial_hold_time); @@ -1039,8 +1093,8 @@ bgp_connect(struct bgp_proto *p) /* Enter Connect state and start establishing c DBG("BGP: Connecting\n"); sock *s = sk_new(p->p.pool); s->type = SK_TCP_ACTIVE; - s->saddr = p->source_addr; - s->daddr = p->cf->remote_ip; + s->saddr = p->local_ip; + s->daddr = p->remote_ip; s->dport = p->cf->remote_port; s->iface = p->neigh ? p->neigh->iface : NULL; s->vrf = p->p.vrf; @@ -1075,6 +1129,9 @@ err: return; } +static inline int bgp_is_dynamic(struct bgp_proto *p) +{ return ipa_zero(p->remote_ip); } + /** * bgp_find_proto - find existing proto for incoming connection * @sk: TCP socket @@ -1083,6 +1140,7 @@ err: static struct bgp_proto * bgp_find_proto(sock *sk) { + struct bgp_proto *best = NULL; struct bgp_proto *p; /* sk->iface is valid only if src or dst address is link-local */ @@ -1090,13 +1148,20 @@ bgp_find_proto(sock *sk) WALK_LIST(p, proto_list) if ((p->p.proto == &proto_bgp) && - (p->sock == sk->data) && - ipa_equal(p->cf->remote_ip, sk->daddr) && + (ipa_equal(p->remote_ip, sk->daddr) || bgp_is_dynamic(p)) && + (!p->cf->remote_range || ipa_in_netX(sk->daddr, p->cf->remote_range)) && + (p->p.vrf == sk->vrf) && + (p->cf->local_port == sk->sport) && (!link || (p->cf->iface == sk->iface)) && (ipa_zero(p->cf->local_ip) || ipa_equal(p->cf->local_ip, sk->saddr))) - return p; + { + best = p; - return NULL; + if (!bgp_is_dynamic(p)) + break; + } + + return best; } /** @@ -1175,6 +1240,16 @@ bgp_incoming_connection(sock *sk, uint dummy UNUSED) sk_reallocate(sk); } + /* For dynamic BGP, spawn new instance and postpone the socket */ + if (bgp_is_dynamic(p)) + { + p = bgp_spawn(p, sk->daddr); + p->postponed_sk = sk; + rmove(sk, p->p.pool); + return 0; + } + + rmove(sk, p->p.pool); bgp_setup_conn(p, &p->incoming_conn); bgp_setup_sk(&p->incoming_conn, sk); bgp_send_open(&p->incoming_conn); @@ -1201,11 +1276,11 @@ bgp_start_neighbor(struct bgp_proto *p) { /* Called only for single-hop BGP sessions */ - if (ipa_zero(p->source_addr)) - p->source_addr = p->neigh->ifa->ip; + if (ipa_zero(p->local_ip)) + p->local_ip = p->neigh->ifa->ip; - if (ipa_is_link_local(p->source_addr)) - p->link_addr = p->source_addr; + if (ipa_is_link_local(p->local_ip)) + p->link_addr = p->local_ip; else if (p->neigh->iface->llv6) p->link_addr = p->neigh->iface->llv6->ip; @@ -1293,8 +1368,8 @@ bgp_bfd_notify(struct bfd_request *req) static void bgp_update_bfd(struct bgp_proto *p, int use_bfd) { - if (use_bfd && !p->bfd_req) - p->bfd_req = bfd_request_session(p->p.pool, p->cf->remote_ip, p->source_addr, + if (use_bfd && !p->bfd_req && !bgp_is_dynamic(p)) + p->bfd_req = bfd_request_session(p->p.pool, p->remote_ip, p->local_ip, p->cf->multihop ? NULL : p->neigh->iface, bgp_bfd_notify, p); @@ -1375,7 +1450,7 @@ static void bgp_start_locked(struct object_lock *lock) { struct bgp_proto *p = lock->data; - struct bgp_config *cf = p->cf; + const struct bgp_config *cf = p->cf; if (p->p.proto_state != PS_START) { @@ -1385,17 +1460,17 @@ bgp_start_locked(struct object_lock *lock) DBG("BGP: Got lock\n"); - if (cf->multihop) + if (cf->multihop || bgp_is_dynamic(p)) { /* Multi-hop sessions do not use neighbor entries */ bgp_initiate(p); return; } - neighbor *n = neigh_find(&p->p, cf->remote_ip, cf->iface, NEF_STICKY); + neighbor *n = neigh_find(&p->p, p->remote_ip, cf->iface, NEF_STICKY); if (!n) { - log(L_ERR "%s: Invalid remote address %I%J", p->p.name, cf->remote_ip, cf->iface); + log(L_ERR "%s: Invalid remote address %I%J", p->p.name, p->remote_ip, cf->iface); /* As we do not start yet, we can just disable protocol */ p->p.disabled = 1; bgp_store_error(p, NULL, BE_MISC, BEM_INVALID_NEXT_HOP); @@ -1406,7 +1481,7 @@ bgp_start_locked(struct object_lock *lock) p->neigh = n; if (n->scope <= 0) - BGP_TRACE(D_EVENTS, "Waiting for %I%J to become my neighbor", cf->remote_ip, cf->iface); + BGP_TRACE(D_EVENTS, "Waiting for %I%J to become my neighbor", p->remote_ip, cf->iface); else if (p->cf->check_link && !(n->iface->flags & IF_LINK_UP)) BGP_TRACE(D_EVENTS, "Waiting for link on %s", n->iface->name); else @@ -1417,14 +1492,29 @@ static int bgp_start(struct proto *P) { struct bgp_proto *p = (struct bgp_proto *) P; - struct object_lock *lock; + const struct bgp_config *cf = p->cf; + + p->local_ip = cf->local_ip; + p->local_as = cf->local_as; + p->remote_as = cf->remote_as; + p->public_as = cf->local_as; + + /* For dynamic BGP childs, remote_ip is already set */ + if (ipa_nonzero(cf->remote_ip)) + p->remote_ip = cf->remote_ip; + + /* Confederation ID is used for truly external peers */ + if (p->cf->confederation && !p->is_interior) + p->public_as = cf->confederation; + + p->passive = cf->passive || bgp_is_dynamic(p); - DBG("BGP: Startup.\n"); p->start_state = BSS_PREPARE; p->outgoing_conn.state = BS_IDLE; p->incoming_conn.state = BS_IDLE; p->neigh = NULL; p->bfd_req = NULL; + p->postponed_sk = NULL; p->gr_ready = 0; p->gr_active_num = 0; @@ -1437,7 +1527,6 @@ bgp_start(struct proto *P) p->rr_cluster_id = p->cf->rr_cluster_id ? p->cf->rr_cluster_id : p->local_id; p->remote_id = 0; - p->source_addr = p->cf->local_ip; p->link_addr = IPA_NONE; /* Lock all channels when in GR recovery mode */ @@ -1452,9 +1541,9 @@ bgp_start(struct proto *P) * Before attempting to create the connection, we need to lock the port, * so that we are the only instance attempting to talk with that neighbor. */ - + struct object_lock *lock; lock = p->lock = olock_new(P->pool); - lock->addr = p->cf->remote_ip; + lock->addr = p->remote_ip; lock->port = p->cf->remote_port; lock->iface = p->cf->iface; lock->vrf = p->cf->iface ? NULL : p->p.vrf; @@ -1472,7 +1561,7 @@ static int bgp_shutdown(struct proto *P) { struct bgp_proto *p = (struct bgp_proto *) P; - uint subcode = 0; + int subcode = 0; char *message = NULL; byte *data = NULL; @@ -1493,6 +1582,7 @@ bgp_shutdown(struct proto *P) case PDC_CMD_DISABLE: case PDC_CMD_SHUTDOWN: + shutdown: subcode = 2; // Errcode 6, 2 - administrative shutdown message = P->message; break; @@ -1502,6 +1592,14 @@ bgp_shutdown(struct proto *P) message = P->message; break; + case PDC_CMD_GR_DOWN: + if ((p->cf->gr_mode != BGP_GR_ABLE) && + (p->cf->llgr_mode != BGP_LLGR_ABLE)) + goto shutdown; + + subcode = -1; // Do not send NOTIFICATION, just close the connection + break; + case PDC_RX_LIMIT_HIT: case PDC_IN_LIMIT_HIT: subcode = 1; // Errcode 6, 1 - max number of prefixes reached @@ -1528,7 +1626,7 @@ bgp_shutdown(struct proto *P) if (message) { uint msg_len = strlen(message); - msg_len = MIN(msg_len, 128); + msg_len = MIN(msg_len, 255); /* Buffer will be freed automatically by protocol shutdown */ data = mb_alloc(p->p.pool, msg_len + 1); @@ -1562,17 +1660,21 @@ bgp_init(struct proto_config *CF) P->rte_modify = bgp_rte_modify_stale; p->cf = cf; - p->local_as = cf->local_as; - p->remote_as = cf->remote_as; - p->public_as = cf->local_as; p->is_internal = (cf->local_as == cf->remote_as); p->is_interior = p->is_internal || cf->confederation_member; p->rs_client = cf->rs_client; p->rr_client = cf->rr_client; - /* Confederation ID is used for truly external peers */ - if (cf->confederation && !p->is_interior) - p->public_as = cf->confederation; + p->ipv4 = ipa_nonzero(cf->remote_ip) ? + ipa_is_ip4(cf->remote_ip) : + (cf->remote_range && (cf->remote_range->type == NET_IP4)); + + p->remote_ip = cf->remote_ip; + p->remote_as = cf->remote_as; + + /* Hack: We use cf->remote_ip just to pass remote_ip from bgp_spawn() */ + if (cf->c.parent) + cf->remote_ip = IPA_NONE; /* Add all channels */ struct bgp_channel_config *cc; @@ -1604,7 +1706,7 @@ bgp_channel_start(struct channel *C) { struct bgp_proto *p = (void *) C->proto; struct bgp_channel *c = (void *) C; - ip_addr src = p->source_addr; + ip_addr src = p->local_ip; if (c->igp_table_ip4) rt_lock_table(c->igp_table_ip4); @@ -1745,14 +1847,19 @@ void bgp_postconfig(struct proto_config *CF) { struct bgp_config *cf = (void *) CF; - int internal = (cf->local_as == cf->remote_as); - int interior = internal || cf->confederation_member; /* Do not check templates at all */ if (cf->c.class == SYM_TEMPLATE) return; + /* Handle undefined remote_as, zero should mean unspecified external */ + if (!cf->remote_as && (cf->peer_type == BGP_PT_INTERNAL)) + cf->remote_as = cf->local_as; + + int internal = (cf->local_as == cf->remote_as); + int interior = internal || cf->confederation_member; + /* EBGP direct by default, IBGP multihop by default */ if (cf->multihop < 0) cf->multihop = internal ? 64 : 0; @@ -1769,11 +1876,20 @@ bgp_postconfig(struct proto_config *CF) if (!cf->local_as) cf_error("Local AS number must be set"); - if (ipa_zero(cf->remote_ip)) + if (ipa_zero(cf->remote_ip) && !cf->remote_range) cf_error("Neighbor must be configured"); - if (!cf->remote_as) - cf_error("Remote AS number must be set"); + if (ipa_zero(cf->local_ip) && cf->strict_bind) + cf_error("Local address must be configured for strict bind"); + + if (!cf->remote_as && !cf->peer_type) + cf_error("Remote AS number (or peer type) must be set"); + + if ((cf->peer_type == BGP_PT_INTERNAL) && !internal) + cf_error("IBGP cannot have different ASNs"); + + if ((cf->peer_type == BGP_PT_EXTERNAL) && internal) + cf_error("EBGP cannot have the same ASNs"); if (!cf->iface && (ipa_is_link_local(cf->local_ip) || ipa_is_link_local(cf->remote_ip))) @@ -1885,8 +2001,8 @@ static int bgp_reconfigure(struct proto *P, struct proto_config *CF) { struct bgp_proto *p = (void *) P; - struct bgp_config *new = (void *) CF; - struct bgp_config *old = p->cf; + const struct bgp_config *new = (void *) CF; + const struct bgp_config *old = p->cf; if (proto_get_router_id(CF) != p->local_id) return 0; @@ -1896,7 +2012,12 @@ bgp_reconfigure(struct proto *P, struct proto_config *CF) // password item is last and must be checked separately OFFSETOF(struct bgp_config, password) - sizeof(struct proto_config)) && ((!old->password && !new->password) - || (old->password && new->password && !strcmp(old->password, new->password))); + || (old->password && new->password && !strcmp(old->password, new->password))) + && ((!old->remote_range && !new->remote_range) + || (old->remote_range && new->remote_range && net_equal(old->remote_range, new->remote_range))) + && ((!old->dynamic_name && !new->dynamic_name) + || (old->dynamic_name && new->dynamic_name && !strcmp(old->dynamic_name, new->dynamic_name))) + && (old->dynamic_name_digits == new->dynamic_name_digits); /* FIXME: Move channel reconfiguration to generic protocol code ? */ struct channel *C, *C2; @@ -1926,6 +2047,9 @@ bgp_reconfigure(struct proto *P, struct proto_config *CF) if (same) p->cf = new; + /* Reset name counter */ + p->dynamic_name_counter = 0; + return same; } @@ -2056,7 +2180,7 @@ bgp_state_dsc(struct bgp_proto *p) return "Down"; int state = MAX(p->incoming_conn.state, p->outgoing_conn.state); - if ((state == BS_IDLE) && (p->start_state >= BSS_CONNECT) && p->cf->passive) + if ((state == BS_IDLE) && (p->start_state >= BSS_CONNECT) && p->passive) return "Passive"; return bgp_state_names[state]; @@ -2232,8 +2356,14 @@ bgp_show_proto_info(struct proto *P) struct bgp_proto *p = (struct bgp_proto *) P; cli_msg(-1006, " BGP state: %s", bgp_state_dsc(p)); - cli_msg(-1006, " Neighbor address: %I%J", p->cf->remote_ip, p->cf->iface); + + if (bgp_is_dynamic(p) && p->cf->remote_range) + cli_msg(-1006, " Neighbor range: %N", p->cf->remote_range); + else + cli_msg(-1006, " Neighbor address: %I%J", p->remote_ip, p->cf->iface); + cli_msg(-1006, " Neighbor AS: %u", p->remote_as); + cli_msg(-1006, " Local AS: %u", p->cf->local_as); if (p->gr_active_num) cli_msg(-1006, " Neighbor graceful restart active"); @@ -2269,7 +2399,7 @@ bgp_show_proto_info(struct proto *P) p->rr_client ? " route-reflector" : "", p->rs_client ? " route-server" : "", p->as4_session ? " AS4" : ""); - cli_msg(-1006, " Source address: %I", p->source_addr); + cli_msg(-1006, " Source address: %I", p->local_ip); cli_msg(-1006, " Hold timer: %t/%u", tm_remains(p->conn->hold_timer), p->conn->hold_time); cli_msg(-1006, " Keepalive timer: %t/%u", diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h index b604c7aa..075e1bb9 100644 --- a/proto/bgp/bgp.h +++ b/proto/bgp/bgp.h @@ -83,6 +83,7 @@ struct bgp_config { struct iface *iface; /* Interface for link-local addresses */ u16 local_port; /* Local listening port */ u16 remote_port; /* Neighbor destination port */ + int peer_type; /* Internal or external BGP (BGP_PT_*, optional) */ int multihop; /* Number of hops if multihop */ int strict_bind; /* Bind listening socket to local address */ int ttl_security; /* Enable TTL security [RFC 5082] */ @@ -123,6 +124,9 @@ struct bgp_config { u32 disable_after_cease; /* Disable it when cease is received, bitfield */ char *password; /* Password used for MD5 authentication */ + net_addr *remote_range; /* Allowed neighbor range for dynamic BGP */ + char *dynamic_name; /* Name pattern for dynamic BGP */ + int dynamic_name_digits; /* Minimum number of digits for dynamic names */ int check_link; /* Use iface link state for liveness detection */ int bfd; /* Use BFD for liveness detection */ }; @@ -136,6 +140,7 @@ struct bgp_channel_config { ip_addr next_hop_addr; /* Local address for NEXT_HOP attribute */ u8 next_hop_self; /* Always set next hop to local IP address (NH_*) */ u8 next_hop_keep; /* Do not modify next hop attribute (NH_*) */ + u8 mandatory; /* Channel is mandatory in capability negotiation */ u8 missing_lladdr; /* What we will do when we don' know link-local addr, see MLL_* */ u8 gw_mode; /* How we compute route gateway from next_hop attr, see GW_* */ u8 secondary; /* Accept also non-best routes (i.e. RA_ACCEPTED) */ @@ -151,6 +156,9 @@ struct bgp_channel_config { struct rtable_config *igp_table_ip6; /* Table for recursive IPv6 next hop lookups */ }; +#define BGP_PT_INTERNAL 1 +#define BGP_PT_EXTERNAL 2 + #define NH_NO 0 #define NH_ALL 1 #define NH_IBGP 2 @@ -213,8 +221,11 @@ struct bgp_caps { u16 gr_time; /* Graceful restart time in seconds */ u8 llgr_aware; /* Long-lived GR capability, RFC draft */ + u8 any_ext_next_hop; /* Bitwise OR of per-AF ext_next_hop */ + u8 any_add_path; /* Bitwise OR of per-AF add_path */ u16 af_count; /* Number of af_data items */ + u16 length; /* Length of capabilities in OPEN msg */ struct bgp_af_caps af_data[0]; /* Per-AF capability data */ }; @@ -235,6 +246,7 @@ struct bgp_conn { u8 state; /* State of connection state machine */ u8 as4_session; /* Session uses 4B AS numbers in AS_PATH (both sides support it) */ u8 ext_messages; /* Session uses extended message length */ + u32 received_as; /* ASN received in OPEN message */ struct bgp_caps *local_caps; struct bgp_caps *remote_caps; @@ -254,18 +266,21 @@ struct bgp_conn { struct bgp_proto { struct proto p; - struct bgp_config *cf; /* Shortcut to BGP configuration */ + const struct bgp_config *cf; /* Shortcut to BGP configuration */ + ip_addr local_ip, remote_ip; u32 local_as, remote_as; u32 public_as; /* Externally visible ASN (local_as or confederation id) */ u32 local_id; /* BGP identifier of this router */ u32 remote_id; /* BGP identifier of the neighbor */ u32 rr_cluster_id; /* Route reflector cluster ID */ - int start_state; /* Substates that partitions BS_START */ + u8 start_state; /* Substates that partitions BS_START */ u8 is_internal; /* Internal BGP session (local_as == remote_as) */ u8 is_interior; /* Internal or intra-confederation BGP session */ u8 as4_session; /* Session uses 4B AS numbers in AS_PATH (both sides support it) */ u8 rr_client; /* Whether neighbor is RR client of me */ u8 rs_client; /* Whether neighbor is RS client of me */ + u8 ipv4; /* Use IPv4 connection, i.e. remote_ip is IPv4 */ + u8 passive; /* Do not initiate outgoing connection */ u8 route_refresh; /* Route refresh allowed to send [RFC 2918] */ u8 enhanced_refresh; /* Enhanced refresh is negotiated [RFC 7313] */ u8 gr_ready; /* Neighbor could do graceful restart */ @@ -282,11 +297,12 @@ struct bgp_proto { struct neighbor *neigh; /* Neighbor entry corresponding to remote ip, NULL if multihop */ struct bgp_socket *sock; /* Shared listening socket */ struct bfd_request *bfd_req; /* BFD request, if BFD is used */ - ip_addr source_addr; /* Local address used as an advertised next hop */ - ip_addr link_addr; /* Link-local version of source_addr */ + struct birdsock *postponed_sk; /* Postponed incoming socket for dynamic BGP */ + ip_addr link_addr; /* Link-local version of local_ip */ event *event; /* Event for respawning and shutting process */ timer *startup_timer; /* Timer used to delay protocol startup due to previous errors (startup_delay) */ timer *gr_timer; /* Timer waiting for reestablishment after graceful restart */ + int dynamic_name_counter; /* Counter for dynamic BGP names */ uint startup_delay; /* Delay (in seconds) of protocol startup due to previous errors */ btime last_proto_error; /* Time of last error that leads to protocol stop */ u8 last_error_class; /* Error class of last error */ @@ -472,7 +488,7 @@ void bgp_graceful_restart_done(struct bgp_channel *c); void bgp_refresh_begin(struct bgp_channel *c); void bgp_refresh_end(struct bgp_channel *c); void bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code); -void bgp_stop(struct bgp_proto *p, uint subcode, byte *data, uint len); +void bgp_stop(struct bgp_proto *p, int subcode, byte *data, uint len); struct rte_source *bgp_find_source(struct bgp_proto *p, u32 path_id); struct rte_source *bgp_get_source(struct bgp_proto *p, u32 path_id); @@ -549,6 +565,7 @@ void bgp_get_route_info(struct rte *, byte *buf); /* packets.c */ void bgp_dump_state_change(struct bgp_conn *conn, uint old, uint new); +void bgp_prepare_capabilities(struct bgp_conn *conn); const struct bgp_af_desc *bgp_get_af_desc(u32 afi); const struct bgp_af_caps *bgp_find_af_caps(struct bgp_caps *caps, u32 afi); void bgp_schedule_packet(struct bgp_conn *conn, struct bgp_channel *c, int type); diff --git a/proto/bgp/config.Y b/proto/bgp/config.Y index ac8d024a..bbc7d9a4 100644 --- a/proto/bgp/config.Y +++ b/proto/bgp/config.Y @@ -29,7 +29,8 @@ CF_KEYWORDS(BGP, LOCAL, NEIGHBOR, AS, HOLD, TIME, CONNECT, RETRY, KEEPALIVE, SECURITY, DETERMINISTIC, SECONDARY, ALLOW, BFD, ADD, PATHS, RX, TX, GRACEFUL, RESTART, AWARE, CHECK, LINK, PORT, EXTENDED, MESSAGES, SETKEY, STRICT, BIND, CONFEDERATION, MEMBER, MULTICAST, FLOW4, FLOW6, LONG, - LIVED, STALE, IMPORT, IBGP, EBGP) + LIVED, STALE, IMPORT, IBGP, EBGP, MANDATORY, INTERNAL, EXTERNAL, + DYNAMIC, RANGE, NAME, DIGITS) %type <i> bgp_nh %type <i32> bgp_afi @@ -68,6 +69,7 @@ bgp_proto_start: proto_start BGP { BGP_CFG->llgr_mode = -1; BGP_CFG->llgr_time = 3600; BGP_CFG->setkey = 1; + BGP_CFG->dynamic_name = "dynbgp"; BGP_CFG->check_link = -1; } ; @@ -82,6 +84,8 @@ bgp_nbr_opts: /* empty */ | bgp_nbr_opts PORT expr { BGP_CFG->remote_port = $3; if (($3<1) || ($3>65535)) cf_error("Invalid port number"); } | bgp_nbr_opts AS expr { BGP_CFG->remote_as = $3; } + | bgp_nbr_opts INTERNAL { BGP_CFG->peer_type = BGP_PT_INTERNAL; } + | bgp_nbr_opts EXTERNAL { BGP_CFG->peer_type = BGP_PT_EXTERNAL; } ; bgp_cease_mask: @@ -118,11 +122,18 @@ bgp_proto: } | bgp_proto NEIGHBOR bgp_nbr_opts ';' | bgp_proto NEIGHBOR ipa ipa_scope bgp_nbr_opts ';' { - if (ipa_nonzero(BGP_CFG->remote_ip)) + if (ipa_nonzero(BGP_CFG->remote_ip) || BGP_CFG->remote_range) cf_error("Only one neighbor per BGP instance is allowed"); BGP_CFG->remote_ip = $3; if ($4) BGP_CFG->iface = $4; } + | bgp_proto NEIGHBOR RANGE net_ip bgp_nbr_opts ';' { + if (ipa_nonzero(BGP_CFG->remote_ip) || BGP_CFG->remote_range) + cf_error("Only one neighbor per BGP instance is allowed"); + net_addr *n = cfg_alloc($4.length); + net_copy(n, &($4)); + BGP_CFG->remote_range = n; + } | bgp_proto INTERFACE TEXT ';' { BGP_CFG->iface = if_get_by_name($3); } | bgp_proto RR CLUSTER ID idval ';' { BGP_CFG->rr_cluster_id = $5; } | bgp_proto RR CLIENT bool ';' { BGP_CFG->rr_client = $4; } @@ -134,6 +145,12 @@ bgp_proto: | bgp_proto DIRECT ';' { BGP_CFG->multihop = 0; } | bgp_proto MULTIHOP ';' { BGP_CFG->multihop = 64; } | bgp_proto MULTIHOP expr ';' { BGP_CFG->multihop = $3; if (($3<1) || ($3>255)) cf_error("Multihop must be in range 1-255"); } + | bgp_proto DYNAMIC NAME text ';' { + if (strchr($4, '%')) cf_error("Forbidden character '%%' in dynamic name"); + if (strlen($4) > (SYM_MAX_LEN - 16)) cf_error("Dynamic name too long"); + BGP_CFG->dynamic_name = $4; + } + | bgp_proto DYNAMIC NAME DIGITS expr ';' { BGP_CFG->dynamic_name_digits = $5; if ($5>10) cf_error("Dynamic name digits must be at most 10"); } | bgp_proto STRICT BIND bool ';' { BGP_CFG->strict_bind = $4; } | bgp_proto PATH METRIC bool ';' { BGP_CFG->compare_path_lengths = $4; } | bgp_proto MED METRIC bool ';' { BGP_CFG->med_metric = $4; } @@ -223,6 +240,7 @@ bgp_channel_item: | NEXT HOP ADDRESS ipa { BGP_CC->next_hop_addr = $4; } | NEXT HOP SELF bgp_nh { BGP_CC->next_hop_self = $4; } | NEXT HOP KEEP bgp_nh { BGP_CC->next_hop_keep = $4; } + | MANDATORY bool { BGP_CC->mandatory = $2; } | MISSING LLADDR SELF { BGP_CC->missing_lladdr = MLL_SELF; } | MISSING LLADDR DROP { BGP_CC->missing_lladdr = MLL_DROP; } | MISSING LLADDR IGNORE { BGP_CC->missing_lladdr = MLL_IGNORE; } diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c index 2b5cc440..daa88630 100644 --- a/proto/bgp/packets.c +++ b/proto/bgp/packets.c @@ -100,7 +100,7 @@ init_mrt_bgp_data(struct bgp_conn *conn, struct mrt_bgp_data *d) d->peer_as = p->remote_as; d->local_as = p->local_as; d->index = (p->neigh && p->neigh->iface) ? p->neigh->iface->index : 0; - d->af = ipa_is_ip4(p->cf->remote_ip) ? BGP_AFI_IPV4 : BGP_AFI_IPV6; + d->af = ipa_is_ip4(p->remote_ip) ? BGP_AFI_IPV4 : BGP_AFI_IPV6; d->peer_ip = conn->sk ? conn->sk->daddr : IPA_NONE; d->local_ip = conn->sk ? conn->sk->saddr : IPA_NONE; d->as4 = p_ok ? p->as4_session : 0; @@ -208,19 +208,22 @@ bgp_af_caps_cmp(const void *X, const void *Y) } -static byte * -bgp_write_capabilities(struct bgp_conn *conn, byte *buf) +void +bgp_prepare_capabilities(struct bgp_conn *conn) { struct bgp_proto *p = conn->bgp; struct bgp_channel *c; struct bgp_caps *caps; struct bgp_af_caps *ac; - uint any_ext_next_hop = 0; - uint any_add_path = 0; - byte *data; - /* Prepare bgp_caps structure */ + if (!p->cf->capabilities) + { + /* Just prepare empty local_caps */ + conn->local_caps = mb_allocz(p->p.pool, sizeof(struct bgp_caps)); + return; + } + /* Prepare bgp_caps structure */ int n = list_length(&p->p.channels); caps = mb_allocz(p->p.pool, sizeof(struct bgp_caps) + n * sizeof(struct bgp_af_caps)); conn->local_caps = caps; @@ -251,10 +254,10 @@ bgp_write_capabilities(struct bgp_conn *conn, byte *buf) ac->ready = 1; ac->ext_next_hop = bgp_channel_is_ipv4(c) && c->cf->ext_next_hop; - any_ext_next_hop |= ac->ext_next_hop; + caps->any_ext_next_hop |= ac->ext_next_hop; ac->add_path = c->cf->add_path; - any_add_path |= ac->add_path; + caps->any_add_path |= ac->add_path; if (c->cf->gr_able) { @@ -276,7 +279,16 @@ bgp_write_capabilities(struct bgp_conn *conn, byte *buf) /* Sort capability fields by AFI/SAFI */ qsort(caps->af_data, caps->af_count, sizeof(struct bgp_af_caps), bgp_af_caps_cmp); +} +static byte * +bgp_write_capabilities(struct bgp_conn *conn, byte *buf) +{ + struct bgp_proto *p = conn->bgp; + struct bgp_caps *caps = conn->local_caps; + struct bgp_af_caps *ac; + byte *buf_head = buf; + byte *data; /* Create capability list in buffer */ @@ -301,7 +313,7 @@ bgp_write_capabilities(struct bgp_conn *conn, byte *buf) *buf++ = 0; /* Capability data length */ } - if (any_ext_next_hop) + if (caps->any_ext_next_hop) { *buf++ = 5; /* Capability 5: Support for extended next hop */ *buf++ = 0; /* Capability data length, will be fixed later */ @@ -353,7 +365,7 @@ bgp_write_capabilities(struct bgp_conn *conn, byte *buf) buf += 4; } - if (any_add_path) + if (caps->any_add_path) { *buf++ = 69; /* Capability 69: Support for ADD-PATH */ *buf++ = 0; /* Capability data length, will be fixed later */ @@ -394,6 +406,8 @@ bgp_write_capabilities(struct bgp_conn *conn, byte *buf) data[-1] = buf - data; } + caps->length = buf - buf_head; + return buf; } @@ -405,6 +419,8 @@ bgp_read_capabilities(struct bgp_conn *conn, struct bgp_caps *caps, byte *pos, i int i, cl; u32 af; + caps->length += len; + while (len > 0) { if (len < 2 || len < (2 + pos[1])) @@ -569,6 +585,42 @@ err: } static int +bgp_check_capabilities(struct bgp_conn *conn) +{ + struct bgp_proto *p = conn->bgp; + struct bgp_caps *local = conn->local_caps; + struct bgp_caps *remote = conn->remote_caps; + struct bgp_channel *c; + int count = 0; + + /* This is partially overlapping with bgp_conn_enter_established_state(), + but we need to run this just after we receive OPEN message */ + + WALK_LIST(c, p->p.channels) + { + const struct bgp_af_caps *loc = bgp_find_af_caps(local, c->afi); + const struct bgp_af_caps *rem = bgp_find_af_caps(remote, c->afi); + + /* Find out whether this channel will be active */ + int active = loc && loc->ready && + ((rem && rem->ready) || (!remote->length && (c->afi == BGP_AF_IPV4))); + + /* Mandatory must be active */ + if (c->cf->mandatory && !active) + return 0; + + if (active) + count++; + } + + /* We need at least one channel active */ + if (!count) + return 0; + + return 1; +} + +static int bgp_read_options(struct bgp_conn *conn, byte *pos, int len) { struct bgp_proto *p = conn->bgp; @@ -635,9 +687,6 @@ bgp_create_open(struct bgp_conn *conn, byte *buf) } else { - /* Prepare empty local_caps */ - conn->local_caps = mb_allocz(p->p.pool, sizeof(struct bgp_caps)); - buf[9] = 0; /* No optional parameters */ return buf + 10; } @@ -678,6 +727,10 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, uint len) if (!id || (p->is_internal && id == p->local_id)) { bgp_error(conn, 2, 3, pkt+24, -4); return; } + /* RFC 5492 4 - check for required capabilities */ + if (p->cf->capabilities && !bgp_check_capabilities(conn)) + { bgp_error(conn, 2, 7, NULL, 0); return; } + struct bgp_caps *caps = conn->remote_caps; if (caps->as4_support) @@ -687,13 +740,18 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, uint len) if ((as4 != asn) && (asn != AS_TRANS)) log(L_WARN "%s: Peer advertised inconsistent AS numbers", p->p.name); - if (as4 != p->remote_as) + /* When remote ASN is unspecified, it must be external one */ + if (p->remote_as ? (as4 != p->remote_as) : (as4 == p->local_as)) { as4 = htonl(as4); bgp_error(conn, 2, 2, (byte *) &as4, 4); return; } + + conn->received_as = as4; } else { - if (asn != p->remote_as) + if (p->remote_as ? (asn != p->remote_as) : (asn == p->local_as)) { bgp_error(conn, 2, 2, pkt+20, 2); return; } + + conn->received_as = asn; } /* Check the other connection */ @@ -962,7 +1020,7 @@ bgp_update_next_hop_ip(struct bgp_export_state *s, eattr *a, ea_list **to) WITHDRAW(NO_NEXT_HOP); ip_addr *nh = (void *) a->u.ptr->data; - ip_addr peer = s->proto->cf->remote_ip; + ip_addr peer = s->proto->remote_ip; uint len = a->u.ptr->length; /* Forbid zero next hop */ @@ -2280,7 +2338,7 @@ bgp_decode_nlri(struct bgp_parse_state *s, u32 afi, byte *nlri, uint len, ea_lis a->source = RTS_BGP; a->scope = SCOPE_UNIVERSE; - a->from = s->proto->cf->remote_ip; + a->from = s->proto->remote_ip; a->eattrs = ea; c->desc->decode_next_hop(s, nh, nh_len, a); @@ -2634,6 +2692,12 @@ bgp_fire_tx(struct bgp_conn *conn) end = bgp_create_notification(conn, pkt); return bgp_send(conn, PKT_NOTIFICATION, end - buf); } + else if (s & (1 << PKT_OPEN)) + { + conn->packets_to_send &= ~(1 << PKT_OPEN); + end = bgp_create_open(conn, pkt); + return bgp_send(conn, PKT_OPEN, end - buf); + } else if (s & (1 << PKT_KEEPALIVE)) { conn->packets_to_send &= ~(1 << PKT_KEEPALIVE); @@ -2641,12 +2705,6 @@ bgp_fire_tx(struct bgp_conn *conn) bgp_start_timer(conn->keepalive_timer, conn->keepalive_time); return bgp_send(conn, PKT_KEEPALIVE, BGP_HEADER_LENGTH); } - else if (s & (1 << PKT_OPEN)) - { - conn->packets_to_send &= ~(1 << PKT_OPEN); - end = bgp_create_open(conn, pkt); - return bgp_send(conn, PKT_OPEN, end - buf); - } else while (conn->channels_to_send) { c = bgp_get_channel_to_send(p, conn); @@ -2731,15 +2789,18 @@ bgp_schedule_packet(struct bgp_conn *conn, struct bgp_channel *c, int type) if ((conn->sk->tpos == conn->sk->tbuf) && !ev_active(conn->tx_ev)) ev_schedule(conn->tx_ev); } - void bgp_kick_tx(void *vconn) { struct bgp_conn *conn = vconn; DBG("BGP: kicking TX\n"); - while (bgp_fire_tx(conn) > 0) + uint max = 1024; + while (--max && (bgp_fire_tx(conn) > 0)) ; + + if (!max && !ev_active(conn->tx_ev)) + ev_schedule(conn->tx_ev); } void @@ -2748,8 +2809,12 @@ bgp_tx(sock *sk) struct bgp_conn *conn = sk->data; DBG("BGP: TX hook\n"); - while (bgp_fire_tx(conn) > 0) + uint max = 1024; + while (--max && (bgp_fire_tx(conn) > 0)) ; + + if (!max && !ev_active(conn->tx_ev)) + ev_schedule(conn->tx_ev); } @@ -2835,7 +2900,7 @@ bgp_handle_message(struct bgp_proto *p, byte *data, uint len, byte **bp) return 1; /* Handle proper message */ - if ((msg_len > 128) && (msg_len + 1 > len)) + if ((msg_len > 255) && (msg_len + 1 > len)) return 0; /* Some elementary cleanup */ diff --git a/proto/mrt/mrt.c b/proto/mrt/mrt.c index e4f1acea..7a396a84 100644 --- a/proto/mrt/mrt.c +++ b/proto/mrt/mrt.c @@ -361,7 +361,7 @@ mrt_peer_table_dump(struct mrt_table_dump_state *s) if ((P->proto == &proto_bgp) && (P->proto_state != PS_DOWN)) { struct bgp_proto *p = (void *) P; - mrt_peer_table_entry(s, p->remote_id, p->remote_as, p->cf->remote_ip); + mrt_peer_table_entry(s, p->remote_id, p->remote_as, p->remote_ip); } #endif @@ -429,7 +429,7 @@ mrt_rib_table_entry(struct mrt_table_dump_state *s, rte *r) { struct bgp_proto *p = (void *) r->attrs->src->proto; struct mrt_peer_entry *n = - HASH_FIND(s->peer_hash, PEER, p->remote_id, p->remote_as, p->cf->remote_ip); + HASH_FIND(s->peer_hash, PEER, p->remote_id, p->remote_as, p->remote_ip); peer = n ? n->index : 0; } diff --git a/proto/ospf/config.Y b/proto/ospf/config.Y index 2e9ed0ac..2ec8c0b6 100644 --- a/proto/ospf/config.Y +++ b/proto/ospf/config.Y @@ -200,6 +200,7 @@ CF_KEYWORDS(RX, BUFFER, LARGE, NORMAL, STUBNET, HIDDEN, SUMMARY, TAG, EXTERNAL) CF_KEYWORDS(WAIT, DELAY, LSADB, ECMP, LIMIT, WEIGHT, NSSA, TRANSLATOR, STABILITY) CF_KEYWORDS(GLOBAL, LSID, ROUTER, SELF, INSTANCE, REAL, NETMASK, TX, PRIORITY, LENGTH) CF_KEYWORDS(MERGE, LSA, SUPPRESSION, MULTICAST, RFC5838, VPN, PE) +CF_KEYWORDS(GRACEFUL, RESTART, AWARE, TIME) %type <ld> lsadb_args %type <i> ospf_variant ospf_af_mc nbma_eligible @@ -226,6 +227,8 @@ ospf_proto_start: proto_start ospf_variant OSPF_CFG->tick = OSPF_DEFAULT_TICK; OSPF_CFG->ospf2 = $2; OSPF_CFG->af_ext = !$2; + OSPF_CFG->gr_mode = OSPF_GR_AWARE; + OSPF_CFG->gr_time = OSPF_DEFAULT_GR_TIME; }; ospf_proto: @@ -258,6 +261,9 @@ ospf_proto_item: | RFC5838 bool { OSPF_CFG->af_ext = $2; if (!ospf_cfg_is_v3()) cf_error("RFC5838 option requires OSPFv3"); } | VPN PE bool { OSPF_CFG->vpn_pe = $3; } | STUB ROUTER bool { OSPF_CFG->stub_router = $3; } + | GRACEFUL RESTART bool { OSPF_CFG->gr_mode = $3; } + | GRACEFUL RESTART AWARE { OSPF_CFG->gr_mode = OSPF_GR_AWARE; } + | GRACEFUL RESTART TIME expr { OSPF_CFG->gr_time = $4; if (($4 < 1) || ($4 > 1800)) cf_error("Graceful restart time must be in range 1-1800"); } | ECMP bool { OSPF_CFG->ecmp = $2 ? OSPF_DEFAULT_ECMP_LIMIT : 0; } | ECMP bool LIMIT expr { OSPF_CFG->ecmp = $2 ? $4 : 0; } | MERGE EXTERNAL bool { OSPF_CFG->merge_external = $3; } diff --git a/proto/ospf/dbdes.c b/proto/ospf/dbdes.c index a1559782..b39595d9 100644 --- a/proto/ospf/dbdes.c +++ b/proto/ospf/dbdes.c @@ -215,7 +215,7 @@ ospf_send_dbdes(struct ospf_proto *p, struct ospf_neighbor *n) ASSERT((n->state == NEIGHBOR_EXSTART) || (n->state == NEIGHBOR_EXCHANGE)); - if (n->ifa->oa->rt == NULL) + if (!n->ifa->oa->rt && !p->gr_recovery) return; ospf_prepare_dbdes(p, n); @@ -279,6 +279,10 @@ ospf_process_dbdes(struct ospf_proto *p, struct ospf_packet *pkt, struct ospf_ne if (LSA_SCOPE(lsa_type) == LSA_SCOPE_RES) DROP1("LSA with invalid scope"); + /* RFC 3623 2.2 (2) special case - check for my router-LSA (GR recovery) */ + if ((lsa_type == LSA_T_RT) && (lsa.rt == p->router_id)) + n->got_my_rt_lsa = 1; + en = ospf_hash_find(p->gr, lsa_domain, lsa.id, lsa.rt, lsa_type); if (!en || (lsa_comp(&lsa, &(en->lsa)) == CMP_NEWER)) { diff --git a/proto/ospf/iface.c b/proto/ospf/iface.c index 388c91c8..f5c69199 100644 --- a/proto/ospf/iface.c +++ b/proto/ospf/iface.c @@ -772,6 +772,14 @@ ospf_iface_reconfigure(struct ospf_iface *ifa, struct ospf_iface_patt *new) ifa->cf = new; ifa->marked = 0; + /* Cancel GR peers if GR is disabled */ + if (!p->gr_mode && p->gr_count) + { + struct ospf_neighbor *n, *nx; + WALK_LIST_DELSAFE(n, nx, ifa->neigh_list) + if (n->gr_active) + ospf_neigh_cancel_graceful_restart(n); + } /* HELLO TIMER */ if (ifa->helloint != new->helloint) diff --git a/proto/ospf/lsalib.c b/proto/ospf/lsalib.c index 7ddf64e3..7767700f 100644 --- a/proto/ospf/lsalib.c +++ b/proto/ospf/lsalib.c @@ -12,6 +12,9 @@ #include "lib/fletcher16.h" +#define HDRLEN sizeof(struct ospf_lsa_header) + + #ifndef CPU_BIG_ENDIAN void lsa_hton_hdr(struct ospf_lsa_header *h, struct ospf_lsa_header *n) @@ -61,7 +64,6 @@ lsa_ntoh_body(void *n, void *h, u16 len) #endif /* little endian */ - int lsa_flooding_allowed(u32 type, u32 domain, struct ospf_iface *ifa) { @@ -147,11 +149,13 @@ static const u16 lsa_v2_types[] = { /* Maps OSPFv2 opaque types to OSPFv3 function codes */ static const u16 opaque_lsa_types[] = { + [LSA_OT_GR] = LSA_T_GR, [LSA_OT_RI] = LSA_T_RI_, }; /* Maps (subset of) OSPFv3 function codes to OSPFv2 opaque types */ static const u8 opaque_lsa_types_inv[] = { + [LSA_T_GR] = LSA_OT_GR, [LSA_T_RI_] = LSA_OT_RI, }; @@ -168,7 +172,13 @@ lsa_get_type_domain_(u32 type, u32 id, struct ospf_iface *ifa, u32 *otype, u32 * uint code; if (LSA_FUNCTION(type) == LSA_T_OPAQUE_) if (code = LOOKUP(opaque_lsa_types, id >> 24)) + { type = code | LSA_UBIT | LSA_SCOPE(type); + + /* Hack for Grace-LSA: It does not use U-bit for link-scoped LSAs */ + if (type == (LSA_T_GR | LSA_UBIT)) + type = LSA_T_GR; + } } else { @@ -196,6 +206,13 @@ lsa_get_type_domain_(u32 type, u32 id, struct ospf_iface *ifa, u32 *otype, u32 * } } +int +lsa_is_opaque(u32 type) +{ + u32 fn = LSA_FUNCTION(type); + return LOOKUP(opaque_lsa_types_inv, fn) || (fn == LSA_T_OPAQUE_); +} + u32 lsa_get_opaque_type(u32 type) { @@ -267,6 +284,51 @@ lsa_comp(struct ospf_lsa_header *l1, struct ospf_lsa_header *l2) } +#define LSA_TLV_LENGTH(tlv) \ + (sizeof(struct ospf_tlv) + BIRD_ALIGN((tlv)->length, 4)) + +#define LSA_NEXT_TLV(tlv) \ + ((struct ospf_tlv *) ((byte *) (tlv) + LSA_TLV_LENGTH(tlv))) + +#define LSA_WALK_TLVS(tlv,buf,len) \ + for(struct ospf_tlv *tlv = (void *) (buf); \ + (byte *) tlv < (byte *) (buf) + (len); \ + tlv = LSA_NEXT_TLV(tlv)) + +struct ospf_tlv * +lsa_get_tlv(struct top_hash_entry *en, uint type) +{ + LSA_WALK_TLVS(tlv, en->lsa_body, en->lsa.length - HDRLEN) + if (tlv->type == type) + return tlv; + + return NULL; +} + +int +lsa_validate_tlvs(byte *buf, uint len) +{ + byte *pos = buf; + byte *end = buf + len; + + while (pos < end) + { + if ((pos + sizeof(struct ospf_tlv)) > end) + return 0; + + struct ospf_tlv *tlv = (void *) pos; + uint len = LSA_TLV_LENGTH(tlv); + + if ((pos + len) > end) + return 0; + + pos += len; + } + + return 1; +} + + static inline int lsa_walk_rt2(struct ospf_lsa_rt_walk *rt) { @@ -408,7 +470,6 @@ lsa_parse_ext(struct top_hash_entry *en, int ospf2, int af, struct ospf_lsa_ext_ } } -#define HDRLEN sizeof(struct ospf_lsa_header) static int lsa_validate_rt2(struct ospf_lsa_header *lsa, struct ospf_lsa_rt *body) @@ -604,6 +665,12 @@ lsa_validate_prefix(struct ospf_lsa_header *lsa, struct ospf_lsa_prefix *body) } static int +lsa_validate_gr(struct ospf_lsa_header *lsa, void *body) +{ + return lsa_validate_tlvs(body, lsa->length - HDRLEN); +} + +static int lsa_validate_ri(struct ospf_lsa_header *lsa UNUSED, struct ospf_lsa_net *body UNUSED) { /* @@ -643,6 +710,8 @@ lsa_validate(struct ospf_lsa_header *lsa, u32 lsa_type, int ospf2, void *body) case LSA_T_EXT: case LSA_T_NSSA: return lsa_validate_ext2(lsa, body); + case LSA_T_GR: + return lsa_validate_gr(lsa, body); case LSA_T_RI_LINK: case LSA_T_RI_AREA: case LSA_T_RI_AS: @@ -674,6 +743,8 @@ lsa_validate(struct ospf_lsa_header *lsa, u32 lsa_type, int ospf2, void *body) return lsa_validate_link(lsa, body); case LSA_T_PREFIX: return lsa_validate_prefix(lsa, body); + case LSA_T_GR: + return lsa_validate_gr(lsa, body); case LSA_T_RI_LINK: case LSA_T_RI_AREA: case LSA_T_RI_AS: diff --git a/proto/ospf/lsalib.h b/proto/ospf/lsalib.h index af8901ce..eca138d7 100644 --- a/proto/ospf/lsalib.h +++ b/proto/ospf/lsalib.h @@ -44,10 +44,7 @@ static inline void lsa_get_type_domain(struct ospf_lsa_header *lsa, struct ospf_ static inline u32 lsa_get_etype(struct ospf_lsa_header *h, struct ospf_proto *p) { return ospf_is_v2(p) ? (h->type_raw & LSA_T_V2_MASK) : h->type_raw; } -/* Assuming OSPFv2 - All U-bit LSAs are mapped to Opaque LSAs */ -static inline int lsa_is_opaque(u32 type) -{ return !!(type & LSA_UBIT); } - +int lsa_is_opaque(u32 type); u32 lsa_get_opaque_type(u32 type); int lsa_flooding_allowed(u32 type, u32 domain, struct ospf_iface *ifa); int lsa_is_acceptable(u32 type, struct ospf_neighbor *n, struct ospf_proto *p); @@ -58,6 +55,16 @@ u16 lsa_verify_checksum(const void *lsa_n, int lsa_len); #define CMP_SAME 0 #define CMP_OLDER -1 int lsa_comp(struct ospf_lsa_header *l1, struct ospf_lsa_header *l2); + +struct ospf_tlv * lsa_get_tlv(struct top_hash_entry *en, uint type); + +static inline u32 +lsa_get_tlv_u32(struct top_hash_entry *en, uint type) +{ + struct ospf_tlv *tlv = lsa_get_tlv(en, type); + return (tlv && (tlv->length == 4)) ? tlv->data[0] : 0; +} + void lsa_walk_rt_init(struct ospf_proto *po, struct top_hash_entry *act, struct ospf_lsa_rt_walk *rt); int lsa_walk_rt(struct ospf_lsa_rt_walk *rt); void lsa_parse_sum_net(struct top_hash_entry *en, int ospf2, int af, net_addr *net, u8 *pxopts, u32 *metric); diff --git a/proto/ospf/lsupd.c b/proto/ospf/lsupd.c index 7318b751..fafe4872 100644 --- a/proto/ospf/lsupd.c +++ b/proto/ospf/lsupd.c @@ -185,6 +185,13 @@ static int ospf_flood_lsupd(struct ospf_proto *p, struct top_hash_entry **lsa_li static void ospf_enqueue_lsa(struct ospf_proto *p, struct top_hash_entry *en, struct ospf_iface *ifa) { + /* Exception for local Grace-LSA, they are flooded synchronously */ + if ((en->lsa_type == LSA_T_GR) && (en->lsa.rt == p->router_id)) + { + ospf_flood_lsupd(p, &en, 1, 1, ifa); + return; + } + if (ifa->flood_queue_used == ifa->flood_queue_size) { /* If we already have full queue, we send some packets */ @@ -591,8 +598,9 @@ ospf_receive_lsupd(struct ospf_packet *pkt, struct ospf_iface *ifa, } /* 13. (5f) - handle self-originated LSAs, see also 13.4. */ - if ((lsa.rt == p->router_id) || - (ospf_is_v2(p) && (lsa_type == LSA_T_NET) && ospf_addr_is_local(p, ifa->oa, ipa_from_u32(lsa.id)))) + if (!p->gr_recovery && + ((lsa.rt == p->router_id) || + (ospf_is_v2(p) && (lsa_type == LSA_T_NET) && ospf_addr_is_local(p, ifa->oa, ipa_from_u32(lsa.id))))) { OSPF_TRACE(D_EVENTS, "Received unexpected self-originated LSA"); ospf_advance_lsa(p, en, &lsa, lsa_type, lsa_domain, body); @@ -629,6 +637,14 @@ ospf_receive_lsupd(struct ospf_packet *pkt, struct ospf_iface *ifa, if (lsa_type == LSA_T_LINK) ospf_notify_net_lsa(ifa); + /* RFC 3623 3.1 - entering graceful restart helper mode */ + if (lsa_type == LSA_T_GR) + ospf_neigh_notify_grace_lsa(n, en); + + /* Link received pre-restart router LSA */ + if (p->gr_recovery && (lsa_type == LSA_T_RT) && (lsa.rt == p->router_id)) + ifa->oa->rt = en; + /* 13. (5b) - flood new LSA */ int flood_back = ospf_flood_lsa(p, en, n); diff --git a/proto/ospf/neighbor.c b/proto/ospf/neighbor.c index c143b130..50ef6a49 100644 --- a/proto/ospf/neighbor.c +++ b/proto/ospf/neighbor.c @@ -28,6 +28,8 @@ static void dbdes_timer_hook(timer *t); static void lsrq_timer_hook(timer *t); static void lsrt_timer_hook(timer *t); static void ackd_timer_hook(timer *t); +static void ospf_neigh_stop_graceful_restart_(struct ospf_neighbor *n); +static void graceful_restart_timeout(timer *t); static void @@ -163,7 +165,7 @@ ospf_neigh_chstate(struct ospf_neighbor *n, u8 state) if (old_state == NEIGHBOR_FULL) ifa->fadj--; - if (ifa->fadj != old_fadj) + if ((ifa->fadj != old_fadj) && !n->gr_active) { /* RFC 2328 12.4 Event 4 - neighbor enters/leaves Full state */ ospf_notify_rt_lsa(ifa->oa); @@ -182,6 +184,7 @@ ospf_neigh_chstate(struct ospf_neighbor *n, u8 state) n->dds++; n->myimms = DBDES_IMMS; + n->got_my_rt_lsa = 0; tm_start(n->dbdes_timer, 0); tm_start(n->ackd_timer, ifa->rxmtint S / 2); @@ -191,9 +194,9 @@ ospf_neigh_chstate(struct ospf_neighbor *n, u8 state) n->myimms &= ~DBDES_I; /* Generate NeighborChange event if needed, see RFC 2328 9.2 */ - if ((state == NEIGHBOR_2WAY) && (old_state < NEIGHBOR_2WAY)) + if ((state == NEIGHBOR_2WAY) && (old_state < NEIGHBOR_2WAY) && !n->gr_active) ospf_iface_sm(ifa, ISM_NEICH); - if ((state < NEIGHBOR_2WAY) && (old_state >= NEIGHBOR_2WAY)) + if ((state < NEIGHBOR_2WAY) && (old_state >= NEIGHBOR_2WAY) && !n->gr_active) ospf_iface_sm(ifa, ISM_NEICH); } @@ -291,6 +294,17 @@ ospf_neigh_sm(struct ospf_neighbor *n, int event) case INM_KILLNBR: case INM_LLDOWN: case INM_INACTTIM: + if (n->gr_active && (event == INM_INACTTIM)) + { + /* Just down the neighbor, but do not remove it */ + reset_lists(p, n); + ospf_neigh_chstate(n, NEIGHBOR_DOWN); + break; + } + + if (n->gr_active) + ospf_neigh_stop_graceful_restart_(n); + /* No need for reset_lists() */ ospf_neigh_chstate(n, NEIGHBOR_DOWN); ospf_neigh_down(n); @@ -356,6 +370,180 @@ can_do_adj(struct ospf_neighbor *n) return i; } +static void +ospf_neigh_start_graceful_restart(struct ospf_neighbor *n, uint gr_time) +{ + struct ospf_proto *p = n->ifa->oa->po; + + OSPF_TRACE(D_EVENTS, "Neighbor %R on %s started graceful restart", + n->rid, n->ifa->ifname); + + n->gr_active = 1; + p->gr_count++; + + n->gr_timer = tm_new_init(n->pool, graceful_restart_timeout, n, 0, 0); + tm_start(n->gr_timer, gr_time S); +} + +static void +ospf_neigh_stop_graceful_restart_(struct ospf_neighbor *n) +{ + struct ospf_proto *p = n->ifa->oa->po; + struct ospf_iface *ifa = n->ifa; + + n->gr_active = 0; + p->gr_count--; + + rfree(n->gr_timer); + n->gr_timer = NULL; + + ospf_notify_rt_lsa(ifa->oa); + ospf_notify_net_lsa(ifa); + + if (ifa->type == OSPF_IT_VLINK) + ospf_notify_rt_lsa(ifa->voa); + + ospf_iface_sm(ifa, ISM_NEICH); +} + +static void +ospf_neigh_stop_graceful_restart(struct ospf_neighbor *n) +{ + struct ospf_proto *p = n->ifa->oa->po; + + OSPF_TRACE(D_EVENTS, "Neighbor %R on %s finished graceful restart", + n->rid, n->ifa->ifname); + + ospf_neigh_stop_graceful_restart_(n); +} + +void +ospf_neigh_cancel_graceful_restart(struct ospf_neighbor *n) +{ + struct ospf_proto *p = n->ifa->oa->po; + + OSPF_TRACE(D_EVENTS, "Graceful restart canceled for nbr %R on %s", + n->rid, n->ifa->ifname); + + ospf_neigh_stop_graceful_restart_(n); + + if (n->state == NEIGHBOR_DOWN) + ospf_neigh_down(n); +} + +static void +graceful_restart_timeout(timer *t) +{ + struct ospf_neighbor *n = t->data; + struct ospf_proto *p = n->ifa->oa->po; + + OSPF_TRACE(D_EVENTS, "Graceful restart timer expired for nbr %R on %s", + n->rid, n->ifa->ifname); + + ospf_neigh_stop_graceful_restart_(n); + + if (n->state == NEIGHBOR_DOWN) + ospf_neigh_down(n); +} + +static inline int +changes_in_lsrtl(struct ospf_neighbor *n) +{ + /* This could be improved, see RFC 3623 3.1 (2) */ + + struct top_hash_entry *en; + WALK_SLIST(en, n->lsrtl) + if (LSA_FUNCTION(en->lsa_type) <= LSA_FUNCTION(LSA_T_NSSA)) + return 1; + + return 0; +} + +void +ospf_neigh_notify_grace_lsa(struct ospf_neighbor *n, struct top_hash_entry *en) +{ + struct ospf_iface *ifa = n->ifa; + struct ospf_proto *p = ifa->oa->po; + + /* In OSPFv2, neighbors are identified by either IP or Router ID, based on network type */ + uint t = ifa->type; + if (ospf_is_v2(p) && ((t == OSPF_IT_BCAST) || (t == OSPF_IT_NBMA) || (t == OSPF_IT_PTMP))) + { + struct ospf_tlv *tlv = lsa_get_tlv(en, LSA_GR_ADDRESS); + if (!tlv || tlv->length != 4) + return; + + ip_addr addr = ipa_from_u32(tlv->data[0]); + if (!ipa_equal(n->ip, addr)) + n = find_neigh_by_ip(ifa, addr); + } + else + { + if (n->rid != en->lsa.rt) + n = find_neigh(ifa, en->lsa.rt); + } + + if (!n) + return; + + if (en->lsa.age < LSA_MAXAGE) + { + u32 period = lsa_get_tlv_u32(en, LSA_GR_PERIOD); + + /* Exception for updating grace period */ + if (n->gr_active) + { + tm_start(n->gr_timer, (period S) - (en->lsa.age S)); + return; + } + + /* RFC 3623 3.1 (1) - full adjacency */ + if (n->state != NEIGHBOR_FULL) + return; + + /* RFC 3623 3.1 (2) - no changes in LSADB */ + if (changes_in_lsrtl(n)) + return; + + /* RFC 3623 3.1 (3) - grace period not expired */ + if (en->lsa.age >= period) + return; + + /* RFC 3623 3.1 (4) - helper mode allowed */ + if (!p->gr_mode) + return; + + /* RFC 3623 3.1 (5) - no local graceful restart */ + if (p->p.gr_recovery) + return; + + ospf_neigh_start_graceful_restart(n, period - en->lsa.age); + } + else /* Grace-LSA is flushed */ + { + if (n->gr_active) + ospf_neigh_stop_graceful_restart(n); + } +} + +void +ospf_neigh_lsadb_changed_(struct ospf_proto *p, struct top_hash_entry *en) +{ + struct ospf_iface *ifa; + struct ospf_neighbor *n, *nx; + + if (LSA_FUNCTION(en->lsa_type) > LSA_FUNCTION(LSA_T_NSSA)) + return; + + /* RFC 3623 3.2 (3) - cancel graceful restart when LSdb changed */ + WALK_LIST(ifa, p->iface_list) + if (lsa_flooding_allowed(en->lsa_type, en->domain, ifa)) + WALK_LIST_DELSAFE(n, nx, ifa->neigh_list) + if (n->gr_active) + ospf_neigh_cancel_graceful_restart(n); +} + + static inline u32 neigh_get_id(struct ospf_proto *p, struct ospf_neighbor *n) { return ospf_is_v2(p) ? ipa_to_u32(n->ip) : n->rid; } diff --git a/proto/ospf/ospf.c b/proto/ospf/ospf.c index f26f0160..b6d5570c 100644 --- a/proto/ospf/ospf.c +++ b/proto/ospf/ospf.c @@ -92,7 +92,9 @@ * - RFC 2328 - main OSPFv2 standard * - RFC 5340 - main OSPFv3 standard * - RFC 3101 - OSPFv2 NSSA areas + * - RFC 3623 - OSPFv2 Graceful Restart * - RFC 4576 - OSPFv2 VPN loop prevention + * - RFC 5187 - OSPFv3 Graceful Restart * - RFC 5250 - OSPFv2 Opaque LSAs * - RFC 5709 - OSPFv2 HMAC-SHA Cryptographic Authentication * - RFC 5838 - OSPFv3 Support of Address Families @@ -207,7 +209,6 @@ ospf_area_remove(struct ospf_area *oa) mb_free(oa); } - struct ospf_area * ospf_find_area(struct ospf_proto *p, u32 aid) { @@ -228,6 +229,37 @@ ospf_find_vlink(struct ospf_proto *p, u32 voa, u32 vid) return NULL; } +static void +ospf_start_gr_recovery(struct ospf_proto *p) +{ + OSPF_TRACE(D_EVENTS, "Graceful restart started"); + + p->gr_recovery = 1; + p->gr_timeout = current_time() + (p->gr_time S); + channel_graceful_restart_lock(p->p.main_channel); + p->p.main_channel->gr_wait = 1; + + /* NOTE: We should get end of grace period from non-volatile storage */ +} + +void +ospf_stop_gr_recovery(struct ospf_proto *p) +{ + p->gr_recovery = 0; + p->gr_timeout = 0; + channel_graceful_restart_unlock(p->p.main_channel); + + /* Reorigination of router/network LSAs is already scheduled */ + ospf_mark_lsadb(p); + + /* + * NOTE: We should move channel_graceful_restart_unlock() to the end of + * ospf_disp() in order to have local LSA reorigination / LSAdb cleanup / + * routing table recomputation before official end of GR. It does not matter + * when we are single-threaded. + */ +} + static int ospf_start(struct proto *P) { @@ -246,6 +278,8 @@ ospf_start(struct proto *P) p->asbr = c->asbr; p->vpn_pe = c->vpn_pe; p->ecmp = c->ecmp; + p->gr_mode = c->gr_mode; + p->gr_time = c->gr_time; p->tick = c->tick; p->disp_timer = tm_new_init(P->pool, ospf_disp, p, p->tick S, 0); tm_start(p->disp_timer, 100 MS); @@ -267,6 +301,10 @@ ospf_start(struct proto *P) p->log_pkt_tbf = (struct tbf){ .rate = 1, .burst = 5 }; p->log_lsa_tbf = (struct tbf){ .rate = 4, .burst = 20 }; + /* Lock the channel when in GR recovery mode */ + if (p->p.gr_recovery && (p->gr_mode == OSPF_GR_ABLE)) + ospf_start_gr_recovery(p); + WALK_LIST(ac, c->area_list) ospf_area_add(p, ac); @@ -398,6 +436,9 @@ ospf_disp(timer * timer) { struct ospf_proto *p = timer->data; + if (p->gr_recovery) + ospf_update_gr_recovery(p); + /* Originate or flush local topology LSAs */ ospf_update_topology(p); @@ -475,9 +516,18 @@ ospf_shutdown(struct proto *P) OSPF_TRACE(D_EVENTS, "Shutdown requested"); - /* And send to all my neighbors 1WAY */ - WALK_LIST(ifa, p->iface_list) - ospf_iface_shutdown(ifa); + if ((P->down_code == PDC_CMD_GR_DOWN) && (p->gr_mode == OSPF_GR_ABLE)) + { + /* Originate Grace LSAs */ + WALK_LIST(ifa, p->iface_list) + ospf_originate_gr_lsa(p, ifa); + } + else + { + /* Send to all my neighbors 1WAY */ + WALK_LIST(ifa, p->iface_list) + ospf_iface_shutdown(ifa); + } /* Cleanup locked rta entries */ FIB_WALK(&p->rtf, ort, nf) @@ -664,6 +714,8 @@ ospf_reconfigure(struct proto *P, struct proto_config *CF) p->merge_external = new->merge_external; p->asbr = new->asbr; p->ecmp = new->ecmp; + p->gr_mode = new->gr_mode; + p->gr_time = new->gr_time; p->tick = new->tick; p->disp_timer->recurrent = p->tick S; tm_start(p->disp_timer, 10 MS); diff --git a/proto/ospf/ospf.h b/proto/ospf/ospf.h index 82ae4df4..beecd2b6 100644 --- a/proto/ospf/ospf.h +++ b/proto/ospf/ospf.h @@ -75,6 +75,7 @@ #define OSPF_DEFAULT_TICK 1 #define OSPF_DEFAULT_STUB_COST 1000 #define OSPF_DEFAULT_ECMP_LIMIT 16 +#define OSPF_DEFAULT_GR_TIME 120 #define OSPF_DEFAULT_TRANSINT 40 #define OSPF_MIN_PKT_SIZE 256 @@ -82,6 +83,9 @@ #define OSPF_VLINK_ID_OFFSET 0x80000000 +#define OSPF_GR_ABLE 1 +#define OSPF_GR_AWARE 2 + struct ospf_config { struct proto_config c; @@ -97,7 +101,9 @@ struct ospf_config u8 abr; u8 asbr; u8 vpn_pe; - int ecmp; + u8 gr_mode; /* Graceful restart mode (OSPF_GR_*) */ + uint gr_time; /* Graceful restart interval */ + uint ecmp; list area_list; /* list of area configs (struct ospf_area_config) */ list vlink_list; /* list of configured vlinks (struct ospf_iface_patt) */ }; @@ -216,6 +222,9 @@ struct ospf_proto list area_list; /* List of OSPF areas (struct ospf_area) */ int areano; /* Number of area I belong to */ int padj; /* Number of neighbors in Exchange or Loading state */ + int gr_count; /* Number of neighbors in graceful restart state */ + int gr_recovery; /* Graceful restart recovery is active */ + btime gr_timeout; /* The end time of grace restart recovery */ struct fib rtf; /* Routing table */ struct idm idm; /* OSPFv3 LSA ID map */ u8 ospf2; /* OSPF v2 or v3 */ @@ -228,6 +237,8 @@ struct ospf_proto u8 asbr; /* May i originate any ext/NSSA lsa? */ u8 vpn_pe; /* Should we do VPN PE specific behavior (RFC 4577)? */ u8 ecmp; /* Maximal number of nexthops in ECMP route, or 0 */ + u8 gr_mode; /* Graceful restart mode (OSPF_GR_*) */ + uint gr_time; /* Graceful restart interval */ u64 csn64; /* Last used cryptographic sequence number */ struct ospf_area *backbone; /* If exists */ event *flood_event; /* Event for flooding LS updates */ @@ -346,6 +357,8 @@ struct ospf_neighbor pool *pool; struct ospf_iface *ifa; u8 state; + u8 gr_active; /* We act as GR helper for the neighbor */ + u8 got_my_rt_lsa; /* Received my Rt-LSA in DBDES exchanged */ timer *inactim; /* Inactivity timer */ u8 imms; /* I, M, Master/slave received */ u8 myimms; /* I, M Master/slave */ @@ -388,6 +401,7 @@ struct ospf_neighbor #define ACKL_DIRECT 0 #define ACKL_DELAY 1 timer *ackd_timer; /* Delayed ack timer */ + timer *gr_timer; /* Graceful restart timer, non-NULL only if gr_active */ struct bfd_request *bfd_req; /* BFD request, if BFD is used */ void *ldd_buffer; /* Last database description packet */ u32 ldd_bsize; /* Buffer size for ldd_buffer */ @@ -555,6 +569,7 @@ struct ospf_auth3 #define LSA_T_NSSA 0x2007 #define LSA_T_LINK 0x0008 #define LSA_T_PREFIX 0x2009 +#define LSA_T_GR 0x000B #define LSA_T_RI_ 0x000C #define LSA_T_RI_LINK 0x800C #define LSA_T_RI_AREA 0xA00C @@ -569,6 +584,7 @@ struct ospf_auth3 /* OSPFv2 Opaque LSA Types */ /* https://www.iana.org/assignments/ospf-opaque-types/ospf-opaque-types.xhtml#ospf-opaque-types-2 */ +#define LSA_OT_GR 0x03 #define LSA_OT_RI 0x04 #define LSA_FUNCTION_MASK 0x1FFF @@ -613,6 +629,12 @@ struct ospf_auth3 #define LSA_EXT3_FBIT 0x02000000 #define LSA_EXT3_TBIT 0x01000000 +/* OSPF Grace LSA (GR) TLVs */ +/* https://www.iana.org/assignments/ospfv2-parameters/ospfv2-parameters.xhtml#ospfv2-parameters-13 */ +#define LSA_GR_PERIOD 1 +#define LSA_GR_REASON 2 +#define LSA_GR_ADDRESS 3 + /* OSPF Router Information (RI) TLVs */ /* https://www.iana.org/assignments/ospf-parameters/ospf-parameters.xhtml#ri-tlv */ #define LSA_RI_RIC 1 @@ -959,6 +981,8 @@ static inline int oa_is_ext(struct ospf_area *oa) static inline int oa_is_nssa(struct ospf_area *oa) { return oa->options & OPT_N; } +void ospf_stop_gr_recovery(struct ospf_proto *p); + void ospf_sh_neigh(struct proto *P, char *iff); void ospf_sh(struct proto *P); void ospf_sh_iface(struct proto *P, char *iff); @@ -990,12 +1014,18 @@ static inline struct nbma_node * find_nbma_node(struct ospf_iface *ifa, ip_addr /* neighbor.c */ struct ospf_neighbor *ospf_neighbor_new(struct ospf_iface *ifa); void ospf_neigh_sm(struct ospf_neighbor *n, int event); +void ospf_neigh_cancel_graceful_restart(struct ospf_neighbor *n); +void ospf_neigh_notify_grace_lsa(struct ospf_neighbor *n, struct top_hash_entry *en); +void ospf_neigh_lsadb_changed_(struct ospf_proto *p, struct top_hash_entry *en); void ospf_dr_election(struct ospf_iface *ifa); struct ospf_neighbor *find_neigh(struct ospf_iface *ifa, u32 rid); struct ospf_neighbor *find_neigh_by_ip(struct ospf_iface *ifa, ip_addr ip); void ospf_neigh_update_bfd(struct ospf_neighbor *n, int use_bfd); void ospf_sh_neigh_info(struct ospf_neighbor *n); +static inline void ospf_neigh_lsadb_changed(struct ospf_proto *p, struct top_hash_entry *en) +{ if (p->gr_count) ospf_neigh_lsadb_changed_(p, en); } + /* packet.c */ void ospf_pkt_fill_hdr(struct ospf_iface *ifa, void *buf, u8 h_type); int ospf_rx_hook(sock * sk, uint size); diff --git a/proto/ospf/rt.c b/proto/ospf/rt.c index 6ddd6c9f..126ef201 100644 --- a/proto/ospf/rt.c +++ b/proto/ospf/rt.c @@ -10,7 +10,7 @@ #include "ospf.h" -static void add_cand(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry *par, u32 dist, int i, uint lif, uint nif); +static void add_cand(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry *par, u32 dist, int i, uint data, uint lif, uint nif); static void rt_sync(struct ospf_proto *p); @@ -392,6 +392,40 @@ px_pos_to_ifa(struct ospf_area *oa, int pos) return NULL; } +static inline struct ospf_iface * +rt_find_iface2(struct ospf_area *oa, uint data) +{ + ip_addr addr = ipa_from_u32(data); + + /* We should handle it differently for unnumbered PTP links */ + struct ospf_iface *ifa; + WALK_LIST(ifa, oa->po->iface_list) + if ((ifa->oa == oa) && ifa->addr && (ipa_equal(ifa->addr->ip, addr))) + return ifa; + + return NULL; +} + +static inline struct ospf_iface * +rt_find_iface3(struct ospf_area *oa, uint lif) +{ + struct ospf_iface *ifa; + WALK_LIST(ifa, oa->po->iface_list) + if ((ifa->oa == oa) && (ifa->iface_id == lif)) + return ifa; + + return NULL; +} + +static struct ospf_iface * +rt_find_iface(struct ospf_area *oa, int pos, uint data, uint lif) +{ + if (0) + return rt_pos_to_ifa(oa, pos); + else + return ospf_is_v2(oa->po) ? rt_find_iface2(oa, data) : rt_find_iface3(oa, lif); +} + static void add_network(struct ospf_area *oa, net_addr *net, int metric, struct top_hash_entry *en, int pos) @@ -503,7 +537,7 @@ spfa_process_rt(struct ospf_proto *p, struct ospf_area *oa, struct top_hash_entr break; } - add_cand(oa, tmp, act, act->dist + rtl.metric, i, rtl.lif, rtl.nif); + add_cand(oa, tmp, act, act->dist + rtl.metric, i, rtl.data, rtl.lif, rtl.nif); } } @@ -526,7 +560,7 @@ spfa_process_net(struct ospf_proto *p, struct ospf_area *oa, struct top_hash_ent for (i = 0; i < cnt; i++) { tmp = ospf_hash_find_rt(p->gr, oa->areaid, ln->routers[i]); - add_cand(oa, tmp, act, act->dist, -1, 0, 0); + add_cand(oa, tmp, act, act->dist, -1, 0, 0, 0); } } @@ -1708,7 +1742,7 @@ link_lsa_lladdr(struct ospf_proto *p, struct top_hash_entry *en) static struct nexthop * calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en, - struct top_hash_entry *par, int pos, uint lif, uint nif) + struct top_hash_entry *par, int pos, uint data, uint lif, uint nif) { struct ospf_proto *p = oa->po; struct nexthop *pn = par->nhs; @@ -1735,7 +1769,7 @@ calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en, /* The first case - local network */ if ((en->lsa_type == LSA_T_NET) && (par == oa->rt)) { - ifa = rt_pos_to_ifa(oa, pos); + ifa = rt_find_iface(oa, pos, data, lif); if (!ifa) return NULL; @@ -1748,7 +1782,7 @@ calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en, /* The second case - ptp or ptmp neighbor */ if ((en->lsa_type == LSA_T_RT) && (par == oa->rt)) { - ifa = rt_pos_to_ifa(oa, pos); + ifa = rt_find_iface(oa, pos, data, lif); if (!ifa) return NULL; @@ -1838,7 +1872,7 @@ calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en, /* Add LSA into list of candidates in Dijkstra's algorithm */ static void add_cand(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry *par, - u32 dist, int pos, uint lif, uint nif) + u32 dist, int pos, uint data, uint lif, uint nif) { struct ospf_proto *p = oa->po; node *prev, *n; @@ -1871,7 +1905,7 @@ add_cand(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry if (!link_back(oa, en, par, lif, nif)) return; - struct nexthop *nhs = calc_next_hop(oa, en, par, pos, lif, nif); + struct nexthop *nhs = calc_next_hop(oa, en, par, pos, data, lif, nif); if (!nhs) { log(L_WARN "%s: Cannot find next hop for LSA (Type: %04x, Id: %R, Rt: %R)", @@ -2086,3 +2120,133 @@ again2: if (en->mode == LSA_M_STALE) ospf_flush_lsa(p, en); } + + +/* RFC 3623 2.2 - checking for graceful restart termination conditions */ +void +ospf_update_gr_recovery(struct ospf_proto *p) +{ + struct top_hash_entry *rt, *net, *nbr; + struct ospf_lsa_rt_walk rtl; + struct ospf_neighbor *n; + struct ospf_iface *ifa; + struct ospf_area *oa; + const char *err_dsc = NULL; + uint i, j, missing = 0, err_val = 0; + + /* + * We check here for three cases: + * RFC 3623 2.2 (1) - success when all adjacencies are established + * RFC 3623 2.2 (2) - failure when inconsistent LSA was received + * RFC 3623 2.2 (3) - grace period timeout + * + * It is handled by processing pre-restart local router-LSA and adjacent + * network-LSAs, checking neighbor association for referenced routers (1) + * and checking back links from their router-LSAs (2). + * + * TODO: Use timer for grace period timeout. We avoided that as function + * ospf_stop_gr_recovery() called from ospf_disp() makes ending of graceful + * restart uninterrupted by other events. + */ + + #define CONTINUE { missing++; continue; } + + if (current_time() > p->gr_timeout) + goto timeout; + + WALK_LIST(oa, p->area_list) + { + /* Get the router-LSA */ + rt = oa->rt; + if (!rt || (rt->lsa.age == LSA_MAXAGE)) + CONTINUE; + + for (lsa_walk_rt_init(p, rt, &rtl), i = 0; lsa_walk_rt(&rtl); i++) + { + if (rtl.type == LSART_STUB) + continue; + + ifa = rt_find_iface(oa, i, rtl.data, rtl.lif); + if (!ifa) + DROP("inconsistent interface", ospf_is_v2(p) ? rtl.data : rtl.lif); + + switch (rtl.type) + { + case LSART_NET: + /* Find the network-LSA */ + net = ospf_hash_find_net(p->gr, oa->areaid, rtl.id, rtl.nif); + if (!net) + CONTINUE; + + if (!link_back(oa, net, rt, rtl.lif, rtl.nif)) + DROP("Inconsistent network-LSA", net->lsa.id); + + if (ifa->state == OSPF_IS_DR) + { + /* Find all neighbors from the network-LSA */ + struct ospf_lsa_net *net_body = net->lsa_body; + uint cnt = lsa_net_count(&net->lsa); + for (j = 0; j < cnt; i++) + { + n = find_neigh(ifa, net_body->routers[j]); + if (!n || (n->state != NEIGHBOR_FULL)) + CONTINUE; + + if (!n->got_my_rt_lsa) + DROP("not received my router-LSA", n->rid); + + nbr = ospf_hash_find_rt(p->gr, oa->areaid, n->rid); + if (!link_back(oa, nbr, net, 0, 0)) + DROP("inconsistent router-LSA", n->rid); + } + } + else + { + /* Find the DR (by IP for OSPFv2) */ + n = ospf_is_v2(p) ? + find_neigh_by_ip(ifa, ipa_from_u32(rtl.id)) : + find_neigh(ifa, rtl.id); + if (!n || (n->state != NEIGHBOR_FULL)) + CONTINUE; + + if (!n->got_my_rt_lsa) + DROP("not received my router-LSA", n->rid); + } + break; + + case LSART_VLNK: + case LSART_PTP: + /* Find the PtP peer */ + n = find_neigh(ifa, rtl.id); + if (!n || (n->state != NEIGHBOR_FULL)) + CONTINUE; + + if (!n->got_my_rt_lsa) + DROP("not received my router-LSA", n->rid); + + nbr = ospf_hash_find_rt(p->gr, oa->areaid, rtl.id); + if (!link_back(oa, nbr, rt, rtl.lif, rtl.nif)) + DROP("inconsistent router-LSA", rtl.id); + } + } + } + + #undef CONTINUE + + if (missing) + return; + + OSPF_TRACE(D_EVENTS, "Graceful restart finished"); + ospf_stop_gr_recovery(p); + return; + +drop: + log(L_INFO "%s: Graceful restart ended - %s (%R)", p->p.name, err_dsc, err_val); + ospf_stop_gr_recovery(p); + return; + +timeout: + log(L_INFO "%s: Graceful restart ended - grace period expired", p->p.name); + ospf_stop_gr_recovery(p); + return; +} diff --git a/proto/ospf/rt.h b/proto/ospf/rt.h index 589d2bc5..094e125b 100644 --- a/proto/ospf/rt.h +++ b/proto/ospf/rt.h @@ -130,6 +130,7 @@ static inline int rt_is_nssa(ort *nf) void ospf_rt_spf(struct ospf_proto *p); void ospf_rt_initort(struct fib_node *fn); +void ospf_update_gr_recovery(struct ospf_proto *p); #endif /* _BIRD_OSPF_RT_H_ */ diff --git a/proto/ospf/topology.c b/proto/ospf/topology.c index 7d5deca0..efd03b54 100644 --- a/proto/ospf/topology.c +++ b/proto/ospf/topology.c @@ -83,7 +83,10 @@ ospf_install_lsa(struct ospf_proto *p, struct ospf_lsa_header *lsa, u32 type, u3 en->lsa_type, en->lsa.id, en->lsa.rt, en->lsa.sn, en->lsa.age); if (change) + { + ospf_neigh_lsadb_changed(p, en); ospf_schedule_rtcalc(p); + } return en; } @@ -243,6 +246,7 @@ ospf_do_originate_lsa(struct ospf_proto *p, struct top_hash_entry *en, void *lsa en->lsa.age = 0; en->init_age = 0; en->inst_time = current_time(); + en->dirty = 0; lsa_generate_checksum(&en->lsa, en->lsa_body); OSPF_TRACE(D_EVENTS, "Originating LSA: Type: %04x, Id: %R, Rt: %R, Seq: %08x", @@ -251,7 +255,10 @@ ospf_do_originate_lsa(struct ospf_proto *p, struct top_hash_entry *en, void *lsa ospf_flood_lsa(p, en, NULL); if (en->mode == LSA_M_BASIC) + { + ospf_neigh_lsadb_changed(p, en); ospf_schedule_rtcalc(p); + } return 1; } @@ -321,7 +328,8 @@ ospf_originate_lsa(struct ospf_proto *p, struct ospf_new_lsa *lsa) if ((en->lsa.age < LSA_MAXAGE) && (lsa_length == en->lsa.length) && !memcmp(lsa_body, en->lsa_body, lsa_blen) && - (!ospf_is_v2(p) || (lsa->opts == lsa_get_options(&en->lsa)))) + (!ospf_is_v2(p) || (lsa->opts == lsa_get_options(&en->lsa))) && + !en->dirty) goto drop; lsa_body = lsab_flush(p); @@ -433,7 +441,10 @@ ospf_flush_lsa(struct ospf_proto *p, struct top_hash_entry *en) ospf_flood_lsa(p, en, NULL); if (en->mode == LSA_M_BASIC) + { + ospf_neigh_lsadb_changed(p, en); ospf_schedule_rtcalc(p); + } en->mode = LSA_M_BASIC; } @@ -509,6 +520,12 @@ ospf_update_lsadb(struct ospf_proto *p) continue; } + if (en->dirty) + { + ospf_flush_lsa(p, en); + continue; + } + if ((en->lsa.rt == p->router_id) && (real_age >= LSREFRESHTIME)) { ospf_refresh_lsa(p, en); @@ -525,6 +542,16 @@ ospf_update_lsadb(struct ospf_proto *p) } } +void +ospf_mark_lsadb(struct ospf_proto *p) +{ + struct top_hash_entry *en; + + /* Mark all local LSAs as dirty */ + WALK_SLIST(en, p->lsal) + if (en->lsa.rt == p->router_id) + en->dirty = 1; +} static u32 ort_to_lsaid(struct ospf_proto *p, ort *nf) @@ -1424,6 +1451,7 @@ prepare_prefix_rt_lsa_body(struct ospf_proto *p, struct ospf_area *oa) struct ospf_config *cf = (struct ospf_config *) (p->p.cf); struct ospf_iface *ifa; struct ospf_lsa_prefix *lp; + uint max = ospf_is_ip4(p) ? IP4_MAX_PREFIX_LENGTH : IP6_MAX_PREFIX_LENGTH; int host_addr = 0; int net_lsa; int i = 0; @@ -1457,7 +1485,7 @@ prepare_prefix_rt_lsa_body(struct ospf_proto *p, struct ospf_area *oa) (a->scope <= SCOPE_LINK)) continue; - if (((a->prefix.pxlen < IP6_MAX_PREFIX_LENGTH) && net_lsa) || + if (((a->prefix.pxlen < max) && net_lsa) || configured_stubnet(oa, a)) continue; @@ -1465,8 +1493,13 @@ prepare_prefix_rt_lsa_body(struct ospf_proto *p, struct ospf_area *oa) (ifa->state == OSPF_IS_LOOP) || (ifa->type == OSPF_IT_PTMP)) { - net_addr_ip6 net = NET_ADDR_IP6(a->ip, IP6_MAX_PREFIX_LENGTH); - lsab_put_prefix(p, (net_addr *) &net, 0); + net_addr net; + if (a->prefix.type == NET_IP4) + net_fill_ip4(&net, ipa_to_ip4(a->ip), IP4_MAX_PREFIX_LENGTH); + else + net_fill_ip6(&net, ipa_to_ip6(a->ip), IP6_MAX_PREFIX_LENGTH); + + lsab_put_prefix(p, &net, 0); host_addr = 1; } else @@ -1482,7 +1515,7 @@ prepare_prefix_rt_lsa_body(struct ospf_proto *p, struct ospf_area *oa) if (!sn->hidden) { lsab_put_prefix(p, &sn->prefix, sn->cost); - if (sn->prefix.pxlen == IP6_MAX_PREFIX_LENGTH) + if (sn->prefix.pxlen == max) host_addr = 1; i++; } @@ -1670,6 +1703,59 @@ ospf_originate_prefix_net_lsa(struct ospf_proto *p, struct ospf_iface *ifa) /* + * Grace LSA handling + * Type = LSA_T_GR, opaque type = LSA_OT_GR + */ + +static inline void +ospf_add_gr_period_tlv(struct ospf_proto *p, uint period) +{ + struct ospf_tlv *tlv = lsab_allocz(p, sizeof(struct ospf_tlv) + sizeof(u32)); + tlv->type = LSA_GR_PERIOD; + tlv->length = 4; + tlv->data[0] = period; +} + +static inline void +ospf_add_gr_reason_tlv(struct ospf_proto *p, uint reason) +{ + struct ospf_tlv *tlv = lsab_allocz(p, sizeof(struct ospf_tlv) + sizeof(u32)); + tlv->type = LSA_GR_REASON; + tlv->length = 1; + tlv->data[0] = reason << 24; +} + +static inline void +ospf_add_gr_address_tlv(struct ospf_proto *p, ip4_addr addr) +{ + struct ospf_tlv *tlv = lsab_allocz(p, sizeof(struct ospf_tlv) + sizeof(u32)); + tlv->type = LSA_GR_ADDRESS; + tlv->length = 4; + tlv->data[0] = ip4_to_u32(addr); +} + +void +ospf_originate_gr_lsa(struct ospf_proto *p, struct ospf_iface *ifa) +{ + struct ospf_new_lsa lsa = { + .type = LSA_T_GR, + .dom = ifa->iface_id, + .id = ospf_is_v2(p) ? 0 : ifa->iface_id, + .ifa = ifa + }; + + ospf_add_gr_period_tlv(p, p->gr_time); + ospf_add_gr_reason_tlv(p, 0); + + uint t = ifa->type; + if (ospf_is_v2(p) && ((t == OSPF_IT_BCAST) || (t == OSPF_IT_NBMA) || (t == OSPF_IT_PTMP))) + ospf_add_gr_address_tlv(p, ipa_to_ip4(ifa->addr->ip)); + + ospf_originate_lsa(p, &lsa); +} + + +/* * Router Information LSA handling * Type = LSA_T_RI_AREA, opaque type = LSA_OT_RI */ @@ -1712,6 +1798,10 @@ ospf_update_topology(struct ospf_proto *p) struct ospf_area *oa; struct ospf_iface *ifa; + /* No LSA reorigination during GR recovery */ + if (p->gr_recovery) + return; + WALK_LIST(oa, p->area_list) { if (oa->update_rt_lsa) diff --git a/proto/ospf/topology.h b/proto/ospf/topology.h index fd70239d..ffae436a 100644 --- a/proto/ospf/topology.h +++ b/proto/ospf/topology.h @@ -33,6 +33,7 @@ struct top_hash_entry u32 lb_id; /* Interface ID of link back iface (for bcast or NBMA networks) */ u32 dist; /* Distance from the root */ int ret_count; /* Number of retransmission lists referencing the entry */ + u8 dirty; /* Will be flushed during next LSAdb update unless reoriginated*/ u8 color; #define OUTSPF 0 #define CANDIDATE 1 @@ -180,6 +181,7 @@ struct top_hash_entry * ospf_originate_lsa(struct ospf_proto *p, struct ospf_new void ospf_advance_lsa(struct ospf_proto *p, struct top_hash_entry *en, struct ospf_lsa_header *lsa, u32 type, u32 domain, void *body); void ospf_flush_lsa(struct ospf_proto *p, struct top_hash_entry *en); void ospf_update_lsadb(struct ospf_proto *p); +void ospf_mark_lsadb(struct ospf_proto *p); static inline void ospf_flush2_lsa(struct ospf_proto *p, struct top_hash_entry **en) { if (*en) { ospf_flush_lsa(p, *en); *en = NULL; } } @@ -187,6 +189,7 @@ static inline void ospf_flush2_lsa(struct ospf_proto *p, struct top_hash_entry * void ospf_originate_sum_net_lsa(struct ospf_proto *p, struct ospf_area *oa, ort *nf, int metric); void ospf_originate_sum_rt_lsa(struct ospf_proto *p, struct ospf_area *oa, u32 drid, int metric, u32 options); void ospf_originate_ext_lsa(struct ospf_proto *p, struct ospf_area *oa, ort *nf, u8 mode, u32 metric, u32 ebit, ip_addr fwaddr, u32 tag, int pbit, int dn); +void ospf_originate_gr_lsa(struct ospf_proto *p, struct ospf_iface *ifa); void ospf_rt_notify(struct proto *P, struct channel *ch, net *n, rte *new, rte *old); void ospf_update_topology(struct ospf_proto *p); |