diff options
Diffstat (limited to 'proto/bgp/bgp.c')
-rw-r--r-- | proto/bgp/bgp.c | 541 |
1 files changed, 366 insertions, 175 deletions
diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index a0e83415..cda0eb8d 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -115,7 +115,7 @@ #include "nest/bird.h" #include "nest/iface.h" #include "nest/protocol.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/cli.h" #include "nest/locks.h" #include "conf/conf.h" @@ -126,9 +126,13 @@ #include "bgp.h" +static void bgp_listen_create(void *); static list STATIC_LIST_INIT(bgp_sockets); /* Global list of listening sockets */ +static list STATIC_LIST_INIT(bgp_listen_pending); /* Global list of listening socket open requests */ +static event bgp_listen_event = { .hook = bgp_listen_create }; +DOMAIN(rtable) bgp_listen_domain; static void bgp_connect(struct bgp_proto *p); static void bgp_active(struct bgp_proto *p); @@ -139,73 +143,43 @@ static void bgp_update_bfd(struct bgp_proto *p, const struct bfd_options *bfd); static int bgp_incoming_connection(sock *sk, uint dummy UNUSED); static void bgp_listen_sock_err(sock *sk UNUSED, int err); +static void bgp_initiate_disable(struct bgp_proto *p, int err_val); -/** - * bgp_open - open a BGP instance - * @p: BGP instance - * - * This function allocates and configures shared BGP resources, mainly listening - * sockets. Should be called as the last step during initialization (when lock - * is acquired and neighbor is ready). When error, caller should change state to - * PS_DOWN and return immediately. - */ -static int -bgp_open(struct bgp_proto *p) +static void bgp_graceful_restart_feed(struct bgp_channel *c); + + +static inline int +bgp_setup_auth(struct bgp_proto *p, int enable) { - struct bgp_socket *bs = NULL; - struct iface *ifa = p->cf->strict_bind ? p->cf->iface : NULL; - ip_addr addr = p->cf->strict_bind ? p->cf->local_ip : - (p->ipv4 ? IPA_NONE4 : IPA_NONE6); - uint port = p->cf->local_port; - uint flags = p->cf->free_bind ? SKF_FREEBIND : 0; - uint flag_mask = SKF_FREEBIND; + /* Beware. This is done from main_birdloop and protocol birdloop is NOT ENTERED. + * Anyway, we are only accessing: + * - protocol config which can be changed only from main_birdloop (reconfig) + * - protocol listen socket which is always driven by main_birdloop + * - protocol name which is set on reconfig + */ - /* We assume that cf->iface is defined iff cf->local_ip is link-local */ + if (p->cf->password && p->listen.sock) + { + ip_addr prefix = p->cf->remote_ip; + int pxlen = -1; - WALK_LIST(bs, bgp_sockets) - if (ipa_equal(bs->sk->saddr, addr) && - (bs->sk->sport == port) && - (bs->sk->iface == ifa) && - (bs->sk->vrf == p->p.vrf) && - ((bs->sk->flags & flag_mask) == flags)) + if (p->cf->remote_range) { - bs->uc++; - p->sock = bs; - return 0; + prefix = net_prefix(p->cf->remote_range); + pxlen = net_pxlen(p->cf->remote_range); } - sock *sk = sk_new(proto_pool); - sk->type = SK_TCP_PASSIVE; - sk->ttl = 255; - sk->saddr = addr; - sk->sport = port; - sk->iface = ifa; - sk->vrf = p->p.vrf; - sk->flags = flags; - sk->tos = IP_PREC_INTERNET_CONTROL; - sk->rbsize = BGP_RX_BUFFER_SIZE; - sk->tbsize = BGP_TX_BUFFER_SIZE; - sk->rx_hook = bgp_incoming_connection; - sk->err_hook = bgp_listen_sock_err; - - if (sk_open(sk) < 0) - goto err; - - bs = mb_allocz(proto_pool, sizeof(struct bgp_socket)); - bs->sk = sk; - bs->uc = 1; - p->sock = bs; - sk->data = bs; - - add_tail(&bgp_sockets, &bs->n); + int rv = sk_set_md5_auth(p->listen.sock->sk, + p->cf->local_ip, prefix, pxlen, p->cf->iface, + enable ? p->cf->password : NULL, p->cf->setkey); - return 0; + if (rv < 0) + sk_log_error(p->listen.sock->sk, p->p.name); -err: - sk_log_error(sk, p->p.name); - log(L_ERR "%s: Cannot open listening socket", p->p.name); - rfree(sk); - return -1; + return rv; + } + else + return 0; } /** @@ -217,43 +191,155 @@ err: static void bgp_close(struct bgp_proto *p) { - struct bgp_socket *bs = p->sock; + LOCK_DOMAIN(rtable, bgp_listen_domain); - ASSERT(bs && bs->uc); + struct bgp_listen_request *req = &p->listen; + struct bgp_socket *bs = req->sock; - if (--bs->uc) - return; + if (bs) + { + req->sock = NULL; + rem_node(&req->n); - rfree(bs->sk); - rem_node(&bs->n); - mb_free(bs); + if (bs && EMPTY_LIST(bs->requests)) + ev_send(&global_event_list, &bgp_listen_event); + } + + UNLOCK_DOMAIN(rtable, bgp_listen_domain); } -static inline int -bgp_setup_auth(struct bgp_proto *p, int enable) +/** + * bgp_open - open a BGP instance + * @p: BGP instance + * + * This function allocates and configures shared BGP resources, mainly listening + * sockets. Should be called as the last step during initialization (when lock + * is acquired and neighbor is ready). When error, caller should change state to + * PS_DOWN and return immediately. + */ +static void +bgp_open(struct bgp_proto *p) { - if (p->cf->password) - { - ip_addr prefix = p->cf->remote_ip; - int pxlen = -1; + LOCK_DOMAIN(rtable, bgp_listen_domain); - if (p->cf->remote_range) + struct bgp_listen_request *req = &p->listen; + /* We assume that cf->iface is defined iff cf->local_ip is link-local */ + req->iface = p->cf->strict_bind ? p->cf->iface : NULL; + req->vrf = p->p.vrf; + req->addr = p->cf->strict_bind ? p->cf->local_ip : + (p->ipv4 ? IPA_NONE4 : IPA_NONE6); + req->port = p->cf->local_port; + req->flags = p->cf->free_bind ? SKF_FREEBIND : 0; + + BGP_TRACE(D_EVENTS, "Requesting listen socket at %I%J port %u", req->addr, req->iface, req->port); + + add_tail(&bgp_listen_pending, &req->n); + ev_send(&global_event_list, &bgp_listen_event); + + UNLOCK_DOMAIN(rtable, bgp_listen_domain); +} + +static void +bgp_listen_create(void *_ UNUSED) +{ + ASSERT_DIE(birdloop_inside(&main_birdloop)); + uint flag_mask = SKF_FREEBIND; + + while (1) { + LOCK_DOMAIN(rtable, bgp_listen_domain); + + if (EMPTY_LIST(bgp_listen_pending)) { - prefix = net_prefix(p->cf->remote_range); - pxlen = net_pxlen(p->cf->remote_range); + UNLOCK_DOMAIN(rtable, bgp_listen_domain); + break; } - int rv = sk_set_md5_auth(p->sock->sk, - p->cf->local_ip, prefix, pxlen, p->cf->iface, - enable ? p->cf->password : NULL, p->cf->setkey); + /* Get the first request to match */ + struct bgp_listen_request *req = HEAD(bgp_listen_pending); + struct bgp_proto *p = SKIP_BACK(struct bgp_proto, listen, req); + rem_node(&req->n); + + /* First try to find existing socket */ + struct bgp_socket *bs; + WALK_LIST(bs, bgp_sockets) + if (ipa_equal(bs->sk->saddr, req->addr) && + (bs->sk->sport == req->port) && + (bs->sk->iface == req->iface) && + (bs->sk->vrf == req->vrf) && + ((bs->sk->flags & flag_mask) == req->flags)) + break; - if (rv < 0) - sk_log_error(p->sock->sk, p->p.name); + /* Not found any */ + if (NODE_VALID(bs)) + BGP_TRACE(D_EVENTS, "Found a listening socket: %p", bs); + else + { + /* Allocating new socket from global protocol pool. + * We can do this in main_birdloop. */ + sock *sk = sk_new(proto_pool); + sk->type = SK_TCP_PASSIVE; + sk->ttl = 255; + sk->saddr = req->addr; + sk->sport = req->port; + sk->iface = req->iface; + sk->vrf = req->vrf; + sk->flags = req->flags; + sk->tos = IP_PREC_INTERNET_CONTROL; + sk->rbsize = BGP_RX_BUFFER_SIZE; + sk->tbsize = BGP_TX_BUFFER_SIZE; + sk->rx_hook = bgp_incoming_connection; + sk->err_hook = bgp_listen_sock_err; + + if (sk_open(sk, &main_birdloop) < 0) + { + sk_log_error(sk, p->p.name); + log(L_ERR "%s: Cannot open listening socket", p->p.name); + rfree(sk); + UNLOCK_DOMAIN(rtable, bgp_listen_domain); - return rv; + bgp_initiate_disable(p, BEM_NO_SOCKET); + continue; + } + + bs = mb_allocz(proto_pool, sizeof(struct bgp_socket)); + bs->sk = sk; + sk->data = bs; + + init_list(&bs->requests); + add_tail(&bgp_sockets, &bs->n); + + BGP_TRACE(D_EVENTS, "Created new listening socket: %p", bs); + } + + req->sock = bs; + add_tail(&bs->requests, &req->n); + + if (bgp_setup_auth(p, 1) < 0) + { + rem_node(&req->n); + req->sock = NULL; + + UNLOCK_DOMAIN(rtable, bgp_listen_domain); + + bgp_initiate_disable(p, BEM_INVALID_MD5); + continue; + } + + UNLOCK_DOMAIN(rtable, bgp_listen_domain); } - else - return 0; + + /* Cleanup leftover listening sockets */ + LOCK_DOMAIN(rtable, bgp_listen_domain); + struct bgp_socket *bs; + node *nxt; + WALK_LIST_DELSAFE(bs, nxt, bgp_sockets) + if (EMPTY_LIST(bs->requests)) + { + rfree(bs->sk); + rem_node(&bs->n); + mb_free(bs); + } + UNLOCK_DOMAIN(rtable, bgp_listen_domain); } static inline struct bgp_channel * @@ -279,6 +365,8 @@ bgp_startup(struct bgp_proto *p) if (p->postponed_sk) { /* Apply postponed incoming connection */ + sk_reloop(p->postponed_sk, p->p.loop); + bgp_setup_conn(p, &p->incoming_conn); bgp_setup_sk(&p->incoming_conn, p->postponed_sk); bgp_send_open(&p->incoming_conn); @@ -296,13 +384,7 @@ bgp_startup_timeout(timer *t) static void bgp_initiate(struct bgp_proto *p) { - int err_val; - - if (bgp_open(p) < 0) - { err_val = BEM_NO_SOCKET; goto err1; } - - if (bgp_setup_auth(p, 1) < 0) - { err_val = BEM_INVALID_MD5; goto err2; } + bgp_open(p); if (p->cf->bfd) bgp_update_bfd(p, p->cf->bfd); @@ -311,23 +393,28 @@ bgp_initiate(struct bgp_proto *p) { p->start_state = BSS_DELAY; BGP_TRACE(D_EVENTS, "Startup delayed by %d seconds due to errors", p->startup_delay); - bgp_start_timer(p->startup_timer, p->startup_delay); + bgp_start_timer(p, p->startup_timer, p->startup_delay); } else bgp_startup(p); +} - return; - -err2: - bgp_close(p); -err1: - p->p.disabled = 1; - bgp_store_error(p, NULL, BE_MISC, err_val); - - p->neigh = NULL; - proto_notify_state(&p->p, PS_DOWN); - - return; +static void +bgp_initiate_disable(struct bgp_proto *p, int err_val) +{ + PROTO_LOCKED_FROM_MAIN(&p->p) + { + /* The protocol may be already down for another reason. + * Shutdown the protocol only if it isn't already shutting down. */ + switch (p->p.proto_state) + { + case PS_START: + case PS_UP: + p->p.disabled = 1; + bgp_store_error(p, NULL, BE_MISC, err_val); + bgp_stop(p, err_val, NULL, 0); + } + } } /** @@ -340,14 +427,14 @@ err1: * timers. */ void -bgp_start_timer(timer *t, uint value) +bgp_start_timer(struct bgp_proto *p, timer *t, uint value) { if (value) { /* The randomization procedure is specified in RFC 4271 section 10 */ btime time = value S; btime randomize = random() % ((time / 4) + 1); - tm_start(t, time - randomize); + tm_start_in(t, time - randomize, p->p.loop); } else tm_stop(t); @@ -374,8 +461,10 @@ bgp_close_conn(struct bgp_conn *conn) conn->keepalive_timer = NULL; rfree(conn->hold_timer); conn->hold_timer = NULL; + rfree(conn->tx_ev); conn->tx_ev = NULL; + rfree(conn->sk); conn->sk = NULL; @@ -512,9 +601,16 @@ void bgp_stop(struct bgp_proto *p, int subcode, byte *data, uint len) { proto_notify_state(&p->p, PS_STOP); + p->uncork_ev->data = NULL; bgp_graceful_close_conn(&p->outgoing_conn, subcode, data, len); bgp_graceful_close_conn(&p->incoming_conn, subcode, data, len); - ev_schedule(p->event); + + struct bgp_channel *c; + WALK_LIST(c, p->p.channels) + if (c->ptx) + bgp_free_pending_tx(c); + + proto_send_event(&p->p, p->event); } static inline void @@ -607,7 +703,7 @@ bgp_conn_enter_established_state(struct bgp_conn *conn) int active = loc->ready && rem->ready; c->c.disabled = !active; - c->c.reloadable = p->route_refresh || c->cf->import_table; + c->c.reloadable = p->route_refresh || ((c->c.in_keep & RIK_PREFILTER) == RIK_PREFILTER); c->index = active ? num++ : 0; @@ -705,7 +801,7 @@ bgp_conn_enter_close_state(struct bgp_conn *conn) conn->sk->rx_hook = NULL; /* Timeout for CLOSE state, if we cannot send notification soon then we just hangup */ - bgp_start_timer(conn->hold_timer, 10); + bgp_start_timer(p, conn->hold_timer, 10); if (os == BS_ESTABLISHED) bgp_conn_leave_established_state(p); @@ -719,7 +815,7 @@ bgp_conn_enter_idle_state(struct bgp_conn *conn) bgp_close_conn(conn); bgp_conn_set_state(conn, BS_IDLE); - ev_schedule(p->event); + proto_send_event(&p->p, p->event); if (os == BS_ESTABLISHED) bgp_conn_leave_established_state(p); @@ -761,32 +857,28 @@ bgp_handle_graceful_restart(struct bgp_proto *p) { case BGP_GRS_NONE: c->gr_active = BGP_GRS_ACTIVE; - rt_refresh_begin(c->c.table, &c->c); - break; + /* fall through */ case BGP_GRS_ACTIVE: - rt_refresh_end(c->c.table, &c->c); - rt_refresh_begin(c->c.table, &c->c); + rt_refresh_begin(&c->c.in_req); break; case BGP_GRS_LLGR: - rt_refresh_begin(c->c.table, &c->c); - rt_modify_stale(c->c.table, &c->c); + rt_refresh_begin(&c->c.in_req); + bgp_graceful_restart_feed(c); break; } } else { /* Just flush the routes */ - rt_refresh_begin(c->c.table, &c->c); - rt_refresh_end(c->c.table, &c->c); + rt_refresh_begin(&c->c.in_req); + rt_refresh_end(&c->c.in_req); } /* Reset bucket and prefix tables */ - bgp_free_bucket_table(c); - bgp_free_prefix_table(c); - bgp_init_bucket_table(c); - bgp_init_prefix_table(c); + bgp_free_pending_tx(c); + bgp_init_pending_tx(c); c->packets_to_send = 0; } @@ -794,9 +886,56 @@ bgp_handle_graceful_restart(struct bgp_proto *p) ASSERT(p->gr_active_num > 0); proto_notify_state(&p->p, PS_START); - tm_start(p->gr_timer, p->conn->remote_caps->gr_time S); + tm_start_in(p->gr_timer, p->conn->remote_caps->gr_time S, p->p.loop); +} + +static void +bgp_graceful_restart_feed_done(struct rt_export_request *req) +{ + req->hook = NULL; +} + +static void +bgp_graceful_restart_feed_dump_req(struct rt_export_request *req) +{ + struct bgp_channel *c = SKIP_BACK(struct bgp_channel, stale_feed, req); + debug(" BGP-GR %s.%s export request %p\n", c->c.proto->name, c->c.name, req); +} + +static void +bgp_graceful_restart_feed_log_state_change(struct rt_export_request *req, u8 state) +{ + struct bgp_channel *c = SKIP_BACK(struct bgp_channel, stale_feed, req); + struct bgp_proto *p = (void *) c->c.proto; + BGP_TRACE(D_EVENTS, "Long-lived graceful restart export state changed to %s", rt_export_state_name(state)); + + if (state == TES_READY) + rt_stop_export(req, bgp_graceful_restart_feed_done); +} + +static void +bgp_graceful_restart_drop_export(struct rt_export_request *req UNUSED, const net_addr *n UNUSED, struct rt_pending_export *rpe UNUSED) +{ /* Nothing to do */ } + +static void +bgp_graceful_restart_feed(struct bgp_channel *c) +{ + c->stale_feed = (struct rt_export_request) { + .name = "BGP-GR", + .list = &global_work_list, + .trace_routes = c->c.debug | c->c.proto->debug, + .dump_req = bgp_graceful_restart_feed_dump_req, + .log_state_change = bgp_graceful_restart_feed_log_state_change, + .export_bulk = bgp_rte_modify_stale, + .export_one = bgp_graceful_restart_drop_export, + }; + + rt_request_export(c->c.table, &c->stale_feed); } + + + /** * bgp_graceful_restart_done - finish active BGP graceful restart * @c: BGP channel @@ -820,7 +959,7 @@ bgp_graceful_restart_done(struct bgp_channel *c) BGP_TRACE(D_EVENTS, "Neighbor graceful restart done"); tm_stop(c->stale_timer); - rt_refresh_end(c->c.table, &c->c); + rt_refresh_end(&c->c.in_req); } /** @@ -861,8 +1000,8 @@ bgp_graceful_restart_timeout(timer *t) /* Channel is in GR, and supports LLGR -> start LLGR */ c->gr_active = BGP_GRS_LLGR; - tm_start(c->stale_timer, c->stale_time S); - rt_modify_stale(c->c.table, &c->c); + tm_start_in(c->stale_timer, c->stale_time S, p->p.loop); + bgp_graceful_restart_feed(c); } } else @@ -900,10 +1039,7 @@ bgp_refresh_begin(struct bgp_channel *c) { log(L_WARN "%s: BEGIN-OF-RR received before END-OF-RIB, ignoring", p->p.name); return; } c->load_state = BFS_REFRESHING; - rt_refresh_begin(c->c.table, &c->c); - - if (c->c.in_table) - rt_refresh_begin(c->c.in_table, &c->c); + rt_refresh_begin(&c->c.in_req); } /** @@ -924,10 +1060,7 @@ bgp_refresh_end(struct bgp_channel *c) { log(L_WARN "%s: END-OF-RR received without prior BEGIN-OF-RR, ignoring", p->p.name); return; } c->load_state = BFS_NONE; - rt_refresh_end(c->c.table, &c->c); - - if (c->c.in_table) - rt_prune_sync(c->c.in_table, 0); + rt_refresh_end(&c->c.in_req); } @@ -941,7 +1074,7 @@ bgp_send_open(struct bgp_conn *conn) bgp_prepare_capabilities(conn); bgp_schedule_packet(conn, NULL, PKT_OPEN); bgp_conn_set_state(conn, BS_OPENSENT); - bgp_start_timer(conn->hold_timer, conn->bgp->cf->initial_hold_time); + bgp_start_timer(conn->bgp, conn->hold_timer, conn->bgp->cf->initial_hold_time); } static void @@ -1018,7 +1151,7 @@ bgp_hold_timeout(timer *t) and perhaps just not processed BGP packets in time. */ if (sk_rx_ready(conn->sk) > 0) - bgp_start_timer(conn->hold_timer, 10); + bgp_start_timer(p, conn->hold_timer, 10); else if ((conn->state == BS_ESTABLISHED) && p->llgr_ready) { BGP_TRACE(D_EVENTS, "Hold timer expired"); @@ -1078,7 +1211,7 @@ bgp_active(struct bgp_proto *p) BGP_TRACE(D_EVENTS, "Connect delayed by %d seconds", delay); bgp_setup_conn(p, conn); bgp_conn_set_state(conn, BS_ACTIVE); - bgp_start_timer(conn->connect_timer, delay); + bgp_start_timer(p, conn->connect_timer, delay); } /** @@ -1109,6 +1242,7 @@ bgp_connect(struct bgp_proto *p) /* Enter Connect state and start establishing c s->tos = IP_PREC_INTERNET_CONTROL; s->password = p->cf->password; s->tx_hook = bgp_connected; + s->flags = p->cf->free_bind ? SKF_FREEBIND : 0; BGP_TRACE(D_EVENTS, "Connecting to %I%J from local address %I%J", s->daddr, ipa_is_link_local(s->daddr) ? p->cf->iface : NULL, s->saddr, ipa_is_link_local(s->saddr) ? s->iface : NULL); @@ -1116,7 +1250,7 @@ bgp_connect(struct bgp_proto *p) /* Enter Connect state and start establishing c bgp_setup_sk(conn, s); bgp_conn_set_state(conn, BS_CONNECT); - if (sk_open(s) < 0) + if (sk_open(s, p->p.loop) < 0) goto err; /* Set minimal receive TTL if needed */ @@ -1125,7 +1259,7 @@ bgp_connect(struct bgp_proto *p) /* Enter Connect state and start establishing c goto err; DBG("BGP: Waiting for connect success\n"); - bgp_start_timer(conn->connect_timer, p->cf->connect_retry_time); + bgp_start_timer(p, conn->connect_timer, p->cf->connect_retry_time); return; err: @@ -1146,12 +1280,17 @@ static struct bgp_proto * bgp_find_proto(sock *sk) { struct bgp_proto *best = NULL; - struct bgp_proto *p; + struct bgp_socket *bs = sk->data; + struct bgp_listen_request *req; /* sk->iface is valid only if src or dst address is link-local */ int link = ipa_is_link_local(sk->saddr) || ipa_is_link_local(sk->daddr); - WALK_LIST(p, proto_list) + LOCK_DOMAIN(rtable, bgp_listen_domain); + + WALK_LIST(req, bs->requests) + { + struct bgp_proto *p = SKIP_BACK(struct bgp_proto, listen, req); if ((p->p.proto == &proto_bgp) && (ipa_equal(p->remote_ip, sk->daddr) || bgp_is_dynamic(p)) && (!p->cf->remote_range || ipa_in_netX(sk->daddr, p->cf->remote_range)) && @@ -1165,7 +1304,9 @@ bgp_find_proto(sock *sk) if (!bgp_is_dynamic(p)) break; } + } + UNLOCK_DOMAIN(rtable, bgp_listen_domain); return best; } @@ -1184,6 +1325,8 @@ bgp_find_proto(sock *sk) static int bgp_incoming_connection(sock *sk, uint dummy UNUSED) { + ASSERT_DIE(birdloop_inside(&main_birdloop)); + struct bgp_proto *p; int acc, hops; @@ -1197,6 +1340,8 @@ bgp_incoming_connection(sock *sk, uint dummy UNUSED) return 0; } + birdloop_enter(p->p.loop); + /* * BIRD should keep multiple incoming connections in OpenSent state (for * details RFC 4271 8.2.1 par 3), but it keeps just one. Duplicate incoming @@ -1226,7 +1371,7 @@ bgp_incoming_connection(sock *sk, uint dummy UNUSED) if (!acc) { rfree(sk); - return 0; + goto leave; } hops = p->cf->multihop ? : 1; @@ -1251,19 +1396,24 @@ bgp_incoming_connection(sock *sk, uint dummy UNUSED) p = bgp_spawn(p, sk->daddr); p->postponed_sk = sk; rmove(sk, p->p.pool); - return 0; + goto leave; } rmove(sk, p->p.pool); + sk_reloop(sk, p->p.loop); + bgp_setup_conn(p, &p->incoming_conn); bgp_setup_sk(&p->incoming_conn, sk); bgp_send_open(&p->incoming_conn); - return 0; + goto leave; err: sk_log_error(sk, p->p.name); log(L_ERR "%s: Incoming connection aborted", p->p.name); rfree(sk); + +leave: + birdloop_leave(p->p.loop); return 0; } @@ -1374,15 +1524,22 @@ static void bgp_update_bfd(struct bgp_proto *p, const struct bfd_options *bfd) { if (bfd && p->bfd_req) + { + BGP_TRACE(D_EVENTS, "Updating existing BFD request"); bfd_update_request(p->bfd_req, bfd); + } if (bfd && !p->bfd_req && !bgp_is_dynamic(p)) + { p->bfd_req = bfd_request_session(p->p.pool, p->remote_ip, p->local_ip, p->cf->multihop ? NULL : p->neigh->iface, - p->p.vrf, bgp_bfd_notify, p, bfd); + p->p.vrf, bgp_bfd_notify, p, p->p.loop, bfd); + BGP_TRACE(D_EVENTS, "Requesting a new BFD session"); + } if (!bfd && p->bfd_req) { + BGP_TRACE(D_EVENTS, "Retracting the BFD request"); rfree(p->bfd_req); p->bfd_req = NULL; } @@ -1398,12 +1555,8 @@ bgp_reload_routes(struct channel *C) if (C->channel != &channel_bgp) return; - ASSERT(p->conn && (p->route_refresh || c->c.in_table)); - - if (c->c.in_table) - channel_schedule_reload(C); - else - bgp_schedule_packet(p->conn, c, PKT_ROUTE_REFRESH); + ASSERT(p->conn && p->route_refresh); + bgp_schedule_packet(p->conn, c, PKT_ROUTE_REFRESH); } static void @@ -1423,6 +1576,12 @@ bgp_feed_begin(struct channel *C, int initial) if (initial && p->cf->gr_mode) c->feed_state = BFS_LOADING; + if (!initial && C->out_table) + { + c->feed_out_table = 1; + return; + } + /* It is refeed and both sides support enhanced route refresh */ if (!initial && p->enhanced_refresh) { @@ -1445,6 +1604,12 @@ bgp_feed_end(struct channel *C) if (C->channel != &channel_bgp) return; + if (c->feed_out_table) + { + c->feed_out_table = 0; + return; + } + /* This should not happen */ if (!p->conn) return; @@ -1467,9 +1632,9 @@ bgp_feed_end(struct channel *C) static void -bgp_start_locked(struct object_lock *lock) +bgp_start_locked(void *_p) { - struct bgp_proto *p = lock->data; + struct bgp_proto *p = _p; const struct bgp_config *cf = p->cf; if (p->p.proto_state != PS_START) @@ -1545,6 +1710,8 @@ bgp_start(struct proto *P) p->last_rx_update = 0; p->event = ev_new_init(p->p.pool, bgp_decision, p); + p->uncork_ev = ev_new_init(p->p.pool, bgp_uncork, p); + p->startup_timer = tm_new_init(p->p.pool, bgp_startup_timeout, p, 0, 0); p->gr_timer = tm_new_init(p->p.pool, bgp_graceful_restart_timeout, p, 0, 0); @@ -1574,8 +1741,11 @@ bgp_start(struct proto *P) lock->iface = p->cf->iface; lock->vrf = p->cf->iface ? NULL : p->p.vrf; lock->type = OBJLOCK_TCP; - lock->hook = bgp_start_locked; - lock->data = p; + lock->event = (event) { + .hook = bgp_start_locked, + .data = p, + }; + lock->target = proto_event_list(P); /* For dynamic BGP, we use inst 1 to avoid collisions with regular BGP */ if (bgp_is_dynamic(p)) @@ -1675,6 +1845,13 @@ done: return p->p.proto_state; } +struct rte_owner_class bgp_rte_owner_class = { + .get_route_info = bgp_get_route_info, + .rte_better = bgp_rte_better, + .rte_mergable = bgp_rte_mergable, + .rte_igp_metric = bgp_rte_igp_metric, +}; + static struct proto * bgp_init(struct proto_config *CF) { @@ -1684,15 +1861,13 @@ bgp_init(struct proto_config *CF) P->rt_notify = bgp_rt_notify; P->preexport = bgp_preexport; - P->neigh_notify = bgp_neigh_notify; + P->iface_sub.neigh_notify = bgp_neigh_notify; P->reload_routes = bgp_reload_routes; P->feed_begin = bgp_feed_begin; P->feed_end = bgp_feed_end; - P->rte_better = bgp_rte_better; - P->rte_mergable = bgp_rte_mergable; - P->rte_recalculate = cf->deterministic_med ? bgp_rte_recalculate : NULL; - P->rte_modify = bgp_rte_modify_stale; - P->rte_igp_metric = bgp_rte_igp_metric; + + P->sources.class = &bgp_rte_owner_class; + P->sources.rte_recalculate = cf->deterministic_med ? bgp_rte_recalculate : NULL; p->cf = cf; p->is_internal = (cf->local_as == cf->remote_as); @@ -1759,14 +1934,11 @@ bgp_channel_start(struct channel *C) } c->pool = p->p.pool; // XXXX - bgp_init_bucket_table(c); - bgp_init_prefix_table(c); - - if (c->cf->import_table) - channel_setup_in_table(C); if (c->cf->export_table) - channel_setup_out_table(C); + bgp_setup_out_table(c); + + bgp_init_pending_tx(c); c->stale_timer = tm_new_init(c->pool, bgp_long_lived_stale_timeout, c, 0, 0); @@ -1889,7 +2061,7 @@ bgp_default_igp_table(struct bgp_config *cf, struct bgp_channel_config *cc, u32 return cc2->c.table; /* Last, try default table of given type */ - if (tab = cf->c.global->def_tables[type]) + if (tab = rt_get_default_table(cf->c.global, type)) return tab; cf_error("Undefined IGP table"); @@ -1908,7 +2080,7 @@ bgp_default_base_table(struct bgp_config *cf, struct bgp_channel_config *cc) return cc2->c.table; /* Last, try default table of given type */ - struct rtable_config *tab = cf->c.global->def_tables[type]; + struct rtable_config *tab = rt_get_default_table(cf->c.global, type); if (tab) return tab; @@ -2175,7 +2347,6 @@ bgp_channel_reconfigure(struct channel *C, struct channel_config *CC, int *impor (new->llgr_time != old->llgr_time) || (new->ext_next_hop != old->ext_next_hop) || (new->add_path != old->add_path) || - (new->import_table != old->import_table) || (new->export_table != old->export_table) || (TABLE(new, igp_table_ip4) != TABLE(old, igp_table_ip4)) || (TABLE(new, igp_table_ip6) != TABLE(old, igp_table_ip6)) || @@ -2190,11 +2361,13 @@ bgp_channel_reconfigure(struct channel *C, struct channel_config *CC, int *impor (new->aigp != old->aigp) || (new->cost != old->cost)) { - /* import_changed itself does not force ROUTE_REFRESH when import_table is active */ - if (c->c.in_table && (c->c.channel_state == CS_UP)) + /* If import table is active and route refresh is possible, we just ask for route refresh */ + if ((c->c.in_keep & RIK_PREFILTER) && (c->c.channel_state == CS_UP) && p->route_refresh) bgp_schedule_packet(p->conn, c, PKT_ROUTE_REFRESH); - *import_changed = 1; + /* Otherwise we do complete reload */ + else + *import_changed = 1; } if (!ipa_equal(new->next_hop_addr, old->next_hop_addr) || @@ -2562,6 +2735,9 @@ bgp_show_proto_info(struct proto *P) tm_remains(p->conn->hold_timer), p->conn->hold_time); cli_msg(-1006, " Keepalive timer: %t/%u", tm_remains(p->conn->keepalive_timer), p->conn->keepalive_time); + cli_msg(-1006, " TX pending: %d bytes%s", + p->conn->sk->tpos - p->conn->sk->ttx, + ev_active(p->conn->tx_ev) ? " (refill scheduled)" : ""); } #if 0 @@ -2615,6 +2791,22 @@ bgp_show_proto_info(struct proto *P) if (c->base_table) cli_msg(-1006, " Base table: %s", c->base_table->name); + + uint bucket_cnt = 0; + uint prefix_cnt = 0; + struct bgp_bucket *buck; + struct bgp_prefix *px; + if (c->ptx) + WALK_LIST(buck, c->ptx->bucket_queue) + { + bucket_cnt++; + WALK_LIST(px, buck->prefixes) + if (px->cur) + prefix_cnt++; + } + + cli_msg(-1006, " Pending %u attribute sets with total %u prefixes to send", + bucket_cnt, prefix_cnt); } } } @@ -2632,7 +2824,6 @@ struct channel_class channel_bgp = { struct protocol proto_bgp = { .name = "BGP", .template = "bgp%d", - .class = PROTOCOL_BGP, .preference = DEF_PREF_BGP, .channel_mask = NB_IP | NB_VPN | NB_FLOW, .proto_size = sizeof(struct bgp_proto), @@ -2644,12 +2835,12 @@ struct protocol proto_bgp = { .reconfigure = bgp_reconfigure, .copy_config = bgp_copy_config, .get_status = bgp_get_status, - .get_attr = bgp_get_attr, - .get_route_info = bgp_get_route_info, .show_proto_info = bgp_show_proto_info }; void bgp_build(void) { proto_build(&proto_bgp); + bgp_register_attrs(); + bgp_listen_domain = DOMAIN_NEW(rtable, "BGP Listen Sockets"); } |