diff options
-rw-r--r-- | proto/bgp/attrs.c | 2 | ||||
-rw-r--r-- | proto/bgp/bgp.c | 496 | ||||
-rw-r--r-- | proto/bgp/bgp.h | 53 | ||||
-rw-r--r-- | proto/bgp/packets.c | 109 |
4 files changed, 464 insertions, 196 deletions
diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c index c13f9056..d3716eab 100644 --- a/proto/bgp/attrs.c +++ b/proto/bgp/attrs.c @@ -365,6 +365,7 @@ bgp_encode_attrs(struct bgp_proto *p, byte *w, ea_list *attrs, int remains) int new_used; int nl = as_path_convert_to_old(a->u.ptr, buf, &new_used); + DBG("BGP: Encoding old AS_PATH\n"); rv = bgp_encode_attr_hdr(w, BAF_TRANSITIVE, BA_AS_PATH, nl); ADVANCE(w, remains, rv); memcpy(w, buf, nl); @@ -381,6 +382,7 @@ bgp_encode_attrs(struct bgp_proto *p, byte *w, ea_list *attrs, int remains) * discarded in bgp_check_as_path(). */ + DBG("BGP: Encoding AS4_PATH\n"); rv = bgp_encode_attr_hdr(w, BAF_OPTIONAL | BAF_TRANSITIVE, BA_AS4_PATH, len); ADVANCE(w, remains, rv); memcpy(w, a->u.ptr->data, len); diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index 29d2e09f..46b28906 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -53,7 +53,7 @@ * Unknown transitive attributes are attached to the route as %EAF_TYPE_OPAQUE byte streams. */ -#undef LOCAL_DEBUG +#define LOCAL_DEBUG #include "nest/bird.h" #include "nest/iface.h" @@ -70,20 +70,69 @@ struct linpool *bgp_linpool; /* Global temporary pool */ static sock *bgp_listen_sk; /* Global listening socket */ static int bgp_counter; /* Number of protocol instances using the listening socket */ -static char *bgp_state_names[] = { "Idle", "Connect", "Active", "OpenSent", "OpenConfirm", "Established" }; +static void bgp_close(struct bgp_proto *p, int apply_md5); static void bgp_connect(struct bgp_proto *p); +static void bgp_active(struct bgp_proto *p, int delay); static void bgp_initiate(struct bgp_proto *p); -static void bgp_setup_listen_sk(void); +static void bgp_stop(struct bgp_proto *p); +static sock *bgp_setup_listen_sk(void); +/** + * bgp_open - open a BGP instance + * @p: BGP instance + * + * This function allocates and configures shared BGP resources. + * Should be called as the last step during initialization + * (when lock is acquired and neighbor is ready). + * When error, state changed to PS_DOWN, -1 is returned and caller + * should return immediately. + */ +static int +bgp_open(struct bgp_proto *p) +{ + bgp_counter++; + + if (!bgp_listen_sk) + bgp_listen_sk = bgp_setup_listen_sk(); + + if (!bgp_linpool) + bgp_linpool = lp_new(&root_pool, 4080); + + if (p->cf->password) + { + int rv = sk_set_md5_auth(bgp_listen_sk, p->cf->remote_ip, p->cf->password); + if (rv < 0) + { + bgp_close(p, 0); + p->p.disabled = 1; + bgp_store_error(p, NULL, BE_MISC, BEM_INVALID_MD5); + proto_notify_state(&p->p, PS_DOWN); + return -1; + } + } + + p->start_state = BSS_CONNECT; + return 0; +} + +/** + * bgp_close - close a BGP instance + * @p: BGP instance + * @apply_md5: 0 to disable unsetting MD5 auth + * + * This function frees and deconfigures shared BGP resources. + * @apply_md5 is set to 0 when bgp_close is called as a cleanup + * from failed bgp_open(). + */ static void -bgp_close(struct bgp_proto *p) +bgp_close(struct bgp_proto *p, int apply_md5) { ASSERT(bgp_counter); bgp_counter--; - if (p->cf->password) + if (p->cf->password && apply_md5) sk_set_md5_auth(bgp_listen_sk, p->cf->remote_ip, NULL); if (!bgp_counter) @@ -123,18 +172,11 @@ bgp_start_timer(timer *t, int value) * * This function takes a connection described by the &bgp_conn structure, * closes its socket and frees all resources associated with it. - * - * If the connection is being closed due to a protocol error, adjust - * the connection restart timer as well according to the error recovery - * policy set in the configuration. - * - * If the connection was marked as primary, it shuts down the protocol as well. */ void bgp_close_conn(struct bgp_conn *conn) { struct bgp_proto *p = conn->bgp; - struct bgp_config *cf = p->cf; DBG("BGP: Closing connection\n"); conn->packets_to_send = 0; @@ -146,54 +188,184 @@ bgp_close_conn(struct bgp_conn *conn) conn->hold_timer = NULL; rfree(conn->sk); conn->sk = NULL; - conn->state = BS_IDLE; - if (conn->error_flag > 1) + rfree(conn->tx_ev); + conn->tx_ev = NULL; +} + + +/** + * bgp_update_startup_delay - update a startup delay + * @p: BGP instance + * @conn: related BGP connection + * @code: BGP error code + * @subcode: BGP error subcode + * + * This function updates a startup delay that is used to postpone next BGP connect. + * It also handles disable_after_error and might stop BGP instance when error + * happened and disable_after_error is on. + * + * It should be called when BGP protocol error happened. + */ +void +bgp_update_startup_delay(struct bgp_proto *p, struct bgp_conn *conn, unsigned code, unsigned subcode) +{ + struct bgp_config *cf = p->cf; + + /* Don't handle cease messages as errors */ + if (code == 6 && !subcode) { - if (cf->disable_after_error) - p->p.disabled = 1; - if (p->last_connect && (bird_clock_t)(p->last_connect + cf->error_amnesia_time) < now) - p->startup_delay = 0; - if (!p->startup_delay) - p->startup_delay = cf->error_delay_time_min; - else - { - p->startup_delay *= 2; - if (p->startup_delay > cf->error_delay_time_max) - p->startup_delay = cf->error_delay_time_max; - } + p->startup_delay = 0; + return; + } + + /* During start, we only consider errors on outgoing connection, because + otherwise delay timer for outgoing connection is already running and + we could increase delay time two times (or more) per one attempt to + connect. + */ + if ((p->p.proto_state == PS_START) && (conn != &p->outgoing_conn)) + return; + + DBG("BGP: Updating startup delay %d %d\n", code, subcode); + + p->last_proto_error = now; + + if (cf->disable_after_error) + { + p->startup_delay = 0; + p->p.disabled = 1; + if (p->p.proto_state == PS_START) + bgp_stop(p); + + return; } - if (conn->primary) + + if (p->last_proto_error && ((now - p->last_proto_error) >= cf->error_amnesia_time)) + p->startup_delay = 0; + + if (!p->startup_delay) + p->startup_delay = cf->error_delay_time_min; + else { - bgp_close(p); - p->conn = NULL; - proto_notify_state(&p->p, PS_DOWN); + p->startup_delay *= 2; + if (p->startup_delay > cf->error_delay_time_max) + p->startup_delay = cf->error_delay_time_max; } - else if (conn->error_flag > 1) - bgp_initiate(p); } -static int -bgp_graceful_close_conn(struct bgp_conn *c) +static void +bgp_graceful_close_conn(struct bgp_conn *conn) { - switch (c->state) + switch (conn->state) { case BS_IDLE: - return 0; + case BS_CLOSE: + return; case BS_CONNECT: case BS_ACTIVE: - bgp_close_conn(c); - return 1; + bgp_conn_enter_idle_state(conn); + return; case BS_OPENSENT: case BS_OPENCONFIRM: case BS_ESTABLISHED: - bgp_error(c, 6, 0, NULL, 0); - return 1; + bgp_error(conn, 6, 0, NULL, 0); + return; default: - bug("bgp_graceful_close_conn: Unknown state %d", c->state); + bug("bgp_graceful_close_conn: Unknown state %d", conn->state); } } static void +bgp_down(struct bgp_proto *p) +{ + if (p->start_state > BSS_PREPARE) + bgp_close(p, 1); + + DBG("BGP: DOWN\n"); + proto_notify_state(&p->p, PS_DOWN); +} + +static void +bgp_decision(void *vp) +{ + struct bgp_proto *p = vp; + + DBG("BGP: Decision start\n"); + if ((p->p.proto_state == PS_START) + && (p->outgoing_conn.state == BS_IDLE)) + bgp_initiate(p); + + if ((p->p.proto_state == PS_STOP) + && (p->outgoing_conn.state == BS_IDLE) + && (p->incoming_conn.state == BS_IDLE)) + bgp_down(p); +} + +static void +bgp_stop(struct bgp_proto *p) +{ + proto_notify_state(&p->p, PS_STOP); + bgp_graceful_close_conn(&p->outgoing_conn); + bgp_graceful_close_conn(&p->incoming_conn); + ev_schedule(p->event); +} + +void +bgp_conn_enter_established_state(struct bgp_conn *conn) +{ + struct bgp_proto *p = conn->bgp; + + BGP_TRACE(D_EVENTS, "BGP session established"); + DBG("BGP: UP!!!\n"); + + p->conn = conn; + p->last_error_class = 0; + p->last_error_code = 0; + bgp_attr_init(conn->bgp); + conn->state = BS_ESTABLISHED; + proto_notify_state(&p->p, PS_UP); +} + +static void +bgp_conn_leave_established_state(struct bgp_proto *p) +{ + BGP_TRACE(D_EVENTS, "BGP session closed"); + p->conn = NULL; + + if (p->p.proto_state == PS_UP) + bgp_stop(p); +} + +void +bgp_conn_enter_close_state(struct bgp_conn *conn) +{ + struct bgp_proto *p = conn->bgp; + int os = conn->state; + + conn->state = BS_CLOSE; + tm_stop(conn->hold_timer); + tm_stop(conn->keepalive_timer); + conn->sk->rx_hook = NULL; + + if (os == BS_ESTABLISHED) + bgp_conn_leave_established_state(p); +} + +void +bgp_conn_enter_idle_state(struct bgp_conn *conn) +{ + struct bgp_proto *p = conn->bgp; + int os = conn->state; + + bgp_close_conn(conn); + conn->state = BS_IDLE; + ev_schedule(p->event); + + if (os == BS_ESTABLISHED) + bgp_conn_leave_established_state(p); +} + +static void bgp_send_open(struct bgp_conn *conn) { DBG("BGP: Sending open\n"); @@ -222,8 +394,13 @@ bgp_connect_timeout(timer *t) struct bgp_proto *p = conn->bgp; DBG("BGP: connect_timeout\n"); - bgp_close_conn(conn); - bgp_connect(p); + if (p->p.proto_state == PS_START) + { + bgp_close_conn(conn); + bgp_connect(p); + } + else + bgp_conn_enter_idle_state(conn); } static void @@ -232,26 +409,14 @@ bgp_sock_err(sock *sk, int err) struct bgp_conn *conn = sk->data; struct bgp_proto *p = conn->bgp; + bgp_store_error(p, conn, BE_SOCKET, err); + if (err) BGP_TRACE(D_EVENTS, "Connection lost (%M)", err); else BGP_TRACE(D_EVENTS, "Connection closed"); - switch (conn->state) - { - case BS_CONNECT: - case BS_OPENSENT: - rfree(conn->sk); - conn->sk = NULL; - conn->state = BS_ACTIVE; - bgp_start_timer(conn->connect_retry_timer, p->cf->connect_retry_time); - break; - case BS_OPENCONFIRM: - case BS_ESTABLISHED: - bgp_close_conn(conn); - break; - default: - bug("bgp_sock_err called in invalid state %d", conn->state); - } + + bgp_conn_enter_idle_state(conn); } static void @@ -280,8 +445,6 @@ bgp_setup_conn(struct bgp_proto *p, struct bgp_conn *conn) conn->sk = NULL; conn->bgp = p; conn->packets_to_send = 0; - conn->error_flag = 0; - conn->primary = 0; t = conn->connect_retry_timer = tm_new(p->p.pool); t->hook = bgp_connect_timeout; @@ -292,6 +455,9 @@ bgp_setup_conn(struct bgp_proto *p, struct bgp_conn *conn) t = conn->keepalive_timer = tm_new(p->p.pool); t->hook = bgp_keepalive_timeout; t->data = conn; + conn->tx_ev = ev_new(p->p.pool); + conn->tx_ev->hook = bgp_kick_tx; + conn->tx_ev->data = conn; } static void @@ -302,6 +468,17 @@ bgp_setup_sk(struct bgp_proto *p, struct bgp_conn *conn, sock *s) conn->sk = s; } +static void +bgp_active(struct bgp_proto *p, int delay) +{ + struct bgp_conn *conn = &p->outgoing_conn; + + BGP_TRACE(D_EVENTS, "Connect delayed by %d seconds", delay); + bgp_setup_conn(p, conn); + conn->state = BS_ACTIVE; + bgp_start_timer(conn->connect_retry_timer, delay); +} + /** * bgp_connect - initiate an outgoing connection * @p: BGP instance @@ -317,7 +494,6 @@ bgp_connect(struct bgp_proto *p) /* Enter Connect state and start establishing c struct bgp_conn *conn = &p->outgoing_conn; DBG("BGP: Connecting\n"); - p->last_connect = now; s = sk_new(p->p.pool); s->type = SK_TCP_ACTIVE; if (ipa_nonzero(p->cf->source_addr)) @@ -348,17 +524,10 @@ bgp_connect(struct bgp_proto *p) /* Enter Connect state and start establishing c static void bgp_initiate(struct bgp_proto *p) { - unsigned delay; + unsigned delay = MAX(p->startup_delay, p->cf->start_delay_time); - delay = p->cf->start_delay_time; - if (p->startup_delay > delay) - delay = p->startup_delay; if (delay) - { - BGP_TRACE(D_EVENTS, "Connect delayed by %d seconds", delay); - bgp_setup_conn(p, &p->outgoing_conn); - bgp_start_timer(p->outgoing_conn.connect_retry_timer, delay); - } + bgp_active(p, delay); else bgp_connect(p); } @@ -389,7 +558,7 @@ bgp_incoming_connection(sock *sk, int dummy UNUSED) if (ipa_equal(p->cf->remote_ip, sk->daddr)) { match = 1; - if ((p->p.proto_state == PS_START || p->p.proto_state == PS_UP) && p->neigh && p->neigh->iface) + if ((p->p.proto_state == PS_START || p->p.proto_state == PS_UP) && (p->start_state > BSS_PREPARE)) { BGP_TRACE(D_EVENTS, "Incoming connection from %I port %d", sk->daddr, sk->dport); if (p->incoming_conn.sk) @@ -411,27 +580,25 @@ bgp_incoming_connection(sock *sk, int dummy UNUSED) return 0; } -static void +static sock * bgp_setup_listen_sk(void) { - if (!bgp_listen_sk) + sock *s = sk_new(&root_pool); + DBG("BGP: Creating incoming socket\n"); + s->type = SK_TCP_PASSIVE; + s->sport = BGP_PORT; + s->tos = IP_PREC_INTERNET_CONTROL; + s->rbsize = BGP_RX_BUFFER_SIZE; + s->tbsize = BGP_TX_BUFFER_SIZE; + s->rx_hook = bgp_incoming_connection; + if (sk_open(s)) { - sock *s = sk_new(&root_pool); - DBG("BGP: Creating incoming socket\n"); - s->type = SK_TCP_PASSIVE; - s->sport = BGP_PORT; - s->tos = IP_PREC_INTERNET_CONTROL; - s->rbsize = BGP_RX_BUFFER_SIZE; - s->tbsize = BGP_TX_BUFFER_SIZE; - s->rx_hook = bgp_incoming_connection; - if (sk_open(s)) - { - log(L_ERR "Unable to open incoming BGP socket"); - rfree(s); - } - else - bgp_listen_sk = s; + log(L_ERR "Unable to open incoming BGP socket"); + rfree(s); + return NULL; } + else + return s; } static void @@ -452,6 +619,11 @@ bgp_start_neighbor(struct bgp_proto *p) DBG("BGP: Selected link-level address %I\n", p->local_link); } #endif + + int rv = bgp_open(p); + if (rv < 0) + return; + bgp_initiate(p); } @@ -462,16 +634,20 @@ bgp_neigh_notify(neighbor *n) if (n->iface) { - BGP_TRACE(D_EVENTS, "Neighbor found"); - bgp_start_neighbor(p); + if ((p->p.proto_state == PS_START) && (p->start_state == BSS_PREPARE)) + { + BGP_TRACE(D_EVENTS, "Neighbor found"); + bgp_start_neighbor(p); + } } else { - BGP_TRACE(D_EVENTS, "Neighbor lost"); - /* Send cease packets, but don't wait for them to be delivered */ - bgp_graceful_close_conn(&p->outgoing_conn); - bgp_graceful_close_conn(&p->incoming_conn); - proto_notify_state(&p->p, PS_DOWN); + if ((p->p.proto_state == PS_START) || (p->p.proto_state == PS_UP)) + { + BGP_TRACE(D_EVENTS, "Neighbor lost"); + bgp_store_error(p, NULL, BE_MISC, BEM_NEIGHBOR_LOST); + bgp_stop(p); + } } } @@ -481,6 +657,12 @@ bgp_start_locked(struct object_lock *lock) struct bgp_proto *p = lock->data; struct bgp_config *cf = p->cf; + if (p->p.proto_state != PS_START) + { + DBG("BGP: Got lock in different state %d\n", p->p.proto_state); + return; + } + DBG("BGP: Got lock\n"); p->local_id = cf->c.global->router_id; p->next_hop = cf->multihop ? cf->multihop_via : cf->remote_ip; @@ -497,10 +679,14 @@ bgp_start_locked(struct object_lock *lock) if (!p->neigh) { log(L_ERR "%s: Invalid next hop %I", p->p.name, p->next_hop); + /* As we do not start yet, we can just disable protocol */ p->p.disabled = 1; + bgp_store_error(p, NULL, BE_MISC, BEM_INVALID_NEXT_HOP); proto_notify_state(&p->p, PS_DOWN); + return; } - else if (p->neigh->iface) + + if (p->neigh->iface) bgp_start_neighbor(p); else BGP_TRACE(D_EVENTS, "Waiting for %I to become my neighbor", p->next_hop); @@ -513,16 +699,14 @@ bgp_start(struct proto *P) struct object_lock *lock; DBG("BGP: Startup.\n"); + p->start_state = BSS_PREPARE; p->outgoing_conn.state = BS_IDLE; p->incoming_conn.state = BS_IDLE; - p->startup_delay = 0; p->neigh = NULL; - bgp_counter++; - bgp_setup_listen_sk(); - - if (!bgp_linpool) - bgp_linpool = lp_new(&root_pool, 4080); + p->event = ev_new(p->p.pool); + p->event->hook = bgp_decision; + p->event->data = p; /* * Before attempting to create the connection, we need to lock the @@ -539,16 +723,6 @@ bgp_start(struct proto *P) lock->data = p; olock_acquire(lock); - /* We should create security association after we get a lock not to - * break existing connections. - */ - if (p->cf->password) - { - int rv = sk_set_md5_auth(bgp_listen_sk, p->cf->remote_ip, p->cf->password); - if (rv < 0) - return PS_STOP; - } - return PS_START; } @@ -558,31 +732,11 @@ bgp_shutdown(struct proto *P) struct bgp_proto *p = (struct bgp_proto *) P; BGP_TRACE(D_EVENTS, "Shutdown requested"); + bgp_store_error(p, NULL, BE_MAN_DOWN, 0); + p->startup_delay = 0; + bgp_stop(p); - /* - * We want to send the Cease notification message to all connections - * we have open, but we don't want to wait for all of them to complete. - * We are willing to handle the primary connection carefully, but for - * the others we just try to send the packet and if there is no buffer - * space free, we'll gracefully finish. - */ - - proto_notify_state(&p->p, PS_STOP); - if (!p->conn) - { - if (p->outgoing_conn.state != BS_IDLE) - p->outgoing_conn.primary = 1; /* Shuts protocol down after connection close */ - else if (p->incoming_conn.state != BS_IDLE) - p->incoming_conn.primary = 1; - } - if (bgp_graceful_close_conn(&p->outgoing_conn) || bgp_graceful_close_conn(&p->incoming_conn)) - return p->p.proto_state; - else - { - /* No connections open, shutdown automatically */ - bgp_close(p); - return PS_DOWN; - } + return p->p.proto_state; } static struct proto * @@ -618,19 +772,48 @@ bgp_init(struct proto_config *C) void bgp_error(struct bgp_conn *c, unsigned code, unsigned subcode, byte *data, int len) { - if (c->error_flag) + if (c->state == BS_CLOSE) return; + bgp_log_error(c->bgp, "Error", code, subcode, data, (len > 0) ? len : -len); - c->error_flag = 1 + (code != 6); + bgp_store_error(c->bgp, c, BE_BGP_TX, (code << 16) | subcode); + bgp_update_startup_delay(c->bgp, c, code, subcode); + bgp_conn_enter_close_state(c); + c->notify_code = code; c->notify_subcode = subcode; c->notify_data = data; c->notify_size = (len > 0) ? len : 0; - if (c->primary) - proto_notify_state(&c->bgp->p, PS_STOP); bgp_schedule_packet(c, PKT_NOTIFICATION); } +/** + * bgp_store_error - store last error for status report + * @p: BGP instance + * @c: connection + * @class: error class (BE_xxx constants) + * @code: error code (class specific) + * + * bgp_store_error() decides whether given error is interesting enough + * and store that error to last_error variables of @p + */ +void +bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code) +{ + /* During PS_UP, we ignore errors on secondary connection */ + if ((p->p.proto_state == PS_UP) && c && (c != p->conn)) + return; + + /* During PS_STOP, we ignore any errors, as we want to report + * the error that caused transition to PS_STOP + */ + if (p->p.proto_state == PS_STOP) + return; + + p->last_error_class = class; + p->last_error_code = code; +} + void bgp_check(struct bgp_config *c) { @@ -639,7 +822,7 @@ bgp_check(struct bgp_config *c) if (!c->remote_as) cf_error("Neighbor must be configured"); if (!bgp_as4_support && c->enable_as4) - cf_error("AS4 support disabled globbaly"); + cf_error("AS4 support disabled globally"); if (!c->enable_as4 && (c->local_as > 0xFFFF)) cf_error("Local AS number out of range"); if (!c->enable_as4 && (c->remote_as > 0xFFFF)) @@ -650,15 +833,40 @@ bgp_check(struct bgp_config *c) cf_error("Only external neighbor can be RS client"); } +static char *bgp_state_names[] = { "Idle", "Connect", "Active", "OpenSent", "OpenConfirm", "Established", "Close" }; +static char *bgp_err_classes[] = { "", "Error: ", "Socket: ", "Received: ", "BGP Error: ", "Automatic shutdown", ""}; +static char *bgp_misc_errors[] = { "", "Neighbor lost", "Invalid next hop", "Kernel MD5 auth failed" }; + + static void bgp_get_status(struct proto *P, byte *buf) { struct bgp_proto *p = (struct bgp_proto *) P; + const byte *err1 = bgp_err_classes[p->last_error_class]; + const byte *err2 = ""; + byte errbuf[32]; + + switch (p->last_error_class) + { + case BE_MISC: + err2 = bgp_misc_errors[p->last_error_code]; + break; + case BE_SOCKET: + err2 = (p->last_error_code == 0) ? "Connection closed" : strerror(p->last_error_code); + break; + case BE_BGP_RX: + case BE_BGP_TX: + err2 = bgp_error_dsc(errbuf, p->last_error_code >> 16, p->last_error_code & 0xFF); + break; + } + if (P->proto_state == PS_DOWN) - buf[0] = 0; + bsprintf(buf, "%s%s", err1, err2); else - strcpy(buf, bgp_state_names[MAX(p->incoming_conn.state, p->outgoing_conn.state)]); + bsprintf(buf, "%-14s%s%s", + bgp_state_names[MAX(p->incoming_conn.state, p->outgoing_conn.state)], + err1, err2); } static int diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h index aaa2c4ac..5c180cce 100644 --- a/proto/bgp/bgp.h +++ b/proto/bgp/bgp.h @@ -47,12 +47,13 @@ struct bgp_conn { struct timer *connect_retry_timer; struct timer *hold_timer; struct timer *keepalive_timer; + struct event *tx_ev; int packets_to_send; /* Bitmap of packet types to be sent */ int notify_code, notify_subcode, notify_size; byte *notify_data; int error_flag; /* Error state, ignore all input */ - int primary; /* This connection is primary */ u32 advertised_as; /* Temporary value for AS number received */ + int as4_support; /* Peer supports 4B AS numbers [RFC4893] */ unsigned hold_time, keepalive_time; /* Times calculated from my and neighbor's requirements */ }; @@ -60,8 +61,8 @@ struct bgp_proto { struct proto p; struct bgp_config *cf; /* Shortcut to BGP configuration */ u32 local_as, remote_as; + int start_state; /* Substates that partitions BS_START */ int is_internal; /* Internal BGP connection (local_as == remote_as) */ - int as4_support; /* Peer supports 4B AS numbers [RFC4893] */ int as4_session; /* Session uses 4B AS numbers in AS_PATH (both sides support it) */ u32 local_id; /* BGP identifier of this router */ u32 remote_id; /* BGP identifier of the neighbor */ @@ -75,13 +76,17 @@ struct bgp_proto { ip_addr next_hop; /* Either the peer or multihop_via */ struct neighbor *neigh; /* Neighbor entry corresponding to next_hop */ ip_addr local_addr; /* Address of the local end of the link to next_hop */ + struct event *event; /* Event for respawning and shutting process */ struct bgp_bucket **bucket_hash; /* Hash table of attribute buckets */ unsigned int hash_size, hash_count, hash_limit; struct fib prefix_fib; /* Prefixes to be sent */ list bucket_queue; /* Queue of buckets to send */ struct bgp_bucket *withdraw_bucket; /* Withdrawn routes */ unsigned startup_delay; /* Time to delay protocol startup by due to errors */ - bird_clock_t last_connect; /* Time of last connect attempt */ + bird_clock_t last_proto_error; /* Time of last error that leads to protocol stop */ + u8 last_error_class; /* Error class of last error */ + u32 last_error_code; /* Error code of last error. BGP protocol errors + are encoded as (bgp_err_code << 16 | bgp_err_subcode) */ #ifdef IPV6 byte *mp_reach_start, *mp_unreach_start; /* Multiprotocol BGP attribute notes */ unsigned mp_reach_len, mp_unreach_len; @@ -118,6 +123,12 @@ void bgp_start_timer(struct timer *t, int value); void bgp_check(struct bgp_config *c); void bgp_error(struct bgp_conn *c, unsigned code, unsigned subcode, byte *data, int len); void bgp_close_conn(struct bgp_conn *c); +void bgp_update_startup_delay(struct bgp_proto *p, struct bgp_conn *conn, unsigned code, unsigned subcode); +void bgp_conn_enter_established_state(struct bgp_conn *conn); +void bgp_conn_enter_close_state(struct bgp_conn *conn); +void bgp_conn_enter_idle_state(struct bgp_conn *conn); +void bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code); + #ifdef LOCAL_DEBUG #define BGP_FORCE_DEBUG 1 @@ -147,8 +158,10 @@ inline static void bgp_attach_attr_ip(struct ea_list **to, struct linpool *pool, /* packets.c */ void bgp_schedule_packet(struct bgp_conn *conn, int type); +void bgp_kick_tx(void *vconn); void bgp_tx(struct birdsock *sk); int bgp_rx(struct birdsock *sk, int size); +const byte * bgp_error_dsc(byte *buff, unsigned code, unsigned subcode); void bgp_log_error(struct bgp_proto *p, char *msg, unsigned code, unsigned subcode, byte *data, unsigned len); /* Packet types */ @@ -186,7 +199,7 @@ void bgp_log_error(struct bgp_proto *p, char *msg, unsigned code, unsigned subco #define BA_AS4_PATH 0x11 /* [RFC4893] */ #define BA_AS4_AGGREGATOR 0x12 -/* BGP states */ +/* BGP connection states */ #define BS_IDLE 0 #define BS_CONNECT 1 /* Attempting to connect */ @@ -194,6 +207,38 @@ void bgp_log_error(struct bgp_proto *p, char *msg, unsigned code, unsigned subco #define BS_OPENSENT 3 #define BS_OPENCONFIRM 4 #define BS_ESTABLISHED 5 +#define BS_CLOSE 6 /* Used during transition to BS_IDLE */ + +/* BGP start states + * + * Used in PS_START for fine-grained specification of starting state. + * + * When BGP protocol is started by core, it goes to BSS_PREPARE. When BGP protocol + * done what is neccessary to start itself (like acquiring the lock), it goes to BSS_CONNECT. + * When some connection attempt failed because of option or capability error, it goes to + * BSS_CONNECT_NOCAP. + */ + +#define BSS_PREPARE 0 /* Used before ordinary BGP started, i. e. waiting for lock */ +#define BSS_CONNECT 1 /* Ordinary BGP connecting */ +#define BSS_CONNECT_NOCAP 2 /* Legacy BGP connecting (without capabilities) */ + +/* Error classes */ + +#define BE_NONE 0 +#define BE_MISC 1 /* Miscellaneous error */ +#define BE_SOCKET 2 /* Socket error */ +#define BE_BGP_RX 3 /* BGP protocol error notification received */ +#define BE_BGP_TX 4 /* BGP protocol error notification sent */ +#define BE_AUTO_DOWN 5 /* Automatic shutdown */ +#define BE_MAN_DOWN 6 /* Manual shutdown */ + +/* Misc error codes */ + +#define BEM_NEIGHBOR_LOST 1 +#define BEM_INVALID_NEXT_HOP 2 +#define BEM_INVALID_MD5 3 /* MD5 authentication kernel request failed (possibly not supported */ + /* Well-known communities */ diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c index 00cdf036..4e42d90b 100644 --- a/proto/bgp/packets.c +++ b/proto/bgp/packets.c @@ -17,6 +17,8 @@ #include "lib/unaligned.h" #include "lib/socket.h" +#include "nest/cli.h" + #include "bgp.h" static byte * @@ -318,7 +320,8 @@ bgp_fire_tx(struct bgp_conn *conn) if (s & (1 << PKT_SCHEDULE_CLOSE)) { - bgp_close_conn(conn); + /* We can finally close connection and enter idle state */ + bgp_conn_enter_idle_state(conn); return 0; } if (s & (1 << PKT_NOTIFICATION)) @@ -371,8 +374,17 @@ bgp_schedule_packet(struct bgp_conn *conn, int type) DBG("BGP: Scheduling packet type %d\n", type); conn->packets_to_send |= 1 << type; if (conn->sk && conn->sk->tpos == conn->sk->tbuf) - while (bgp_fire_tx(conn)) - ; + ev_schedule(conn->tx_ev); +} + +void +bgp_kick_tx(void *vconn) +{ + struct bgp_conn *conn = vconn; + + DBG("BGP: kicking TX\n"); + while (bgp_fire_tx(conn)) + ; } void @@ -406,9 +418,9 @@ bgp_parse_capabilities(struct bgp_conn *conn, byte *opt, int len) case 65: if (cl != 4) goto err; - p->as4_support = 1; - p->as4_session = p->cf->enable_as4; - if (p->as4_session) + conn->as4_support = 1; + + if (p->cf->enable_as4) conn->advertised_as = get_u32(opt + 2); break; @@ -477,7 +489,7 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, int len) /* Check state */ if (conn->state != BS_OPENSENT) - { bgp_error(conn, 5, 0, NULL, 0); } + { bgp_error(conn, 5, 0, NULL, 0); return; } /* Check message contents */ if (len < 29 || len != 29 + pkt[28]) @@ -489,7 +501,7 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, int len) id = get_u32(pkt+24); BGP_TRACE(D_PACKETS, "Got OPEN(as=%d,hold=%d,id=%08x)", conn->advertised_as, hold, id); - p->remote_id = id; // ??? + conn->as4_support = 0; // Default value, possibly changed by capability. if (bgp_parse_options(conn, pkt+29, pkt[28])) return; @@ -499,7 +511,6 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, int len) if (!id || id == 0xffffffff || id == p->local_id) { bgp_error(conn, 2, 3, pkt+24, -4); return; } - if (conn->advertised_as != p->remote_as) { bgp_error(conn, 2, 2, (byte *) &(conn->advertised_as), -4); return; @@ -513,6 +524,7 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, int len) case BS_CONNECT: case BS_ACTIVE: case BS_OPENSENT: + case BS_CLOSE: break; case BS_OPENCONFIRM: if ((p->local_id < id) == (conn == &p->incoming_conn)) @@ -532,19 +544,13 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, int len) bug("bgp_rx_open: Unknown state"); } - /* Make this connection primary */ - conn->primary = 1; - p->conn = conn; - /* Update our local variables */ - if (hold < p->cf->hold_time) - conn->hold_time = hold; - else - conn->hold_time = p->cf->hold_time; + conn->hold_time = MIN(hold, p->cf->hold_time); conn->keepalive_time = p->cf->keepalive_time ? : conn->hold_time / 3; - // p->remote_as = conn->advertised_as; p->remote_id = id; - DBG("BGP: Hold timer set to %d, keepalive to %d, AS to %d, ID to %x\n", conn->hold_time, conn->keepalive_time, p->remote_as, p->remote_id); + p->as4_session = p->cf->enable_as4 && conn->as4_support; + + DBG("BGP: Hold timer set to %d, keepalive to %d, AS to %d, ID to %x, AS4 session to %d\n", conn->hold_time, conn->keepalive_time, p->remote_as, p->remote_id, p->as4_session); bgp_schedule_packet(conn, PKT_KEEPALIVE); bgp_start_timer(conn->hold_timer, conn->hold_time); @@ -817,24 +823,41 @@ static struct { { 6, 0, "Cease" } }; +/** + * bgp_error_dsc - return BGP error description + * @buff: temporary buffer + * @code: BGP error code + * @subcode: BGP error subcode + * + * bgp_error_dsc() returns error description for BGP errors + * which might be static string or given temporary buffer. + */ +const byte * +bgp_error_dsc(byte *buff, unsigned code, unsigned subcode) +{ + unsigned i; + for (i=0; i < ARRAY_SIZE(bgp_msg_table); i++) + if (bgp_msg_table[i].major == code && bgp_msg_table[i].minor == subcode) + { + return bgp_msg_table[i].msg; + } + + bsprintf(buff, "Unknown error %d.%d", code, subcode); + return buff; +} + void bgp_log_error(struct bgp_proto *p, char *msg, unsigned code, unsigned subcode, byte *data, unsigned len) { - byte *name, namebuf[16]; + const byte *name; + byte namebuf[32]; byte *t, argbuf[36]; unsigned i; if (code == 6 && !subcode) /* Don't report Cease messages */ return; - bsprintf(namebuf, "%d.%d", code, subcode); - name = namebuf; - for (i=0; i < ARRAY_SIZE(bgp_msg_table); i++) - if (bgp_msg_table[i].major == code && bgp_msg_table[i].minor == subcode) - { - name = bgp_msg_table[i].msg; - break; - } + name = bgp_error_dsc(namebuf, code, subcode); t = argbuf; if (len) { @@ -857,10 +880,13 @@ bgp_rx_notification(struct bgp_conn *conn, byte *pkt, int len) bgp_error(conn, 1, 2, pkt+16, 2); return; } - bgp_log_error(conn->bgp, "Received error notification", pkt[19], pkt[20], pkt+21, len-21); - conn->error_flag = 1; - if (conn->primary) - proto_notify_state(&conn->bgp->p, PS_STOP); + + unsigned code = pkt[19]; + unsigned subcode = pkt[20]; + bgp_log_error(conn->bgp, "Received error notification", code, subcode, pkt+21, len-21); + bgp_store_error(conn->bgp, conn, BE_BGP_RX, (code << 16) | subcode); + bgp_update_startup_delay(conn->bgp, conn, code, subcode); + bgp_conn_enter_close_state(conn); bgp_schedule_packet(conn, PKT_SCHEDULE_CLOSE); } @@ -874,10 +900,7 @@ bgp_rx_keepalive(struct bgp_conn *conn) switch (conn->state) { case BS_OPENCONFIRM: - DBG("BGP: UP!!!\n"); - conn->state = BS_ESTABLISHED; - bgp_attr_init(conn->bgp); - proto_notify_state(&conn->bgp->p, PS_UP); + bgp_conn_enter_established_state(conn); break; case BS_ESTABLISHED: break; @@ -930,18 +953,8 @@ bgp_rx(sock *sk, int size) DBG("BGP: RX hook: Got %d bytes\n", size); while (end >= pkt_start + BGP_HEADER_LENGTH) { - if (conn->error_flag) - { - /* - * We still need to remember the erroneous packet, so that - * we can generate error notifications properly. To avoid - * subsequent reads rewriting the buffer, we just reset the - * rx_hook. - */ - DBG("BGP: Error, dropping input\n"); - sk->rx_hook = NULL; - return 0; - } + if ((conn->state == BS_CLOSE) || (conn->sk != sk)) + return 0; for(i=0; i<16; i++) if (pkt_start[i] != 0xff) { |