diff options
Diffstat (limited to 'proto')
35 files changed, 3212 insertions, 186 deletions
@@ -1,4 +1,5 @@ H Protocols +C bfd C bgp C ospf C pipe diff --git a/proto/bfd/Doc b/proto/bfd/Doc new file mode 100644 index 00000000..7ee5d3ef --- /dev/null +++ b/proto/bfd/Doc @@ -0,0 +1 @@ +S bfd.c diff --git a/proto/bfd/Makefile b/proto/bfd/Makefile new file mode 100644 index 00000000..c28cedec --- /dev/null +++ b/proto/bfd/Makefile @@ -0,0 +1,5 @@ +source=bfd.c packets.c io.c +root-rel=../../ +dir-name=proto/bfd + +include ../../Rules diff --git a/proto/bfd/bfd.c b/proto/bfd/bfd.c new file mode 100644 index 00000000..5ebfadc1 --- /dev/null +++ b/proto/bfd/bfd.c @@ -0,0 +1,1114 @@ +/* + * BIRD -- Bidirectional Forwarding Detection (BFD) + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +/** + * DOC: Bidirectional Forwarding Detection + * + * The BFD protocol is implemented in three files: |bfd.c| containing the + * protocol logic and the protocol glue with BIRD core, |packets.c| handling BFD + * packet processing, RX, TX and protocol sockets. |io.c| then contains generic + * code for the event loop, threads and event sources (sockets, microsecond + * timers). This generic code will be merged to the main BIRD I/O code in the + * future. + * + * The BFD implementation uses a separate thread with an internal event loop for + * handling the protocol logic, which requires high-res and low-latency timing, + * so it is not affected by the rest of BIRD, which has several low-granularity + * hooks in the main loop, uses second-based timers and cannot offer good + * latency. The core of BFD protocol (the code related to BFD sessions, + * interfaces and packets) runs in the BFD thread, while the rest (the code + * related to BFD requests, BFD neighbors and the protocol glue) runs in the + * main thread. + * + * BFD sessions are represented by structure &bfd_session that contains a state + * related to the session and two timers (TX timer for periodic packets and hold + * timer for session timeout). These sessions are allocated from @session_slab + * and are accessible by two hash tables, @session_hash_id (by session ID) and + * @session_hash_ip (by IP addresses of neighbors). Slab and both hashes are in + * the main protocol structure &bfd_proto. The protocol logic related to BFD + * sessions is implemented in internal functions bfd_session_*(), which are + * expected to be called from the context of BFD thread, and external functions + * bfd_add_session(), bfd_remove_session() and bfd_reconfigure_session(), which + * form an interface to the BFD core for the rest and are expected to be called + * from the context of main thread. + * + * Each BFD session has an associated BFD interface, represented by structure + * &bfd_iface. A BFD interface contains a socket used for TX (the one for RX is + * shared in &bfd_proto), an interface configuration and reference counter. + * Compared to interface structures of other protocols, these structures are not + * created and removed based on interface notification events, but according to + * the needs of BFD sessions. When a new session is created, it requests a + * proper BFD interface by function bfd_get_iface(), which either finds an + * existing one in &iface_list (from &bfd_proto) or allocates a new one. When a + * session is removed, an associated iface is dicharged by bfd_free_iface(). + * + * BFD requests are the external API for the other protocols. When a protocol + * wants a BFD session, it calls bfd_request_session(), which creates a + * structure &bfd_request containing approprite information and an notify hook. + * This structure is a resource associated with the caller's resource pool. When + * a BFD protocol is available, a BFD request is submitted to the protocol, an + * appropriate BFD session is found or created and the request is attached to + * the session. When a session changes state, all attached requests (and related + * protocols) are notified. Note that BFD requests do not depend on BFD protocol + * running. When the BFD protocol is stopped or removed (or not available from + * beginning), related BFD requests are stored in @bfd_wait_list, where waits + * for a new protocol. + * + * BFD neighbors are just a way to statically configure BFD sessions without + * requests from other protocol. Structures &bfd_neighbor are part of BFD + * configuration (like static routes in the static protocol). BFD neighbors are + * handled by BFD protocol like it is a BFD client -- when a BFD neighbor is + * ready, the protocol just creates a BFD request like any other protocol. + * + * The protocol uses a new generic event loop (structure &birdloop) from |io.c|, + * which supports sockets, timers and events like the main loop. Timers + * (structure &timer2) are new microsecond based timers, while sockets and + * events are the same. A birdloop is associated with a thread (field @thread) + * in which event hooks are executed. Most functions for setting event sources + * (like sk_start() or tm2_start()) must be called from the context of that + * thread. Birdloop allows to temporarily acquire the context of that thread for + * the main thread by calling birdloop_enter() and then birdloop_leave(), which + * also ensures mutual exclusion with all event hooks. Note that resources + * associated with a birdloop (like timers) should be attached to the + * independent resource pool, detached from the main resource tree. + * + * There are two kinds of interaction between the BFD core (running in the BFD + * thread) and the rest of BFD (running in the main thread). The first kind are + * configuration calls from main thread to the BFD thread (like bfd_add_session()). + * These calls are synchronous and use birdloop_enter() mechanism for mutual + * exclusion. The second kind is a notification about session changes from the + * BFD thread to the main thread. This is done in an asynchronous way, sesions + * with pending notifications are linked (in the BFD thread) to @notify_list in + * &bfd_proto, and then bfd_notify_hook() in the main thread is activated using + * bfd_notify_kick() and a pipe. The hook then processes scheduled sessions and + * calls hooks from associated BFD requests. This @notify_list (and state fields + * in structure &bfd_session) is protected by a spinlock in &bfd_proto and + * functions bfd_lock_sessions() / bfd_unlock_sessions(). + * + * There are few data races (accessing @p->p.debug from TRACE() from the BFD + * thread and accessing some some private fields of %bfd_session from + * bfd_show_sessions() from the main thread, but these are harmless (i hope). + * + * TODO: document functions and access restrictions for fields in BFD structures. + * + * Supported standards: + * - RFC 5880 - main BFD standard + * - RFC 5881 - BFD for IP links + * - RFC 5882 - generic application of BFD + * - RFC 5883 - BFD for multihop paths + */ + +#include "bfd.h" + + +#define HASH_ID_KEY(n) n->loc_id +#define HASH_ID_NEXT(n) n->next_id +#define HASH_ID_EQ(a,b) (a == b) +#define HASH_ID_FN(k) (k) + +#define HASH_IP_KEY(n) n->addr +#define HASH_IP_NEXT(n) n->next_ip +#define HASH_IP_EQ(a,b) ipa_equal(a,b) +#define HASH_IP_FN(k) ipa_hash(k) + +static list bfd_proto_list; +static list bfd_wait_list; + +const char *bfd_state_names[] = { "AdminDown", "Down", "Init", "Up" }; + +static void bfd_session_set_min_tx(struct bfd_session *s, u32 val); +static struct bfd_iface *bfd_get_iface(struct bfd_proto *p, ip_addr local, struct iface *iface); +static void bfd_free_iface(struct bfd_iface *ifa); +static inline void bfd_notify_kick(struct bfd_proto *p); + + +/* + * BFD sessions + */ + +static void +bfd_session_update_state(struct bfd_session *s, uint state, uint diag) +{ + struct bfd_proto *p = s->ifa->bfd; + uint old_state = s->loc_state; + int notify; + + if (state == old_state) + return; + + TRACE(D_EVENTS, "Session to %I changed state from %s to %s", + s->addr, bfd_state_names[old_state], bfd_state_names[state]); + + bfd_lock_sessions(p); + s->loc_state = state; + s->loc_diag = diag; + + notify = !NODE_VALID(&s->n); + if (notify) + add_tail(&p->notify_list, &s->n); + bfd_unlock_sessions(p); + + if (state == BFD_STATE_UP) + bfd_session_set_min_tx(s, s->ifa->cf->min_tx_int); + + if (old_state == BFD_STATE_UP) + bfd_session_set_min_tx(s, s->ifa->cf->idle_tx_int); + + if (notify) + bfd_notify_kick(p); +} + +static void +bfd_session_update_tx_interval(struct bfd_session *s) +{ + u32 tx_int = MAX(s->des_min_tx_int, s->rem_min_rx_int); + u32 tx_int_l = tx_int - (tx_int / 4); // 75 % + u32 tx_int_h = tx_int - (tx_int / 10); // 90 % + + s->tx_timer->recurrent = tx_int_l; + s->tx_timer->randomize = tx_int_h - tx_int_l; + + /* Do not set timer if no previous event */ + if (!s->last_tx) + return; + + /* Set timer relative to last tx_timer event */ + tm2_set(s->tx_timer, s->last_tx + tx_int_l); +} + +static void +bfd_session_update_detection_time(struct bfd_session *s, int kick) +{ + btime timeout = (btime) MAX(s->req_min_rx_int, s->rem_min_tx_int) * s->rem_detect_mult; + + if (kick) + s->last_rx = current_time(); + + if (!s->last_rx) + return; + + tm2_set(s->hold_timer, s->last_rx + timeout); +} + +static void +bfd_session_control_tx_timer(struct bfd_session *s, int reset) +{ + // if (!s->opened) goto stop; + + if (s->passive && (s->rem_id == 0)) + goto stop; + + if (s->rem_demand_mode && + !s->poll_active && + (s->loc_state == BFD_STATE_UP) && + (s->rem_state == BFD_STATE_UP)) + goto stop; + + if (s->rem_min_rx_int == 0) + goto stop; + + /* So TX timer should run */ + if (reset || !tm2_active(s->tx_timer)) + { + s->last_tx = 0; + tm2_start(s->tx_timer, 0); + } + + return; + + stop: + tm2_stop(s->tx_timer); + s->last_tx = 0; +} + +static void +bfd_session_request_poll(struct bfd_session *s, u8 request) +{ + /* Not sure about this, but doing poll in this case does not make sense */ + if (s->rem_id == 0) + return; + + s->poll_scheduled |= request; + + if (s->poll_active) + return; + + s->poll_active = s->poll_scheduled; + s->poll_scheduled = 0; + + bfd_session_control_tx_timer(s, 1); +} + +static void +bfd_session_terminate_poll(struct bfd_session *s) +{ + u8 poll_done = s->poll_active & ~s->poll_scheduled; + + if (poll_done & BFD_POLL_TX) + s->des_min_tx_int = s->des_min_tx_new; + + if (poll_done & BFD_POLL_RX) + s->req_min_rx_int = s->req_min_rx_new; + + s->poll_active = s->poll_scheduled; + s->poll_scheduled = 0; + + /* Timers are updated by caller - bfd_session_process_ctl() */ +} + +void +bfd_session_process_ctl(struct bfd_session *s, u8 flags, u32 old_tx_int, u32 old_rx_int) +{ + if (s->poll_active && (flags & BFD_FLAG_FINAL)) + bfd_session_terminate_poll(s); + + if ((s->des_min_tx_int != old_tx_int) || (s->rem_min_rx_int != old_rx_int)) + bfd_session_update_tx_interval(s); + + bfd_session_update_detection_time(s, 1); + + /* Update session state */ + int next_state = 0; + int diag = BFD_DIAG_NOTHING; + + switch (s->loc_state) + { + case BFD_STATE_ADMIN_DOWN: + return; + + case BFD_STATE_DOWN: + if (s->rem_state == BFD_STATE_DOWN) next_state = BFD_STATE_INIT; + else if (s->rem_state == BFD_STATE_INIT) next_state = BFD_STATE_UP; + break; + + case BFD_STATE_INIT: + if (s->rem_state == BFD_STATE_ADMIN_DOWN) next_state = BFD_STATE_DOWN, diag = BFD_DIAG_NEIGHBOR_DOWN; + else if (s->rem_state >= BFD_STATE_INIT) next_state = BFD_STATE_UP; + break; + + case BFD_STATE_UP: + if (s->rem_state <= BFD_STATE_DOWN) next_state = BFD_STATE_DOWN, diag = BFD_DIAG_NEIGHBOR_DOWN; + break; + } + + if (next_state) + bfd_session_update_state(s, next_state, diag); + + bfd_session_control_tx_timer(s, 0); + + if (flags & BFD_FLAG_POLL) + bfd_send_ctl(s->ifa->bfd, s, 1); +} + +static void +bfd_session_timeout(struct bfd_session *s) +{ + struct bfd_proto *p = s->ifa->bfd; + + TRACE(D_EVENTS, "Session to %I expired", s->addr); + + s->rem_state = BFD_STATE_DOWN; + s->rem_id = 0; + s->rem_min_tx_int = 0; + s->rem_min_rx_int = 1; + s->rem_demand_mode = 0; + s->rem_detect_mult = 0; + + s->poll_active = 0; + s->poll_scheduled = 0; + + bfd_session_update_state(s, BFD_STATE_DOWN, BFD_DIAG_TIMEOUT); + + bfd_session_control_tx_timer(s, 1); +} + +static void +bfd_session_set_min_tx(struct bfd_session *s, u32 val) +{ + /* Note that des_min_tx_int <= des_min_tx_new */ + + if (val == s->des_min_tx_new) + return; + + s->des_min_tx_new = val; + + /* Postpone timer update if des_min_tx_int increases and the session is up */ + if ((s->loc_state != BFD_STATE_UP) || (val < s->des_min_tx_int)) + { + s->des_min_tx_int = val; + bfd_session_update_tx_interval(s); + } + + bfd_session_request_poll(s, BFD_POLL_TX); +} + +static void +bfd_session_set_min_rx(struct bfd_session *s, u32 val) +{ + /* Note that req_min_rx_int >= req_min_rx_new */ + + if (val == s->req_min_rx_new) + return; + + s->req_min_rx_new = val; + + /* Postpone timer update if req_min_rx_int decreases and the session is up */ + if ((s->loc_state != BFD_STATE_UP) || (val > s->req_min_rx_int)) + { + s->req_min_rx_int = val; + bfd_session_update_detection_time(s, 0); + } + + bfd_session_request_poll(s, BFD_POLL_RX); +} + +struct bfd_session * +bfd_find_session_by_id(struct bfd_proto *p, u32 id) +{ + return HASH_FIND(p->session_hash_id, HASH_ID, id); +} + +struct bfd_session * +bfd_find_session_by_addr(struct bfd_proto *p, ip_addr addr) +{ + return HASH_FIND(p->session_hash_ip, HASH_IP, addr); +} + +static void +bfd_tx_timer_hook(timer2 *t) +{ + struct bfd_session *s = t->data; + + s->last_tx = current_time(); + bfd_send_ctl(s->ifa->bfd, s, 0); +} + +static void +bfd_hold_timer_hook(timer2 *t) +{ + bfd_session_timeout(t->data); +} + +static u32 +bfd_get_free_id(struct bfd_proto *p) +{ + u32 id; + for (id = random_u32(); 1; id++) + if (id && !bfd_find_session_by_id(p, id)) + break; + + return id; +} + +static struct bfd_session * +bfd_add_session(struct bfd_proto *p, ip_addr addr, ip_addr local, struct iface *iface) +{ + birdloop_enter(p->loop); + + struct bfd_iface *ifa = bfd_get_iface(p, local, iface); + + struct bfd_session *s = sl_alloc(p->session_slab); + bzero(s, sizeof(struct bfd_session)); + + s->addr = addr; + s->ifa = ifa; + s->loc_id = bfd_get_free_id(p); + + HASH_INSERT(p->session_hash_id, HASH_ID, s); + HASH_INSERT(p->session_hash_ip, HASH_IP, s); + + + /* Initialization of state variables - see RFC 5880 6.8.1 */ + s->loc_state = BFD_STATE_DOWN; + s->rem_state = BFD_STATE_DOWN; + s->des_min_tx_int = s->des_min_tx_new = ifa->cf->idle_tx_int; + s->req_min_rx_int = s->req_min_rx_new = ifa->cf->min_rx_int; + s->rem_min_rx_int = 1; + s->detect_mult = ifa->cf->multiplier; + s->passive = ifa->cf->passive; + + s->tx_timer = tm2_new_init(p->tpool, bfd_tx_timer_hook, s, 0, 0); + s->hold_timer = tm2_new_init(p->tpool, bfd_hold_timer_hook, s, 0, 0); + bfd_session_update_tx_interval(s); + bfd_session_control_tx_timer(s, 1); + + init_list(&s->request_list); + s->last_state_change = now; + + TRACE(D_EVENTS, "Session to %I added", s->addr); + + birdloop_leave(p->loop); + + return s; +} + +/* +static void +bfd_open_session(struct bfd_proto *p, struct bfd_session *s, ip_addr local, struct iface *ifa) +{ + birdloop_enter(p->loop); + + s->opened = 1; + + bfd_session_control_tx_timer(s); + + birdloop_leave(p->loop); +} + +static void +bfd_close_session(struct bfd_proto *p, struct bfd_session *s) +{ + birdloop_enter(p->loop); + + s->opened = 0; + + bfd_session_update_state(s, BFD_STATE_DOWN, BFD_DIAG_PATH_DOWN); + bfd_session_control_tx_timer(s); + + birdloop_leave(p->loop); +} +*/ + +static void +bfd_remove_session(struct bfd_proto *p, struct bfd_session *s) +{ + ip_addr ip = s->addr; + + birdloop_enter(p->loop); + + bfd_free_iface(s->ifa); + + rfree(s->tx_timer); + rfree(s->hold_timer); + + HASH_REMOVE(p->session_hash_id, HASH_ID, s); + HASH_REMOVE(p->session_hash_ip, HASH_IP, s); + + sl_free(p->session_slab, s); + + TRACE(D_EVENTS, "Session to %I removed", ip); + + birdloop_leave(p->loop); +} + +static void +bfd_reconfigure_session(struct bfd_proto *p, struct bfd_session *s) +{ + birdloop_enter(p->loop); + + struct bfd_iface_config *cf = s->ifa->cf; + + u32 tx = (s->loc_state == BFD_STATE_UP) ? cf->min_tx_int : cf->idle_tx_int; + bfd_session_set_min_tx(s, tx); + bfd_session_set_min_rx(s, cf->min_rx_int); + s->detect_mult = cf->multiplier; + s->passive = cf->passive; + + bfd_session_control_tx_timer(s, 0); + + birdloop_leave(p->loop); + + TRACE(D_EVENTS, "Session to %I reconfigured", s->addr); +} + + +/* + * BFD interfaces + */ + +static struct bfd_iface_config bfd_default_iface = { + .min_rx_int = BFD_DEFAULT_MIN_RX_INT, + .min_tx_int = BFD_DEFAULT_MIN_TX_INT, + .idle_tx_int = BFD_DEFAULT_IDLE_TX_INT, + .multiplier = BFD_DEFAULT_MULTIPLIER +}; + +static inline struct bfd_iface_config * +bfd_find_iface_config(struct bfd_config *cf, struct iface *iface) +{ + struct bfd_iface_config *ic; + + ic = iface ? (void *) iface_patt_find(&cf->patt_list, iface, NULL) : cf->multihop; + + return ic ? ic : &bfd_default_iface; +} + +static struct bfd_iface * +bfd_get_iface(struct bfd_proto *p, ip_addr local, struct iface *iface) +{ + struct bfd_iface *ifa; + + WALK_LIST(ifa, p->iface_list) + if (ipa_equal(ifa->local, local) && (ifa->iface == iface)) + return ifa->uc++, ifa; + + struct bfd_config *cf = (struct bfd_config *) (p->p.cf); + struct bfd_iface_config *ic = bfd_find_iface_config(cf, iface); + + ifa = mb_allocz(p->tpool, sizeof(struct bfd_iface)); + ifa->local = local; + ifa->iface = iface; + ifa->cf = ic; + ifa->bfd = p; + + ifa->sk = bfd_open_tx_sk(p, local, iface); + ifa->uc = 1; + + add_tail(&p->iface_list, &ifa->n); + + return ifa; +} + +static void +bfd_free_iface(struct bfd_iface *ifa) +{ + if (!ifa || --ifa->uc) + return; + + rem_node(&ifa->n); + sk_stop(ifa->sk); + rfree(ifa->sk); + mb_free(ifa); +} + +static void +bfd_reconfigure_iface(struct bfd_proto *p, struct bfd_iface *ifa, struct bfd_config *nc) +{ + struct bfd_iface_config *nic = bfd_find_iface_config(nc, ifa->iface); + ifa->changed = !!memcmp(nic, ifa->cf, sizeof(struct bfd_iface_config)); + + /* This should be probably changed to not access ifa->cf from the BFD thread */ + birdloop_enter(p->loop); + ifa->cf = nic; + birdloop_leave(p->loop); +} + + +/* + * BFD requests + */ + +static void +bfd_request_notify(struct bfd_request *req, u8 state, u8 diag) +{ + u8 old_state = req->state; + + if (state == old_state) + return; + + req->state = state; + req->diag = diag; + req->old_state = old_state; + req->down = (old_state == BFD_STATE_UP) && (state == BFD_STATE_DOWN); + + if (req->hook) + req->hook(req); +} + +static int +bfd_add_request(struct bfd_proto *p, struct bfd_request *req) +{ + struct bfd_session *s = bfd_find_session_by_addr(p, req->addr); + u8 state, diag; + + if (!s) + s = bfd_add_session(p, req->addr, req->local, req->iface); + + rem_node(&req->n); + add_tail(&s->request_list, &req->n); + req->session = s; + + bfd_lock_sessions(p); + state = s->loc_state; + diag = s->loc_diag; + bfd_unlock_sessions(p); + + bfd_request_notify(req, state, diag); + + return 1; +} + +static void +bfd_submit_request(struct bfd_request *req) +{ + node *n; + + WALK_LIST(n, bfd_proto_list) + if (bfd_add_request(SKIP_BACK(struct bfd_proto, bfd_node, n), req)) + return; + + rem_node(&req->n); + add_tail(&bfd_wait_list, &req->n); + req->session = NULL; + bfd_request_notify(req, BFD_STATE_ADMIN_DOWN, 0); +} + +static void +bfd_take_requests(struct bfd_proto *p) +{ + node *n, *nn; + + WALK_LIST_DELSAFE(n, nn, bfd_wait_list) + bfd_add_request(p, SKIP_BACK(struct bfd_request, n, n)); +} + +static void +bfd_drop_requests(struct bfd_proto *p) +{ + node *n; + + HASH_WALK(p->session_hash_id, next_id, s) + { + /* We assume that p is not in bfd_proto_list */ + WALK_LIST_FIRST(n, s->request_list) + bfd_submit_request(SKIP_BACK(struct bfd_request, n, n)); + } + HASH_WALK_END; +} + +static struct resclass bfd_request_class; + +struct bfd_request * +bfd_request_session(pool *p, ip_addr addr, ip_addr local, struct iface *iface, + void (*hook)(struct bfd_request *), void *data) +{ + struct bfd_request *req = ralloc(p, &bfd_request_class); + + /* Hack: self-link req->n, we will call rem_node() on it */ + req->n.prev = req->n.next = &req->n; + + req->addr = addr; + req->local = local; + req->iface = iface; + + bfd_submit_request(req); + + req->hook = hook; + req->data = data; + + return req; +} + +static void +bfd_request_free(resource *r) +{ + struct bfd_request *req = (struct bfd_request *) r; + struct bfd_session *s = req->session; + + rem_node(&req->n); + + /* Remove the session if there is no request for it. Skip that if + inside notify hooks, will be handled by bfd_notify_hook() itself */ + + if (s && EMPTY_LIST(s->request_list) && !s->notify_running) + bfd_remove_session(s->ifa->bfd, s); +} + +static void +bfd_request_dump(resource *r) +{ + struct bfd_request *req = (struct bfd_request *) r; + + debug("(code %p, data %p)\n", req->hook, req->data); +} + +static struct resclass bfd_request_class = { + "BFD request", + sizeof(struct bfd_request), + bfd_request_free, + bfd_request_dump, + NULL, + NULL +}; + + +/* + * BFD neighbors + */ + +static void +bfd_neigh_notify(struct neighbor *nb) +{ + struct bfd_proto *p = (struct bfd_proto *) nb->proto; + struct bfd_neighbor *n = nb->data; + + if (!n) + return; + + if ((nb->scope > 0) && !n->req) + { + ip_addr local = ipa_nonzero(n->local) ? n->local : nb->iface->addr->ip; + n->req = bfd_request_session(p->p.pool, n->addr, local, nb->iface, NULL, NULL); + } + + if ((nb->scope <= 0) && n->req) + { + rfree(n->req); + n->req = NULL; + } +} + +static void +bfd_start_neighbor(struct bfd_proto *p, struct bfd_neighbor *n) +{ + n->active = 1; + + if (n->multihop) + { + n->req = bfd_request_session(p->p.pool, n->addr, n->local, NULL, NULL, NULL); + return; + } + + struct neighbor *nb = neigh_find2(&p->p, &n->addr, n->iface, NEF_STICKY); + if (!nb) + { + log(L_ERR "%s: Invalid remote address %I%J", p->p.name, n->addr, n->iface); + return; + } + + if (nb->data) + { + log(L_ERR "%s: Duplicate neighbor %I", p->p.name, n->addr); + return; + } + + n->neigh = nb; + nb->data = n; + + if (nb->scope > 0) + bfd_neigh_notify(nb); + else + TRACE(D_EVENTS, "Waiting for %I%J to become my neighbor", n->addr, n->iface); +} + +static void +bfd_stop_neighbor(struct bfd_proto *p, struct bfd_neighbor *n) +{ + if (n->neigh) + n->neigh->data = NULL; + n->neigh = NULL; + + rfree(n->req); + n->req = NULL; +} + +static inline int +bfd_same_neighbor(struct bfd_neighbor *x, struct bfd_neighbor *y) +{ + return ipa_equal(x->addr, y->addr) && ipa_equal(x->local, y->local) && + (x->iface == y->iface) && (x->multihop == y->multihop); +} + +static void +bfd_reconfigure_neighbors(struct bfd_proto *p, struct bfd_config *new) +{ + struct bfd_config *old = (struct bfd_config *) (p->p.cf); + struct bfd_neighbor *on, *nn; + + WALK_LIST(on, old->neigh_list) + { + WALK_LIST(nn, new->neigh_list) + if (bfd_same_neighbor(nn, on)) + { + nn->neigh = on->neigh; + if (nn->neigh) + nn->neigh->data = nn; + + nn->req = on->req; + nn->active = 1; + return; + } + + bfd_stop_neighbor(p, on); + } + + WALK_LIST(nn, new->neigh_list) + if (!nn->active) + bfd_start_neighbor(p, nn); +} + + +/* + * BFD notify socket + */ + +/* This core notify code should be replaced after main loop transition to birdloop */ + +int pipe(int pipefd[2]); +void pipe_drain(int fd); +void pipe_kick(int fd); + +static int +bfd_notify_hook(sock *sk, int len) +{ + struct bfd_proto *p = sk->data; + struct bfd_session *s; + list tmp_list; + u8 state, diag; + node *n, *nn; + + pipe_drain(sk->fd); + + bfd_lock_sessions(p); + init_list(&tmp_list); + add_tail_list(&tmp_list, &p->notify_list); + init_list(&p->notify_list); + bfd_unlock_sessions(p); + + WALK_LIST_FIRST(s, tmp_list) + { + bfd_lock_sessions(p); + rem2_node(&s->n); + state = s->loc_state; + diag = s->loc_diag; + bfd_unlock_sessions(p); + + /* FIXME: convert to btime and move to bfd_session_update_state() */ + s->last_state_change = now; + + s->notify_running = 1; + WALK_LIST_DELSAFE(n, nn, s->request_list) + bfd_request_notify(SKIP_BACK(struct bfd_request, n, n), state, diag); + s->notify_running = 0; + + /* Remove the session if all requests were removed in notify hooks */ + if (EMPTY_LIST(s->request_list)) + bfd_remove_session(p, s); + } + + return 0; +} + +static inline void +bfd_notify_kick(struct bfd_proto *p) +{ + pipe_kick(p->notify_ws->fd); +} + +static void +bfd_noterr_hook(sock *sk, int err) +{ + struct bfd_proto *p = sk->data; + log(L_ERR "%s: Notify socket error: %m", p->p.name, err); +} + +static void +bfd_notify_init(struct bfd_proto *p) +{ + int pfds[2]; + sock *sk; + + int rv = pipe(pfds); + if (rv < 0) + die("pipe: %m"); + + sk = sk_new(p->p.pool); + sk->type = SK_MAGIC; + sk->rx_hook = bfd_notify_hook; + sk->err_hook = bfd_noterr_hook; + sk->fd = pfds[0]; + sk->data = p; + if (sk_open(sk) < 0) + die("bfd: sk_open failed"); + p->notify_rs = sk; + + /* The write sock is not added to any event loop */ + sk = sk_new(p->p.pool); + sk->type = SK_MAGIC; + sk->fd = pfds[1]; + sk->data = p; + sk->flags = SKF_THREAD; + if (sk_open(sk) < 0) + die("bfd: sk_open failed"); + p->notify_ws = sk; +} + + +/* + * BFD protocol glue + */ + +void +bfd_init_all(void) +{ + init_list(&bfd_proto_list); + init_list(&bfd_wait_list); +} + +static struct proto * +bfd_init(struct proto_config *c) +{ + struct proto *p = proto_new(c, sizeof(struct bfd_proto)); + + p->neigh_notify = bfd_neigh_notify; + + return p; +} + +static int +bfd_start(struct proto *P) +{ + struct bfd_proto *p = (struct bfd_proto *) P; + struct bfd_config *cf = (struct bfd_config *) (P->cf); + + p->loop = birdloop_new(); + p->tpool = rp_new(NULL, "BFD thread root"); + pthread_spin_init(&p->lock, PTHREAD_PROCESS_PRIVATE); + + p->session_slab = sl_new(P->pool, sizeof(struct bfd_session)); + HASH_INIT(p->session_hash_id, P->pool, 8); + HASH_INIT(p->session_hash_ip, P->pool, 8); + + init_list(&p->iface_list); + + init_list(&p->notify_list); + bfd_notify_init(p); + + add_tail(&bfd_proto_list, &p->bfd_node); + + birdloop_enter(p->loop); + p->rx_1 = bfd_open_rx_sk(p, 0); + p->rx_m = bfd_open_rx_sk(p, 1); + birdloop_leave(p->loop); + + bfd_take_requests(p); + + struct bfd_neighbor *n; + WALK_LIST(n, cf->neigh_list) + bfd_start_neighbor(p, n); + + birdloop_start(p->loop); + + return PS_UP; +} + + +static int +bfd_shutdown(struct proto *P) +{ + struct bfd_proto *p = (struct bfd_proto *) P; + struct bfd_config *cf = (struct bfd_config *) (P->cf); + + rem_node(&p->bfd_node); + + birdloop_stop(p->loop); + + struct bfd_neighbor *n; + WALK_LIST(n, cf->neigh_list) + bfd_stop_neighbor(p, n); + + bfd_drop_requests(p); + + /* FIXME: This is hack */ + birdloop_enter(p->loop); + rfree(p->tpool); + birdloop_leave(p->loop); + + birdloop_free(p->loop); + + return PS_DOWN; +} + +static int +bfd_reconfigure(struct proto *P, struct proto_config *c) +{ + struct bfd_proto *p = (struct bfd_proto *) P; + // struct bfd_config *old = (struct bfd_config *) (P->cf); + struct bfd_config *new = (struct bfd_config *) c; + struct bfd_iface *ifa; + + birdloop_mask_wakeups(p->loop); + + WALK_LIST(ifa, p->iface_list) + bfd_reconfigure_iface(p, ifa, new); + + HASH_WALK(p->session_hash_id, next_id, s) + { + if (s->ifa->changed) + bfd_reconfigure_session(p, s); + } + HASH_WALK_END; + + bfd_reconfigure_neighbors(p, new); + + birdloop_unmask_wakeups(p->loop); + + return 1; +} + +/* Ensure one instance */ +struct bfd_config *bfd_cf; + +static void +bfd_preconfig(struct protocol *P UNUSED, struct config *c UNUSED) +{ + bfd_cf = NULL; +} + +static void +bfd_copy_config(struct proto_config *dest, struct proto_config *src) +{ + struct bfd_config *d = (struct bfd_config *) dest; + // struct bfd_config *s = (struct bfd_config *) src; + + /* We clean up patt_list and neigh_list, neighbors and ifaces are non-sharable */ + init_list(&d->patt_list); + init_list(&d->neigh_list); +} + +void +bfd_show_sessions(struct proto *P) +{ + byte tbuf[TM_DATETIME_BUFFER_SIZE]; + struct bfd_proto *p = (struct bfd_proto *) P; + uint state, diag; + u32 tx_int, timeout; + const char *ifname; + + if (p->p.proto_state != PS_UP) + { + cli_msg(-1013, "%s: is not up", p->p.name); + cli_msg(0, ""); + return; + } + + cli_msg(-1013, "%s:", p->p.name); + cli_msg(-1013, "%-25s %-10s %-10s %-10s %8s %8s", + "IP address", "Interface", "State", "Since", "Interval", "Timeout"); + + + HASH_WALK(p->session_hash_id, next_id, s) + { + /* FIXME: this is thread-unsafe, but perhaps harmless */ + state = s->loc_state; + diag = s->loc_diag; + ifname = (s->ifa && s->ifa->sk->iface) ? s->ifa->sk->iface->name : "---"; + tx_int = s->last_tx ? (MAX(s->des_min_tx_int, s->rem_min_rx_int) TO_MS) : 0; + timeout = (MAX(s->req_min_rx_int, s->rem_min_tx_int) TO_MS) * s->rem_detect_mult; + + state = (state < 4) ? state : 0; + tm_format_datetime(tbuf, &config->tf_proto, s->last_state_change); + + cli_msg(-1013, "%-25I %-10s %-10s %-10s %3u.%03u %3u.%03u", + s->addr, ifname, bfd_state_names[state], tbuf, + tx_int / 1000, tx_int % 1000, timeout / 1000, timeout % 1000); + } + HASH_WALK_END; + + cli_msg(0, ""); +} + + +struct protocol proto_bfd = { + .name = "BFD", + .template = "bfd%d", + .init = bfd_init, + .start = bfd_start, + .shutdown = bfd_shutdown, + .reconfigure = bfd_reconfigure, + .preconfig = bfd_preconfig, + .copy_config = bfd_copy_config, +}; diff --git a/proto/bfd/bfd.h b/proto/bfd/bfd.h new file mode 100644 index 00000000..9b61be64 --- /dev/null +++ b/proto/bfd/bfd.h @@ -0,0 +1,191 @@ +/* + * BIRD -- Bidirectional Forwarding Detection (BFD) + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#ifndef _BIRD_BFD_H_ +#define _BIRD_BFD_H_ + +#include <pthread.h> + +#include "nest/bird.h" +#include "nest/cli.h" +#include "nest/iface.h" +#include "nest/protocol.h" +#include "nest/route.h" +#include "conf/conf.h" +#include "lib/hash.h" +#include "lib/resource.h" +#include "lib/socket.h" +#include "lib/string.h" + +#include "nest/bfd.h" +#include "io.h" + + +#define BFD_CONTROL_PORT 3784 +#define BFD_ECHO_PORT 3785 +#define BFD_MULTI_CTL_PORT 4784 + +#define BFD_DEFAULT_MIN_RX_INT (10 MS_) +#define BFD_DEFAULT_MIN_TX_INT (100 MS_) +#define BFD_DEFAULT_IDLE_TX_INT (1 S_) +#define BFD_DEFAULT_MULTIPLIER 5 + + +struct bfd_iface_config; + +struct bfd_config +{ + struct proto_config c; + list patt_list; /* List of iface configs (struct bfd_iface_config) */ + list neigh_list; /* List of configured neighbors (struct bfd_neighbor) */ + struct bfd_iface_config *multihop; /* Multihop pseudoiface config */ +}; + +struct bfd_iface_config +{ + struct iface_patt i; + u32 min_rx_int; + u32 min_tx_int; + u32 idle_tx_int; + u8 multiplier; + u8 passive; +}; + +struct bfd_neighbor +{ + node n; + ip_addr addr; + ip_addr local; + struct iface *iface; + + struct neighbor *neigh; + struct bfd_request *req; + + u8 multihop; + u8 active; +}; + +struct bfd_proto +{ + struct proto p; + struct birdloop *loop; + pool *tpool; + pthread_spinlock_t lock; + node bfd_node; + + slab *session_slab; + HASH(struct bfd_session) session_hash_id; + HASH(struct bfd_session) session_hash_ip; + + sock *notify_rs; + sock *notify_ws; + list notify_list; + + sock *rx_1; + sock *rx_m; + list iface_list; +}; + +struct bfd_iface +{ + node n; + ip_addr local; + struct iface *iface; + struct bfd_iface_config *cf; + struct bfd_proto *bfd; + + sock *sk; + u32 uc; + u8 changed; +}; + +struct bfd_session +{ + node n; + ip_addr addr; /* Address of session */ + struct bfd_iface *ifa; /* Iface associated with session */ + struct bfd_session *next_id; /* Next in bfd.session_hash_id */ + struct bfd_session *next_ip; /* Next in bfd.session_hash_ip */ + + u8 opened_unused; + u8 passive; + u8 poll_active; + u8 poll_scheduled; + + u8 loc_state; + u8 rem_state; + u8 loc_diag; + u8 rem_diag; + u32 loc_id; /* Local session ID (local discriminator) */ + u32 rem_id; /* Remote session ID (remote discriminator) */ + u32 des_min_tx_int; /* Desired min rx interval, local option */ + u32 des_min_tx_new; /* Used for des_min_tx_int change */ + u32 req_min_rx_int; /* Required min tx interval, local option */ + u32 req_min_rx_new; /* Used for req_min_rx_int change */ + u32 rem_min_tx_int; /* Last received des_min_tx_int */ + u32 rem_min_rx_int; /* Last received req_min_rx_int */ + u8 demand_mode; /* Currently unused */ + u8 rem_demand_mode; + u8 detect_mult; /* Announced detect_mult, local option */ + u8 rem_detect_mult; /* Last received detect_mult */ + + btime last_tx; /* Time of last sent periodic control packet */ + btime last_rx; /* Time of last received valid control packet */ + + timer2 *tx_timer; /* Periodic control packet timer */ + timer2 *hold_timer; /* Timer for session down detection time */ + + list request_list; /* List of client requests (struct bfd_request) */ + bird_clock_t last_state_change; /* Time of last state change */ + u8 notify_running; /* 1 if notify hooks are running */ +}; + + +extern const char *bfd_state_names[]; + +#define BFD_STATE_ADMIN_DOWN 0 +#define BFD_STATE_DOWN 1 +#define BFD_STATE_INIT 2 +#define BFD_STATE_UP 3 + +#define BFD_DIAG_NOTHING 0 +#define BFD_DIAG_TIMEOUT 1 +#define BFD_DIAG_ECHO_FAILED 2 +#define BFD_DIAG_NEIGHBOR_DOWN 3 +#define BFD_DIAG_FWD_RESET 4 +#define BFD_DIAG_PATH_DOWN 5 +#define BFD_DIAG_C_PATH_DOWN 6 +#define BFD_DIAG_ADMIN_DOWN 7 +#define BFD_DIAG_RC_PATH_DOWN 8 + +#define BFD_POLL_TX 1 +#define BFD_POLL_RX 2 + +#define BFD_FLAGS 0x3f +#define BFD_FLAG_POLL (1 << 5) +#define BFD_FLAG_FINAL (1 << 4) +#define BFD_FLAG_CPI (1 << 3) +#define BFD_FLAG_AP (1 << 2) +#define BFD_FLAG_DEMAND (1 << 1) +#define BFD_FLAG_MULTIPOINT (1 << 0) + + +static inline void bfd_lock_sessions(struct bfd_proto *p) { pthread_spin_lock(&p->lock); } +static inline void bfd_unlock_sessions(struct bfd_proto *p) { pthread_spin_unlock(&p->lock); } + +/* bfd.c */ +struct bfd_session * bfd_find_session_by_id(struct bfd_proto *p, u32 id); +struct bfd_session * bfd_find_session_by_addr(struct bfd_proto *p, ip_addr addr); +void bfd_session_process_ctl(struct bfd_session *s, u8 flags, u32 old_tx_int, u32 old_rx_int); +void bfd_show_sessions(struct proto *P); + +/* packets.c */ +void bfd_send_ctl(struct bfd_proto *p, struct bfd_session *s, int final); +sock * bfd_open_rx_sk(struct bfd_proto *p, int multihop); +sock * bfd_open_tx_sk(struct bfd_proto *p, ip_addr local, struct iface *ifa); + + +#endif /* _BIRD_BFD_H_ */ diff --git a/proto/bfd/config.Y b/proto/bfd/config.Y new file mode 100644 index 00000000..1bf8764f --- /dev/null +++ b/proto/bfd/config.Y @@ -0,0 +1,138 @@ +/* + * BIRD -- Router Advertisement Configuration + * + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +CF_HDR + +#include "proto/bfd/bfd.h" + +CF_DEFINES + +#define BFD_CFG ((struct bfd_config *) this_proto) +#define BFD_IFACE ((struct bfd_iface_config *) this_ipatt) +#define BFD_NEIGHBOR this_bfd_neighbor + +static struct bfd_neighbor *this_bfd_neighbor; + +extern struct bfd_config *bfd_cf; + +CF_DECLS + +CF_KEYWORDS(BFD, MIN, IDLE, RX, TX, INTERVAL, MULTIPLIER, PASSIVE, + INTERFACE, MULTIHOP, NEIGHBOR, DEV, LOCAL) + +%type <iface> bfd_neigh_iface +%type <a> bfd_neigh_local +%type <i> bfd_neigh_multihop + +CF_GRAMMAR + +CF_ADDTO(proto, bfd_proto) + +bfd_proto_start: proto_start BFD +{ + this_proto = proto_config_new(&proto_bfd, sizeof(struct bfd_config), $1); + init_list(&BFD_CFG->patt_list); + init_list(&BFD_CFG->neigh_list); + + if (bfd_cf) + cf_error("Only one BFD instance allowed"); + bfd_cf = BFD_CFG; +}; + +bfd_proto_item: + proto_item + | INTERFACE bfd_iface + | MULTIHOP bfd_multihop + | NEIGHBOR bfd_neighbor + ; + +bfd_proto_opts: + /* empty */ + | bfd_proto_opts bfd_proto_item ';' + ; + +bfd_proto: + bfd_proto_start proto_name '{' bfd_proto_opts '}'; + + +bfd_iface_start: +{ + this_ipatt = cfg_allocz(sizeof(struct bfd_iface_config)); + init_list(&this_ipatt->ipn_list); + + BFD_IFACE->min_rx_int = BFD_DEFAULT_MIN_RX_INT; + BFD_IFACE->min_tx_int = BFD_DEFAULT_MIN_TX_INT; + BFD_IFACE->idle_tx_int = BFD_DEFAULT_IDLE_TX_INT; + BFD_IFACE->multiplier = BFD_DEFAULT_MULTIPLIER; +}; + +bfd_iface_item: + INTERVAL expr_us { BFD_IFACE->min_rx_int = BFD_IFACE->min_tx_int = $2; } + | MIN RX INTERVAL expr_us { BFD_IFACE->min_rx_int = $4; } + | MIN TX INTERVAL expr_us { BFD_IFACE->min_tx_int = $4; } + | IDLE TX INTERVAL expr_us { BFD_IFACE->idle_tx_int = $4; } + | MULTIPLIER expr { BFD_IFACE->multiplier = $2; } + | PASSIVE bool { BFD_IFACE->passive = $2; } + ; + +bfd_iface_opts: + /* empty */ + | bfd_iface_opts bfd_iface_item ';' + ; + +bfd_iface_opt_list: + /* empty */ + | '{' bfd_iface_opts '}' + ; + +bfd_iface: bfd_iface_start iface_patt_list bfd_iface_opt_list +{ add_tail(&BFD_CFG->patt_list, NODE this_ipatt); }; + +bfd_multihop: bfd_iface_start bfd_iface_opt_list +{ BFD_CFG->multihop = BFD_IFACE; }; + + +bfd_neigh_iface: + /* empty */ { $$ = NULL; } + | '%' SYM { $$ = if_get_by_name($2->name); } + | DEV TEXT { $$ = if_get_by_name($2); } + ; + +bfd_neigh_local: + /* empty */ { $$ = IPA_NONE; } + | LOCAL ipa { $$ = $2; } + ; + +bfd_neigh_multihop: + /* empty */ { $$ = 0; } + | MULTIHOP bool { $$ = $2; } + ; + +bfd_neighbor: ipa bfd_neigh_iface bfd_neigh_local bfd_neigh_multihop +{ + this_bfd_neighbor = cfg_allocz(sizeof(struct bfd_neighbor)); + add_tail(&BFD_CFG->neigh_list, NODE this_bfd_neighbor); + + BFD_NEIGHBOR->addr = $1; + BFD_NEIGHBOR->local = $3; + BFD_NEIGHBOR->iface = $2; + BFD_NEIGHBOR->multihop = $4; + + if ($4 && $2) + cf_error("Neighbor cannot set both interface and multihop"); + + if ($4 && ipa_zero($3)) + cf_error("Multihop neighbor requires specified local address"); +}; + + +CF_CLI(SHOW BFD SESSIONS, optsym, [<name>], [[Show information about BFD sessions]]) +{ bfd_show_sessions(proto_get_named($4, &proto_bfd)); }; + +CF_CODE + +CF_END diff --git a/proto/bfd/io.c b/proto/bfd/io.c new file mode 100644 index 00000000..fb150040 --- /dev/null +++ b/proto/bfd/io.c @@ -0,0 +1,768 @@ +/* + * BIRD -- I/O and event loop + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> +#include <fcntl.h> +#include <poll.h> +#include <pthread.h> +#include <time.h> +#include <sys/time.h> + +#include "nest/bird.h" +#include "proto/bfd/io.h" + +#include "lib/buffer.h" +#include "lib/heap.h" +#include "lib/lists.h" +#include "lib/resource.h" +#include "lib/event.h" +#include "lib/socket.h" + + +struct birdloop +{ + pool *pool; + pthread_t thread; + pthread_mutex_t mutex; + + btime last_time; + btime real_time; + u8 use_monotonic_clock; + + u8 stop_called; + u8 poll_active; + u8 wakeup_masked; + int wakeup_fds[2]; + + BUFFER(timer2 *) timers; + list event_list; + list sock_list; + uint sock_num; + + BUFFER(sock *) poll_sk; + BUFFER(struct pollfd) poll_fd; + u8 poll_changed; + u8 close_scheduled; +}; + + +/* + * Current thread context + */ + +static pthread_key_t current_loop_key; + +static inline struct birdloop * +birdloop_current(void) +{ + return pthread_getspecific(current_loop_key); +} + +static inline void +birdloop_set_current(struct birdloop *loop) +{ + pthread_setspecific(current_loop_key, loop); +} + +static inline void +birdloop_init_current(void) +{ + pthread_key_create(¤t_loop_key, NULL); +} + + +/* + * Time clock + */ + +static void times_update_alt(struct birdloop *loop); + +static void +times_init(struct birdloop *loop) +{ + struct timespec ts; + int rv; + + rv = clock_gettime(CLOCK_MONOTONIC, &ts); + if (rv < 0) + { + log(L_WARN "Monotonic clock is missing"); + + loop->use_monotonic_clock = 0; + loop->last_time = 0; + loop->real_time = 0; + times_update_alt(loop); + return; + } + + if ((ts.tv_sec < 0) || (((s64) ts.tv_sec) > ((s64) 1 << 40))) + log(L_WARN "Monotonic clock is crazy"); + + loop->use_monotonic_clock = 1; + loop->last_time = ((s64) ts.tv_sec S) + (ts.tv_nsec / 1000); + loop->real_time = 0; +} + +static void +times_update_pri(struct birdloop *loop) +{ + struct timespec ts; + int rv; + + rv = clock_gettime(CLOCK_MONOTONIC, &ts); + if (rv < 0) + die("clock_gettime: %m"); + + btime new_time = ((s64) ts.tv_sec S) + (ts.tv_nsec / 1000); + + if (new_time < loop->last_time) + log(L_ERR "Monotonic clock is broken"); + + loop->last_time = new_time; + loop->real_time = 0; +} + +static void +times_update_alt(struct birdloop *loop) +{ + struct timeval tv; + int rv; + + rv = gettimeofday(&tv, NULL); + if (rv < 0) + die("gettimeofday: %m"); + + btime new_time = ((s64) tv.tv_sec S) + tv.tv_usec; + btime delta = new_time - loop->real_time; + + if ((delta < 0) || (delta > (60 S))) + { + if (loop->real_time) + log(L_WARN "Time jump, delta %d us", (int) delta); + + delta = 100 MS; + } + + loop->last_time += delta; + loop->real_time = new_time; +} + +static void +times_update(struct birdloop *loop) +{ + if (loop->use_monotonic_clock) + times_update_pri(loop); + else + times_update_alt(loop); +} + +btime +current_time(void) +{ + return birdloop_current()->last_time; +} + + +/* + * Wakeup code for birdloop + */ + +static void +pipe_new(int *pfds) +{ + int rv = pipe(pfds); + if (rv < 0) + die("pipe: %m"); + + if (fcntl(pfds[0], F_SETFL, O_NONBLOCK) < 0) + die("fcntl(O_NONBLOCK): %m"); + + if (fcntl(pfds[1], F_SETFL, O_NONBLOCK) < 0) + die("fcntl(O_NONBLOCK): %m"); +} + +void +pipe_drain(int fd) +{ + char buf[64]; + int rv; + + try: + rv = read(fd, buf, 64); + if (rv < 0) + { + if (errno == EINTR) + goto try; + if (errno == EAGAIN) + return; + die("wakeup read: %m"); + } + if (rv == 64) + goto try; +} + +void +pipe_kick(int fd) +{ + u64 v = 1; + int rv; + + try: + rv = write(fd, &v, sizeof(u64)); + if (rv < 0) + { + if (errno == EINTR) + goto try; + if (errno == EAGAIN) + return; + die("wakeup write: %m"); + } +} + +static inline void +wakeup_init(struct birdloop *loop) +{ + pipe_new(loop->wakeup_fds); +} + +static inline void +wakeup_drain(struct birdloop *loop) +{ + pipe_drain(loop->wakeup_fds[0]); +} + +static inline void +wakeup_do_kick(struct birdloop *loop) +{ + pipe_kick(loop->wakeup_fds[1]); +} + +static inline void +wakeup_kick(struct birdloop *loop) +{ + if (!loop->wakeup_masked) + wakeup_do_kick(loop); + else + loop->wakeup_masked = 2; +} + + +/* + * Events + */ + +static inline uint +events_waiting(struct birdloop *loop) +{ + return !EMPTY_LIST(loop->event_list); +} + +static inline void +events_init(struct birdloop *loop) +{ + init_list(&loop->event_list); +} + +static void +events_fire(struct birdloop *loop) +{ + times_update(loop); + ev_run_list(&loop->event_list); +} + +void +ev2_schedule(event *e) +{ + struct birdloop *loop = birdloop_current(); + + if (loop->poll_active && EMPTY_LIST(loop->event_list)) + wakeup_kick(loop); + + if (e->n.next) + rem_node(&e->n); + + add_tail(&loop->event_list, &e->n); +} + + +/* + * Timers + */ + +#define TIMER_LESS(a,b) ((a)->expires < (b)->expires) +#define TIMER_SWAP(heap,a,b,t) (t = heap[a], heap[a] = heap[b], heap[b] = t, \ + heap[a]->index = (a), heap[b]->index = (b)) + +static inline uint timers_count(struct birdloop *loop) +{ return loop->timers.used - 1; } + +static inline timer2 *timers_first(struct birdloop *loop) +{ return (loop->timers.used > 1) ? loop->timers.data[1] : NULL; } + + +static void +tm2_free(resource *r) +{ + timer2 *t = (timer2 *) r; + + tm2_stop(t); +} + +static void +tm2_dump(resource *r) +{ + timer2 *t = (timer2 *) r; + + debug("(code %p, data %p, ", t->hook, t->data); + if (t->randomize) + debug("rand %d, ", t->randomize); + if (t->recurrent) + debug("recur %d, ", t->recurrent); + if (t->expires) + debug("expires in %d ms)\n", (t->expires - current_time()) TO_MS); + else + debug("inactive)\n"); +} + + +static struct resclass tm2_class = { + "Timer", + sizeof(timer2), + tm2_free, + tm2_dump, + NULL, + NULL +}; + +timer2 * +tm2_new(pool *p) +{ + timer2 *t = ralloc(p, &tm2_class); + t->index = -1; + return t; +} + +void +tm2_set(timer2 *t, btime when) +{ + struct birdloop *loop = birdloop_current(); + uint tc = timers_count(loop); + + if (!t->expires) + { + t->index = ++tc; + t->expires = when; + BUFFER_PUSH(loop->timers) = t; + HEAP_INSERT(loop->timers.data, tc, timer2 *, TIMER_LESS, TIMER_SWAP); + } + else if (t->expires < when) + { + t->expires = when; + HEAP_INCREASE(loop->timers.data, tc, timer2 *, TIMER_LESS, TIMER_SWAP, t->index); + } + else if (t->expires > when) + { + t->expires = when; + HEAP_DECREASE(loop->timers.data, tc, timer2 *, TIMER_LESS, TIMER_SWAP, t->index); + } + + if (loop->poll_active && (t->index == 1)) + wakeup_kick(loop); +} + +void +tm2_start(timer2 *t, btime after) +{ + tm2_set(t, current_time() + MAX(after, 0)); +} + +void +tm2_stop(timer2 *t) +{ + if (!t->expires) + return; + + struct birdloop *loop = birdloop_current(); + uint tc = timers_count(loop); + + HEAP_DELETE(loop->timers.data, tc, timer2 *, TIMER_LESS, TIMER_SWAP, t->index); + BUFFER_POP(loop->timers); + + t->index = -1; + t->expires = 0; +} + +static void +timers_init(struct birdloop *loop) +{ + BUFFER_INIT(loop->timers, loop->pool, 4); + BUFFER_PUSH(loop->timers) = NULL; +} + +static void +timers_fire(struct birdloop *loop) +{ + btime base_time; + timer2 *t; + + times_update(loop); + base_time = loop->last_time; + + while (t = timers_first(loop)) + { + if (t->expires > base_time) + return; + + if (t->recurrent) + { + btime when = t->expires + t->recurrent; + + if (when <= loop->last_time) + when = loop->last_time + t->recurrent; + + if (t->randomize) + when += random() % (t->randomize + 1); + + tm2_set(t, when); + } + else + tm2_stop(t); + + t->hook(t); + } +} + + +/* + * Sockets + */ + +static void +sockets_init(struct birdloop *loop) +{ + init_list(&loop->sock_list); + loop->sock_num = 0; + + BUFFER_INIT(loop->poll_sk, loop->pool, 4); + BUFFER_INIT(loop->poll_fd, loop->pool, 4); + loop->poll_changed = 1; /* add wakeup fd */ +} + +static void +sockets_add(struct birdloop *loop, sock *s) +{ + add_tail(&loop->sock_list, &s->n); + loop->sock_num++; + + s->index = -1; + loop->poll_changed = 1; + + if (loop->poll_active) + wakeup_kick(loop); +} + +void +sk_start(sock *s) +{ + struct birdloop *loop = birdloop_current(); + + sockets_add(loop, s); +} + +static void +sockets_remove(struct birdloop *loop, sock *s) +{ + rem_node(&s->n); + loop->sock_num--; + + if (s->index >= 0) + loop->poll_sk.data[s->index] = NULL; + + s->index = -1; + loop->poll_changed = 1; + + /* Wakeup moved to sk_stop() */ +} + +void +sk_stop(sock *s) +{ + struct birdloop *loop = birdloop_current(); + + sockets_remove(loop, s); + + if (loop->poll_active) + { + loop->close_scheduled = 1; + wakeup_kick(loop); + } + else + close(s->fd); + + s->fd = -1; +} + +static inline uint sk_want_events(sock *s) +{ return (s->rx_hook ? POLLIN : 0) | ((s->ttx != s->tpos) ? POLLOUT : 0); } + +/* +FIXME: this should be called from sock code + +static void +sockets_update(struct birdloop *loop, sock *s) +{ + if (s->index >= 0) + loop->poll_fd.data[s->index].events = sk_want_events(s); +} +*/ + +static void +sockets_prepare(struct birdloop *loop) +{ + BUFFER_SET(loop->poll_sk, loop->sock_num + 1); + BUFFER_SET(loop->poll_fd, loop->sock_num + 1); + + struct pollfd *pfd = loop->poll_fd.data; + sock **psk = loop->poll_sk.data; + int i = 0; + node *n; + + WALK_LIST(n, loop->sock_list) + { + sock *s = SKIP_BACK(sock, n, n); + + ASSERT(i < loop->sock_num); + + s->index = i; + *psk = s; + pfd->fd = s->fd; + pfd->events = sk_want_events(s); + pfd->revents = 0; + + pfd++; + psk++; + i++; + } + + ASSERT(i == loop->sock_num); + + /* Add internal wakeup fd */ + *psk = NULL; + pfd->fd = loop->wakeup_fds[0]; + pfd->events = POLLIN; + pfd->revents = 0; + + loop->poll_changed = 0; +} + +static void +sockets_close_fds(struct birdloop *loop) +{ + struct pollfd *pfd = loop->poll_fd.data; + sock **psk = loop->poll_sk.data; + int poll_num = loop->poll_fd.used - 1; + + int i; + for (i = 0; i < poll_num; i++) + if (psk[i] == NULL) + close(pfd[i].fd); + + loop->close_scheduled = 0; +} + +int sk_read(sock *s); +int sk_write(sock *s); + +static void +sockets_fire(struct birdloop *loop) +{ + struct pollfd *pfd = loop->poll_fd.data; + sock **psk = loop->poll_sk.data; + int poll_num = loop->poll_fd.used - 1; + + times_update(loop); + + /* Last fd is internal wakeup fd */ + if (pfd[loop->sock_num].revents & POLLIN) + wakeup_drain(loop); + + int i; + for (i = 0; i < poll_num; pfd++, psk++, i++) + { + int e = 1; + + if (! pfd->revents) + continue; + + if (pfd->revents & POLLNVAL) + die("poll: invalid fd %d", pfd->fd); + + if (pfd->revents & POLLIN) + while (e && *psk && (*psk)->rx_hook) + e = sk_read(*psk); + + e = 1; + if (pfd->revents & POLLOUT) + while (e && *psk) + e = sk_write(*psk); + } +} + + +/* + * Birdloop + */ + +static void * birdloop_main(void *arg); + +struct birdloop * +birdloop_new(void) +{ + /* FIXME: this init should be elsewhere and thread-safe */ + static int init = 0; + if (!init) + { birdloop_init_current(); init = 1; } + + pool *p = rp_new(NULL, "Birdloop root"); + struct birdloop *loop = mb_allocz(p, sizeof(struct birdloop)); + loop->pool = p; + pthread_mutex_init(&loop->mutex, NULL); + + times_init(loop); + wakeup_init(loop); + + events_init(loop); + timers_init(loop); + sockets_init(loop); + + return loop; +} + +void +birdloop_start(struct birdloop *loop) +{ + int rv = pthread_create(&loop->thread, NULL, birdloop_main, loop); + if (rv) + die("pthread_create(): %M", rv); +} + +void +birdloop_stop(struct birdloop *loop) +{ + pthread_mutex_lock(&loop->mutex); + loop->stop_called = 1; + wakeup_do_kick(loop); + pthread_mutex_unlock(&loop->mutex); + + int rv = pthread_join(loop->thread, NULL); + if (rv) + die("pthread_join(): %M", rv); +} + +void +birdloop_free(struct birdloop *loop) +{ + rfree(loop->pool); +} + + +void +birdloop_enter(struct birdloop *loop) +{ + /* TODO: these functions could save and restore old context */ + pthread_mutex_lock(&loop->mutex); + birdloop_set_current(loop); +} + +void +birdloop_leave(struct birdloop *loop) +{ + /* TODO: these functions could save and restore old context */ + birdloop_set_current(NULL); + pthread_mutex_unlock(&loop->mutex); +} + +void +birdloop_mask_wakeups(struct birdloop *loop) +{ + pthread_mutex_lock(&loop->mutex); + loop->wakeup_masked = 1; + pthread_mutex_unlock(&loop->mutex); +} + +void +birdloop_unmask_wakeups(struct birdloop *loop) +{ + pthread_mutex_lock(&loop->mutex); + if (loop->wakeup_masked == 2) + wakeup_do_kick(loop); + loop->wakeup_masked = 0; + pthread_mutex_unlock(&loop->mutex); +} + +static void * +birdloop_main(void *arg) +{ + struct birdloop *loop = arg; + timer2 *t; + int rv, timeout; + + birdloop_set_current(loop); + + pthread_mutex_lock(&loop->mutex); + while (1) + { + events_fire(loop); + timers_fire(loop); + + times_update(loop); + if (events_waiting(loop)) + timeout = 0; + else if (t = timers_first(loop)) + timeout = (tm2_remains(t) TO_MS) + 1; + else + timeout = -1; + + if (loop->poll_changed) + sockets_prepare(loop); + + loop->poll_active = 1; + pthread_mutex_unlock(&loop->mutex); + + try: + rv = poll(loop->poll_fd.data, loop->poll_fd.used, timeout); + if (rv < 0) + { + if (errno == EINTR || errno == EAGAIN) + goto try; + die("poll: %m"); + } + + pthread_mutex_lock(&loop->mutex); + loop->poll_active = 0; + + if (loop->close_scheduled) + sockets_close_fds(loop); + + if (loop->stop_called) + break; + + if (rv) + sockets_fire(loop); + + timers_fire(loop); + } + + loop->stop_called = 0; + pthread_mutex_unlock(&loop->mutex); + + return NULL; +} + + diff --git a/proto/bfd/io.h b/proto/bfd/io.h new file mode 100644 index 00000000..641ee054 --- /dev/null +++ b/proto/bfd/io.h @@ -0,0 +1,99 @@ +/* + * BIRD -- I/O and event loop + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#ifndef _BIRD_BFD_IO_H_ +#define _BIRD_BFD_IO_H_ + +#include "nest/bird.h" +#include "lib/lists.h" +#include "lib/resource.h" +#include "lib/event.h" +#include "lib/socket.h" +// #include "lib/timer.h" + + +typedef struct timer2 +{ + resource r; + void (*hook)(struct timer2 *); + void *data; + + btime expires; /* 0=inactive */ + uint randomize; /* Amount of randomization */ + uint recurrent; /* Timer recurrence */ + + int index; +} timer2; + + +btime current_time(void); + +void ev2_schedule(event *e); + + +timer2 *tm2_new(pool *p); +void tm2_set(timer2 *t, btime when); +void tm2_start(timer2 *t, btime after); +void tm2_stop(timer2 *t); + +static inline int +tm2_active(timer2 *t) +{ + return t->expires != 0; +} + +static inline btime +tm2_remains(timer2 *t) +{ + btime now = current_time(); + return (t->expires > now) ? (t->expires - now) : 0; +} + +static inline timer2 * +tm2_new_init(pool *p, void (*hook)(struct timer2 *), void *data, uint rec, uint rand) +{ + timer2 *t = tm2_new(p); + t->hook = hook; + t->data = data; + t->recurrent = rec; + t->randomize = rand; + return t; +} + +static inline void +tm2_set_max(timer2 *t, btime when) +{ + if (when > t->expires) + tm2_set(t, when); +} + +/* +static inline void +tm2_start_max(timer2 *t, btime after) +{ + btime rem = tm2_remains(t); + tm2_start(t, MAX_(rem, after)); +} +*/ + + +void sk_start(sock *s); +void sk_stop(sock *s); + + + +struct birdloop *birdloop_new(void); +void birdloop_start(struct birdloop *loop); +void birdloop_stop(struct birdloop *loop); +void birdloop_free(struct birdloop *loop); + +void birdloop_enter(struct birdloop *loop); +void birdloop_leave(struct birdloop *loop); +void birdloop_mask_wakeups(struct birdloop *loop); +void birdloop_unmask_wakeups(struct birdloop *loop); + + +#endif /* _BIRD_BFD_IO_H_ */ diff --git a/proto/bfd/packets.c b/proto/bfd/packets.c new file mode 100644 index 00000000..fc2616ca --- /dev/null +++ b/proto/bfd/packets.c @@ -0,0 +1,248 @@ +/* + * BIRD -- Bidirectional Forwarding Detection (BFD) + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#include "bfd.h" + + +struct bfd_ctl_packet +{ + u8 vdiag; /* version and diagnostic */ + u8 flags; /* state and flags */ + u8 detect_mult; + u8 length; + u32 snd_id; /* sender ID, aka 'my discriminator' */ + u32 rcv_id; /* receiver ID, aka 'your discriminator' */ + u32 des_min_tx_int; + u32 req_min_rx_int; + u32 req_min_echo_rx_int; +}; + +#define BFD_BASE_LEN sizeof(struct bfd_ctl_packet) +#define BFD_MAX_LEN 64 + +static inline u8 bfd_pack_vdiag(u8 version, u8 diag) +{ return (version << 5) | diag; } + +static inline u8 bfd_pack_flags(u8 state, u8 flags) +{ return (state << 6) | flags; } + +static inline u8 bfd_pkt_get_version(struct bfd_ctl_packet *pkt) +{ return pkt->vdiag >> 5; } + +static inline u8 bfd_pkt_get_diag(struct bfd_ctl_packet *pkt) +{ return pkt->vdiag && 0x1f; } + + +static inline u8 bfd_pkt_get_state(struct bfd_ctl_packet *pkt) +{ return pkt->flags >> 6; } + +static inline void bfd_pkt_set_state(struct bfd_ctl_packet *pkt, u8 val) +{ pkt->flags = val << 6; } + + +char * +bfd_format_flags(u8 flags, char *buf) +{ + char *bp = buf; + if (flags & BFD_FLAGS) *bp++ = ' '; + if (flags & BFD_FLAG_POLL) *bp++ = 'P'; + if (flags & BFD_FLAG_FINAL) *bp++ = 'F'; + if (flags & BFD_FLAG_CPI) *bp++ = 'C'; + if (flags & BFD_FLAG_AP) *bp++ = 'A'; + if (flags & BFD_FLAG_DEMAND) *bp++ = 'D'; + if (flags & BFD_FLAG_MULTIPOINT) *bp++ = 'M'; + *bp = 0; + + return buf; +} + +void +bfd_send_ctl(struct bfd_proto *p, struct bfd_session *s, int final) +{ + sock *sk = s->ifa->sk; + struct bfd_ctl_packet *pkt = (struct bfd_ctl_packet *) sk->tbuf; + char fb[8]; + + pkt->vdiag = bfd_pack_vdiag(1, s->loc_diag); + pkt->flags = bfd_pack_flags(s->loc_state, 0); + pkt->detect_mult = s->detect_mult; + pkt->length = BFD_BASE_LEN; + pkt->snd_id = htonl(s->loc_id); + pkt->rcv_id = htonl(s->rem_id); + pkt->des_min_tx_int = htonl(s->des_min_tx_new); + pkt->req_min_rx_int = htonl(s->req_min_rx_new); + pkt->req_min_echo_rx_int = 0; + + if (final) + pkt->flags |= BFD_FLAG_FINAL; + else if (s->poll_active) + pkt->flags |= BFD_FLAG_POLL; + + if (sk->tbuf != sk->tpos) + log(L_WARN "%s: Old packet overwritten in TX buffer", p->p.name); + + TRACE(D_PACKETS, "Sending CTL to %I [%s%s]", s->addr, + bfd_state_names[s->loc_state], bfd_format_flags(pkt->flags, fb)); + + sk_send_to(sk, pkt->length, s->addr, sk->dport); +} + +#define DROP(DSC,VAL) do { err_dsc = DSC; err_val = VAL; goto drop; } while(0) + +static int +bfd_rx_hook(sock *sk, int len) +{ + struct bfd_proto *p = sk->data; + struct bfd_ctl_packet *pkt = (struct bfd_ctl_packet *) sk->rbuf; + const char *err_dsc = NULL; + uint err_val = 0; + char fb[8]; + + if ((sk->sport == BFD_CONTROL_PORT) && (sk->ttl < 255)) + DROP("wrong TTL", sk->ttl); + + if (len < BFD_BASE_LEN) + DROP("too short", len); + + u8 version = bfd_pkt_get_version(pkt); + if (version != 1) + DROP("version mismatch", version); + + if ((pkt->length < BFD_BASE_LEN) || (pkt->length > len)) + DROP("length mismatch", pkt->length); + + if (pkt->detect_mult == 0) + DROP("invalid detect mult", 0); + + if ((pkt->flags & BFD_FLAG_MULTIPOINT) || + ((pkt->flags & BFD_FLAG_POLL) && (pkt->flags & BFD_FLAG_FINAL))) + DROP("invalid flags", pkt->flags); + + if (pkt->snd_id == 0) + DROP("invalid my discriminator", 0); + + struct bfd_session *s; + u32 id = ntohl(pkt->rcv_id); + + if (id) + { + s = bfd_find_session_by_id(p, id); + + if (!s) + DROP("unknown session id", id); + } + else + { + u8 ps = bfd_pkt_get_state(pkt); + if (ps > BFD_STATE_DOWN) + DROP("invalid init state", ps); + + s = bfd_find_session_by_addr(p, sk->faddr); + + /* FIXME: better session matching and message */ + if (!s) + return 1; + } + + /* FIXME: better authentication handling and message */ + if (pkt->flags & BFD_FLAG_AP) + DROP("authentication not supported", 0); + + + u32 old_tx_int = s->des_min_tx_int; + u32 old_rx_int = s->rem_min_rx_int; + + s->rem_id= ntohl(pkt->snd_id); + s->rem_state = bfd_pkt_get_state(pkt); + s->rem_diag = bfd_pkt_get_diag(pkt); + s->rem_demand_mode = pkt->flags & BFD_FLAG_DEMAND; + s->rem_min_tx_int = ntohl(pkt->des_min_tx_int); + s->rem_min_rx_int = ntohl(pkt->req_min_rx_int); + s->rem_detect_mult = pkt->detect_mult; + + TRACE(D_PACKETS, "CTL received from %I [%s%s]", sk->faddr, + bfd_state_names[s->rem_state], bfd_format_flags(pkt->flags, fb)); + + bfd_session_process_ctl(s, pkt->flags, old_tx_int, old_rx_int); + return 1; + + drop: + log(L_REMOTE "%s: Bad packet from %I - %s (%u)", p->p.name, sk->faddr, err_dsc, err_val); + return 1; +} + +static void +bfd_err_hook(sock *sk, int err) +{ + struct bfd_proto *p = sk->data; + log(L_ERR "%s: Socket error: %m", p->p.name, err); +} + +sock * +bfd_open_rx_sk(struct bfd_proto *p, int multihop) +{ + sock *sk = sk_new(p->tpool); + sk->type = SK_UDP; + sk->sport = !multihop ? BFD_CONTROL_PORT : BFD_MULTI_CTL_PORT; + sk->data = p; + + sk->rbsize = BFD_MAX_LEN; + sk->rx_hook = bfd_rx_hook; + sk->err_hook = bfd_err_hook; + + /* TODO: configurable ToS and priority */ + sk->tos = IP_PREC_INTERNET_CONTROL; + sk->priority = sk_priority_control; + sk->flags = SKF_THREAD | SKF_LADDR_RX | (!multihop ? SKF_TTL_RX : 0); + +#ifdef IPV6 + sk->flags |= SKF_V6ONLY; +#endif + + if (sk_open(sk) < 0) + goto err; + + sk_start(sk); + return sk; + + err: + rfree(sk); + return NULL; +} + +sock * +bfd_open_tx_sk(struct bfd_proto *p, ip_addr local, struct iface *ifa) +{ + sock *sk = sk_new(p->tpool); + sk->type = SK_UDP; + sk->saddr = local; + sk->dport = ifa ? BFD_CONTROL_PORT : BFD_MULTI_CTL_PORT; + sk->iface = ifa; + sk->data = p; + + sk->tbsize = BFD_MAX_LEN; + sk->err_hook = bfd_err_hook; + + /* TODO: configurable ToS, priority and TTL security */ + sk->tos = IP_PREC_INTERNET_CONTROL; + sk->priority = sk_priority_control; + sk->ttl = ifa ? 255 : -1; + sk->flags = SKF_THREAD; + +#ifdef IPV6 + sk->flags |= SKF_V6ONLY; +#endif + + if (sk_open(sk) < 0) + goto err; + + sk_start(sk); + return sk; + + err: + rfree(sk); + return NULL; +} diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c index 837a6861..d34e2ae3 100644 --- a/proto/bgp/attrs.c +++ b/proto/bgp/attrs.c @@ -238,7 +238,7 @@ bgp_format_aggregator(eattr *a, byte *buf, int buflen UNUSED) as = get_u32(data); data += 4; - bsprintf(buf, "%d.%d.%d.%d AS%d", data[0], data[1], data[2], data[3], as); + bsprintf(buf, "%d.%d.%d.%d AS%u", data[0], data[1], data[2], data[3], as); } static int @@ -1032,7 +1032,8 @@ bgp_create_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *p rta->dest != RTD_ROUTER || ipa_equal(rta->gw, IPA_NONE) || ipa_has_link_scope(rta->gw) || - (!p->is_internal && (!p->neigh || (rta->iface != p->neigh->iface)))) + (!p->is_internal && !p->cf->next_hop_keep && + (!p->neigh || (rta->iface != p->neigh->iface)))) set_next_hop(z, p->source_addr); else set_next_hop(z, rta->gw); @@ -1046,8 +1047,9 @@ bgp_create_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *p static inline int bgp_as_path_loopy(struct bgp_proto *p, rta *a) { + int num = p->cf->allow_local_as + 1; eattr *e = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH)); - return (e && as_path_is_member(e->u.ptr, p->local_as)); + return (e && (num > 0) && as_path_contains(e->u.ptr, p->local_as, num)); } static inline int @@ -1100,10 +1102,13 @@ bgp_update_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *p /* iBGP -> keep next_hop, eBGP multi-hop -> use source_addr, * eBGP single-hop -> keep next_hop if on the same iface. * If the next_hop is zero (i.e. link-local), keep only if on the same iface. + * + * Note that same-iface-check uses iface from route, which is based on gw. */ a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP)); if (a && !p->cf->next_hop_self && - ((p->is_internal && ipa_nonzero(*((ip_addr *) a->u.ptr->data))) || + (p->cf->next_hop_keep || + (p->is_internal && ipa_nonzero(*((ip_addr *) a->u.ptr->data))) || (p->neigh && (e->attrs->iface == p->neigh->iface)))) { /* Leave the original next hop attribute, will check later where does it point */ @@ -1444,7 +1449,7 @@ bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best) /* The default case - find a new best-in-group route */ r = new; /* new may not be in the list */ - for (s=net->routes; s; s=s->next) + for (s=net->routes; rte_is_valid(s); s=s->next) if (use_deterministic_med(s) && same_group(s, lpref, lasn)) { s->u.bgp.suppressed = 1; diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index f290f227..81a263bb 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -59,8 +59,8 @@ #include "nest/iface.h" #include "nest/protocol.h" #include "nest/route.h" -#include "nest/locks.h" #include "nest/cli.h" +#include "nest/locks.h" #include "conf/conf.h" #include "lib/socket.h" #include "lib/resource.h" @@ -76,6 +76,7 @@ static void bgp_close(struct bgp_proto *p, int apply_md5); static void bgp_connect(struct bgp_proto *p); static void bgp_active(struct bgp_proto *p); static sock *bgp_setup_listen_sk(ip_addr addr, unsigned port, u32 flags); +static void bgp_update_bfd(struct bgp_proto *p, int use_bfd); /** @@ -153,8 +154,12 @@ bgp_initiate(struct bgp_proto *p) if (rv < 0) return; + if (p->cf->bfd) + bgp_update_bfd(p, p->cf->bfd); + if (p->startup_delay) { + p->start_state = BSS_DELAY; BGP_TRACE(D_EVENTS, "Startup delayed by %d seconds", p->startup_delay); bgp_start_timer(p->startup_timer, p->startup_delay); } @@ -386,10 +391,12 @@ bgp_conn_enter_close_state(struct bgp_conn *conn) int os = conn->state; bgp_conn_set_state(conn, BS_CLOSE); - tm_stop(conn->hold_timer); tm_stop(conn->keepalive_timer); conn->sk->rx_hook = NULL; + /* Timeout for CLOSE state, if we cannot send notification soon then we just hangup */ + bgp_start_timer(conn->hold_timer, 10); + if (os == BS_ESTABLISHED) bgp_conn_leave_established_state(p); } @@ -483,9 +490,18 @@ static void bgp_hold_timeout(timer *t) { struct bgp_conn *conn = t->data; + struct bgp_proto *p = conn->bgp; DBG("BGP: Hold timeout\n"); + /* We are already closing the connection - just do hangup */ + if (conn->state == BS_CLOSE) + { + BGP_TRACE(D_EVENTS, "Connection stalled"); + bgp_conn_enter_idle_state(conn); + return; + } + /* If there is something in input queue, we are probably congested and perhaps just not processed BGP packets in time. */ @@ -737,6 +753,9 @@ bgp_neigh_notify(neighbor *n) { struct bgp_proto *p = (struct bgp_proto *) n->proto; + if (! (n->flags & NEF_STICKY)) + return; + if (n->scope > 0) { if ((p->p.proto_state == PS_START) && (p->start_state == BSS_PREPARE)) @@ -756,6 +775,37 @@ bgp_neigh_notify(neighbor *n) } } +static void +bgp_bfd_notify(struct bfd_request *req) +{ + struct bgp_proto *p = req->data; + int ps = p->p.proto_state; + + if (req->down && ((ps == PS_START) || (ps == PS_UP))) + { + BGP_TRACE(D_EVENTS, "BFD session down"); + bgp_store_error(p, NULL, BE_MISC, BEM_BFD_DOWN); + if (ps == PS_UP) + bgp_update_startup_delay(p); + bgp_stop(p, 0); + } +} + +static void +bgp_update_bfd(struct bgp_proto *p, int use_bfd) +{ + if (use_bfd && !p->bfd_req) + p->bfd_req = bfd_request_session(p->p.pool, p->cf->remote_ip, p->source_addr, + p->cf->multihop ? NULL : p->neigh->iface, + bgp_bfd_notify, p); + + if (!use_bfd && p->bfd_req) + { + rfree(p->bfd_req); + p->bfd_req = NULL; + } +} + static int bgp_reload_routes(struct proto *P) { @@ -816,6 +866,7 @@ bgp_start(struct proto *P) p->outgoing_conn.state = BS_IDLE; p->incoming_conn.state = BS_IDLE; p->neigh = NULL; + p->bfd_req = NULL; rt_lock_table(p->igp_table); @@ -845,7 +896,6 @@ bgp_start(struct proto *P) lock->iface = p->cf->iface; lock->type = OBJLOCK_TCP; lock->port = BGP_PORT; - lock->iface = NULL; lock->hook = bgp_start_locked; lock->data = p; olock_acquire(lock); @@ -883,6 +933,7 @@ bgp_shutdown(struct proto *P) subcode = 4; // Errcode 6, 4 - administrative reset break; + case PDC_RX_LIMIT_HIT: case PDC_IN_LIMIT_HIT: subcode = 1; // Errcode 6, 1 - max number of prefixes reached /* log message for compatibility */ @@ -981,6 +1032,9 @@ bgp_check_config(struct bgp_config *c) ipa_has_link_scope(c->source_addr))) cf_error("Multihop BGP cannot be used with link-local addresses"); + if (c->multihop && c->bfd && ipa_zero(c->source_addr)) + cf_error("Multihop BGP with BFD requires specified source address"); + /* Different default based on rs_client */ if (!c->missing_lladdr) @@ -1012,6 +1066,9 @@ bgp_reconfigure(struct proto *P, struct proto_config *C) struct bgp_proto *p = (struct bgp_proto *) P; struct bgp_config *old = p->cf; + if (proto_get_router_id(C) != p->local_id) + return 0; + int same = !memcmp(((byte *) old) + sizeof(struct proto_config), ((byte *) new) + sizeof(struct proto_config), // password item is last and must be checked separately @@ -1020,6 +1077,9 @@ bgp_reconfigure(struct proto *P, struct proto_config *C) || (old->password && new->password && !strcmp(old->password, new->password))) && (get_igp_table(old) == get_igp_table(new)); + if (same && (p->start_state > BSS_PREPARE)) + bgp_update_bfd(p, new->bfd); + /* We should update our copy of configuration ptr as old configuration will be freed */ if (same) p->cf = new; @@ -1101,7 +1161,7 @@ bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code) static char *bgp_state_names[] = { "Idle", "Connect", "Active", "OpenSent", "OpenConfirm", "Established", "Close" }; static char *bgp_err_classes[] = { "", "Error: ", "Socket: ", "Received: ", "BGP Error: ", "Automatic shutdown: ", ""}; -static char *bgp_misc_errors[] = { "", "Neighbor lost", "Invalid next hop", "Kernel MD5 auth failed", "No listening socket" }; +static char *bgp_misc_errors[] = { "", "Neighbor lost", "Invalid next hop", "Kernel MD5 auth failed", "No listening socket", "BFD session down" }; static char *bgp_auto_errors[] = { "", "Route limit exceeded"}; static const char * @@ -1195,7 +1255,7 @@ bgp_show_proto_info(struct proto *P) cli_msg(-1006, " Source address: %I", p->source_addr); if (P->cf->in_limit) cli_msg(-1006, " Route limit: %d/%d", - p->p.stats.imp_routes, P->cf->in_limit->limit); + p->p.stats.imp_routes + p->p.stats.filt_routes, P->cf->in_limit->limit); cli_msg(-1006, " Hold timer: %d/%d", tm_remains(c->hold_timer), c->hold_time); cli_msg(-1006, " Keepalive timer: %d/%d", diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h index b87de46e..a35c362c 100644 --- a/proto/bgp/bgp.h +++ b/proto/bgp/bgp.h @@ -11,6 +11,7 @@ #include <stdint.h> #include "nest/route.h" +#include "nest/bfd.h" struct linpool; struct eattr; @@ -24,6 +25,7 @@ struct bgp_config { int multihop; /* Number of hops if multihop */ int ttl_security; /* Enable TTL security [RFC5082] */ int next_hop_self; /* Always set next hop to local IP address */ + int next_hop_keep; /* Do not touch next hop attribute */ int missing_lladdr; /* What we will do when we don' know link-local addr, see MLL_* */ int gw_mode; /* How we compute route gateway from next_hop attr, see GW_* */ int compare_path_lengths; /* Use path lengths when selecting best route */ @@ -44,6 +46,7 @@ struct bgp_config { int interpret_communities; /* Hardwired handling of well-known communities */ int secondary; /* Accept also non-best routes (i.e. RA_ACCEPTED) */ int add_path; /* Use ADD-PATH extension [draft] */ + int allow_local_as; /* Allow that number of local ASNs in incoming AS_PATHs */ unsigned connect_retry_time; unsigned hold_time, initial_hold_time; unsigned keepalive_time; @@ -52,8 +55,10 @@ struct bgp_config { unsigned error_delay_time_min; /* Time to wait after an error is detected */ unsigned error_delay_time_max; unsigned disable_after_error; /* Disable the protocol when error is detected */ + char *password; /* Password used for MD5 authentication */ struct rtable_config *igp_table; /* Table used for recursive next hop lookups */ + int bfd; /* Use BFD for liveness detection */ }; #define MLL_SELF 1 @@ -106,6 +111,7 @@ struct bgp_proto { struct bgp_conn incoming_conn; /* Incoming connection we have neither accepted nor rejected yet */ struct object_lock *lock; /* Lock for neighbor connection */ struct neighbor *neigh; /* Neighbor entry corresponding to remote ip, NULL if multihop */ + struct bfd_request *bfd_req; /* BFD request, if BFD is used */ ip_addr source_addr; /* Local address used as an advertised next hop */ rtable *igp_table; /* Table used for recursive next hop lookups */ struct event *event; /* Event for respawning and shutting process */ @@ -274,6 +280,8 @@ void bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsi #define BS_ESTABLISHED 5 #define BS_CLOSE 6 /* Used during transition to BS_IDLE */ +#define BS_MAX 7 + /* BGP start states * * Used in PS_START for fine-grained specification of starting state. @@ -305,6 +313,7 @@ void bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsi #define BEM_INVALID_NEXT_HOP 2 #define BEM_INVALID_MD5 3 /* MD5 authentication kernel request failed (possibly not supported) */ #define BEM_NO_SOCKET 4 +#define BEM_BFD_DOWN 5 /* Automatic shutdown error codes */ diff --git a/proto/bgp/config.Y b/proto/bgp/config.Y index 0b096339..ab12fed5 100644 --- a/proto/bgp/config.Y +++ b/proto/bgp/config.Y @@ -26,7 +26,7 @@ CF_KEYWORDS(BGP, LOCAL, NEIGHBOR, AS, HOLD, TIME, CONNECT, RETRY, PREFER, OLDER, MISSING, LLADDR, DROP, IGNORE, ROUTE, REFRESH, INTERPRET, COMMUNITIES, BGP_ORIGINATOR_ID, BGP_CLUSTER_LIST, IGP, TABLE, GATEWAY, DIRECT, RECURSIVE, MED, TTL, SECURITY, DETERMINISTIC, - SECONDARY, ADD, PATHS, RX, TX) + SECONDARY, ALLOW, BFD, ADD, PATHS, RX, TX) CF_GRAMMAR @@ -76,7 +76,8 @@ bgp_proto: | bgp_proto KEEPALIVE TIME expr ';' { BGP_CFG->keepalive_time = $4; } | bgp_proto MULTIHOP ';' { BGP_CFG->multihop = 64; } | bgp_proto MULTIHOP expr ';' { BGP_CFG->multihop = $3; if (($3<1) || ($3>255)) cf_error("Multihop must be in range 1-255"); } - | bgp_proto NEXT HOP SELF ';' { BGP_CFG->next_hop_self = 1; } + | bgp_proto NEXT HOP SELF ';' { BGP_CFG->next_hop_self = 1; BGP_CFG->next_hop_keep = 0; } + | bgp_proto NEXT HOP KEEP ';' { BGP_CFG->next_hop_keep = 1; BGP_CFG->next_hop_self = 0; } | bgp_proto MISSING LLADDR SELF ';' { BGP_CFG->missing_lladdr = MLL_SELF; } | bgp_proto MISSING LLADDR DROP ';' { BGP_CFG->missing_lladdr = MLL_DROP; } | bgp_proto MISSING LLADDR IGNORE ';' { BGP_CFG->missing_lladdr = MLL_IGNORE; } @@ -110,8 +111,11 @@ bgp_proto: | bgp_proto ADD PATHS RX ';' { BGP_CFG->add_path = ADD_PATH_RX; } | bgp_proto ADD PATHS TX ';' { BGP_CFG->add_path = ADD_PATH_TX; } | bgp_proto ADD PATHS bool ';' { BGP_CFG->add_path = $4 ? ADD_PATH_FULL : 0; } + | bgp_proto ALLOW LOCAL AS ';' { BGP_CFG->allow_local_as = -1; } + | bgp_proto ALLOW LOCAL AS expr ';' { BGP_CFG->allow_local_as = $5; } | bgp_proto IGP TABLE rtable ';' { BGP_CFG->igp_table = $4; } | bgp_proto TTL SECURITY bool ';' { BGP_CFG->ttl_security = $4; } + | bgp_proto BFD bool ';' { BGP_CFG->bfd = $3; cf_check_bfd($3); } ; CF_ADDTO(dynamic_attr, BGP_ORIGIN diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c index 3fae2c24..42064332 100644 --- a/proto/bgp/packets.c +++ b/proto/bgp/packets.c @@ -24,6 +24,13 @@ static struct rate_limit rl_rcv_update, rl_snd_update; +/* Table for state -> RFC 6608 FSM error subcodes */ +static byte fsm_err_subcode[BS_MAX] = { + [BS_OPENSENT] = 1, + [BS_OPENCONFIRM] = 2, + [BS_ESTABLISHED] = 3 +}; + /* * MRT Dump format is not semantically specified. * We will use these values in appropriate fields: @@ -58,7 +65,7 @@ mrt_put_bgp4_hdr(byte *buf, struct bgp_conn *conn, int as4) buf+=4; } - put_u16(buf+0, p->neigh ? p->neigh->iface->index : 0); + put_u16(buf+0, (p->neigh && p->neigh->iface) ? p->neigh->iface->index : 0); put_u16(buf+2, BGP_AF); buf+=4; buf = ipa_put_addr(buf, conn->sk ? conn->sk->daddr : IPA_NONE); @@ -758,7 +765,7 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, int len) /* Check state */ if (conn->state != BS_OPENSENT) - { bgp_error(conn, 5, 0, NULL, 0); return; } + { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; } /* Check message contents */ if (len < 29 || len != 29 + pkt[28]) @@ -917,7 +924,7 @@ bgp_set_next_hop(struct bgp_proto *p, rta *a) ip_addr *nexthop = (ip_addr *) nh->u.ptr->data; #ifdef IPV6 - int second = (nh->u.ptr->length == NEXT_HOP_LENGTH); + int second = (nh->u.ptr->length == NEXT_HOP_LENGTH) && ipa_nonzero(nexthop[1]); /* First address should not be link-local, but may be zero in direct mode */ if (ipa_has_link_scope(*nexthop)) @@ -1148,7 +1155,7 @@ bgp_rx_update(struct bgp_conn *conn, byte *pkt, int len) bgp_conn_enter_established_state(conn); if (conn->state != BS_ESTABLISHED) - { bgp_error(conn, 5, 0, NULL, 0); return; } + { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; } bgp_start_timer(conn->hold_timer, conn->hold_time); /* Find parts of the packet and check sizes */ @@ -1210,7 +1217,10 @@ static struct { { 3, 10, "Invalid network field" }, { 3, 11, "Malformed AS_PATH" }, { 4, 0, "Hold timer expired" }, - { 5, 0, "Finite state machine error" }, + { 5, 0, "Finite state machine error" }, /* Subcodes are according to [RFC6608] */ + { 5, 1, "Unexpected message in OpenSent state" }, + { 5, 2, "Unexpected message in OpenConfirm state" }, + { 5, 3, "Unexpected message in Established state" }, { 6, 0, "Cease" }, /* Subcodes are according to [RFC4486] */ { 6, 1, "Maximum number of prefixes reached" }, { 6, 2, "Administrative shutdown" }, @@ -1341,7 +1351,7 @@ bgp_rx_keepalive(struct bgp_conn *conn) case BS_ESTABLISHED: break; default: - bgp_error(conn, 5, 0, NULL, 0); + bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); } } @@ -1353,7 +1363,7 @@ bgp_rx_route_refresh(struct bgp_conn *conn, byte *pkt, int len) BGP_TRACE(D_PACKETS, "Got ROUTE-REFRESH"); if (conn->state != BS_ESTABLISHED) - { bgp_error(conn, 5, 0, NULL, 0); return; } + { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; } if (!p->cf->enable_refresh) { bgp_error(conn, 1, 3, pkt+18, 1); return; } diff --git a/proto/ospf/config.Y b/proto/ospf/config.Y index 67b0785f..c47a8cd2 100644 --- a/proto/ospf/config.Y +++ b/proto/ospf/config.Y @@ -92,6 +92,7 @@ ospf_proto_finish(void) if (cf->abr && !backbone) { struct ospf_area_config *ac = cfg_allocz(sizeof(struct ospf_area_config)); + ac->type = OPT_E; /* Backbone is non-stub */ add_head(&cf->area_list, NODE ac); init_list(&ac->patt_list); init_list(&ac->net_list); @@ -124,16 +125,17 @@ CF_DECLS CF_KEYWORDS(OSPF, AREA, OSPF_METRIC1, OSPF_METRIC2, OSPF_TAG, OSPF_ROUTER_ID) CF_KEYWORDS(NEIGHBORS, RFC1583COMPAT, STUB, TICK, COST, COST2, RETRANSMIT) -CF_KEYWORDS(HELLO, TRANSMIT, PRIORITY, DEAD, TYPE, BROADCAST, BCAST) +CF_KEYWORDS(HELLO, TRANSMIT, PRIORITY, DEAD, TYPE, BROADCAST, BCAST, DEFAULT) CF_KEYWORDS(NONBROADCAST, NBMA, POINTOPOINT, PTP, POINTOMULTIPOINT, PTMP) -CF_KEYWORDS(NONE, SIMPLE, AUTHENTICATION, STRICT, CRYPTOGRAPHIC) -CF_KEYWORDS(ELIGIBLE, POLL, NETWORKS, HIDDEN, VIRTUAL, CHECK, LINK) +CF_KEYWORDS(NONE, SIMPLE, AUTHENTICATION, STRICT, CRYPTOGRAPHIC, TTL, SECURITY) +CF_KEYWORDS(ELIGIBLE, POLL, NETWORKS, HIDDEN, VIRTUAL, CHECK, LINK, ONLY, BFD) CF_KEYWORDS(RX, BUFFER, LARGE, NORMAL, STUBNET, HIDDEN, SUMMARY, TAG, EXTERNAL) CF_KEYWORDS(WAIT, DELAY, LSADB, ECMP, LIMIT, WEIGHT, NSSA, TRANSLATOR, STABILITY) -CF_KEYWORDS(GLOBAL, LSID, ROUTER, SELF, INSTANCE, REAL) +CF_KEYWORDS(GLOBAL, LSID, ROUTER, SELF, INSTANCE, REAL, NETMASK, TX, PRIORITY) %type <t> opttext %type <ld> lsadb_args +%type <i> nbma_eligible CF_GRAMMAR @@ -156,6 +158,7 @@ ospf_proto: ospf_proto_item: proto_item | RFC1583COMPAT bool { OSPF_CFG->rfc1583 = $2; } + | STUB ROUTER bool { OSPF_CFG->stub_router = $3; } | ECMP bool { OSPF_CFG->ecmp = $2 ? DEFAULT_ECMP_LIMIT : 0; } | ECMP bool LIMIT expr { OSPF_CFG->ecmp = $2 ? $4 : 0; if ($4 < 0) cf_error("ECMP limit cannot be negative"); } | TICK expr { OSPF_CFG->tick = $2; if($2<=0) cf_error("Tick must be greater than zero"); } @@ -288,19 +291,25 @@ ospf_iface_item: | TYPE POINTOMULTIPOINT { OSPF_PATT->type = OSPF_IT_PTMP ; } | TYPE PTMP { OSPF_PATT->type = OSPF_IT_PTMP ; } | REAL BROADCAST bool { OSPF_PATT->real_bcast = $3; if (OSPF_VERSION != 2) cf_error("Real broadcast option requires OSPFv2"); } + | PTP NETMASK bool { OSPF_PATT->ptp_netmask = $3; if (OSPF_VERSION != 2) cf_error("Real netmask option requires OSPFv2"); } | TRANSMIT DELAY expr { OSPF_PATT->inftransdelay = $3 ; if (($3<=0) || ($3>65535)) cf_error("Transmit delay must be in range 1-65535"); } | PRIORITY expr { OSPF_PATT->priority = $2 ; if (($2<0) || ($2>255)) cf_error("Priority must be in range 0-255"); } | STRICT NONBROADCAST bool { OSPF_PATT->strictnbma = $3 ; } | STUB bool { OSPF_PATT->stub = $2 ; } | CHECK LINK bool { OSPF_PATT->check_link = $3; } | ECMP WEIGHT expr { OSPF_PATT->ecmp_weight = $3 - 1; if (($3<1) || ($3>256)) cf_error("ECMP weight must be in range 1-256"); } - | NEIGHBORS '{' ipa_list '}' + | NEIGHBORS '{' nbma_list '}' | AUTHENTICATION NONE { OSPF_PATT->autype = OSPF_AUTH_NONE ; } | AUTHENTICATION SIMPLE { OSPF_PATT->autype = OSPF_AUTH_SIMPLE ; } | AUTHENTICATION CRYPTOGRAPHIC { OSPF_PATT->autype = OSPF_AUTH_CRYPT ; } | RX BUFFER LARGE { OSPF_PATT->rxbuf = OSPF_RXBUF_LARGE ; } | RX BUFFER NORMAL { OSPF_PATT->rxbuf = OSPF_RXBUF_NORMAL ; } | RX BUFFER expr { OSPF_PATT->rxbuf = $3 ; if (($3 < OSPF_RXBUF_MINSIZE) || ($3 > OSPF_MAX_PKT_SIZE)) cf_error("Buffer size must be in range 256-65535"); } + | TX tos { OSPF_PATT->tx_tos = $2; } + | TX PRIORITY expr { OSPF_PATT->tx_priority = $3; } + | TTL SECURITY bool { OSPF_PATT->ttl_security = $3; } + | TTL SECURITY TX ONLY { OSPF_PATT->ttl_security = 2; } + | BFD bool { OSPF_PATT->bfd = $2; cf_check_bfd($2); } | password_list ; @@ -326,33 +335,24 @@ pref_opt: | TAG expr { this_pref->tag = $2; } ; -ipa_list: +nbma_list: /* empty */ - | ipa_list ipa_item + | nbma_list nbma_item ; -ipa_item: - ipa_el - | ipa_ne; +nbma_eligible: + /* empty */ { $$ = 0; } + | ELIGIBLE { $$ = 1; } + ; -ipa_el: IPA ';' +nbma_item: IPA nbma_eligible ';' { this_nbma = cfg_allocz(sizeof(struct nbma_node)); add_tail(&OSPF_PATT->nbma_list, NODE this_nbma); this_nbma->ip=$1; - this_nbma->eligible=0; + this_nbma->eligible=$2; } ; - -ipa_ne: IPA ELIGIBLE ';' - { - this_nbma = cfg_allocz(sizeof(struct nbma_node)); - add_tail(&OSPF_PATT->nbma_list, NODE this_nbma); - this_nbma->ip=$1; - this_nbma->eligible=1; - } -; - ospf_iface_start: { @@ -371,6 +371,9 @@ ospf_iface_start: OSPF_PATT->type = OSPF_IT_UNDEF; init_list(&OSPF_PATT->nbma_list); OSPF_PATT->autype = OSPF_AUTH_NONE; + OSPF_PATT->ptp_netmask = 2; /* not specified */ + OSPF_PATT->tx_tos = IP_PREC_INTERNET_CONTROL; + OSPF_PATT->tx_priority = sk_priority_control; reset_passwords(); } ; @@ -439,7 +442,7 @@ lsadb_args: $$ = cfg_allocz(sizeof(struct lsadb_show_data)); } | lsadb_args GLOBAL { $$ = $1; $$->scope = LSA_SCOPE_AS; } - | lsadb_args AREA idval { $$ = $1; $$->scope = LSA_SCOPE_AREA; $$->area = $3 } + | lsadb_args AREA idval { $$ = $1; $$->scope = LSA_SCOPE_AREA; $$->area = $3; } | lsadb_args LINK { $$ = $1; $$->scope = 1; /* hack, 0 is no filter */ } | lsadb_args TYPE NUM { $$ = $1; $$->type = $3; } | lsadb_args LSID idval { $$ = $1; $$->lsid = $3; } diff --git a/proto/ospf/hello.c b/proto/ospf/hello.c index f9ba28f6..b6b11004 100644 --- a/proto/ospf/hello.c +++ b/proto/ospf/hello.c @@ -101,6 +101,17 @@ ospf_hello_receive(struct ospf_packet *ps_i, struct ospf_iface *ifa, return; } +#ifdef OSPFv2 + if (n && (n->rid != ntohl(ps_i->routerid))) + { + OSPF_TRACE(D_EVENTS, + "Neighbor %I has changed router id from %R to %R.", + n->ip, n->rid, ntohl(ps_i->routerid)); + ospf_neigh_remove(n); + n = NULL; + } +#endif + if (!n) { if ((ifa->type == OSPF_IT_NBMA) || (ifa->type == OSPF_IT_PTMP)) @@ -132,7 +143,7 @@ ospf_hello_receive(struct ospf_packet *ps_i, struct ospf_iface *ifa, n = ospf_neighbor_new(ifa); - n->rid = ntohl(((struct ospf_packet *) ps)->routerid); + n->rid = ntohl(ps_i->routerid); n->ip = faddr; n->dr = ntohl(ps->dr); n->bdr = ntohl(ps->bdr); @@ -140,7 +151,18 @@ ospf_hello_receive(struct ospf_packet *ps_i, struct ospf_iface *ifa, #ifdef OSPFv3 n->iface_id = ntohl(ps->iface_id); #endif + + if (n->ifa->cf->bfd) + ospf_neigh_update_bfd(n, n->ifa->bfd); } +#ifdef OSPFv3 /* NOTE: this could also be relevant for OSPFv2 on PtP ifaces */ + else if (!ipa_equal(faddr, n->ip)) + { + OSPF_TRACE(D_EVENTS, "Neighbor address changed from %I to %I", n->ip, faddr); + n->ip = faddr; + } +#endif + ospf_neigh_sm(n, INM_HELLOREC); pnrid = (u32 *) ((struct ospf_hello_packet *) (ps + 1)); @@ -253,7 +275,8 @@ ospf_hello_send(struct ospf_iface *ifa, int kind, struct ospf_neighbor *dirn) #ifdef OSPFv2 pkt->netmask = ipa_mkmask(ifa->addr->pxlen); ipa_hton(pkt->netmask); - if ((ifa->type == OSPF_IT_VLINK) || (ifa->type == OSPF_IT_PTP)) + if ((ifa->type == OSPF_IT_VLINK) || + ((ifa->type == OSPF_IT_PTP) && !ifa->ptp_netmask)) pkt->netmask = IPA_NONE; #endif @@ -261,7 +284,7 @@ ospf_hello_send(struct ospf_iface *ifa, int kind, struct ospf_neighbor *dirn) pkt->priority = ifa->priority; #ifdef OSPFv3 - pkt->iface_id = htonl(ifa->iface->index); + pkt->iface_id = htonl(ifa->iface_id); pkt->options3 = ifa->oa->options >> 16; pkt->options2 = ifa->oa->options >> 8; diff --git a/proto/ospf/iface.c b/proto/ospf/iface.c index a6a0c6c1..f1409840 100644 --- a/proto/ospf/iface.c +++ b/proto/ospf/iface.c @@ -77,7 +77,8 @@ ospf_sk_open(struct ospf_iface *ifa) sk->dport = OSPF_PROTO; sk->saddr = IPA_NONE; - sk->tos = IP_PREC_INTERNET_CONTROL; + sk->tos = ifa->cf->tx_tos; + sk->priority = ifa->cf->tx_priority; sk->rx_hook = ospf_rx_hook; sk->tx_hook = ospf_tx_hook; sk->err_hook = ospf_err_hook; @@ -85,7 +86,8 @@ ospf_sk_open(struct ospf_iface *ifa) sk->rbsize = rxbufsize(ifa); sk->tbsize = rxbufsize(ifa); sk->data = (void *) ifa; - sk->flags = SKF_LADDR_RX; + sk->flags = SKF_LADDR_RX | (ifa->check_ttl ? SKF_TTL_RX : 0); + sk->ttl = ifa->cf->ttl_security ? 255 : -1; if (sk_open(sk) != 0) goto err; @@ -130,7 +132,7 @@ ospf_sk_open(struct ospf_iface *ifa) else { ifa->all_routers = AllSPFRouters; - sk->ttl = 1; /* Hack, this will affect just multicast packets */ + sk->ttl = ifa->cf->ttl_security ? 255 : 1; if (sk_setup_multicast(sk) < 0) goto err; @@ -533,10 +535,15 @@ ospf_iface_new(struct ospf_area *oa, struct ifa *addr, struct ospf_iface_patt *i ifa->rxbuf = ip->rxbuf; ifa->check_link = ip->check_link; ifa->ecmp_weight = ip->ecmp_weight; + ifa->check_ttl = (ip->ttl_security == 1); + ifa->bfd = ip->bfd; #ifdef OSPFv2 ifa->autype = ip->autype; ifa->passwords = ip->passwords; + ifa->ptp_netmask = addr ? !(addr->flags & IA_PEER) : 0; + if (ip->ptp_netmask < 2) + ifa->ptp_netmask = ip->ptp_netmask; #endif #ifdef OSPFv3 @@ -567,13 +574,29 @@ ospf_iface_new(struct ospf_area *oa, struct ifa *addr, struct ospf_iface_patt *i log(L_WARN "%s: Cannot use interface %s as %s, forcing %s", p->name, iface->name, ospf_it[old_type], ospf_it[ifa->type]); + /* Assign iface ID, for vlinks, this is ugly hack */ + ifa->iface_id = (ifa->type != OSPF_IT_VLINK) ? iface->index : oa->po->last_vlink_id++; init_list(&ifa->neigh_list); init_list(&ifa->nbma_list); WALK_LIST(nb, ip->nbma_list) - if (ipa_in_net(nb->ip, addr->prefix, addr->pxlen)) - add_nbma_node(ifa, nb, 0); + { + /* In OSPFv3, addr is link-local while configured neighbors could + have global IP (although RFC 5340 C.5 says link-local addresses + should be used). Because OSPFv3 iface is not subnet-specific, + there is no need for ipa_in_net() check */ + +#ifdef OSPFv2 + if (!ipa_in_net(nb->ip, addr->prefix, addr->pxlen)) + continue; +#else + if (!ipa_has_link_scope(nb->ip)) + log(L_WARN "In OSPFv3, configured neighbor address (%I) should be link-local", nb->ip); +#endif + + add_nbma_node(ifa, nb, 0); + } ifa->state = OSPF_IS_DOWN; add_tail(&oa->po->iface_list, NODE ifa); @@ -640,7 +663,11 @@ ospf_iface_reconfigure(struct ospf_iface *ifa, struct ospf_iface_patt *new) if (ifa->stub != new_stub) return 0; - if (new->real_bcast != ifa->cf->real_bcast) + /* Change of these options would require to reset the iface socket */ + if ((new->real_bcast != ifa->cf->real_bcast) || + (new->tx_tos != ifa->cf->tx_tos) || + (new->tx_priority != ifa->cf->tx_priority) || + (new->ttl_security != ifa->cf->ttl_security)) return 0; ifa->cf = new; @@ -769,8 +796,14 @@ ospf_iface_reconfigure(struct ospf_iface *ifa, struct ospf_iface_patt *new) /* NBMA LIST - add new */ WALK_LIST(nb, new->nbma_list) { + /* See related note in ospf_iface_new() */ +#ifdef OSPFv2 if (!ipa_in_net(nb->ip, ifa->addr->prefix, ifa->addr->pxlen)) continue; +#else + if (!ipa_has_link_scope(nb->ip)) + log(L_WARN "In OSPFv3, configured neighbor address (%I) should be link-local", nb->ip); +#endif if (! find_nbma_node(ifa, nb->ip)) { @@ -808,6 +841,19 @@ ospf_iface_reconfigure(struct ospf_iface *ifa, struct ospf_iface_patt *new) ifa->ecmp_weight = new->ecmp_weight; } + /* BFD */ + if (ifa->bfd != new->bfd) + { + OSPF_TRACE(D_EVENTS, "%s BFD on interface %s", + new->bfd ? "Enabling" : "Disabling", ifname); + ifa->bfd = new->bfd; + + struct ospf_neighbor *n; + WALK_LIST(n, ifa->neigh_list) + ospf_neigh_update_bfd(n, ifa->bfd); + } + + /* instance_id is not updated - it is part of key */ return 1; @@ -884,6 +930,10 @@ ospf_ifaces_reconfigure(struct ospf_area *oa, struct ospf_area_config *nac) struct ifa *a; WALK_LIST(iface, iface_list) + { + if (! (iface->flags & IF_UP)) + continue; + WALK_LIST(a, iface->addrs) { if (a->flags & IA_SECONDARY) @@ -909,6 +959,7 @@ ospf_ifaces_reconfigure(struct ospf_area *oa, struct ospf_area_config *nac) ospf_iface_new(oa, a, ip); } } + } } @@ -1012,6 +1063,10 @@ ospf_ifaces_reconfigure(struct ospf_area *oa, struct ospf_area_config *nac) struct ifa *a; WALK_LIST(iface, iface_list) + { + if (! (iface->flags & IF_UP)) + continue; + WALK_LIST(a, iface->addrs) { if (a->flags & IA_SECONDARY) @@ -1040,6 +1095,7 @@ ospf_ifaces_reconfigure(struct ospf_area *oa, struct ospf_area_config *nac) ospf_iface_new(oa, a, ip); } } + } } #endif diff --git a/proto/ospf/lsupd.c b/proto/ospf/lsupd.c index 16967a7f..beac6c83 100644 --- a/proto/ospf/lsupd.c +++ b/proto/ospf/lsupd.c @@ -112,6 +112,10 @@ ospf_lsa_flooding_allowed(struct ospf_lsa_header *lsa, u32 domain, struct ospf_i { u32 scope = LSA_SCOPE(lsa); + /* Handle inactive vlinks */ + if (ifa->state == OSPF_IS_DOWN) + return 0; + /* 4.5.2 (Case 2) */ if (unknown_lsa_type(lsa) && !(lsa->type & LSA_UBIT)) scope = LSA_SCOPE_LINK; @@ -119,7 +123,7 @@ ospf_lsa_flooding_allowed(struct ospf_lsa_header *lsa, u32 domain, struct ospf_i switch (scope) { case LSA_SCOPE_LINK: - return ifa->iface->index == domain; + return ifa->iface_id == domain; case LSA_SCOPE_AREA: return ifa->oa->areaid == domain; @@ -201,7 +205,7 @@ ospf_lsupd_flood(struct proto_ospf *po, en->lsa_body = NULL; DBG("Removing from lsreq list for neigh %R\n", nn->rid); ospf_hash_delete(nn->lsrqh, en); - if (EMPTY_SLIST(nn->lsrql)) + if ((EMPTY_SLIST(nn->lsrql)) && (nn->state == NEIGHBOR_LOADING)) ospf_neigh_sm(nn, INM_LOADDONE); continue; break; @@ -212,7 +216,7 @@ ospf_lsupd_flood(struct proto_ospf *po, en->lsa_body = NULL; DBG("Removing from lsreq list for neigh %R\n", nn->rid); ospf_hash_delete(nn->lsrqh, en); - if (EMPTY_SLIST(nn->lsrql)) + if ((EMPTY_SLIST(nn->lsrql)) && (nn->state == NEIGHBOR_LOADING)) ospf_neigh_sm(nn, INM_LOADDONE); break; default: @@ -280,6 +284,16 @@ ospf_lsupd_flood(struct proto_ospf *po, ospf_pkt_fill_hdr(ifa, pk, LSUPD_P); pk->lsano = htonl(1); + /* Check iface buffer size */ + int len2 = sizeof(struct ospf_lsupd_packet) + (hn ? ntohs(hn->length) : hh->length); + if (len2 > ospf_pkt_bufsize(ifa)) + { + /* Cannot fit in a tx buffer, skip that iface */ + log(L_ERR "OSPF: LSA too large to flood on %s (Type: %04x, Id: %R, Rt: %R)", + ifa->iface->name, hh->type, hh->id, hh->rt); + continue; + } + lh = (struct ospf_lsa_header *) (pk + 1); /* Copy LSA into the packet */ @@ -395,7 +409,7 @@ ospf_lsupd_send_list(struct ospf_neighbor *n, list * l) if (len2 > ospf_pkt_bufsize(n->ifa)) { /* Cannot fit in a tx buffer, skip that */ - log(L_WARN "OSPF: LSA too large to send (Type: %04x, Id: %R, Rt: %R)", + log(L_ERR "OSPF: LSA too large to send (Type: %04x, Id: %R, Rt: %R)", lsr->lsh.type, lsr->lsh.id, lsr->lsh.rt); lsr = NODE_NEXT(lsr); continue; diff --git a/proto/ospf/neighbor.c b/proto/ospf/neighbor.c index 642365b3..61224ec2 100644 --- a/proto/ospf/neighbor.c +++ b/proto/ospf/neighbor.c @@ -459,7 +459,7 @@ bdr_election(struct ospf_iface *ifa) #else /* OSPFv3 */ me.dr = ifa->drid; me.bdr = ifa->bdrid; - me.iface_id = ifa->iface->index; + me.iface_id = ifa->iface_id; #endif add_tail(&ifa->neigh_list, NODE & me); @@ -582,6 +582,36 @@ ospf_neigh_remove(struct ospf_neighbor *n) OSPF_TRACE(D_EVENTS, "Deleting neigbor."); } +static void +ospf_neigh_bfd_hook(struct bfd_request *req) +{ + struct ospf_neighbor *n = req->data; + struct proto *p = &n->ifa->oa->po->proto; + + if (req->down) + { + OSPF_TRACE(D_EVENTS, "BFD session down for %I on %s", + n->ip, n->ifa->iface->name); + + ospf_neigh_remove(n); + } +} + +void +ospf_neigh_update_bfd(struct ospf_neighbor *n, int use_bfd) +{ + if (use_bfd && !n->bfd_req) + n->bfd_req = bfd_request_session(n->pool, n->ip, n->ifa->addr->ip, n->ifa->iface, + ospf_neigh_bfd_hook, n); + + if (!use_bfd && n->bfd_req) + { + rfree(n->bfd_req); + n->bfd_req = NULL; + } +} + + void ospf_sh_neigh_info(struct ospf_neighbor *n) { diff --git a/proto/ospf/neighbor.h b/proto/ospf/neighbor.h index f593faed..e674927d 100644 --- a/proto/ospf/neighbor.h +++ b/proto/ospf/neighbor.h @@ -16,6 +16,7 @@ void bdr_election(struct ospf_iface *ifa); struct ospf_neighbor *find_neigh(struct ospf_iface *ifa, u32 rid); struct ospf_neighbor *find_neigh_by_ip(struct ospf_iface *ifa, ip_addr ip); void ospf_neigh_remove(struct ospf_neighbor *n); +void ospf_neigh_update_bfd(struct ospf_neighbor *n, int use_bfd); void ospf_sh_neigh_info(struct ospf_neighbor *n); #endif /* _BIRD_OSPF_NEIGHBOR_H_ */ diff --git a/proto/ospf/ospf.c b/proto/ospf/ospf.c index 1aa7407a..232f3f6c 100644 --- a/proto/ospf/ospf.c +++ b/proto/ospf/ospf.c @@ -167,7 +167,7 @@ ospf_area_add(struct proto_ospf *po, struct ospf_area_config *ac, int reconf) #ifdef OSPFv2 oa->options = ac->type; #else /* OSPFv3 */ - oa->options = OPT_R | ac->type | OPT_V6; + oa->options = ac->type | OPT_V6 | (po->stub_router ? 0 : OPT_R); #endif /* @@ -232,7 +232,9 @@ ospf_start(struct proto *p) struct ospf_area_config *ac; po->router_id = proto_get_router_id(p->cf); + po->last_vlink_id = 0x80000000; po->rfc1583 = c->rfc1583; + po->stub_router = c->stub_router; po->ebit = 0; po->ecmp = c->ecmp; po->tick = c->tick; @@ -689,7 +691,7 @@ ospf_area_reconfigure(struct ospf_area *oa, struct ospf_area_config *nac) #ifdef OSPFv2 oa->options = nac->type; #else /* OSPFv3 */ - oa->options = OPT_R | nac->type | OPT_V6; + oa->options = nac->type | OPT_V6 | (oa->po->stub_router ? 0 : OPT_R); #endif if (oa_is_nssa(oa) && (oa->po->areano > 1)) oa->po->ebit = 1; @@ -728,12 +730,16 @@ ospf_reconfigure(struct proto *p, struct proto_config *c) struct ospf_iface *ifa, *ifx; struct ospf_iface_patt *ip; + if (proto_get_router_id(c) != po->router_id) + return 0; + if (po->rfc1583 != new->rfc1583) return 0; if (old->abr != new->abr) return 0; + po->stub_router = new->stub_router; po->ecmp = new->ecmp; po->tick = new->tick; po->disp_timer->recurrent = po->tick; @@ -827,6 +833,7 @@ ospf_sh(struct proto *p) cli_msg(-1014, "%s:", p->name); cli_msg(-1014, "RFC1583 compatibility: %s", (po->rfc1583 ? "enable" : "disabled")); + cli_msg(-1014, "Stub router: %s", (po->stub_router ? "Yes" : "No")); cli_msg(-1014, "RT scheduler tick: %d", po->tick); cli_msg(-1014, "Number of areas: %u", po->areano); cli_msg(-1014, "Number of LSAs in DB:\t%u", po->gr->hash_entries); @@ -953,8 +960,10 @@ lsa_compare_for_state(const void *p1, const void *p2) struct ospf_lsa_header *lsa1 = &(he1->lsa); struct ospf_lsa_header *lsa2 = &(he2->lsa); - if (he1->domain != he2->domain) - return he1->domain - he2->domain; + if (he1->domain < he2->domain) + return -1; + if (he1->domain > he2->domain) + return 1; #ifdef OSPFv3 struct ospf_lsa_header lsatmp1, lsatmp2; @@ -979,14 +988,18 @@ lsa_compare_for_state(const void *p1, const void *p2) { #ifdef OSPFv3 /* In OSPFv3, neworks are named base on ID of DR */ - if (lsa1->rt != lsa2->rt) - return lsa1->rt - lsa2->rt; + if (lsa1->rt < lsa2->rt) + return -1; + if (lsa1->rt > lsa2->rt) + return 1; #endif /* For OSPFv2, this is IP of the network, for OSPFv3, this is interface ID */ - if (lsa1->id != lsa2->id) - return lsa1->id - lsa2->id; + if (lsa1->id < lsa2->id) + return -1; + if (lsa1->id > lsa2->id) + return 1; #ifdef OSPFv3 if (px1 != px2) @@ -997,14 +1010,20 @@ lsa_compare_for_state(const void *p1, const void *p2) } else { - if (lsa1->rt != lsa2->rt) - return lsa1->rt - lsa2->rt; + if (lsa1->rt < lsa2->rt) + return -1; + if (lsa1->rt > lsa2->rt) + return 1; - if (lsa1->type != lsa2->type) - return lsa1->type - lsa2->type; - - if (lsa1->id != lsa2->id) - return lsa1->id - lsa2->id; + if (lsa1->type < lsa2->type) + return -1; + if (lsa1->type > lsa2->type) + return 1; + + if (lsa1->id < lsa2->id) + return -1; + if (lsa1->id > lsa2->id) + return 1; #ifdef OSPFv3 if (px1 != px2) @@ -1023,12 +1042,16 @@ ext_compare_for_state(const void *p1, const void *p2) struct ospf_lsa_header *lsa1 = &(he1->lsa); struct ospf_lsa_header *lsa2 = &(he2->lsa); - if (lsa1->rt != lsa2->rt) - return lsa1->rt - lsa2->rt; + if (lsa1->rt < lsa2->rt) + return -1; + if (lsa1->rt > lsa2->rt) + return 1; + + if (lsa1->id < lsa2->id) + return -1; + if (lsa1->id > lsa2->id) + return 1; - if (lsa1->id != lsa2->id) - return lsa1->id - lsa2->id; - return lsa1->sn - lsa2->sn; } diff --git a/proto/ospf/ospf.h b/proto/ospf/ospf.h index 3bffaf91..46a1c3c1 100644 --- a/proto/ospf/ospf.h +++ b/proto/ospf/ospf.h @@ -46,6 +46,7 @@ do { if ((p->debug & D_PACKETS) || OSPF_FORCE_DEBUG) \ #include "nest/route.h" #include "nest/cli.h" #include "nest/locks.h" +#include "nest/bfd.h" #include "conf/conf.h" #include "lib/string.h" @@ -83,6 +84,7 @@ struct ospf_config struct proto_config c; unsigned tick; byte rfc1583; + byte stub_router; byte abr; int ecmp; list area_list; /* list of struct ospf_area_config */ @@ -189,7 +191,8 @@ struct ospf_iface u32 rxmtint; /* number of seconds between LSA retransmissions */ u32 pollint; /* Poll interval */ u32 deadint; /* after "deadint" missing hellos is router dead */ - u32 vid; /* Id of peer of virtual link */ + u32 iface_id; /* Interface ID (iface->index or new value for vlinks) */ + u32 vid; /* ID of peer of virtual link */ ip_addr vip; /* IP of peer of virtual link */ struct ospf_iface *vifa; /* OSPF iface which the vlink goes through */ struct ospf_area *voa; /* OSPF area which the vlink goes through */ @@ -272,6 +275,9 @@ struct ospf_iface u16 rxbuf; /* Buffer size */ u8 check_link; /* Whether iface link change is used */ u8 ecmp_weight; /* Weight used for ECMP */ + u8 ptp_netmask; /* Send real netmask for P2P */ + u8 check_ttl; /* Check incoming packets for TTL 255 */ + u8 bfd; /* Use BFD on iface */ }; struct ospf_md5 @@ -704,6 +710,7 @@ struct ospf_neighbor #define ACKL_DIRECT 0 #define ACKL_DELAY 1 timer *ackd_timer; /* Delayed ack timer */ + struct bfd_request *bfd_req; /* BFD request, if BFD is used */ u32 csn; /* Last received crypt seq number (for MD5) */ }; @@ -769,6 +776,7 @@ struct proto_ospf int areano; /* Number of area I belong to */ struct fib rtf; /* Routing table */ byte rfc1583; /* RFC1583 compatibility */ + byte stub_router; /* Do not forward transit traffic */ byte ebit; /* Did I originate any ext lsa? */ byte ecmp; /* Maximal number of nexthops in ECMP route, or 0 */ struct ospf_area *backbone; /* If exists */ @@ -776,6 +784,7 @@ struct proto_ospf int lsab_size, lsab_used; linpool *nhpool; /* Linpool used for next hops computed in SPF */ u32 router_id; + u32 last_vlink_id; /* Interface IDs for vlinks (starts at 0x80000000) */ }; struct ospf_iface_patt @@ -795,6 +804,8 @@ struct ospf_iface_patt u32 priority; u32 voa; u32 vid; + int tx_tos; + int tx_priority; u16 rxbuf; #define OSPF_RXBUF_NORMAL 0 #define OSPF_RXBUF_LARGE 1 @@ -808,6 +819,9 @@ struct ospf_iface_patt u8 check_link; u8 ecmp_weight; u8 real_bcast; /* Not really used in OSPFv3 */ + u8 ptp_netmask; /* bool + 2 for unspecified */ + u8 ttl_security; /* bool + 2 for TX only */ + u8 bfd; #ifdef OSPFv2 list *passwords; diff --git a/proto/ospf/packet.c b/proto/ospf/packet.c index 241a58f7..4338bc1a 100644 --- a/proto/ospf/packet.c +++ b/proto/ospf/packet.c @@ -309,6 +309,12 @@ ospf_rx_hook(sock *sk, int size) return 1; } + if (ifa->check_ttl && (sk->ttl < 255)) + { + log(L_ERR "%s%I - TTL %d (< 255)", mesg, sk->faddr, sk->ttl); + return 1; + } + if ((unsigned) size < sizeof(struct ospf_packet)) { log(L_ERR "%s%I - too short (%u bytes)", mesg, sk->faddr, size); diff --git a/proto/ospf/rt.c b/proto/ospf/rt.c index 1053fd07..52110aa1 100644 --- a/proto/ospf/rt.c +++ b/proto/ospf/rt.c @@ -501,6 +501,10 @@ ospf_rt_spfa(struct ospf_area *oa) #ifdef OSPFv2 ospf_rt_spfa_rtlinks(oa, act, act); #else /* OSPFv3 */ + /* Errata 2078 to RFC 5340 4.8.1 - skip links from non-routing nodes */ + if ((act != oa->rt) && !(rt->options & OPT_R)) + break; + for (tmp = ospf_hash_find_rt_first(po->gr, act->domain, act->lsa.rt); tmp; tmp = ospf_hash_find_rt_next(tmp)) ospf_rt_spfa_rtlinks(oa, act, tmp); @@ -1839,7 +1843,7 @@ add_cand(list * l, struct top_hash_entry *en, struct top_hash_entry *par, if (en->lsa.type == LSA_T_RT) { struct ospf_lsa_rt *rt = en->lsa_body; - if (!(rt->options & OPT_V6) || !(rt->options & OPT_R)) + if (!(rt->options & OPT_V6)) return; } #endif diff --git a/proto/ospf/topology.c b/proto/ospf/topology.c index ec012b22..f25db9a7 100644 --- a/proto/ospf/topology.c +++ b/proto/ospf/topology.c @@ -103,7 +103,8 @@ lsab_alloc(struct proto_ospf *po, unsigned size) if (po->lsab_used > po->lsab_size) { po->lsab_size = MAX(po->lsab_used, 2 * po->lsab_size); - po->lsab = mb_realloc(po->proto.pool, po->lsab, po->lsab_size); + po->lsab = po->lsab ? mb_realloc(po->lsab, po->lsab_size): + mb_alloc(po->proto.pool, po->lsab_size); } return ((byte *) po->lsab) + offset; } @@ -233,6 +234,7 @@ originate_rt_lsa_body(struct ospf_area *oa, u16 *length) WALK_LIST(ifa, po->iface_list) { int net_lsa = 0; + u32 link_cost = po->stub_router ? 0xffff : ifa->cost; if ((ifa->type == OSPF_IT_VLINK) && (ifa->voa == oa) && (!EMPTY_LIST(ifa->neigh_list))) @@ -258,9 +260,17 @@ originate_rt_lsa_body(struct ospf_area *oa, u16 *length) ln = lsab_alloc(po, sizeof(struct ospf_lsa_rt_link)); ln->type = LSART_PTP; ln->id = neigh->rid; - ln->data = (ifa->addr->flags & IA_PEER) ? - ifa->iface->index : ipa_to_u32(ifa->addr->ip); - ln->metric = ifa->cost; + + /* + * ln->data should be ifa->iface_id in case of no/ptp + * address (ifa->addr->flags & IA_PEER) on PTP link (see + * RFC 2328 12.4.1.1.), but the iface ID value has no use, + * while using IP address even in this case is here for + * compatibility with some broken implementations that use + * this address as a next-hop. + */ + ln->data = ipa_to_u32(ifa->addr->ip); + ln->metric = link_cost; ln->padding = 0; i++; } @@ -274,7 +284,7 @@ originate_rt_lsa_body(struct ospf_area *oa, u16 *length) ln->type = LSART_NET; ln->id = ipa_to_u32(ifa->drip); ln->data = ipa_to_u32(ifa->addr->ip); - ln->metric = ifa->cost; + ln->metric = link_cost; ln->padding = 0; i++; net_lsa = 1; @@ -289,7 +299,7 @@ originate_rt_lsa_body(struct ospf_area *oa, u16 *length) ln->type = LSART_VLNK; ln->id = neigh->rid; ln->data = ipa_to_u32(ifa->addr->ip); - ln->metric = ifa->cost; + ln->metric = link_cost; ln->padding = 0; i++; } @@ -305,7 +315,7 @@ originate_rt_lsa_body(struct ospf_area *oa, u16 *length) /* Now we will originate stub area if there is no primary */ if (net_lsa || (ifa->type == OSPF_IT_VLINK) || - (ifa->addr->flags & IA_PEER) || + ((ifa->addr->flags & IA_PEER) && ! ifa->cf->stub) || configured_stubnet(oa, ifa->addr)) continue; @@ -368,7 +378,7 @@ add_lsa_rt_link(struct proto_ospf *po, struct ospf_iface *ifa, u8 type, u32 nif, ln->type = type; ln->padding = 0; ln->metric = ifa->cost; - ln->lif = ifa->iface->index; + ln->lif = ifa->iface_id; ln->nif = nif; ln->id = id; } @@ -546,7 +556,7 @@ originate_net_lsa_body(struct ospf_iface *ifa, u16 *length, if (n->state == NEIGHBOR_FULL) { #ifdef OSPFv3 - en = ospf_hash_find(po->gr, ifa->iface->index, n->iface_id, n->rid, LSA_T_LINK); + en = ospf_hash_find(po->gr, ifa->iface_id, n->iface_id, n->rid, LSA_T_LINK); if (en) options |= ((struct ospf_lsa_link *) en->lsa_body)->options; #endif @@ -596,7 +606,7 @@ originate_net_lsa(struct ospf_iface *ifa) lsa.options = ifa->oa->options; lsa.id = ipa_to_u32(ifa->addr->ip); #else /* OSPFv3 */ - lsa.id = ifa->iface->index; + lsa.id = ifa->iface_id; #endif lsa.rt = po->router_id; @@ -1207,10 +1217,10 @@ originate_link_lsa(struct ospf_iface *ifa) lsa.age = 0; lsa.type = LSA_T_LINK; - lsa.id = ifa->iface->index; + lsa.id = ifa->iface_id; lsa.rt = po->router_id; lsa.sn = get_seqnum(ifa->link_lsa); - u32 dom = ifa->iface->index; + u32 dom = ifa->iface_id; body = originate_link_lsa_body(ifa, &lsa.length); lsasum_calculate(&lsa, body); @@ -1249,7 +1259,6 @@ originate_prefix_rt_lsa_body(struct ospf_area *oa, u16 *length) struct ospf_config *cf = (struct ospf_config *) (po->proto.cf); struct ospf_iface *ifa; struct ospf_lsa_prefix *lp; - struct ifa *vlink_addr = NULL; int host_addr = 0; int net_lsa; int i = 0; @@ -1263,7 +1272,7 @@ originate_prefix_rt_lsa_body(struct ospf_area *oa, u16 *length) WALK_LIST(ifa, po->iface_list) { - if ((ifa->oa != oa) || (ifa->state == OSPF_IS_DOWN)) + if ((ifa->oa != oa) || (ifa->type == OSPF_IT_VLINK) || (ifa->state == OSPF_IS_DOWN)) continue; ifa->px_pos_beg = i; @@ -1282,9 +1291,6 @@ originate_prefix_rt_lsa_body(struct ospf_area *oa, u16 *length) (a->scope <= SCOPE_LINK)) continue; - if (!vlink_addr) - vlink_addr = a; - if (((a->pxlen < MAX_PREFIX_LENGTH) && net_lsa) || configured_stubnet(oa, a)) continue; @@ -1304,23 +1310,41 @@ originate_prefix_rt_lsa_body(struct ospf_area *oa, u16 *length) ifa->px_pos_end = i; } - /* If there are some configured vlinks, add some global address, - which will be used as a vlink endpoint. */ - if (!EMPTY_LIST(cf->vlink_list) && !host_addr && vlink_addr) - { - lsa_put_prefix(po, vlink_addr->ip, MAX_PREFIX_LENGTH, 0); - i++; - } - struct ospf_stubnet_config *sn; if (oa->ac) WALK_LIST(sn, oa->ac->stubnet_list) if (!sn->hidden) { lsa_put_prefix(po, sn->px.addr, sn->px.len, sn->cost); + if (sn->px.len == MAX_PREFIX_LENGTH) + host_addr = 1; + i++; + } + + /* If there are some configured vlinks, find some global address + (even from another area), which will be used as a vlink endpoint. */ + if (!EMPTY_LIST(cf->vlink_list) && !host_addr) + { + WALK_LIST(ifa, po->iface_list) + { + if ((ifa->type == OSPF_IT_VLINK) || (ifa->state == OSPF_IS_DOWN)) + continue; + + struct ifa *a; + WALK_LIST(a, ifa->iface->addrs) + { + if ((a->flags & IA_SECONDARY) || (a->scope <= SCOPE_LINK)) + continue; + + /* Found some IP */ + lsa_put_prefix(po, a->ip, MAX_PREFIX_LENGTH, 0); i++; + goto done; } + } + } + done: lp = po->lsab; lp->pxcount = i; *length = po->lsab_used + sizeof(struct ospf_lsa_header); @@ -1389,15 +1413,12 @@ add_prefix(struct proto_ospf *po, u32 *px, int offset, int *pxc) { u32 *pxl = lsab_offset(po, offset); int i; - for (i = 0; i < *pxc; i++) + for (i = 0; i < *pxc; pxl = prefix_advance(pxl), i++) + if (prefix_same(px, pxl)) { - if (prefix_same(px, pxl)) - { - /* Options should be logically OR'ed together */ - *pxl |= *px; - return; - } - pxl = prefix_advance(pxl); + /* Options should be logically OR'ed together */ + *pxl |= (*px & 0x00FF0000); + return; } ASSERT(pxl == lsab_end(po)); @@ -1405,6 +1426,7 @@ add_prefix(struct proto_ospf *po, u32 *px, int offset, int *pxc) int pxspace = prefix_space(px); pxl = lsab_alloc(po, pxspace); memcpy(pxl, px, pxspace); + *pxl &= 0xFFFF0000; /* Set metric to zero */ (*pxc)++; } @@ -1415,11 +1437,21 @@ add_link_lsa(struct proto_ospf *po, struct top_hash_entry *en, int offset, int * u32 *pxb = ll->rest; int j; - for (j = 0; j < ll->pxcount; j++) - { - add_prefix(po, pxb, offset, pxc); - pxb = prefix_advance(pxb); - } + for (j = 0; j < ll->pxcount; pxb = prefix_advance(pxb), j++) + { + u8 pxlen = (pxb[0] >> 24); + u8 pxopts = (pxb[0] >> 16); + + /* Skip NU or LA prefixes */ + if (pxopts & (OPT_PX_NU | OPT_PX_LA)) + continue; + + /* Skip link-local prefixes */ + if ((pxlen >= 10) && ((pxb[1] & 0xffc00000) == 0xfe800000)) + continue; + + add_prefix(po, pxb, offset, pxc); + } } @@ -1449,7 +1481,7 @@ originate_prefix_net_lsa_body(struct ospf_iface *ifa, u16 *length) WALK_LIST(n, ifa->neigh_list) if ((n->state == NEIGHBOR_FULL) && - (en = ospf_hash_find(po->gr, ifa->iface->index, n->iface_id, n->rid, LSA_T_LINK))) + (en = ospf_hash_find(po->gr, ifa->iface_id, n->iface_id, n->rid, LSA_T_LINK))) add_link_lsa(po, en, offset, &pxc); lp = po->lsab; @@ -1471,7 +1503,7 @@ originate_prefix_net_lsa(struct ospf_iface *ifa) lsa.age = 0; lsa.type = LSA_T_PREFIX; - lsa.id = ifa->iface->index; + lsa.id = ifa->iface_id; lsa.rt = po->router_id; lsa.sn = get_seqnum(ifa->pxn_lsa); u32 dom = ifa->oa->areaid; @@ -1642,7 +1674,7 @@ ospf_lsa_domain(u32 type, struct ospf_iface *ifa) switch (type & LSA_SCOPE_MASK) { case LSA_SCOPE_LINK: - return ifa->iface->index; + return ifa->iface_id; case LSA_SCOPE_AREA: return ifa->oa->areaid; diff --git a/proto/pipe/pipe.c b/proto/pipe/pipe.c index 5bae8614..2e206038 100644 --- a/proto/pipe/pipe.c +++ b/proto/pipe/pipe.c @@ -210,6 +210,11 @@ pipe_postconfig(struct proto_config *C) cf_error("Name of peer routing table not specified"); if (c->peer == C->table) cf_error("Primary table and peer table must be different"); + + if (C->in_keep_filtered) + cf_error("Pipe protocol prohibits keeping filtered routes"); + if (C->rx_limit) + cf_error("Pipe protocol does not support receive limits"); } extern int proto_reconfig_type; diff --git a/proto/radv/config.Y b/proto/radv/config.Y index abccd2c7..ff70a2f7 100644 --- a/proto/radv/config.Y +++ b/proto/radv/config.Y @@ -30,9 +30,9 @@ CF_KEYWORDS(RADV, PREFIX, INTERFACE, MIN, MAX, RA, DELAY, INTERVAL, MANAGED, OTHER, CONFIG, LINK, MTU, REACHABLE, TIME, RETRANS, TIMER, CURRENT, HOP, LIMIT, DEFAULT, VALID, PREFERRED, MULT, LIFETIME, SKIP, ONLINK, AUTONOMOUS, RDNSS, DNSSL, NS, DOMAIN, - LOCAL) + LOCAL, TRIGGER, SENSITIVE) -%type<i> radv_mult +%type<i> radv_mult radv_sensitive CF_GRAMMAR @@ -53,6 +53,11 @@ radv_proto_item: | PREFIX radv_prefix { add_tail(&RADV_CFG->pref_list, NODE this_radv_prefix); } | RDNSS { init_list(&radv_dns_list); } radv_rdnss { add_tail_list(&RADV_CFG->rdnss_list, &radv_dns_list); } | DNSSL { init_list(&radv_dns_list); } radv_dnssl { add_tail_list(&RADV_CFG->dnssl_list, &radv_dns_list); } + | TRIGGER prefix { + RADV_CFG->trigger_prefix = $2.addr; + RADV_CFG->trigger_pxlen = $2.len; + RADV_CFG->trigger_valid = 1; + } ; radv_proto_opts: @@ -78,6 +83,7 @@ radv_iface_start: RADV_IFACE->min_delay = DEFAULT_MIN_DELAY; RADV_IFACE->current_hop_limit = DEFAULT_CURRENT_HOP_LIMIT; RADV_IFACE->default_lifetime = -1; + RADV_IFACE->default_lifetime_sensitive = 1; }; radv_iface_item: @@ -90,7 +96,11 @@ radv_iface_item: | REACHABLE TIME expr { RADV_IFACE->reachable_time = $3; if (($3 < 0) || ($3 > 3600000)) cf_error("Reachable time must be in range 0-3600000"); } | RETRANS TIMER expr { RADV_IFACE->retrans_timer = $3; if ($3 < 0) cf_error("Retrans timer must be 0 or positive"); } | CURRENT HOP LIMIT expr { RADV_IFACE->current_hop_limit = $4; if (($4 < 0) || ($4 > 255)) cf_error("Current hop limit must be in range 0-255"); } - | DEFAULT LIFETIME expr { RADV_IFACE->default_lifetime = $3; if (($3 < 0) || ($3 > 9000)) cf_error("Default lifetime must be in range 0-9000"); } + | DEFAULT LIFETIME expr radv_sensitive { + RADV_IFACE->default_lifetime = $3; + if (($3 < 0) || ($3 > 9000)) cf_error("Default lifetime must be in range 0-9000"); + if ($4 != -1) RADV_IFACE->default_lifetime_sensitive = $4; + } | PREFIX radv_prefix { add_tail(&RADV_IFACE->pref_list, NODE this_radv_prefix); } | RDNSS { init_list(&radv_dns_list); } radv_rdnss { add_tail_list(&RADV_IFACE->rdnss_list, &radv_dns_list); } | DNSSL { init_list(&radv_dns_list); } radv_dnssl { add_tail_list(&RADV_IFACE->dnssl_list, &radv_dns_list); } @@ -103,7 +113,7 @@ radv_iface_finish: struct radv_iface_config *ic = RADV_IFACE; if (ic->min_ra_int == (u32) -1) - ic->min_ra_int = _MAX(ic->max_ra_int / 3, 3); + ic->min_ra_int = MAX_(ic->max_ra_int / 3, 3); if (ic->default_lifetime == (u32) -1) ic->default_lifetime = 3 * ic->max_ra_int; @@ -147,14 +157,25 @@ radv_prefix_item: SKIP bool { RADV_PREFIX->skip = $2; } | ONLINK bool { RADV_PREFIX->onlink = $2; } | AUTONOMOUS bool { RADV_PREFIX->autonomous = $2; } - | VALID LIFETIME expr { RADV_PREFIX->valid_lifetime = $3; if ($3 < 0) cf_error("Valid lifetime must be 0 or positive"); } - | PREFERRED LIFETIME expr { RADV_PREFIX->preferred_lifetime = $3; if ($3 < 0) cf_error("Preferred lifetime must be 0 or positive"); } + | VALID LIFETIME expr radv_sensitive { + RADV_PREFIX->valid_lifetime = $3; + if ($3 < 0) cf_error("Valid lifetime must be 0 or positive"); + if ($4 != -1) RADV_PREFIX->valid_lifetime_sensitive = $4; + } + | PREFERRED LIFETIME expr radv_sensitive { + RADV_PREFIX->preferred_lifetime = $3; + if ($3 < 0) cf_error("Preferred lifetime must be 0 or positive"); + if ($4 != -1) RADV_PREFIX->preferred_lifetime_sensitive = $4; + } ; radv_prefix_finish: { if (RADV_PREFIX->preferred_lifetime > RADV_PREFIX->valid_lifetime) cf_error("Preferred lifetime must be at most Valid lifetime"); + + if (RADV_PREFIX->valid_lifetime_sensitive > RADV_PREFIX->preferred_lifetime_sensitive) + cf_error("Valid lifetime sensitive requires that Preferred lifetime is sensitive too"); }; radv_prefix_opts: @@ -268,6 +289,11 @@ radv_mult: | MULT expr { $$ = 0; radv_mult_val = $2; if (($2 < 1) || ($2 > 254)) cf_error("Multiplier must be in range 1-254"); } ; +radv_sensitive: + /* empty */ { $$ = -1; } + | SENSITIVE bool { $$ = $2; } + ; + CF_CODE CF_END diff --git a/proto/radv/packets.c b/proto/radv/packets.c index 6fdfcaa3..38abaa4c 100644 --- a/proto/radv/packets.c +++ b/proto/radv/packets.c @@ -240,6 +240,7 @@ radv_prepare_ra(struct radv_iface *ifa) { struct proto_radv *ra = ifa->ra; struct radv_config *cf = (struct radv_config *) (ra->p.cf); + struct radv_iface_config *ic = ifa->cf; char *buf = ifa->sk->tbuf; char *bufstart = buf; @@ -249,21 +250,22 @@ radv_prepare_ra(struct radv_iface *ifa) pkt->type = ICMPV6_RA; pkt->code = 0; pkt->checksum = 0; - pkt->current_hop_limit = ifa->cf->current_hop_limit; - pkt->flags = (ifa->cf->managed ? OPT_RA_MANAGED : 0) | - (ifa->cf->other_config ? OPT_RA_OTHER_CFG : 0); - pkt->router_lifetime = htons(ifa->cf->default_lifetime); - pkt->reachable_time = htonl(ifa->cf->reachable_time); - pkt->retrans_timer = htonl(ifa->cf->retrans_timer); + pkt->current_hop_limit = ic->current_hop_limit; + pkt->flags = (ic->managed ? OPT_RA_MANAGED : 0) | + (ic->other_config ? OPT_RA_OTHER_CFG : 0); + pkt->router_lifetime = (ra->active || !ic->default_lifetime_sensitive) ? + htons(ic->default_lifetime) : 0; + pkt->reachable_time = htonl(ic->reachable_time); + pkt->retrans_timer = htonl(ic->retrans_timer); buf += sizeof(*pkt); - if (ifa->cf->link_mtu) + if (ic->link_mtu) { struct radv_opt_mtu *om = (void *) buf; om->type = OPT_MTU; om->length = 1; om->reserved = 0; - om->mtu = htonl(ifa->cf->link_mtu); + om->mtu = htonl(ic->link_mtu); buf += sizeof (*om); } @@ -288,26 +290,28 @@ radv_prepare_ra(struct radv_iface *ifa) op->pxlen = addr->pxlen; op->flags = (pc->onlink ? OPT_PX_ONLINK : 0) | (pc->autonomous ? OPT_PX_AUTONOMOUS : 0); - op->valid_lifetime = htonl(pc->valid_lifetime); - op->preferred_lifetime = htonl(pc->preferred_lifetime); + op->valid_lifetime = (ra->active || !pc->valid_lifetime_sensitive) ? + htonl(pc->valid_lifetime) : 0; + op->preferred_lifetime = (ra->active || !pc->preferred_lifetime_sensitive) ? + htonl(pc->preferred_lifetime) : 0; op->reserved = 0; op->prefix = addr->prefix; ipa_hton(op->prefix); buf += sizeof(*op); } - if (! ifa->cf->rdnss_local) + if (! ic->rdnss_local) if (radv_prepare_rdnss(ifa, &cf->rdnss_list, &buf, bufend) < 0) goto done; - if (radv_prepare_rdnss(ifa, &ifa->cf->rdnss_list, &buf, bufend) < 0) + if (radv_prepare_rdnss(ifa, &ic->rdnss_list, &buf, bufend) < 0) goto done; - if (! ifa->cf->dnssl_local) + if (! ic->dnssl_local) if (radv_prepare_dnssl(ifa, &cf->dnssl_list, &buf, bufend) < 0) goto done; - if (radv_prepare_dnssl(ifa, &ifa->cf->dnssl_list, &buf, bufend) < 0) + if (radv_prepare_dnssl(ifa, &ic->dnssl_list, &buf, bufend) < 0) goto done; done: @@ -391,7 +395,7 @@ static void radv_err_hook(sock *sk, int err) { struct radv_iface *ifa = sk->data; - log(L_ERR "%s: Socket error: %m", ifa->ra->p.name, err); + log(L_ERR "%s: Socket error on %s: %M", ifa->ra->p.name, ifa->iface->name, err); } int diff --git a/proto/radv/radv.c b/proto/radv/radv.c index 5e7296a3..90408536 100644 --- a/proto/radv/radv.c +++ b/proto/radv/radv.c @@ -15,7 +15,7 @@ * The RAdv protocol is implemented in two files: |radv.c| containing * the interface with BIRD core and the protocol logic and |packets.c| * handling low level protocol stuff (RX, TX and packet formats). - * The protocol does not import or export any routes. + * The protocol does not export any routes. * * The RAdv is structured in the usual way - for each handled interface * there is a structure &radv_iface that contains a state related to @@ -30,6 +30,13 @@ * by RA_EV_* codes), and radv_timer(), which triggers sending RAs and * computes the next timeout. * + * The RAdv protocol could receive routes (through + * radv_import_control() and radv_rt_notify()), but only the + * configured trigger route is tracked (in &active var). When a radv + * protocol is reconfigured, the connected routing table is examined + * (in radv_check_active()) to have proper &active value in case of + * the specified trigger prefix was changed. + * * Supported standards: * - RFC 4861 - main RA standard * - RFC 6106 - DNS extensions (RDDNS, DNSSL) @@ -93,6 +100,16 @@ radv_iface_notify(struct radv_iface *ifa, int event) tm_start(ifa->timer, after); } +static void +radv_iface_notify_all(struct proto_radv *ra, int event) +{ + struct radv_iface *ifa; + + WALK_LIST(ifa, ra->iface_list) + radv_iface_notify(ifa, event); +} + + static struct radv_iface * radv_iface_find(struct proto_radv *ra, struct iface *what) { @@ -238,11 +255,68 @@ radv_ifa_notify(struct proto *p, unsigned flags, struct ifa *a) radv_iface_notify(ifa, RA_EV_CHANGE); } +static inline int radv_net_match_trigger(struct radv_config *cf, net *n) +{ + return cf->trigger_valid && + (n->n.pxlen == cf->trigger_pxlen) && + ipa_equal(n->n.prefix, cf->trigger_prefix); +} + +int +radv_import_control(struct proto *p, rte **new, ea_list **attrs UNUSED, struct linpool *pool UNUSED) +{ + // struct proto_radv *ra = (struct proto_radv *) p; + struct radv_config *cf = (struct radv_config *) (p->cf); + + if (radv_net_match_trigger(cf, (*new)->net)) + return RIC_PROCESS; + + return RIC_DROP; +} + +static void +radv_rt_notify(struct proto *p, rtable *tbl UNUSED, net *n, rte *new, rte *old UNUSED, ea_list *attrs UNUSED) +{ + struct proto_radv *ra = (struct proto_radv *) p; + struct radv_config *cf = (struct radv_config *) (p->cf); + + if (radv_net_match_trigger(cf, n)) + { + u8 old_active = ra->active; + ra->active = !!new; + + if (ra->active == old_active) + return; + + if (ra->active) + RADV_TRACE(D_EVENTS, "Triggered"); + else + RADV_TRACE(D_EVENTS, "Suppressed"); + + radv_iface_notify_all(ra, RA_EV_CHANGE); + } +} + +static int +radv_check_active(struct proto_radv *ra) +{ + struct radv_config *cf = (struct radv_config *) (ra->p.cf); + + if (! cf->trigger_valid) + return 1; + + return rt_examine(ra->p.table, cf->trigger_prefix, cf->trigger_pxlen, + &(ra->p), ra->p.cf->out_filter); +} + static struct proto * radv_init(struct proto_config *c) { struct proto *p = proto_new(c, sizeof(struct proto_radv)); + p->accept_ra_types = RA_OPTIMAL; + p->import_control = radv_import_control; + p->rt_notify = radv_rt_notify; p->if_notify = radv_if_notify; p->ifa_notify = radv_ifa_notify; return p; @@ -252,9 +326,10 @@ static int radv_start(struct proto *p) { struct proto_radv *ra = (struct proto_radv *) p; - // struct radv_config *cf = (struct radv_config *) (p->cf); + struct radv_config *cf = (struct radv_config *) (p->cf); init_list(&(ra->iface_list)); + ra->active = !cf->trigger_valid; return PS_UP; } @@ -293,6 +368,9 @@ radv_reconfigure(struct proto *p, struct proto_config *c) * causing nodes to temporary remove their default routes. */ + p->cf = c; /* radv_check_active() requires proper p->cf */ + ra->active = radv_check_active(ra); + struct iface *iface; WALK_LIST(iface, iface_list) { @@ -335,6 +413,14 @@ radv_copy_config(struct proto_config *dest, struct proto_config *src) cfg_copy_list(&d->pref_list, &s->pref_list, sizeof(struct radv_prefix_config)); } +static void +radv_get_status(struct proto *p, byte *buf) +{ + struct proto_radv *ra = (struct proto_radv *) p; + + if (!ra->active) + strcpy(buf, "Suppressed"); +} struct protocol proto_radv = { .name = "RAdv", @@ -343,5 +429,6 @@ struct protocol proto_radv = { .start = radv_start, .shutdown = radv_shutdown, .reconfigure = radv_reconfigure, - .copy_config = radv_copy_config + .copy_config = radv_copy_config, + .get_status = radv_get_status }; diff --git a/proto/radv/radv.h b/proto/radv/radv.h index 48af8c00..f80e4530 100644 --- a/proto/radv/radv.h +++ b/proto/radv/radv.h @@ -52,6 +52,10 @@ struct radv_config list pref_list; /* Global list of prefix configs (struct radv_prefix_config) */ list rdnss_list; /* Global list of RDNSS configs (struct radv_rdnss_config) */ list dnssl_list; /* Global list of DNSSL configs (struct radv_dnssl_config) */ + + ip_addr trigger_prefix; /* Prefix of a trigger route, if defined */ + u8 trigger_pxlen; /* Pxlen of a trigger route, if defined */ + u8 trigger_valid; /* Whether a trigger route is defined */ }; struct radv_iface_config @@ -75,6 +79,7 @@ struct radv_iface_config u32 retrans_timer; u32 current_hop_limit; u32 default_lifetime; + u8 default_lifetime_sensitive; /* Whether default_lifetime depends on trigger */ }; struct radv_prefix_config @@ -88,6 +93,8 @@ struct radv_prefix_config u8 autonomous; u32 valid_lifetime; u32 preferred_lifetime; + u8 valid_lifetime_sensitive; /* Whether valid_lifetime depends on trigger */ + u8 preferred_lifetime_sensitive; /* Whether preferred_lifetime depends on trigger */ }; struct radv_rdnss_config @@ -113,6 +120,7 @@ struct proto_radv { struct proto p; list iface_list; /* List of active ifaces */ + u8 active; /* Whether radv is active w.r.t. triggers */ }; struct radv_iface diff --git a/proto/rip/config.Y b/proto/rip/config.Y index cd4f30e7..791c43a2 100644 --- a/proto/rip/config.Y +++ b/proto/rip/config.Y @@ -22,12 +22,18 @@ CF_DEFINES #define RIP_CFG ((struct rip_proto_config *) this_proto) #define RIP_IPATT ((struct rip_patt *) this_ipatt) +#ifdef IPV6 +#define RIP_DEFAULT_TTL_SECURITY 2 +#else +#define RIP_DEFAULT_TTL_SECURITY 0 +#endif + CF_DECLS CF_KEYWORDS(RIP, INFINITY, METRIC, PORT, PERIOD, GARBAGE, TIMEOUT, MODE, BROADCAST, MULTICAST, QUIET, NOLISTEN, VERSION1, - AUTHENTICATION, NONE, PLAINTEXT, MD5, - HONOR, NEVER, NEIGHBOR, ALWAYS, + AUTHENTICATION, NONE, PLAINTEXT, MD5, TTL, SECURITY, + HONOR, NEVER, NEIGHBOR, ALWAYS, TX, PRIORITY, ONLY, RIP_METRIC, RIP_TAG) %type <i> rip_mode rip_auth @@ -76,6 +82,10 @@ rip_mode: rip_iface_item: | METRIC expr { RIP_IPATT->metric = $2; } | MODE rip_mode { RIP_IPATT->mode |= $2; } + | TX tos { RIP_IPATT->tx_tos = $2; } + | TX PRIORITY expr { RIP_IPATT->tx_priority = $3; } + | TTL SECURITY bool { RIP_IPATT->ttl_security = $3; } + | TTL SECURITY TX ONLY { RIP_IPATT->ttl_security = 2; } ; rip_iface_opts: @@ -94,6 +104,9 @@ rip_iface_init: add_tail(&RIP_CFG->iface_list, NODE this_ipatt); init_list(&this_ipatt->ipn_list); RIP_IPATT->metric = 1; + RIP_IPATT->tx_tos = IP_PREC_INTERNET_CONTROL; + RIP_IPATT->tx_priority = sk_priority_control; + RIP_IPATT->ttl_security = RIP_DEFAULT_TTL_SECURITY; } ; diff --git a/proto/rip/rip.c b/proto/rip/rip.c index 9f4f0856..5cc40403 100644 --- a/proto/rip/rip.c +++ b/proto/rip/rip.c @@ -6,15 +6,14 @@ * * Can be freely distributed and used under the terms of the GNU GPL. * - FIXME: IpV6 support: packet size - FIXME: (nonurgent) IpV6 support: receive "route using" blocks - FIXME: (nonurgent) IpV6 support: generate "nexthop" blocks - next hops are only advisory, and they are pretty ugly in IpV6. + FIXME: IPv6 support: packet size + FIXME: (nonurgent) IPv6 support: receive "route using" blocks + FIXME: (nonurgent) IPv6 support: generate "nexthop" blocks + next hops are only advisory, and they are pretty ugly in IPv6. I suggest just forgetting about them. FIXME: (nonurgent): fold rip_connection into rip_interface? - FIXME: (nonurgent) allow bigger frequencies than 1 regular update in 6 seconds (?) FIXME: propagation of metric=infinity into main routing table may or may not be good idea. */ @@ -47,6 +46,7 @@ */ #undef LOCAL_DEBUG +#define LOCAL_DEBUG 1 #include "nest/bird.h" #include "nest/iface.h" @@ -59,11 +59,11 @@ #include "lib/string.h" #include "rip.h" -#include <assert.h> #define P ((struct rip_proto *) p) #define P_CF ((struct rip_proto_config *)p->cf) +#undef TRACE #define TRACE(level, msg, args...) do { if (p->debug & level) { log(L_TRACE "%s: " msg, p->name , ## args); } } while(0) static struct rip_interface *new_iface(struct proto *p, struct iface *new, unsigned long flags, struct iface_patt *patt); @@ -163,7 +163,7 @@ rip_tx( sock *s ) FIB_ITERATE_START(&P->rtable, &c->iter, z) { struct rip_entry *e = (struct rip_entry *) z; - if (!rif->triggered || (!(e->updated < now-5))) { + if (!rif->triggered || (!(e->updated < now-2))) { /* FIXME: Should be probably 1 or some different algorithm */ nullupdate = 0; i = rip_tx_prepare( p, packet->block + i, e, rif, i ); if (i >= maxi) { @@ -361,26 +361,26 @@ advertise_entry( struct proto *p, struct rip_block *b, ip_addr whotoldme, struct static void process_block( struct proto *p, struct rip_block *block, ip_addr whotoldme, struct iface *iface ) { + int metric, pxlen; + #ifndef IPV6 - int metric = ntohl( block->metric ); + metric = ntohl( block->metric ); + pxlen = ipa_mklen(block->netmask); #else - int metric = block->metric; + metric = block->metric; + pxlen = block->pxlen; #endif ip_addr network = block->network; CHK_MAGIC; -#ifdef IPV6 - TRACE(D_ROUTES, "block: %I tells me: %I/%d available, metric %d... ", - whotoldme, network, block->pxlen, metric ); -#else + TRACE(D_ROUTES, "block: %I tells me: %I/%d available, metric %d... ", - whotoldme, network, ipa_mklen(block->netmask), metric ); -#endif + whotoldme, network, pxlen, metric ); if ((!metric) || (metric > P_CF->infinity)) { -#ifdef IPV6 /* Someone is sedning us nexthop and we are ignoring it */ +#ifdef IPV6 /* Someone is sending us nexthop and we are ignoring it */ if (metric == 0xff) - { DBG( "IpV6 nexthop ignored" ); return; } + { DBG( "IPv6 nexthop ignored" ); return; } #endif log( L_WARN "%s: Got metric %d from %I", p->name, metric, whotoldme ); return; @@ -483,6 +483,14 @@ rip_rx(sock *s, int size) iface = i->iface; #endif + if (i->check_ttl && (s->ttl < 255)) + { + log( L_REMOTE "%s: Discarding packet with TTL %d (< 255) from %I on %s", + p->name, s->ttl, s->faddr, i->iface->name); + return 1; + } + + CHK_MAGIC; DBG( "RIP: message came: %d bytes from %I via %s\n", size, s->faddr, i->iface ? i->iface->name : "(dummy)" ); size -= sizeof( struct rip_packet_heading ); @@ -535,13 +543,10 @@ rip_timer(timer *t) WALK_LIST_DELSAFE( e, et, P->garbage ) { rte *rte; rte = SKIP_BACK( struct rte, u.rip.garbage, e ); -#ifdef LOCAL_DEBUG - { - struct proto *p = rte->attrs->proto; - CHK_MAGIC; - } + + CHK_MAGIC; + DBG( "Garbage: (%p)", rte ); rte_dump( rte ); -#endif if (now - rte->lastmod > P_CF->timeout_time) { TRACE(D_EVENTS, "entry is too old: %I", rte->net->n.prefix ); @@ -560,17 +565,23 @@ rip_timer(timer *t) DBG( "RIP: Broadcasting routing tables\n" ); { struct rip_interface *rif; + + if ( P_CF->period > 2 ) { /* Bring some randomness into sending times */ + if (! (P->tx_count % P_CF->period)) P->rnd_count = random_u32() % 2; + } else P->rnd_count = P->tx_count % P_CF->period; + WALK_LIST( rif, P->interfaces ) { struct iface *iface = rif->iface; if (!iface) continue; if (rif->mode & IM_QUIET) continue; if (!(iface->flags & IF_UP)) continue; + rif->triggered = P->rnd_count; - rif->triggered = (P->tx_count % 6); rip_sendto( p, IPA_NONE, 0, rif ); } - P->tx_count ++; + P->tx_count++; + P->rnd_count--; } DBG( "RIP: tick tock done\n" ); @@ -585,9 +596,9 @@ rip_start(struct proto *p) struct rip_interface *rif; DBG( "RIP: starting instance...\n" ); - assert( sizeof(struct rip_packet_heading) == 4); - assert( sizeof(struct rip_block) == 20); - assert( sizeof(struct rip_block_auth) == 20); + ASSERT(sizeof(struct rip_packet_heading) == 4); + ASSERT(sizeof(struct rip_block) == 20); + ASSERT(sizeof(struct rip_block_auth) == 20); #ifdef LOCAL_DEBUG P->magic = RIP_MAGIC; @@ -598,10 +609,9 @@ rip_start(struct proto *p) init_list( &P->interfaces ); P->timer = tm_new( p->pool ); P->timer->data = p; - P->timer->randomize = 5; - P->timer->recurrent = (P_CF->period / 6)+1; + P->timer->recurrent = 1; P->timer->hook = rip_timer; - tm_start( P->timer, 5 ); + tm_start( P->timer, 2 ); rif = new_iface(p, NULL, 0, NULL); /* Initialize dummy interface */ add_head( &P->interfaces, NODE rif ); CHK_MAGIC; @@ -677,6 +687,7 @@ new_iface(struct proto *p, struct iface *new, unsigned long flags, struct iface_ rif->mode = PATT->mode; rif->metric = PATT->metric; rif->multicast = (!(PATT->mode & IM_BROADCAST)) && (flags & IF_MULTICAST); + rif->check_ttl = (PATT->ttl_security == 1); } /* lookup multicasts over unnumbered links - no: rip is not defined over unnumbered links */ @@ -697,9 +708,10 @@ new_iface(struct proto *p, struct iface *new, unsigned long flags, struct iface_ rif->sock->dport = P_CF->port; if (new) { - rif->sock->ttl = 1; - rif->sock->tos = IP_PREC_INTERNET_CONTROL; - rif->sock->flags = SKF_LADDR_RX; + rif->sock->tos = PATT->tx_tos; + rif->sock->priority = PATT->tx_priority; + rif->sock->ttl = PATT->ttl_security ? 255 : 1; + rif->sock->flags = SKF_LADDR_RX | (rif->check_ttl ? SKF_TTL_RX : 0); } if (new) { @@ -948,9 +960,11 @@ rip_rte_insert(net *net UNUSED, rte *rte) static void rip_rte_remove(net *net UNUSED, rte *rte) { - // struct proto *p = rte->attrs->proto; +#ifdef LOCAL_DEBUG + struct proto *p = rte->attrs->src->proto; CHK_MAGIC; DBG( "rip_rte_remove: %p\n", rte ); +#endif rem_node( &rte->u.rip.garbage ); } @@ -1000,7 +1014,9 @@ static int rip_pat_compare(struct rip_patt *a, struct rip_patt *b) { return ((a->metric == b->metric) && - (a->mode == b->mode)); + (a->mode == b->mode) && + (a->tx_tos == b->tx_tos) && + (a->tx_priority == b->tx_priority)); } static int diff --git a/proto/rip/rip.h b/proto/rip/rip.h index 6a8af379..2a327260 100644 --- a/proto/rip/rip.h +++ b/proto/rip/rip.h @@ -114,6 +114,7 @@ struct rip_interface { struct rip_connection *busy; int metric; /* You don't want to put struct rip_patt *patt here -- think about reconfigure */ int mode; + int check_ttl; /* Check incoming packets for TTL 255 */ int triggered; struct object_lock *lock; int multicast; @@ -128,6 +129,9 @@ struct rip_patt { #define IM_QUIET 4 #define IM_NOLISTEN 8 #define IM_VERSION1 16 + int tx_tos; + int tx_priority; + int ttl_security; /* bool + 2 for TX only (send, but do not check on RX) */ }; struct rip_proto_config { @@ -162,6 +166,7 @@ struct rip_proto { int magic; #endif int tx_count; /* Do one regular update once in a while */ + int rnd_count; /* Randomize sending time */ }; #ifdef LOCAL_DEBUG diff --git a/proto/static/config.Y b/proto/static/config.Y index f8e84f92..2d9d4b42 100644 --- a/proto/static/config.Y +++ b/proto/static/config.Y @@ -18,7 +18,7 @@ static struct static_route *this_srt, *this_srt_nh, *last_srt_nh; CF_DECLS CF_KEYWORDS(STATIC, ROUTE, VIA, DROP, REJECT, PROHIBIT, PREFERENCE, CHECK, LINK) -CF_KEYWORDS(MULTIPATH, WEIGHT, RECURSIVE, IGP, TABLE) +CF_KEYWORDS(MULTIPATH, WEIGHT, RECURSIVE, IGP, TABLE, BLACKHOLE, UNREACHABLE) CF_GRAMMAR @@ -86,9 +86,12 @@ stat_route: this_srt->dest = RTDX_RECURSIVE; this_srt->via = $3; } - | stat_route0 DROP { this_srt->dest = RTD_BLACKHOLE; } - | stat_route0 REJECT { this_srt->dest = RTD_UNREACHABLE; } - | stat_route0 PROHIBIT { this_srt->dest = RTD_PROHIBIT; } + + | stat_route0 DROP { this_srt->dest = RTD_BLACKHOLE; } + | stat_route0 REJECT { this_srt->dest = RTD_UNREACHABLE; } + | stat_route0 BLACKHOLE { this_srt->dest = RTD_BLACKHOLE; } + | stat_route0 UNREACHABLE { this_srt->dest = RTD_UNREACHABLE; } + | stat_route0 PROHIBIT { this_srt->dest = RTD_PROHIBIT; } ; CF_CLI(SHOW STATIC, optsym, [<name>], [[Show details of static protocol]]) diff --git a/proto/static/static.c b/proto/static/static.c index b6c8948f..9b115acd 100644 --- a/proto/static/static.c +++ b/proto/static/static.c @@ -461,7 +461,7 @@ static_reconfigure(struct proto *p, struct proto_config *new) WALK_LIST(r, n->iface_routes) { struct iface *ifa; - if (ifa = if_find_by_name(r->if_name)) + if ((ifa = if_find_by_name(r->if_name)) && (ifa->flags & IF_UP)) static_install(p, r, ifa); } WALK_LIST(r, n->other_routes) |