diff options
Diffstat (limited to 'proto')
52 files changed, 3123 insertions, 1963 deletions
diff --git a/proto/babel/babel.c b/proto/babel/babel.c index 6d2a593e..d398da8e 100644 --- a/proto/babel/babel.c +++ b/proto/babel/babel.c @@ -14,9 +14,8 @@ /** * DOC: The Babel protocol * - * Babel (RFC6126) is a loop-avoiding distance-vector routing protocol that is - * robust and efficient both in ordinary wired networks and in wireless mesh - * networks. + * The Babel is a loop-avoiding distance-vector routing protocol that is robust + * and efficient both in ordinary wired networks and in wireless mesh networks. * * The Babel protocol keeps state for each neighbour in a &babel_neighbor * struct, tracking received Hello and I Heard You (IHU) messages. A @@ -33,10 +32,17 @@ * an entry is updated by receiving updates from the network or when modified by * internal timers. The function selects from feasible and reachable routes the * one with the lowest metric to be announced to the core. + * + * Supported standards: + * RFC 8966 - The Babel Routing Protocol + * RFC 8967 - MAC Authentication for Babel + * RFC 9079 - Source Specific Routing for Babel + * RFC 9229 - IPv4 Routes with IPv6 Next Hop for Babel */ #include <stdlib.h> #include "babel.h" +#include "lib/macro.h" #define LOG_PKT_AUTH(msg, args...) \ log_rl(&p->log_pkt_tbf, L_AUTH "%s: " msg, p->p.name, args) @@ -49,21 +55,27 @@ static inline int ge_mod64k(uint a, uint b) { return (u16)(a - b) < 0x8000; } +/* Strict inequality version of the above */ +static inline int gt_mod64k(uint a, uint b) +{ return ge_mod64k(a, b) && a != b; } + static void babel_expire_requests(struct babel_proto *p, struct babel_entry *e); static void babel_select_route(struct babel_proto *p, struct babel_entry *e, struct babel_route *mod); static inline void babel_announce_retraction(struct babel_proto *p, struct babel_entry *e); static void babel_send_route_request(struct babel_proto *p, struct babel_entry *e, struct babel_neighbor *n); -static void babel_send_seqno_request(struct babel_proto *p, struct babel_entry *e, struct babel_seqno_request *sr); +static void babel_send_seqno_request(struct babel_proto *p, struct babel_entry *e, struct babel_seqno_request *sr, struct babel_neighbor *n); static void babel_update_cost(struct babel_neighbor *n); static inline void babel_kick_timer(struct babel_proto *p); static inline void babel_iface_kick_timer(struct babel_iface *ifa); +static struct ea_class ea_babel_metric, ea_babel_router_id, ea_babel_seqno; + /* * Functions to maintain data structures */ static void -babel_init_entry(void *E) +babel_init_entry(struct fib *f UNUSED, void *E) { struct babel_entry *e = E; @@ -101,7 +113,8 @@ babel_find_source(struct babel_entry *e, u64 router_id) } static struct babel_source * -babel_get_source(struct babel_proto *p, struct babel_entry *e, u64 router_id) +babel_get_source(struct babel_proto *p, struct babel_entry *e, u64 router_id, + u16 initial_seqno) { struct babel_source *s = babel_find_source(e, router_id); @@ -111,7 +124,7 @@ babel_get_source(struct babel_proto *p, struct babel_entry *e, u64 router_id) s = sl_allocz(p->source_slab); s->router_id = router_id; s->expires = current_time() + BABEL_GARBAGE_INTERVAL; - s->seqno = 0; + s->seqno = initial_seqno; s->metric = BABEL_INFINITY; add_tail(&e->sources, NODE s); @@ -119,7 +132,7 @@ babel_get_source(struct babel_proto *p, struct babel_entry *e, u64 router_id) } static void -babel_expire_sources(struct babel_proto *p, struct babel_entry *e) +babel_expire_sources(struct babel_proto *p UNUSED, struct babel_entry *e) { struct babel_source *n, *nx; btime now_ = current_time(); @@ -129,7 +142,7 @@ babel_expire_sources(struct babel_proto *p, struct babel_entry *e) if (n->expires && n->expires <= now_) { rem_node(NODE n); - sl_free(p->source_slab, n); + sl_free(n); } } } @@ -174,7 +187,7 @@ babel_retract_route(struct babel_proto *p, struct babel_route *r) } static void -babel_flush_route(struct babel_proto *p, struct babel_route *r) +babel_flush_route(struct babel_proto *p UNUSED, struct babel_route *r) { DBG("Babel: Flush route %N router_id %lR neigh %I\n", r->e->n.addr, r->router_id, r->neigh->addr); @@ -185,7 +198,7 @@ babel_flush_route(struct babel_proto *p, struct babel_route *r) if (r->e->selected == r) r->e->selected = NULL; - sl_free(p->route_slab, r); + sl_free(r); } static void @@ -288,61 +301,92 @@ babel_expire_routes(struct babel_proto *p) babel_expire_routes_(p, &p->ip6_rtable); } -static inline int seqno_request_valid(struct babel_seqno_request *sr) -{ return !sr->nbr || sr->nbr->ifa; } - /* - * Add seqno request to the table of pending requests (RFC 6216 3.2.6) and send + * Add seqno request to the table of pending requests (RFC 8966 3.2.6) and send * it to network. Do nothing if it is already in the table. */ static void babel_add_seqno_request(struct babel_proto *p, struct babel_entry *e, u64 router_id, u16 seqno, u8 hop_count, - struct babel_neighbor *nbr) + struct babel_neighbor *target) { struct babel_seqno_request *sr; + btime now_ = current_time(); WALK_LIST(sr, e->requests) if (sr->router_id == router_id) { - /* Found matching or newer */ - if (ge_mod64k(sr->seqno, seqno) && seqno_request_valid(sr)) + /* + * To suppress duplicates, check if we already have a newer (higher seqno) + * outstanding request. If we do, suppress this request if the outstanding + * request is one we originated ourselves. If the outstanding request is + * forwarded, suppress only if this request is also one we're forwarding + * *and* we're within the duplicate suppression time of that request (see + * below). + */ + if (ge_mod64k(sr->seqno, seqno) && + (!sr->forwarded || (target && now_ < sr->dup_suppress_time))) return; - /* Found older */ rem_node(NODE sr); - rem_node(&sr->nbr_node); + + /* Allow upgrading from forwarded to non-forwarded */ + if (!target) + sr->forwarded = 0; goto found; } /* No entries found */ sr = sl_allocz(p->seqno_slab); + sr->forwarded = !!target; found: sr->router_id = router_id; sr->seqno = seqno; - sr->hop_count = hop_count; + sr->hop_count = hop_count ?: BABEL_INITIAL_HOP_COUNT; sr->count = 0; - sr->expires = current_time() + BABEL_SEQNO_REQUEST_EXPIRY; - if (sr->nbr = nbr) - add_tail(&nbr->requests, &sr->nbr_node); + if (sr->forwarded) + { + /* + * We want to keep the entry around for a reasonable period of time so it + * can be used to trigger an update (through babel_satisfy_seqno_request()). + * However, duplicate suppression should only trigger for a short period of + * time so it suppresses duplicates from multiple sources, but not + * retransmissions from the same source. Hence we keep two timers. + */ + sr->expires = now_ + BABEL_SEQNO_FORWARD_EXPIRY; + sr->dup_suppress_time = now_ + BABEL_SEQNO_DUP_SUPPRESS_TIME; + } + else + { + sr->expires = now_ + BABEL_SEQNO_REQUEST_EXPIRY; + } add_tail(&e->requests, NODE sr); - - babel_send_seqno_request(p, e, sr); + babel_send_seqno_request(p, e, sr, target); } static void -babel_remove_seqno_request(struct babel_proto *p, struct babel_seqno_request *sr) +babel_generate_seqno_request(struct babel_proto *p, struct babel_entry *e, + u64 router_id, u16 seqno, struct babel_neighbor *target) { - if (sr->nbr) - rem_node(&sr->nbr_node); + struct babel_seqno_request req = { + .router_id = router_id, + .seqno = seqno, + .hop_count = BABEL_INITIAL_HOP_COUNT, + }; + babel_send_seqno_request(p, e, &req, target); +} + +static void +babel_remove_seqno_request(struct babel_proto *p UNUSED, struct babel_seqno_request *sr) +{ rem_node(NODE sr); - sl_free(p->seqno_slab, sr); + sl_free(sr); } static int @@ -370,21 +414,14 @@ babel_expire_requests(struct babel_proto *p, struct babel_entry *e) WALK_LIST_DELSAFE(sr, srx, e->requests) { - /* Remove seqno requests sent to dead neighbors */ - if (!seqno_request_valid(sr)) - { - babel_remove_seqno_request(p, sr); - continue; - } - /* Handle expired requests - resend or remove */ if (sr->expires && sr->expires <= now_) { - if (sr->count < BABEL_SEQNO_REQUEST_RETRY) + if (!sr->forwarded && sr->count < BABEL_SEQNO_REQUEST_RETRY) { sr->count++; sr->expires += (BABEL_SEQNO_REQUEST_EXPIRY << sr->count); - babel_send_seqno_request(p, e, sr); + babel_send_seqno_request(p, e, sr, NULL); } else { @@ -429,7 +466,6 @@ babel_get_neighbor(struct babel_iface *ifa, ip_addr addr) nbr->cost = BABEL_INFINITY; nbr->init_expiry = current_time() + BABEL_INITIAL_NEIGHBOR_TIMEOUT; init_list(&nbr->routes); - init_list(&nbr->requests); add_tail(&ifa->neigh_list, NODE nbr); return nbr; @@ -450,13 +486,6 @@ babel_flush_neighbor(struct babel_proto *p, struct babel_neighbor *nbr) babel_flush_route(p, r); } - struct babel_seqno_request *sr; - WALK_LIST_FIRST2(sr, nbr_node, nbr->requests) - { - sr->nbr = NULL; - rem_node(&sr->nbr_node); - } - nbr->ifa = NULL; rem_node(NODE nbr); mb_free(nbr); @@ -543,7 +572,7 @@ babel_is_feasible(struct babel_source *s, u16 seqno, u16 metric) { return !s || (metric == BABEL_INFINITY) || - (seqno > s->seqno) || + gt_mod64k(seqno, s->seqno) || ((seqno == s->seqno) && (metric < s->metric)); } @@ -640,37 +669,14 @@ babel_announce_rte(struct babel_proto *p, struct babel_entry *e) if (r) { - rta a0 = { - .source = RTS_BABEL, - .scope = SCOPE_UNIVERSE, - .dest = RTD_UNICAST, - .pref = c->preference, - .from = r->neigh->addr, - .nh.gw = r->next_hop, - .nh.iface = r->neigh->ifa->iface, - .eattrs = alloca(sizeof(ea_list) + 3*sizeof(eattr)), - }; - - *a0.eattrs = (ea_list) { .count = 3 }; - a0.eattrs->attrs[0] = (eattr) { - .id = EA_BABEL_METRIC, - .type = EAF_TYPE_INT, - .u.data = r->metric, - }; - - struct adata *ad = alloca(sizeof(struct adata) + sizeof(u64)); - ad->length = sizeof(u64); - memcpy(ad->data, &(r->router_id), sizeof(u64)); - a0.eattrs->attrs[1] = (eattr) { - .id = EA_BABEL_ROUTER_ID, - .type = EAF_TYPE_OPAQUE, - .u.ptr = ad, - }; - - a0.eattrs->attrs[2] = (eattr) { - .id = EA_BABEL_SEQNO, - .type = EAF_TYPE_INT, - .u.data = r->seqno, + struct nexthop_adata nhad = { + .nh = { + .gw = r->next_hop, + .iface = r->neigh->ifa->iface, + }, + .ad = { + .length = sizeof nhad - sizeof nhad.ad, + }, }; /* @@ -679,10 +685,26 @@ babel_announce_rte(struct babel_proto *p, struct babel_entry *e) * have routing work. */ if (!neigh_find(&p->p, r->next_hop, r->neigh->ifa->iface, 0)) - a0.nh.flags = RNF_ONLINK; + nhad.nh.flags = RNF_ONLINK; + + struct { + ea_list l; + eattr a[7]; + } eattrs = { + .l.count = ARRAY_SIZE(eattrs.a), + .a = { + EA_LITERAL_EMBEDDED(&ea_gen_preference, 0, c->preference), + EA_LITERAL_STORE_ADATA(&ea_gen_from, 0, &r->neigh->addr, sizeof(r->neigh->addr)), + EA_LITERAL_EMBEDDED(&ea_gen_source, 0, RTS_BABEL), + EA_LITERAL_STORE_ADATA(&ea_gen_nexthop, 0, nhad.ad.data, nhad.ad.length), + EA_LITERAL_EMBEDDED(&ea_babel_metric, 0, r->metric), + EA_LITERAL_STORE_ADATA(&ea_babel_router_id, 0, &r->router_id, sizeof(r->router_id)), + EA_LITERAL_EMBEDDED(&ea_babel_seqno, 0, r->seqno), + } + }; rte e0 = { - .attrs = &a0, + .attrs = &eattrs.l, .src = p->p.main_source, }; @@ -692,15 +714,14 @@ babel_announce_rte(struct babel_proto *p, struct babel_entry *e) else if (e->valid && (e->router_id != p->router_id)) { /* Unreachable */ - rta a0 = { - .source = RTS_BABEL, - .scope = SCOPE_UNIVERSE, - .dest = RTD_UNREACHABLE, - .pref = 1, - }; + ea_list *ea = NULL; + + ea_set_attr_u32(&ea, &ea_gen_preference, 0, 1); + ea_set_attr_u32(&ea, &ea_gen_source, 0, RTS_BABEL); + ea_set_dest(&ea, 0, RTD_UNREACHABLE); rte e0 = { - .attrs = &a0, + .attrs = ea, .src = p->p.main_source, }; @@ -862,14 +883,14 @@ babel_send_ihus(struct babel_iface *ifa) } static void -babel_send_hello(struct babel_iface *ifa) +babel_send_hello(struct babel_iface *ifa, uint interval) { struct babel_proto *p = ifa->proto; union babel_msg msg = {}; msg.type = BABEL_TLV_HELLO; msg.hello.seqno = ifa->hello_seqno++; - msg.hello.interval = ifa->cf->hello_interval; + msg.hello.interval = interval ?: ifa->cf->hello_interval; TRACE(D_PACKETS, "Sending hello on %s with seqno %d interval %t", ifa->ifname, msg.hello.seqno, (btime) msg.hello.interval); @@ -907,22 +928,23 @@ babel_send_wildcard_request(struct babel_iface *ifa) } static void -babel_send_seqno_request(struct babel_proto *p, struct babel_entry *e, struct babel_seqno_request *sr) +babel_send_seqno_request(struct babel_proto *p, struct babel_entry *e, + struct babel_seqno_request *sr, struct babel_neighbor *n) { union babel_msg msg = {}; msg.type = BABEL_TLV_SEQNO_REQUEST; - msg.seqno_request.hop_count = sr->hop_count ?: BABEL_INITIAL_HOP_COUNT; + msg.seqno_request.hop_count = sr->hop_count; msg.seqno_request.seqno = sr->seqno; msg.seqno_request.router_id = sr->router_id; net_copy(&msg.seqno_request.net, e->n.addr); - if (sr->nbr) + if (n) { TRACE(D_PACKETS, "Sending seqno request for %N router-id %lR seqno %d to %I on %s", - e->n.addr, sr->router_id, sr->seqno, sr->nbr->addr, sr->nbr->ifa->ifname); + e->n.addr, sr->router_id, sr->seqno, n->addr, n->ifa->ifname); - babel_send_unicast(&msg, sr->nbr->ifa, sr->nbr->addr); + babel_send_unicast(&msg, n->ifa, n->addr); } else { @@ -985,8 +1007,18 @@ babel_send_update_(struct babel_iface *ifa, btime changed, struct fib *rtable) msg.update.router_id = e->router_id; net_copy(&msg.update.net, e->n.addr); - msg.update.next_hop = ((e->n.addr->type == NET_IP4) ? - ifa->next_hop_ip4 : ifa->next_hop_ip6); + if (e->n.addr->type == NET_IP4) + { + /* Always prefer IPv4 nexthop if set */ + if (ipa_nonzero(ifa->next_hop_ip4)) + msg.update.next_hop = ifa->next_hop_ip4; + + /* Only send IPv6 nexthop if enabled */ + else if (ifa->cf->ext_next_hop) + msg.update.next_hop = ifa->next_hop_ip6; + } + else + msg.update.next_hop = ifa->next_hop_ip6; /* Do not send route if next hop is unknown, e.g. no configured IPv4 address */ if (ipa_zero(msg.update.next_hop)) @@ -994,13 +1026,13 @@ babel_send_update_(struct babel_iface *ifa, btime changed, struct fib *rtable) babel_enqueue(&msg, ifa); - /* Update feasibility distance for redistributed routes */ + /* RFC 8966 3.7.3 - update feasibility distance for redistributed routes */ if (e->router_id != p->router_id) { - struct babel_source *s = babel_get_source(p, e, e->router_id); + struct babel_source *s = babel_get_source(p, e, e->router_id, msg.update.seqno); s->expires = current_time() + BABEL_GARBAGE_INTERVAL; - if ((msg.update.seqno > s->seqno) || + if (gt_mod64k(msg.update.seqno, s->seqno) || ((msg.update.seqno == s->seqno) && (msg.update.metric < s->metric))) { s->seqno = msg.update.seqno; @@ -1245,6 +1277,13 @@ babel_handle_update(union babel_msg *m, struct babel_iface *ifa) return; } + /* Reject IPv4 via IPv6 routes if disabled */ + if ((msg->net.type == NET_IP4) && ipa_is_ip6(msg->next_hop) && !ifa->cf->ext_next_hop) + { + DBG("Babel: Ignoring disabled IPv4 via IPv6 route.\n"); + return; + } + /* Retraction */ if (msg->metric == BABEL_INFINITY) { @@ -1289,10 +1328,14 @@ babel_handle_update(union babel_msg *m, struct babel_iface *ifa) metric = babel_compute_metric(nbr, msg->metric); best = e->selected; - /* RFC section 3.8.2.2 - Dealing with unfeasible updates */ - if (!feasible && (metric != BABEL_INFINITY) && + /* + * RFC 8966 3.8.2.2 - dealing with unfeasible updates. Generate a one-off + * (not retransmitted) unicast seqno request to the originator of this update. + * Note: !feasible -> s exists, check for 's' is just for clarity / safety. + */ + if (!feasible && s && (metric != BABEL_INFINITY) && (!best || (r == best) || (metric < best->metric))) - babel_add_seqno_request(p, e, s->router_id, s->seqno + 1, 0, nbr); + babel_generate_seqno_request(p, e, s->router_id, s->seqno + 1, nbr); /* Special case - ignore unfeasible update to best route */ if (r == best && !feasible && (msg->router_id == r->router_id)) @@ -1331,7 +1374,7 @@ babel_handle_route_request(union babel_msg *m, struct babel_iface *ifa) struct babel_proto *p = ifa->proto; struct babel_msg_route_request *msg = &m->route_request; - /* RFC 6126 3.8.1.1 */ + /* RFC 8966 3.8.1.1 */ /* Wildcard request - full update on the interface */ if (msg->full) @@ -1357,13 +1400,39 @@ babel_handle_route_request(union babel_msg *m, struct babel_iface *ifa) } } +static struct babel_neighbor * +babel_find_seqno_request_target(struct babel_entry *e, struct babel_neighbor *skip) +{ + struct babel_route *r, *best_feasible = NULL, *best_any = NULL; + + WALK_LIST(r, e->routes) + { + if (r->neigh == skip) + continue; + + if (r->feasible && (!best_feasible || r->metric < best_feasible->metric)) + best_feasible = r; + + if (!best_any || r->metric < best_any->metric) + best_any = r; + } + + if (best_feasible) + return best_feasible->neigh; + + if (best_any) + return best_any->neigh; + + return NULL; +} + void babel_handle_seqno_request(union babel_msg *m, struct babel_iface *ifa) { struct babel_proto *p = ifa->proto; struct babel_msg_seqno_request *msg = &m->seqno_request; - /* RFC 6126 3.8.1.2 */ + /* RFC 8966 3.8.1.2 */ TRACE(D_PACKETS, "Handling seqno request for %N router-id %lR seqno %d hop count %d", &msg->net, msg->router_id, msg->seqno, msg->hop_count); @@ -1393,26 +1462,21 @@ babel_handle_seqno_request(union babel_msg *m, struct babel_iface *ifa) { /* Not ours; forward if TTL allows it */ - /* Find best admissible route */ - struct babel_route *r, *best1 = NULL, *best2 = NULL; - WALK_LIST(r, e->routes) - if ((r->router_id == msg->router_id) && !ipa_equal(r->neigh->addr, msg->sender)) - { - /* Find best feasible route */ - if ((!best1 || r->metric < best1->metric) && r->feasible) - best1 = r; + struct babel_neighbor *nbr, *target; - /* Find best not necessary feasible route */ - if (!best2 || r->metric < best2->metric) - best2 = r; - } + nbr = babel_find_neighbor(ifa, msg->sender); + if (!nbr) + return; - /* If no route is found, do nothing */ - r = best1 ?: best2; - if (!r) + target = babel_find_seqno_request_target(e, nbr); + if (!target) + { + TRACE(D_PACKETS, "No neighbor to forward seqno request for %N router-id %lR seqno %d to", + e->n.addr, msg->router_id, msg->seqno); return; + } - babel_add_seqno_request(p, e, msg->router_id, msg->seqno, msg->hop_count-1, r->neigh); + babel_add_seqno_request(p, e, msg->router_id, msg->seqno, msg->hop_count-1, target); } } @@ -1506,7 +1570,8 @@ babel_auth_check_pc(struct babel_iface *ifa, struct babel_msg_auth *msg) n->auth_index_len = msg->index_len; memcpy(n->auth_index, msg->index, msg->index_len); - n->auth_pc = msg->pc; + n->auth_pc_unicast = msg->pc; + n->auth_pc_multicast = msg->pc; n->auth_passed = 1; return 1; @@ -1525,16 +1590,30 @@ babel_auth_check_pc(struct babel_iface *ifa, struct babel_msg_auth *msg) return 0; } - /* (6) Index matches; only accept if PC is greater than last */ - if (n->auth_pc >= msg->pc) + /* + * (6) Index matches; only accept if PC is greater than last. We keep separate + * counters for unicast and multicast because multicast packets can be delayed + * significantly on wireless networks (enough to be received out of order). + * Separate counters are safe because the packet destination address is part + * of the MAC pseudo-header (so unicast packets can't be replayed as multicast + * and vice versa). + */ + u32 auth_pc = msg->unicast ? n->auth_pc_unicast : n->auth_pc_multicast; + if (auth_pc >= msg->pc) { LOG_PKT_AUTH("Authentication failed for %I on %s - " - "lower packet counter (rcv %u, old %u)", - msg->sender, ifa->ifname, msg->pc, n->auth_pc); + "lower %s packet counter (rcv %u, old %u)", + msg->sender, ifa->ifname, + msg->unicast ? "unicast" : "multicast", + msg->pc, auth_pc); return 0; } - n->auth_pc = msg->pc; + if (msg->unicast) + n->auth_pc_unicast = msg->pc; + else + n->auth_pc_multicast = msg->pc; + n->auth_passed = 1; return 1; @@ -1577,7 +1656,7 @@ babel_iface_timer(timer *t) if (now_ >= ifa->next_hello) { - babel_send_hello(ifa); + babel_send_hello(ifa, 0); ifa->next_hello += hello_period * (1 + (now_ - ifa->next_hello) / hello_period); } @@ -1624,7 +1703,7 @@ babel_iface_start(struct babel_iface *ifa) tm_start(ifa->timer, 100 MS); ifa->up = 1; - babel_send_hello(ifa); + babel_send_hello(ifa, 0); babel_send_wildcard_retraction(ifa); babel_send_wildcard_request(ifa); babel_send_update(ifa, 0); /* Full update */ @@ -1686,7 +1765,7 @@ babel_iface_update_addr4(struct babel_iface *ifa) ip_addr addr4 = ifa->iface->addr4 ? ifa->iface->addr4->ip : IPA_NONE; ifa->next_hop_ip4 = ipa_nonzero(ifa->cf->next_hop_ip4) ? ifa->cf->next_hop_ip4 : addr4; - if (ipa_zero(ifa->next_hop_ip4) && p->ip4_channel) + if (ipa_zero(ifa->next_hop_ip4) && p->ip4_channel && !ifa->cf->ext_next_hop) log(L_WARN "%s: Missing IPv4 next hop address for %s", p->p.name, ifa->ifname); if (ifa->up) @@ -1725,9 +1804,9 @@ babel_find_iface(struct babel_proto *p, struct iface *what) } static void -babel_iface_locked(struct object_lock *lock) +babel_iface_locked(void *_ifa) { - struct babel_iface *ifa = lock->data; + struct babel_iface *ifa = _ifa; struct babel_proto *p = ifa->proto; if (!babel_open_socket(ifa)) @@ -1747,7 +1826,7 @@ babel_add_iface(struct babel_proto *p, struct iface *new, struct babel_iface_con TRACE(D_EVENTS, "Adding interface %s", new->name); - pool *pool = rp_new(p->p.pool, p->p.loop, new->name); + pool *pool = rp_new(p->p.pool, new->name); ifa = mb_allocz(pool, sizeof(struct babel_iface)); ifa->proto = p; @@ -1763,8 +1842,8 @@ babel_add_iface(struct babel_proto *p, struct iface *new, struct babel_iface_con ifa->next_hop_ip4 = ipa_nonzero(ic->next_hop_ip4) ? ic->next_hop_ip4 : addr4; ifa->next_hop_ip6 = ipa_nonzero(ic->next_hop_ip6) ? ic->next_hop_ip6 : ifa->addr; - if (ipa_zero(ifa->next_hop_ip4) && p->ip4_channel) - log(L_WARN "%s: Missing IPv4 next hop address for %s", p->p.name, new->name); + if (ipa_zero(ifa->next_hop_ip4) && p->ip4_channel && !ic->ext_next_hop) + log(L_WARN "%s: Missing IPv4 next hop address for %s", p->p.name, ifa->ifname); init_list(&ifa->neigh_list); ifa->hello_seqno = 1; @@ -1782,8 +1861,11 @@ babel_add_iface(struct babel_proto *p, struct iface *new, struct babel_iface_con lock->addr = IP6_BABEL_ROUTERS; lock->port = ifa->cf->port; lock->iface = ifa->iface; - lock->hook = babel_iface_locked; - lock->data = ifa; + lock->event = (event) { + .hook = babel_iface_locked, + .data = ifa, + }; + lock->target = &global_event_list; olock_acquire(lock); } @@ -1799,7 +1881,7 @@ babel_remove_iface(struct babel_proto *p, struct babel_iface *ifa) rem_node(NODE ifa); - rp_free(ifa->pool, p->p.pool); /* contains ifa itself, locks, socket, etc */ + rfree(ifa->pool); /* contains ifa itself, locks, socket, etc */ } static int @@ -1884,7 +1966,7 @@ babel_reconfigure_iface(struct babel_proto *p, struct babel_iface *ifa, struct b if ((new->auth_type != BABEL_AUTH_NONE) && (new->auth_type != old->auth_type)) babel_auth_reset_index(ifa); - if (ipa_zero(ifa->next_hop_ip4) && p->ip4_channel) + if (ipa_zero(ifa->next_hop_ip4) && p->ip4_channel && !new->ext_next_hop) log(L_WARN "%s: Missing IPv4 next hop address for %s", p->p.name, ifa->ifname); if (ifa->next_hello > (current_time() + new->hello_interval)) @@ -1905,11 +1987,11 @@ babel_reconfigure_iface(struct babel_proto *p, struct babel_iface *ifa, struct b static void babel_reconfigure_ifaces(struct babel_proto *p, struct babel_config *cf) { - struct iface *iface; - - IFACE_LEGACY_ACCESS; - WALK_LIST(iface, global_iface_list) + IFACE_WALK(iface) { + if (p->p.vrf && p->p.vrf != iface->master) + continue; + if (!(iface->flags & IF_UP)) continue; @@ -1920,7 +2002,7 @@ babel_reconfigure_ifaces(struct babel_proto *p, struct babel_config *cf) struct babel_iface *ifa = babel_find_iface(p, iface); struct babel_iface_config *ic = (void *) iface_patt_find(&cf->iface_list, iface, NULL); - if (ic && iface_is_valid(p, iface)) + if (ic && !iface_is_valid(p, iface)) ic = NULL; if (ifa && ic) @@ -2029,41 +2111,45 @@ babel_dump(struct proto *P) } static void -babel_get_route_info(rte *rte, byte *buf) +babel_get_route_info(const rte *rte, byte *buf) { u64 rid = 0; - eattr *e = ea_find(rte->attrs->eattrs, EA_BABEL_ROUTER_ID); + eattr *e = ea_find(rte->attrs, &ea_babel_router_id); if (e) memcpy(&rid, e->u.ptr->data, sizeof(u64)); - buf += bsprintf(buf, " (%d/%d) [%lR]", rte->attrs->pref, - ea_get_int(rte->attrs->eattrs, EA_BABEL_METRIC, BABEL_INFINITY), rid); + buf += bsprintf(buf, " (%d/%d) [%lR]", + rt_get_preference(rte), + ea_get_int(rte->attrs, &ea_babel_metric, BABEL_INFINITY), rid); } -static int -babel_get_attr(const eattr *a, byte *buf, int buflen UNUSED) +static void +babel_router_id_format(const eattr *a, byte *buf, uint len) { - switch (a->id) - { - case EA_BABEL_SEQNO: - return GA_FULL; + u64 rid = 0; + memcpy(&rid, a->u.ptr->data, sizeof(u64)); + bsnprintf(buf, len, "%lR", rid); +} - case EA_BABEL_METRIC: - bsprintf(buf, "metric: %d", a->u.data); - return GA_FULL; +static struct ea_class ea_babel_metric = { + .name = "babel_metric", + .type = T_INT, +}; - case EA_BABEL_ROUTER_ID: - { - u64 rid = 0; - memcpy(&rid, a->u.ptr->data, sizeof(u64)); - bsprintf(buf, "router_id: %lR", rid); - return GA_FULL; - } +static struct ea_class ea_babel_router_id = { + .name = "babel_router_id", + .type = T_OPAQUE, + .readonly = 1, + .format = babel_router_id_format, +}; + +static struct ea_class ea_babel_seqno = { + .name = "babel_seqno", + .type = T_INT, + .readonly = 1, + .hidden = 1, +}; - default: - return GA_UNKNOWN; - } -} void babel_show_interfaces(struct proto *P, const char *iff) @@ -2261,11 +2347,15 @@ babel_kick_timer(struct babel_proto *p) static int -babel_preexport(struct channel *c, struct rte *new) +babel_preexport(struct channel *C, struct rte *new) { - struct rta *a = new->attrs; + if (new->src->owner != &C->proto->sources) + return 0; + /* Reject our own unreachable routes */ - if ((a->dest == RTD_UNREACHABLE) && (new->src->owner == &c->proto->sources)) + eattr *ea = ea_find(new->attrs, &ea_gen_nexthop); + struct nexthop_adata *nhad = (void *) ea->u.ptr; + if (!NEXTHOP_IS_REACHABLE(nhad)) return -1; return 0; @@ -2286,13 +2376,13 @@ babel_rt_notify(struct proto *P, struct channel *c UNUSED, const net_addr *net, { /* Update */ uint rt_seqno; - uint rt_metric = ea_get_int(new->attrs->eattrs, EA_BABEL_METRIC, 0); + uint rt_metric = ea_get_int(new->attrs, &ea_babel_metric, 0); u64 rt_router_id = 0; if (new->src->owner == &P->sources) { - rt_seqno = ea_find(new->attrs->eattrs, EA_BABEL_SEQNO)->u.data; - eattr *e = ea_find(new->attrs->eattrs, EA_BABEL_ROUTER_ID); + rt_seqno = ea_get_int(new->attrs, &ea_babel_seqno, 0); + eattr *e = ea_find(new->attrs, &ea_babel_router_id); if (e) memcpy(&rt_router_id, e->u.ptr->data, sizeof(u64)); } @@ -2341,18 +2431,18 @@ babel_rt_notify(struct proto *P, struct channel *c UNUSED, const net_addr *net, } static int -babel_rte_better(struct rte *new, struct rte *old) +babel_rte_better(const rte *new, const rte *old) { - uint new_metric = ea_find(new->attrs->eattrs, EA_BABEL_SEQNO)->u.data; - uint old_metric = ea_find(old->attrs->eattrs, EA_BABEL_SEQNO)->u.data; + uint new_metric = ea_get_int(new->attrs, &ea_babel_metric, BABEL_INFINITY); + uint old_metric = ea_get_int(old->attrs, &ea_babel_metric, BABEL_INFINITY); return new_metric < old_metric; } static u32 -babel_rte_igp_metric(struct rte *rt) +babel_rte_igp_metric(const rte *rt) { - return ea_get_int(rt->attrs->eattrs, EA_BABEL_METRIC, BABEL_INFINITY); + return ea_get_int(rt->attrs, &ea_babel_metric, BABEL_INFINITY); } @@ -2389,7 +2479,7 @@ babel_init(struct proto_config *CF) proto_configure_channel(P, &p->ip4_channel, cf->ip4_channel); proto_configure_channel(P, &p->ip6_channel, cf->ip6_channel); - P->if_notify = babel_if_notify; + P->iface_sub.if_notify = babel_if_notify; P->rt_notify = babel_rt_notify; P->preexport = babel_preexport; @@ -2442,6 +2532,11 @@ babel_iface_shutdown(struct babel_iface *ifa) { if (ifa->sk) { + /* + * Retract all our routes and lower the hello interval so peers' neighbour + * state expires quickly + */ + babel_send_hello(ifa, BABEL_MIN_INTERVAL); babel_send_wildcard_retraction(ifa); babel_send_queue(ifa); } @@ -2489,7 +2584,6 @@ babel_reconfigure(struct proto *P, struct proto_config *CF) struct protocol proto_babel = { .name = "Babel", .template = "babel%d", - .class = PROTOCOL_BABEL, .preference = DEF_PREF_BABEL, .channel_mask = NB_IP | NB_IP6_SADR, .proto_size = sizeof(struct babel_proto), @@ -2500,5 +2594,16 @@ struct protocol proto_babel = { .start = babel_start, .shutdown = babel_shutdown, .reconfigure = babel_reconfigure, - .get_attr = babel_get_attr }; + +void +babel_build(void) +{ + proto_build(&proto_babel); + + EA_REGISTER_ALL( + &ea_babel_metric, + &ea_babel_router_id, + &ea_babel_seqno + ); +} diff --git a/proto/babel/babel.h b/proto/babel/babel.h index 8b6da3c8..562abac2 100644 --- a/proto/babel/babel.h +++ b/proto/babel/babel.h @@ -16,7 +16,7 @@ #include "nest/bird.h" #include "nest/cli.h" #include "nest/iface.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/protocol.h" #include "nest/locks.h" #include "nest/password.h" @@ -26,10 +26,6 @@ #include "lib/string.h" #include "lib/timer.h" -#define EA_BABEL_METRIC EA_CODE(PROTOCOL_BABEL, 0) -#define EA_BABEL_ROUTER_ID EA_CODE(PROTOCOL_BABEL, 1) -#define EA_BABEL_SEQNO EA_CODE(PROTOCOL_BABEL, 2) - #define BABEL_MAGIC 42 #define BABEL_VERSION 2 #define BABEL_PORT 6696 @@ -48,6 +44,8 @@ #define BABEL_ROUTE_REFRESH_FACTOR(X) ((btime)(X)*5/2) /* 2.5 */ #define BABEL_SEQNO_REQUEST_RETRY 4 #define BABEL_SEQNO_REQUEST_EXPIRY (2 S_) +#define BABEL_SEQNO_FORWARD_EXPIRY (10 S_) +#define BABEL_SEQNO_DUP_SUPPRESS_TIME (1 S_) #define BABEL_GARBAGE_INTERVAL (300 S_) #define BABEL_RXCOST_WIRED 96 #define BABEL_RXCOST_WIRELESS 256 @@ -112,6 +110,7 @@ enum babel_ae_type { BABEL_AE_IP4 = 1, BABEL_AE_IP6 = 2, BABEL_AE_IP6_LL = 3, + BABEL_AE_IP4_VIA_IP6 = 4, BABEL_AE_MAX }; @@ -145,8 +144,9 @@ struct babel_iface_config { ip_addr next_hop_ip4; ip_addr next_hop_ip6; + u8 ext_next_hop; /* Enable IPv4 via IPv6 */ - u8 auth_type; /* Authentication type (BABEL_AUTH_*) */ + u8 auth_type; /* Authentication type (BABEL_AUTH_*) */ u8 auth_permissive; /* Don't drop packets failing auth check */ uint mac_num_keys; /* Number of configured HMAC keys */ uint mac_total_len; /* Total digest length for all configured keys */ @@ -225,7 +225,8 @@ struct babel_neighbor { u16 next_hello_seqno; uint last_hello_int; - u32 auth_pc; + u32 auth_pc_unicast; + u32 auth_pc_multicast; u8 auth_passed; u8 auth_index_len; u8 auth_index[BABEL_AUTH_INDEX_LEN]; @@ -240,7 +241,6 @@ struct babel_neighbor { btime init_expiry; list routes; /* Routes this neighbour has sent us (struct babel_route) */ - list requests; /* Seqno requests bound to this neighbor */ }; struct babel_source { @@ -270,13 +270,13 @@ struct babel_route { struct babel_seqno_request { node n; - node nbr_node; u64 router_id; u16 seqno; + u8 forwarded; u8 hop_count; u8 count; btime expires; - struct babel_neighbor *nbr; + btime dup_suppress_time; }; struct babel_entry { @@ -404,6 +404,7 @@ struct babel_msg_auth { u8 challenge_seen; u8 challenge_len; u8 challenge[BABEL_AUTH_MAX_NONCE_LEN]; + u8 unicast; }; static inline int babel_sadr_enabled(struct babel_proto *p) diff --git a/proto/babel/config.Y b/proto/babel/config.Y index 05210fa4..6d1ad7d0 100644 --- a/proto/babel/config.Y +++ b/proto/babel/config.Y @@ -24,8 +24,9 @@ CF_DECLS CF_KEYWORDS(BABEL, INTERFACE, METRIC, RXCOST, HELLO, UPDATE, INTERVAL, PORT, TYPE, WIRED, WIRELESS, RX, TX, BUFFER, PRIORITY, LENGTH, CHECK, LINK, - NEXT, HOP, IPV4, IPV6, BABEL_METRIC, SHOW, INTERFACES, NEIGHBORS, - ENTRIES, RANDOMIZE, ROUTER, ID, AUTHENTICATION, NONE, MAC, PERMISSIVE) + NEXT, HOP, IPV4, IPV6, SHOW, INTERFACES, NEIGHBORS, + ENTRIES, RANDOMIZE, ROUTER, ID, AUTHENTICATION, NONE, MAC, PERMISSIVE, + EXTENDED) CF_GRAMMAR @@ -67,6 +68,7 @@ babel_iface_start: BABEL_IFACE->tx_tos = IP_PREC_INTERNET_CONTROL; BABEL_IFACE->tx_priority = sk_priority_control; BABEL_IFACE->check_link = 1; + BABEL_IFACE->ext_next_hop = 1; }; @@ -143,6 +145,7 @@ babel_iface_item: | CHECK LINK bool { BABEL_IFACE->check_link = $3; } | NEXT HOP IPV4 ipa { BABEL_IFACE->next_hop_ip4 = $4; if (!ipa_is_ip4($4)) cf_error("Must be an IPv4 address"); } | NEXT HOP IPV6 ipa { BABEL_IFACE->next_hop_ip6 = $4; if (!ipa_is_ip6($4)) cf_error("Must be an IPv6 address"); } + | EXTENDED NEXT HOP bool { BABEL_IFACE->ext_next_hop = $4; } | AUTHENTICATION NONE { BABEL_IFACE->auth_type = BABEL_AUTH_NONE; } | AUTHENTICATION MAC { BABEL_IFACE->auth_type = BABEL_AUTH_MAC; BABEL_IFACE->auth_permissive = 0; } | AUTHENTICATION MAC PERMISSIVE { BABEL_IFACE->auth_type = BABEL_AUTH_MAC; BABEL_IFACE->auth_permissive = 1; } @@ -163,8 +166,6 @@ babel_iface_opt_list: babel_iface: babel_iface_start iface_patt_list_nopx babel_iface_opt_list babel_iface_finish; -dynamic_attr: BABEL_METRIC { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_BABEL_METRIC); } ; - CF_CLI_HELP(SHOW BABEL, ..., [[Show information about Babel protocol]]); CF_CLI(SHOW BABEL INTERFACES, optproto opttext, [<name>] [\"<interface>\"], [[Show information about Babel interfaces]]) diff --git a/proto/babel/packets.c b/proto/babel/packets.c index f13410e2..d26ee5c6 100644 --- a/proto/babel/packets.c +++ b/proto/babel/packets.c @@ -166,10 +166,12 @@ struct babel_parse_state { ip_addr next_hop_ip6; u64 router_id; /* Router ID used in subsequent updates */ u8 def_ip6_prefix[16]; /* Implicit IPv6 prefix in network order */ - u8 def_ip4_prefix[4]; /* Implicit IPv4 prefix in network order */ + u8 def_ip4_prefix[4]; /* Implicit IPv4 prefix (AE 1) in network order */ + u8 def_ip4_via_ip6_prefix[4]; /* Implicit IPv4 prefix (AE 4) in network order */ u8 router_id_seen; /* router_id field is valid */ u8 def_ip6_prefix_seen; /* def_ip6_prefix is valid */ u8 def_ip4_prefix_seen; /* def_ip4_prefix is valid */ + u8 def_ip4_via_ip6_prefix_seen; /* def_ip4_via_ip6_prefix is valid */ u8 current_tlv_endpos; /* End of self-terminating TLVs (offset from start) */ u8 sadr_enabled; u8 is_unicast; @@ -515,9 +517,6 @@ babel_read_ihu(struct babel_tlv *hdr, union babel_msg *m, msg->addr = IPA_NONE; msg->sender = state->saddr; - if (msg->ae >= BABEL_AE_MAX) - return PARSE_IGNORE; - /* * We only actually read link-local IPs. In every other case, the addr field * will be 0 but validation will succeed. The handler takes care of these @@ -526,17 +525,20 @@ babel_read_ihu(struct babel_tlv *hdr, union babel_msg *m, */ switch (msg->ae) { + case BABEL_AE_WILDCARD: + return PARSE_SUCCESS; + case BABEL_AE_IP4: if (TLV_OPT_LENGTH(tlv) < 4) return PARSE_ERROR; state->current_tlv_endpos += 4; - break; + return PARSE_SUCCESS; case BABEL_AE_IP6: if (TLV_OPT_LENGTH(tlv) < 16) return PARSE_ERROR; state->current_tlv_endpos += 16; - break; + return PARSE_SUCCESS; case BABEL_AE_IP6_LL: if (TLV_OPT_LENGTH(tlv) < 8) @@ -544,10 +546,17 @@ babel_read_ihu(struct babel_tlv *hdr, union babel_msg *m, msg->addr = ipa_from_ip6(get_ip6_ll(&tlv->addr)); state->current_tlv_endpos += 8; - break; + return PARSE_SUCCESS; + + /* RFC 9229 2.4 - IHU TLV MUST NOT carry the AE 4 (IPv4-via-IPv6) */ + case BABEL_AE_IP4_VIA_IP6: + return PARSE_ERROR; + + default: + return PARSE_IGNORE; } - return PARSE_SUCCESS; + return PARSE_IGNORE; } static uint @@ -640,6 +649,10 @@ babel_read_next_hop(struct babel_tlv *hdr, union babel_msg *m UNUSED, state->current_tlv_endpos += 8; return PARSE_IGNORE; + /* RFC 9229 2.4 - Next Hop TLV MUST NOT carry the AE 4 (IPv4-via-IPv6) */ + case BABEL_AE_IP4_VIA_IP6: + return PARSE_ERROR; + default: return PARSE_IGNORE; } @@ -692,6 +705,42 @@ babel_write_next_hop(struct babel_tlv *hdr, ip_addr addr, return 0; } +/* This is called directly from babel_read_update() to handle + both BABEL_AE_IP4 and BABEL_AE_IP4_VIA_IP6 encodings */ +static int +babel_read_ip4_prefix(struct babel_tlv_update *tlv, struct babel_msg_update *msg, + u8 *def_prefix, u8 *def_prefix_seen, ip_addr next_hop, int len) +{ + if (tlv->plen > IP4_MAX_PREFIX_LENGTH) + return PARSE_ERROR; + + /* Cannot omit data if there is no saved prefix */ + if (tlv->omitted && !*def_prefix_seen) + return PARSE_ERROR; + + /* Update must have next hop, unless it is retraction */ + if (ipa_zero(next_hop) && msg->metric != BABEL_INFINITY) + return PARSE_ERROR; + + /* Merge saved prefix and received prefix parts */ + u8 buf[4] = {}; + memcpy(buf, def_prefix, tlv->omitted); + memcpy(buf + tlv->omitted, tlv->addr, len); + + ip4_addr prefix4 = get_ip4(buf); + net_fill_ip4(&msg->net, prefix4, tlv->plen); + + if (tlv->flags & BABEL_UF_DEF_PREFIX) + { + put_ip4(def_prefix, prefix4); + *def_prefix_seen = 1; + } + + msg->next_hop = next_hop; + + return PARSE_SUCCESS; +} + static int babel_read_update(struct babel_tlv *hdr, union babel_msg *m, struct babel_parse_state *state) @@ -706,11 +755,11 @@ babel_read_update(struct babel_tlv *hdr, union babel_msg *m, /* Length of received prefix data without omitted part */ int len = BYTES(tlv->plen) - (int) tlv->omitted; - u8 buf[16] = {}; if ((len < 0) || ((uint) len > TLV_OPT_LENGTH(tlv))) return PARSE_ERROR; + int rc; switch (tlv->ae) { case BABEL_AE_WILDCARD: @@ -724,31 +773,20 @@ babel_read_update(struct babel_tlv *hdr, union babel_msg *m, break; case BABEL_AE_IP4: - if (tlv->plen > IP4_MAX_PREFIX_LENGTH) - return PARSE_ERROR; - - /* Cannot omit data if there is no saved prefix */ - if (tlv->omitted && !state->def_ip4_prefix_seen) - return PARSE_ERROR; - - /* Update must have next hop, unless it is retraction */ - if (ipa_zero(state->next_hop_ip4) && (msg->metric != BABEL_INFINITY)) - return PARSE_IGNORE; + rc = babel_read_ip4_prefix(tlv, msg, state->def_ip4_prefix, + &state->def_ip4_prefix_seen, + state->next_hop_ip4, len); + if (rc != PARSE_SUCCESS) + return rc; - /* Merge saved prefix and received prefix parts */ - memcpy(buf, state->def_ip4_prefix, tlv->omitted); - memcpy(buf + tlv->omitted, tlv->addr, len); - - ip4_addr prefix4 = get_ip4(buf); - net_fill_ip4(&msg->net, prefix4, tlv->plen); - - if (tlv->flags & BABEL_UF_DEF_PREFIX) - { - put_ip4(state->def_ip4_prefix, prefix4); - state->def_ip4_prefix_seen = 1; - } + break; - msg->next_hop = state->next_hop_ip4; + case BABEL_AE_IP4_VIA_IP6: + rc = babel_read_ip4_prefix(tlv, msg, state->def_ip4_via_ip6_prefix, + &state->def_ip4_via_ip6_prefix_seen, + state->next_hop_ip6, len); + if (rc != PARSE_SUCCESS) + return rc; break; @@ -761,6 +799,7 @@ babel_read_update(struct babel_tlv *hdr, union babel_msg *m, return PARSE_ERROR; /* Merge saved prefix and received prefix parts */ + u8 buf[16] = {}; memcpy(buf, state->def_ip6_prefix, tlv->omitted); memcpy(buf + tlv->omitted, tlv->addr, len); @@ -863,7 +902,7 @@ babel_write_update(struct babel_tlv *hdr, union babel_msg *m, } else if (msg->net.type == NET_IP4) { - tlv->ae = BABEL_AE_IP4; + tlv->ae = ipa_is_ip4(msg->next_hop) ? BABEL_AE_IP4 : BABEL_AE_IP4_VIA_IP6; tlv->plen = net4_pxlen(&msg->net); put_ip4_px(tlv->addr, &msg->net); } @@ -931,7 +970,12 @@ babel_read_route_request(struct babel_tlv *hdr, union babel_msg *m, msg->full = 1; return PARSE_SUCCESS; + /* + * RFC 9229 2.3 - When receiving requests, AE 1 (IPv4) and AE 4 + * (IPv4-via-IPv6) MUST be treated in the same manner. + */ case BABEL_AE_IP4: + case BABEL_AE_IP4_VIA_IP6: if (tlv->plen > IP4_MAX_PREFIX_LENGTH) return PARSE_ERROR; @@ -1032,7 +1076,12 @@ babel_read_seqno_request(struct babel_tlv *hdr, union babel_msg *m, case BABEL_AE_WILDCARD: return PARSE_ERROR; + /* + * RFC 9229 2.3 - When receiving requests, AE 1 (IPv4) and AE 4 + * (IPv4-via-IPv6) MUST be treated in the same manner. + */ case BABEL_AE_IP4: + case BABEL_AE_IP4_VIA_IP6: if (tlv->plen > IP4_MAX_PREFIX_LENGTH) return PARSE_ERROR; @@ -1318,7 +1367,6 @@ babel_send_to(struct babel_iface *ifa, ip_addr dest) static uint babel_write_queue(struct babel_iface *ifa, list *queue) { - struct babel_proto *p = ifa->proto; struct babel_write_state state = { .next_hop_ip6 = ifa->addr }; if (EMPTY_LIST(*queue)) @@ -1346,7 +1394,7 @@ babel_write_queue(struct babel_iface *ifa, list *queue) pos += len; rem_node(NODE msg); - sl_free(p->msg_slab, msg); + sl_free(msg); } pos += babel_auth_add_tlvs(ifa, (struct babel_tlv *) pos, end - pos); @@ -1507,13 +1555,13 @@ babel_process_packet(struct babel_iface *ifa, else if (res == PARSE_IGNORE) { DBG("Babel: Ignoring TLV of type %d\n", tlv->type); - sl_free(p->msg_slab, msg); + sl_free(msg); } else /* PARSE_ERROR */ { LOG_PKT("Bad TLV from %I via %s type %d pos %d - parse error", saddr, ifa->iface->name, tlv->type, (int) ((byte *)tlv - (byte *)pkt)); - sl_free(p->msg_slab, msg); + sl_free(msg); break; } } @@ -1525,7 +1573,7 @@ babel_process_packet(struct babel_iface *ifa, if (tlv_data[msg->msg.type].handle_tlv) tlv_data[msg->msg.type].handle_tlv(&msg->msg, ifa); rem_node(NODE msg); - sl_free(p->msg_slab, msg); + sl_free(msg); } } @@ -1618,7 +1666,7 @@ babel_open_socket(struct babel_iface *ifa) sk->ttl = 1; sk->flags = SKF_LADDR_RX; - if (sk_open(sk) < 0) + if (sk_open(sk, p->p.loop) < 0) goto err; if (sk_setup_multicast(sk) < 0) @@ -1656,6 +1704,7 @@ babel_read_pc(struct babel_tlv *hdr, union babel_msg *m UNUSED, state->auth.pc_seen = 1; state->auth.index_len = index_len; state->auth.index = tlv->index; + state->auth.unicast = state->is_unicast; state->current_tlv_endpos += index_len; return PARSE_SUCCESS; @@ -2011,7 +2060,7 @@ babel_auth_sign(struct babel_iface *ifa, ip_addr dest) } DBG("Added MAC signatures (%d bytes) on ifa %s for dest %I\n", - tot_len, ifa->ifname, dest); + pos - (pkt + len), ifa->ifname, dest); return pos - (pkt + len); } diff --git a/proto/bfd/bfd.c b/proto/bfd/bfd.c index 479c1510..c88c1cb2 100644 --- a/proto/bfd/bfd.c +++ b/proto/bfd/bfd.c @@ -82,7 +82,7 @@ * BFD thread to the main thread. This is done in an asynchronous way, sesions * with pending notifications are linked (in the BFD thread) to @notify_list in * &bfd_proto, and then bfd_notify_hook() in the main thread is activated using - * bfd_notify_kick() and a pipe. The hook then processes scheduled sessions and + * a standard event sending code. The hook then processes scheduled sessions and * calls hooks from associated BFD requests. This @notify_list (and state fields * in structure &bfd_session) is protected by a spinlock in &bfd_proto and * functions bfd_lock_sessions() / bfd_unlock_sessions(). @@ -113,26 +113,22 @@ #define HASH_IP_EQ(a1,n1,a2,n2) ipa_equal(a1, a2) && n1 == n2 #define HASH_IP_FN(a,n) ipa_hash(a) ^ u32_hash(n) -DEFINE_DOMAIN(rtable); #define BFD_LOCK LOCK_DOMAIN(rtable, bfd_global.lock) #define BFD_UNLOCK UNLOCK_DOMAIN(rtable, bfd_global.lock) -#define BFD_ASSERT_LOCKED ASSERT_DIE(DOMAIN_IS_LOCKED(rtable, bfd_global.lock)) static struct { DOMAIN(rtable) lock; list wait_list; + list pickup_list; list proto_list; + uint pickup_reload; } bfd_global; -static struct bfd_session bfd_admin_down = { .loc = ATOMIC_VAR_INIT((struct bfd_session_state) { .state = BFD_STATE_ADMIN_DOWN }), }; - const char *bfd_state_names[] = { "AdminDown", "Down", "Init", "Up" }; static void bfd_session_set_min_tx(struct bfd_session *s, u32 val); static struct bfd_iface *bfd_get_iface(struct bfd_proto *p, ip_addr local, struct iface *iface); static void bfd_free_iface(struct bfd_iface *ifa); -static void bfd_remove_session(struct bfd_proto *p, struct bfd_session *s); -static void bfd_reconfigure_session_hook(void *vsession); /* @@ -151,57 +147,37 @@ bfd_merge_options(const struct bfd_iface_config *cf, const struct bfd_options *o }; } -static int +static void bfd_session_update_state(struct bfd_session *s, uint state, uint diag) { struct bfd_proto *p = s->ifa->bfd; - uint old_state = BFD_LOC_STATE(s).state; + uint old_state = s->loc_state; + int notify; if (state == old_state) - { - if (current_time() > s->last_reqlist_check + 5 S) - { - BFD_LOCK; - if (EMPTY_LIST(s->request_list)) - { - bfd_remove_session(p, s); - BFD_UNLOCK; - return 1; - } - - s->last_reqlist_check = current_time(); - BFD_UNLOCK; - } - return 0; - } + return; TRACE(D_EVENTS, "Session to %I changed state from %s to %s", s->addr, bfd_state_names[old_state], bfd_state_names[state]); - atomic_store_explicit(&s->loc, ((struct bfd_session_state) { .state = state, .diag = diag }), memory_order_release); + bfd_lock_sessions(p); + s->loc_state = state; + s->loc_diag = diag; s->last_state_change = current_time(); + notify = !NODE_VALID(&s->n); + if (notify) + add_tail(&p->notify_list, &s->n); + bfd_unlock_sessions(p); + if (state == BFD_STATE_UP) bfd_session_set_min_tx(s, s->cf.min_tx_int); if (old_state == BFD_STATE_UP) bfd_session_set_min_tx(s, s->cf.idle_tx_int); - BFD_LOCK; - if (EMPTY_LIST(s->request_list)) - { - bfd_remove_session(p, s); - BFD_UNLOCK; - return 1; - } - - struct bfd_request *req; - node *nn; - WALK_LIST2(req, nn, s->request_list, n) - ev_send_self(&req->event); - - BFD_UNLOCK; - return 0; + if (notify) + ev_send(&global_event_list, &p->notify_event); } static void @@ -246,8 +222,8 @@ bfd_session_control_tx_timer(struct bfd_session *s, int reset) if (s->rem_demand_mode && !s->poll_active && - (BFD_LOC_STATE(s).state == BFD_STATE_UP) && - (s->rem.state == BFD_STATE_UP)) + (s->loc_state == BFD_STATE_UP) && + (s->rem_state == BFD_STATE_UP)) goto stop; if (s->rem_min_rx_int == 0) @@ -317,29 +293,28 @@ bfd_session_process_ctl(struct bfd_session *s, u8 flags, u32 old_tx_int, u32 old int next_state = 0; int diag = BFD_DIAG_NOTHING; - switch (BFD_LOC_STATE(s).state) + switch (s->loc_state) { case BFD_STATE_ADMIN_DOWN: return; case BFD_STATE_DOWN: - if (s->rem.state == BFD_STATE_DOWN) next_state = BFD_STATE_INIT; - else if (s->rem.state == BFD_STATE_INIT) next_state = BFD_STATE_UP; + if (s->rem_state == BFD_STATE_DOWN) next_state = BFD_STATE_INIT; + else if (s->rem_state == BFD_STATE_INIT) next_state = BFD_STATE_UP; break; case BFD_STATE_INIT: - if (s->rem.state == BFD_STATE_ADMIN_DOWN) next_state = BFD_STATE_DOWN, diag = BFD_DIAG_NEIGHBOR_DOWN; - else if (s->rem.state >= BFD_STATE_INIT) next_state = BFD_STATE_UP; + if (s->rem_state == BFD_STATE_ADMIN_DOWN) next_state = BFD_STATE_DOWN, diag = BFD_DIAG_NEIGHBOR_DOWN; + else if (s->rem_state >= BFD_STATE_INIT) next_state = BFD_STATE_UP; break; case BFD_STATE_UP: - if (s->rem.state <= BFD_STATE_DOWN) next_state = BFD_STATE_DOWN, diag = BFD_DIAG_NEIGHBOR_DOWN; + if (s->rem_state <= BFD_STATE_DOWN) next_state = BFD_STATE_DOWN, diag = BFD_DIAG_NEIGHBOR_DOWN; break; } if (next_state) - if (bfd_session_update_state(s, next_state, diag)) - return; + bfd_session_update_state(s, next_state, diag); bfd_session_control_tx_timer(s, 0); @@ -354,7 +329,7 @@ bfd_session_timeout(struct bfd_session *s) TRACE(D_EVENTS, "Session to %I expired", s->addr); - s->rem.state = BFD_STATE_DOWN; + s->rem_state = BFD_STATE_DOWN; s->rem_id = 0; s->rem_min_tx_int = 0; s->rem_min_rx_int = 1; @@ -365,8 +340,7 @@ bfd_session_timeout(struct bfd_session *s) s->poll_active = 0; s->poll_scheduled = 0; - if (bfd_session_update_state(s, BFD_STATE_DOWN, BFD_DIAG_TIMEOUT)) - return; + bfd_session_update_state(s, BFD_STATE_DOWN, BFD_DIAG_TIMEOUT); bfd_session_control_tx_timer(s, 1); } @@ -382,7 +356,7 @@ bfd_session_set_min_tx(struct bfd_session *s, u32 val) s->des_min_tx_new = val; /* Postpone timer update if des_min_tx_int increases and the session is up */ - if ((BFD_LOC_STATE(s).state != BFD_STATE_UP) || (val < s->des_min_tx_int)) + if ((s->loc_state != BFD_STATE_UP) || (val < s->des_min_tx_int)) { s->des_min_tx_int = val; bfd_session_update_tx_interval(s); @@ -402,7 +376,7 @@ bfd_session_set_min_rx(struct bfd_session *s, u32 val) s->req_min_rx_new = val; /* Postpone timer update if req_min_rx_int decreases and the session is up */ - if ((BFD_LOC_STATE(s).state != BFD_STATE_UP) || (val > s->req_min_rx_int)) + if ((s->loc_state != BFD_STATE_UP) || (val > s->req_min_rx_int)) { s->req_min_rx_int = val; bfd_session_update_detection_time(s, 0); @@ -414,14 +388,12 @@ bfd_session_set_min_rx(struct bfd_session *s, u32 val) struct bfd_session * bfd_find_session_by_id(struct bfd_proto *p, u32 id) { - ASSERT_DIE(birdloop_inside(p->p.loop)); return HASH_FIND(p->session_hash_id, HASH_ID, id); } struct bfd_session * bfd_find_session_by_addr(struct bfd_proto *p, ip_addr addr, uint ifindex) { - ASSERT_DIE(birdloop_inside(p->p.loop)); return HASH_FIND(p->session_hash_ip, HASH_IP, addr, ifindex); } @@ -455,7 +427,6 @@ static struct bfd_session * bfd_add_session(struct bfd_proto *p, ip_addr addr, ip_addr local, struct iface *iface, struct bfd_options *opts) { ASSERT_DIE(birdloop_inside(p->p.loop)); - BFD_ASSERT_LOCKED; struct bfd_iface *ifa = bfd_get_iface(p, local, iface); @@ -469,15 +440,10 @@ bfd_add_session(struct bfd_proto *p, ip_addr addr, ip_addr local, struct iface * HASH_INSERT(p->session_hash_ip, HASH_IP, s); s->cf = bfd_merge_options(ifa->cf, opts); - s->update_event = (event) { - .hook = bfd_reconfigure_session_hook, - .data = s, - .list = birdloop_event_list(p->p.loop), - }; /* Initialization of state variables - see RFC 5880 6.8.1 */ - atomic_store_explicit(&s->loc, ((struct bfd_session_state) { .state = BFD_STATE_DOWN }), memory_order_relaxed); - s->rem.state = BFD_STATE_DOWN; + s->loc_state = BFD_STATE_DOWN; + s->rem_state = BFD_STATE_DOWN; s->des_min_tx_int = s->des_min_tx_new = s->cf.idle_tx_int; s->req_min_rx_int = s->req_min_rx_new = s->cf.min_rx_int; s->rem_min_rx_int = 1; @@ -485,8 +451,8 @@ bfd_add_session(struct bfd_proto *p, ip_addr addr, ip_addr local, struct iface * s->passive = s->cf.passive; s->tx_csn = random_u32(); - s->tx_timer = tm_new_init(p->p.pool, bfd_tx_timer_hook, s, 0, 0); - s->hold_timer = tm_new_init(p->p.pool, bfd_hold_timer_hook, s, 0, 0); + s->tx_timer = tm_new_init(p->tpool, bfd_tx_timer_hook, s, 0, 0); + s->hold_timer = tm_new_init(p->tpool, bfd_hold_timer_hook, s, 0, 0); bfd_session_update_tx_interval(s); bfd_session_control_tx_timer(s, 1); @@ -498,12 +464,42 @@ bfd_add_session(struct bfd_proto *p, ip_addr addr, ip_addr local, struct iface * return s; } +/* static void -bfd_remove_session(struct bfd_proto *p, struct bfd_session *s) +bfd_open_session(struct bfd_proto *p, struct bfd_session *s, ip_addr local, struct iface *ifa) { - ASSERT_DIE(birdloop_inside(p->p.loop)); - BFD_ASSERT_LOCKED; - ASSERT_DIE(EMPTY_LIST(s->request_list)); + birdloop_enter(p->p.loop); + + s->opened = 1; + + bfd_session_control_tx_timer(s); + + birdloop_leave(p->p.loop); +} + +static void +bfd_close_session(struct bfd_proto *p, struct bfd_session *s) +{ + birdloop_enter(p->p.loop); + + s->opened = 0; + + bfd_session_update_state(s, BFD_STATE_DOWN, BFD_DIAG_PATH_DOWN); + bfd_session_control_tx_timer(s); + + birdloop_leave(p->p.loop); +} +*/ + +static void +bfd_remove_session_locked(struct bfd_proto *p, struct bfd_session *s) +{ + /* Caller should ensure that request list is empty */ + + /* Remove session from notify list if scheduled for notification */ + /* No need for bfd_lock_sessions(), we are already protected by birdloop_enter() */ + if (NODE_VALID(&s->n)) + rem_node(&s->n); bfd_free_iface(s->ifa); @@ -515,25 +511,29 @@ bfd_remove_session(struct bfd_proto *p, struct bfd_session *s) TRACE(D_EVENTS, "Session to %I removed", s->addr); - sl_free(p->session_slab, s); + sl_free(s); +} + +static void +bfd_remove_session(struct bfd_proto *p, struct bfd_session *s) +{ + birdloop_enter(p->p.loop); + bfd_remove_session_locked(p, s); + birdloop_leave(p->p.loop); } static void bfd_reconfigure_session(struct bfd_proto *p, struct bfd_session *s) { - ASSERT_DIE(birdloop_inside(p->p.loop)); - BFD_LOCK; if (EMPTY_LIST(s->request_list)) - { - bfd_remove_session(p, s); - BFD_UNLOCK; return; - } + + birdloop_enter(p->p.loop); struct bfd_request *req = SKIP_BACK(struct bfd_request, n, HEAD(s->request_list)); s->cf = bfd_merge_options(s->ifa->cf, &req->opts); - u32 tx = (BFD_LOC_STATE(s).state == BFD_STATE_UP) ? s->cf.min_tx_int : s->cf.idle_tx_int; + u32 tx = (s->loc_state == BFD_STATE_UP) ? s->cf.min_tx_int : s->cf.idle_tx_int; bfd_session_set_min_tx(s, tx); bfd_session_set_min_rx(s, s->cf.min_rx_int); s->detect_mult = s->cf.multiplier; @@ -541,15 +541,9 @@ bfd_reconfigure_session(struct bfd_proto *p, struct bfd_session *s) bfd_session_control_tx_timer(s, 0); - TRACE(D_EVENTS, "Session to %I reconfigured", s->addr); - BFD_UNLOCK; -} + birdloop_leave(p->p.loop); -static void -bfd_reconfigure_session_hook(void *data) -{ - struct bfd_session *s = data; - return bfd_reconfigure_session(s->ifa->bfd, s); + TRACE(D_EVENTS, "Session to %I reconfigured", s->addr); } @@ -586,7 +580,7 @@ bfd_get_iface(struct bfd_proto *p, ip_addr local, struct iface *iface) struct bfd_config *cf = (struct bfd_config *) (p->p.cf); struct bfd_iface_config *ic = bfd_find_iface_config(cf, iface); - ifa = mb_allocz(p->p.pool, sizeof(struct bfd_iface)); + ifa = mb_allocz(p->tpool, sizeof(struct bfd_iface)); ifa->local = local; ifa->iface = iface; ifa->cf = ic; @@ -595,6 +589,9 @@ bfd_get_iface(struct bfd_proto *p, ip_addr local, struct iface *iface) ifa->sk = bfd_open_tx_sk(p, local, iface); ifa->uc = 1; + if (cf->strict_bind) + ifa->rx = bfd_open_rx_sk_bound(p, local, iface); + add_tail(&p->iface_list, &ifa->n); return ifa; @@ -607,17 +604,17 @@ bfd_free_iface(struct bfd_iface *ifa) return; if (ifa->sk) - { - sk_stop(ifa->sk); rfree(ifa->sk); - } + + if (ifa->rx) + rfree(ifa->rx); rem_node(&ifa->n); mb_free(ifa); } static void -bfd_reconfigure_iface(struct bfd_proto *p UNUSED, struct bfd_iface *ifa, struct bfd_config *nc) +bfd_reconfigure_iface(struct bfd_proto *p, struct bfd_iface *ifa, struct bfd_config *nc) { struct bfd_iface_config *new = bfd_find_iface_config(nc, ifa->iface); struct bfd_iface_config *old = ifa->cf; @@ -631,7 +628,9 @@ bfd_reconfigure_iface(struct bfd_proto *p UNUSED, struct bfd_iface *ifa, struct (new->passive != old->passive); /* This should be probably changed to not access ifa->cf from the BFD thread */ + birdloop_enter(p->p.loop); ifa->cf = new; + birdloop_leave(p->p.loop); } @@ -640,55 +639,77 @@ bfd_reconfigure_iface(struct bfd_proto *p UNUSED, struct bfd_iface *ifa, struct */ static void -bfd_request_notify(void *data) +bfd_request_notify(struct bfd_request *req, u8 state, u8 diag) { - struct bfd_request *req = data; - struct bfd_session_state old = req->old_state; - - BFD_LOCK; /* Needed to safely access req->session */ - struct bfd_session_state new = atomic_load_explicit(&req->session->loc, memory_order_acquire); - BFD_UNLOCK; + u8 old_state = req->state; - if (new.state == old.state) + if (state == old_state) return; - req->state = new.state; - req->diag = new.diag; - req->old_state = new; - req->down = (old.state == BFD_STATE_UP) && (new.state == BFD_STATE_DOWN); + req->state = state; + req->diag = diag; + req->old_state = old_state; + req->down = (old_state == BFD_STATE_UP) && (state == BFD_STATE_DOWN); if (req->hook) + { + struct birdloop *target = !birdloop_inside(req->target) ? req->target : NULL; + + if (target) + birdloop_enter(target); + req->hook(req); + + if (target) + birdloop_leave(target); + } } static int bfd_add_request(struct bfd_proto *p, struct bfd_request *req) { - BFD_ASSERT_LOCKED; - ASSERT_DIE(req->session == &bfd_admin_down); - struct bfd_config *cf = (struct bfd_config *) (p->p.cf); if (p->p.vrf && (p->p.vrf != req->vrf)) + { + TRACE(D_EVENTS, "Not accepting request to %I with different VRF", req->addr); return 0; + } if (ipa_is_ip4(req->addr) ? !cf->accept_ipv4 : !cf->accept_ipv6) + { + TRACE(D_EVENTS, "Not accepting request to %I (AF limit)", req->addr); return 0; + } if (req->iface ? !cf->accept_direct : !cf->accept_multihop) + { + TRACE(D_EVENTS, "Not accepting %s request to %I", req->iface ? "direct" : "multihop", req->addr); return 0; + } uint ifindex = req->iface ? req->iface->index : 0; struct bfd_session *s = bfd_find_session_by_addr(p, req->addr, ifindex); - if (!s) + if (s) + TRACE(D_EVENTS, "Session to %I reused", s->addr); + else s = bfd_add_session(p, req->addr, req->local, req->iface, &req->opts); rem_node(&req->n); add_tail(&s->request_list, &req->n); req->session = s; - ev_send_self(&req->event); + bfd_lock_sessions(p); + + int notify = !NODE_VALID(&s->n); + if (notify) + add_tail(&p->notify_list, &s->n); + + bfd_unlock_sessions(p); + + if (notify) + ev_send(&global_event_list, &p->notify_event); return 1; } @@ -696,35 +717,89 @@ bfd_add_request(struct bfd_proto *p, struct bfd_request *req) static void bfd_pickup_requests(void *_data UNUSED) { - struct bfd_proto *p; - node *nn; - WALK_LIST2(p, nn, bfd_global.proto_list, bfd_node) - { - birdloop_enter(p->p.loop); + /* NOTE TO MY FUTURE SELF + * + * Functions bfd_take_requests() and bfd_drop_requests() need to have + * consistent &bfd_global.wait_list and this is ensured only by having these + * functions called from bfd_start() and bfd_shutdown() which are both called + * in PROTO_LOCKED_FROM_MAIN context, i.e. always from &main_birdloop. + * + * This pickup event is also called in &main_birdloop, therefore we can + * freely do BFD_LOCK/BFD_UNLOCK while processing all the requests. All BFD + * protocols capable of bfd_add_request() are either started before this code + * happens or after that. + * + * If BFD protocols could start in parallel with this routine, they might + * miss some of the waiting requests, thus if anybody tries to start + * protocols or run this pickup event outside &main_birdloop in future, they + * shall ensure that this race condition is mitigated somehow. + * + * Thank you, my future self, for understanding. Have a nice day! + */ + + DBG("BFD pickup loop starting"); + + BFD_LOCK; + do { + bfd_global.pickup_reload = 0; + BFD_UNLOCK; + + node *n; + WALK_LIST(n, bfd_global.proto_list) + { + struct bfd_proto *p = SKIP_BACK(struct bfd_proto, bfd_node, n); + birdloop_enter(p->p.loop); + BFD_LOCK; + + TRACE(D_EVENTS, "Picking up new requests (%d available)", list_length(&bfd_global.pickup_list)); + + node *rn, *rnxt; + WALK_LIST_DELSAFE(rn, rnxt, bfd_global.pickup_list) + bfd_add_request(p, SKIP_BACK(struct bfd_request, n, rn)); + + BFD_UNLOCK; + + /* Remove sessions with no requests */ + HASH_WALK_DELSAFE(p->session_hash_id, next_id, s) + { + if (EMPTY_LIST(s->request_list)) + bfd_remove_session_locked(p, s); + } + HASH_WALK_END; + + birdloop_leave(p->p.loop); + } + BFD_LOCK; + } while (bfd_global.pickup_reload); - struct bfd_request *req; - node *rn, *rnxt; - WALK_LIST2_DELSAFE(req, rn, rnxt, bfd_global.wait_list, n) - bfd_add_request(p, req); + list tmp_list; + init_list(&tmp_list); + add_tail_list(&tmp_list, &bfd_global.pickup_list); - BFD_UNLOCK; - birdloop_ping(p->p.loop); - birdloop_leave(p->p.loop); - } + init_list(&bfd_global.pickup_list); + BFD_UNLOCK; + + log(L_TRACE "No protocol for %d BFD requests", list_length(&tmp_list)); + + node *n; + WALK_LIST(n, tmp_list) + bfd_request_notify(SKIP_BACK(struct bfd_request, n, n), BFD_STATE_ADMIN_DOWN, 0); + + BFD_LOCK; + add_tail_list(&bfd_global.wait_list, &tmp_list); + BFD_UNLOCK; } static event bfd_pickup_event = { .hook = bfd_pickup_requests }; -#define bfd_schedule_pickup() ev_send(&global_event_list, &bfd_pickup_event) static void bfd_take_requests(struct bfd_proto *p) { - struct bfd_request *req; node *n, *nn; BFD_LOCK; - WALK_LIST2_DELSAFE(req, n, nn, bfd_global.wait_list, n) - bfd_add_request(p, req); + WALK_LIST_DELSAFE(n, nn, bfd_global.wait_list) + bfd_add_request(p, SKIP_BACK(struct bfd_request, n, n)); BFD_UNLOCK; } @@ -739,13 +814,13 @@ bfd_drop_requests(struct bfd_proto *p) { struct bfd_request *req = SKIP_BACK(struct bfd_request, n, n); rem_node(&req->n); - add_tail(&bfd_global.wait_list, &req->n); - req->session = &bfd_admin_down; - ev_send_self(&req->event); + add_tail(&bfd_global.pickup_list, &req->n); + req->session = NULL; } - bfd_schedule_pickup(); - bfd_remove_session(p, s); + ev_send(&global_event_list, &bfd_pickup_event); + + bfd_remove_session_locked(p, s); } HASH_WALK_END; BFD_UNLOCK; @@ -757,7 +832,7 @@ struct bfd_request * bfd_request_session(pool *p, ip_addr addr, ip_addr local, struct iface *iface, struct iface *vrf, void (*hook)(struct bfd_request *), void *data, - struct event_list *list, + struct birdloop *target, const struct bfd_options *opts) { struct bfd_request *req = ralloc(p, &bfd_request_class); @@ -770,18 +845,18 @@ bfd_request_session(pool *p, ip_addr addr, ip_addr local, if (opts) req->opts = *opts; + ASSERT_DIE(target || !hook); req->hook = hook; req->data = data; - req->event = (event) { - .hook = bfd_request_notify, - .data = req, - .list = list, - }; + req->target = target; + + req->session = NULL; BFD_LOCK; - req->session = &bfd_admin_down; - add_tail(&bfd_global.wait_list, &req->n); - bfd_schedule_pickup(); + bfd_global.pickup_reload++; + add_tail(&bfd_global.pickup_list, &req->n); + ev_send(&global_event_list, &bfd_pickup_event); + DBG("New BFD request enlisted.\n"); BFD_UNLOCK; return req; @@ -790,17 +865,15 @@ bfd_request_session(pool *p, ip_addr addr, ip_addr local, void bfd_update_request(struct bfd_request *req, const struct bfd_options *opts) { + struct bfd_session *s = req->session; + if (!memcmp(opts, &req->opts, sizeof(const struct bfd_options))) return; - BFD_LOCK; req->opts = *opts; - struct bfd_session *s = req->session; - if (s != &bfd_admin_down) - ev_send_self(&s->update_event); - - BFD_UNLOCK; + if (s) + bfd_reconfigure_session(s->ifa->bfd, s); } static void @@ -812,11 +885,11 @@ bfd_request_free(resource *r) rem_node(&req->n); BFD_UNLOCK; - ev_postpone(&req->event); + ev_send(&global_event_list, &bfd_pickup_event); } static void -bfd_request_dump(resource *r) +bfd_request_dump(resource *r, unsigned indent UNUSED) { struct bfd_request *req = (struct bfd_request *) r; @@ -849,7 +922,7 @@ bfd_neigh_notify(struct neighbor *nb) if ((nb->scope > 0) && !n->req) { ip_addr local = ipa_nonzero(n->local) ? n->local : nb->ifa->ip; - n->req = bfd_request_session(p->p.pool, n->addr, local, nb->iface, p->p.vrf, NULL, NULL, birdloop_event_list(p->p.loop), NULL); + n->req = bfd_request_session(p->p.pool, n->addr, local, nb->iface, p->p.vrf, NULL, NULL, NULL, NULL); } if ((nb->scope <= 0) && n->req) @@ -866,7 +939,7 @@ bfd_start_neighbor(struct bfd_proto *p, struct bfd_neighbor *n) if (n->multihop) { - n->req = bfd_request_session(p->p.pool, n->addr, n->local, NULL, p->p.vrf, NULL, NULL, birdloop_event_list(p->p.loop), NULL); + n->req = bfd_request_session(p->p.pool, n->addr, n->local, NULL, p->p.vrf, NULL, NULL, NULL, NULL); return; } @@ -941,23 +1014,53 @@ bfd_reconfigure_neighbors(struct bfd_proto *p, struct bfd_config *new) /* - * BFD protocol glue + * BFD notify socket */ -void -bfd_init_all(void) +/* This core notify code should be replaced after main loop transition to birdloop */ + +static void +bfd_notify_hook(void *data) { - bfd_global.lock = DOMAIN_NEW(rtable, "BFD Global"); - init_list(&bfd_global.wait_list); - init_list(&bfd_global.proto_list); + struct bfd_proto *p = data; + struct bfd_session *s; + list tmp_list; + u8 state, diag; + node *n, *nn; + + bfd_lock_sessions(p); + init_list(&tmp_list); + add_tail_list(&tmp_list, &p->notify_list); + init_list(&p->notify_list); + bfd_unlock_sessions(p); + + WALK_LIST_FIRST(s, tmp_list) + { + bfd_lock_sessions(p); + rem_node(&s->n); + state = s->loc_state; + diag = s->loc_diag; + bfd_unlock_sessions(p); + + WALK_LIST_DELSAFE(n, nn, s->request_list) + bfd_request_notify(SKIP_BACK(struct bfd_request, n, n), state, diag); + + /* Remove the session if all requests were removed in notify hooks */ + if (EMPTY_LIST(s->request_list)) + bfd_remove_session(p, s); + } } +/* + * BFD protocol glue + */ + static struct proto * bfd_init(struct proto_config *c) { struct proto *p = proto_new(c); - p->neigh_notify = bfd_neigh_notify; + p->iface_sub.neigh_notify = bfd_neigh_notify; return p; } @@ -968,25 +1071,38 @@ bfd_start(struct proto *P) struct bfd_proto *p = (struct bfd_proto *) P; struct bfd_config *cf = (struct bfd_config *) (P->cf); + pthread_spin_init(&p->lock, PTHREAD_PROCESS_PRIVATE); + + p->tpool = rp_new(P->pool, "BFD loop pool"); + p->session_slab = sl_new(P->pool, sizeof(struct bfd_session)); HASH_INIT(p->session_hash_id, P->pool, 8); HASH_INIT(p->session_hash_ip, P->pool, 8); init_list(&p->iface_list); + init_list(&p->notify_list); + p->notify_event = (event) { + .hook = bfd_notify_hook, + .data = p, + }; + add_tail(&bfd_global.proto_list, &p->bfd_node); - if (cf->accept_ipv4 && cf->accept_direct) - p->rx4_1 = bfd_open_rx_sk(p, 0, SK_IPV4); + if (!cf->strict_bind) + { + if (cf->accept_ipv4 && cf->accept_direct) + p->rx4_1 = bfd_open_rx_sk(p, 0, SK_IPV4); - if (cf->accept_ipv4 && cf->accept_multihop) - p->rx4_m = bfd_open_rx_sk(p, 1, SK_IPV4); + if (cf->accept_ipv4 && cf->accept_multihop) + p->rx4_m = bfd_open_rx_sk(p, 1, SK_IPV4); - if (cf->accept_ipv6 && cf->accept_direct) - p->rx6_1 = bfd_open_rx_sk(p, 0, SK_IPV6); + if (cf->accept_ipv6 && cf->accept_direct) + p->rx6_1 = bfd_open_rx_sk(p, 0, SK_IPV6); - if (cf->accept_ipv6 && cf->accept_multihop) - p->rx6_m = bfd_open_rx_sk(p, 1, SK_IPV6); + if (cf->accept_ipv6 && cf->accept_multihop) + p->rx6_m = bfd_open_rx_sk(p, 1, SK_IPV6); + } bfd_take_requests(p); @@ -1011,11 +1127,6 @@ bfd_shutdown(struct proto *P) bfd_drop_requests(p); - if (p->rx4_1) sk_stop(p->rx4_1); - if (p->rx4_m) sk_stop(p->rx4_m); - if (p->rx6_1) sk_stop(p->rx6_1); - if (p->rx6_m) sk_stop(p->rx6_m); - return PS_DOWN; } @@ -1027,13 +1138,12 @@ bfd_reconfigure(struct proto *P, struct proto_config *c) struct bfd_config *new = (struct bfd_config *) c; struct bfd_iface *ifa; - ASSERT_DIE(birdloop_inside(P->loop)); - /* TODO: Improve accept reconfiguration */ if ((new->accept_ipv4 != old->accept_ipv4) || (new->accept_ipv6 != old->accept_ipv6) || (new->accept_direct != old->accept_direct) || - (new->accept_multihop != old->accept_multihop)) + (new->accept_multihop != old->accept_multihop) || + (new->strict_bind != old->strict_bind)) return 0; birdloop_mask_wakeups(p->p.loop); @@ -1041,12 +1151,12 @@ bfd_reconfigure(struct proto *P, struct proto_config *c) WALK_LIST(ifa, p->iface_list) bfd_reconfigure_iface(p, ifa, new); - HASH_WALK_DELSAFE(p->session_hash_id, next_id, s) + HASH_WALK(p->session_hash_id, next_id, s) { if (s->ifa->changed) bfd_reconfigure_session(p, s); } - HASH_WALK_DELSAFE_END; + HASH_WALK_END; bfd_reconfigure_neighbors(p, new); @@ -1071,14 +1181,13 @@ bfd_show_sessions(struct proto *P) { byte tbuf[TM_DATETIME_BUFFER_SIZE]; struct bfd_proto *p = (struct bfd_proto *) P; + uint state, diag UNUSED; btime tx_int, timeout; const char *ifname; - birdloop_enter(P->loop); if (p->p.proto_state != PS_UP) { cli_msg(-1020, "%s: is not up", p->p.name); - birdloop_leave(P->loop); return; } @@ -1086,9 +1195,12 @@ bfd_show_sessions(struct proto *P) cli_msg(-1020, "%-25s %-10s %-10s %-12s %8s %8s", "IP address", "Interface", "State", "Since", "Interval", "Timeout"); + HASH_WALK(p->session_hash_id, next_id, s) { - uint state = BFD_LOC_STATE(s).state; + /* FIXME: this is thread-unsafe, but perhaps harmless */ + state = s->loc_state; + diag = s->loc_diag; ifname = (s->ifa && s->ifa->iface) ? s->ifa->iface->name : "---"; tx_int = s->last_tx ? MAX(s->des_min_tx_int, s->rem_min_rx_int) : 0; timeout = (btime) MAX(s->req_min_rx_int, s->rem_min_tx_int) * s->rem_detect_mult; @@ -1100,15 +1212,12 @@ bfd_show_sessions(struct proto *P) s->addr, ifname, bfd_state_names[state], tbuf, tx_int, timeout); } HASH_WALK_END; - - birdloop_leave(P->loop); } struct protocol proto_bfd = { .name = "BFD", .template = "bfd%d", - .class = PROTOCOL_BFD, .proto_size = sizeof(struct bfd_proto), .config_size = sizeof(struct bfd_config), .init = bfd_init, @@ -1117,3 +1226,14 @@ struct protocol proto_bfd = { .reconfigure = bfd_reconfigure, .copy_config = bfd_copy_config, }; + +void +bfd_build(void) +{ + proto_build(&proto_bfd); + + bfd_global.lock = DOMAIN_NEW(rtable, "BFD Global"); + init_list(&bfd_global.wait_list); + init_list(&bfd_global.pickup_list); + init_list(&bfd_global.proto_list); +} diff --git a/proto/bfd/bfd.h b/proto/bfd/bfd.h index ffb1c43f..a4b7d63c 100644 --- a/proto/bfd/bfd.h +++ b/proto/bfd/bfd.h @@ -13,7 +13,7 @@ #include "nest/cli.h" #include "nest/iface.h" #include "nest/protocol.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/password.h" #include "conf/conf.h" #include "lib/hash.h" @@ -47,6 +47,7 @@ struct bfd_config u8 accept_ipv6; u8 accept_direct; u8 accept_multihop; + u8 strict_bind; }; struct bfd_iface_config @@ -88,12 +89,19 @@ struct bfd_proto { struct proto p; + pthread_spinlock_t lock; + + pool *tpool; + node bfd_node; slab *session_slab; HASH(struct bfd_session) session_hash_id; HASH(struct bfd_session) session_hash_ip; + event notify_event; + list notify_list; + sock *rx4_1; sock *rx6_1; sock *rx4_m; @@ -110,12 +118,14 @@ struct bfd_iface struct bfd_proto *bfd; sock *sk; + sock *rx; u32 uc; u8 changed; }; struct bfd_session { + node n; ip_addr addr; /* Address of session */ struct bfd_iface *ifa; /* Iface associated with session */ struct bfd_session *next_id; /* Next in bfd.session_hash_id */ @@ -126,15 +136,14 @@ struct bfd_session u8 poll_active; u8 poll_scheduled; - _Atomic struct bfd_session_state loc; - struct bfd_session_state rem; -#define BFD_LOC_STATE(s) ({ struct bfd_session_state _bss = atomic_load_explicit(&(s)->loc, memory_order_relaxed); _bss; }) - + u8 loc_state; + u8 rem_state; + u8 loc_diag; + u8 rem_diag; u32 loc_id; /* Local session ID (local discriminator) */ u32 rem_id; /* Remote session ID (remote discriminator) */ - struct bfd_session_config cf; /* Static configuration parameers */ - event update_event; /* Reconfiguration requested */ + struct bfd_session_config cf; /* Static configuration parameters */ u32 des_min_tx_int; /* Desired min rx interval, local option */ u32 des_min_tx_new; /* Used for des_min_tx_int change */ @@ -156,8 +165,6 @@ struct bfd_session list request_list; /* List of client requests (struct bfd_request) */ btime last_state_change; /* Time of last state change */ - btime last_reqlist_check; /* Time of last check whether the request list is not empty */ - u8 notify_running; /* 1 if notify hooks are running */ u8 rx_csn_known; /* Received crypto sequence number is known */ u32 rx_csn; /* Last received crypto sequence number */ @@ -203,6 +210,10 @@ extern const char *bfd_state_names[]; extern const u8 bfd_auth_type_to_hash_alg[]; + +static inline void bfd_lock_sessions(struct bfd_proto *p) { pthread_spin_lock(&p->lock); } +static inline void bfd_unlock_sessions(struct bfd_proto *p) { pthread_spin_unlock(&p->lock); } + /* bfd.c */ struct bfd_session * bfd_find_session_by_id(struct bfd_proto *p, u32 id); struct bfd_session * bfd_find_session_by_addr(struct bfd_proto *p, ip_addr addr, uint ifindex); @@ -212,6 +223,7 @@ void bfd_show_sessions(struct proto *P); /* packets.c */ void bfd_send_ctl(struct bfd_proto *p, struct bfd_session *s, int final); sock * bfd_open_rx_sk(struct bfd_proto *p, int multihop, int inet_version); +sock * bfd_open_rx_sk_bound(struct bfd_proto *p, ip_addr local, struct iface *ifa); sock * bfd_open_tx_sk(struct bfd_proto *p, ip_addr local, struct iface *ifa); diff --git a/proto/bfd/config.Y b/proto/bfd/config.Y index ed5479fb..8e608bda 100644 --- a/proto/bfd/config.Y +++ b/proto/bfd/config.Y @@ -23,7 +23,8 @@ CF_DECLS CF_KEYWORDS(BFD, MIN, IDLE, RX, TX, INTERVAL, MULTIPLIER, PASSIVE, INTERFACE, MULTIHOP, NEIGHBOR, DEV, LOCAL, AUTHENTICATION, - NONE, SIMPLE, METICULOUS, KEYED, MD5, SHA1, IPV4, IPV6, DIRECT) + NONE, SIMPLE, METICULOUS, KEYED, MD5, SHA1, IPV4, IPV6, DIRECT, + STRICT, BIND) %type <iface> bfd_neigh_iface %type <a> bfd_neigh_local @@ -37,6 +38,7 @@ bfd_proto_start: proto_start BFD { this_proto = proto_config_new(&proto_bfd, $1); this_proto->loop_order = DOMAIN_ORDER(proto); + this_proto->loop_max_latency = 10 MS_; init_list(&BFD_CFG->patt_list); init_list(&BFD_CFG->neigh_list); BFD_CFG->accept_ipv4 = BFD_CFG->accept_ipv6 = 1; @@ -49,6 +51,7 @@ bfd_proto_item: | INTERFACE bfd_iface | MULTIHOP bfd_multihop | NEIGHBOR bfd_neighbor + | STRICT BIND bool { BFD_CFG->strict_bind = $3; } ; bfd_proto_opts: diff --git a/proto/bfd/packets.c b/proto/bfd/packets.c index 893d582d..a22f223b 100644 --- a/proto/bfd/packets.c +++ b/proto/bfd/packets.c @@ -290,11 +290,9 @@ bfd_send_ctl(struct bfd_proto *p, struct bfd_session *s, int final) if (!sk) return; - struct bfd_session_state loc = BFD_LOC_STATE(s); - pkt = (struct bfd_ctl_packet *) sk->tbuf; - pkt->vdiag = bfd_pack_vdiag(1, loc.diag); - pkt->flags = bfd_pack_flags(loc.state, 0); + pkt->vdiag = bfd_pack_vdiag(1, s->loc_diag); + pkt->flags = bfd_pack_flags(s->loc_state, 0); pkt->detect_mult = s->detect_mult; pkt->length = BFD_BASE_LEN; pkt->snd_id = htonl(s->loc_id); @@ -315,7 +313,7 @@ bfd_send_ctl(struct bfd_proto *p, struct bfd_session *s, int final) log(L_WARN "%s: Old packet overwritten in TX buffer", p->p.name); TRACE(D_PACKETS, "Sending CTL to %I [%s%s]", s->addr, - bfd_state_names[loc.state], bfd_format_flags(pkt->flags, fb)); + bfd_state_names[s->loc_state], bfd_format_flags(pkt->flags, fb)); sk_send_to(sk, pkt->length, s->addr, sk->dport); } @@ -368,12 +366,18 @@ bfd_rx_hook(sock *sk, uint len) if (ps > BFD_STATE_DOWN) DROP("invalid init state", ps); - uint ifindex = (sk->sport == BFD_CONTROL_PORT) ? sk->lifindex : 0; + uint ifindex = (sk->sport == BFD_CONTROL_PORT) ? + (sk->iface ? sk->iface->index : sk->lifindex) : + 0; s = bfd_find_session_by_addr(p, sk->faddr, ifindex); /* FIXME: better session matching and message */ if (!s) return 1; + + /* For active sessions we require matching remote id */ + if ((s->loc_state == BFD_STATE_UP) && (ntohl(pkt->snd_id) != s->rem_id)) + DROP("mismatched remote id", ntohl(pkt->snd_id)); } /* bfd_check_authentication() has its own error logging */ @@ -384,17 +388,16 @@ bfd_rx_hook(sock *sk, uint len) u32 old_rx_int = s->rem_min_rx_int; s->rem_id= ntohl(pkt->snd_id); - s->rem.state = bfd_pkt_get_state(pkt); - s->rem.diag = bfd_pkt_get_diag(pkt); + s->rem_state = bfd_pkt_get_state(pkt); + s->rem_diag = bfd_pkt_get_diag(pkt); s->rem_demand_mode = pkt->flags & BFD_FLAG_DEMAND; s->rem_min_tx_int = ntohl(pkt->des_min_tx_int); s->rem_min_rx_int = ntohl(pkt->req_min_rx_int); s->rem_detect_mult = pkt->detect_mult; TRACE(D_PACKETS, "CTL received from %I [%s%s]", sk->faddr, - bfd_state_names[s->rem.state], bfd_format_flags(pkt->flags, fb)); + bfd_state_names[s->rem_state], bfd_format_flags(pkt->flags, fb)); - /* This call may drop the session, must be called in tail position */ bfd_session_process_ctl(s, pkt->flags, old_tx_int, old_rx_int); return 1; @@ -427,13 +430,42 @@ bfd_open_rx_sk(struct bfd_proto *p, int multihop, int af) /* TODO: configurable ToS and priority */ sk->tos = IP_PREC_INTERNET_CONTROL; sk->priority = sk_priority_control; - sk->flags = SKF_THREAD | SKF_LADDR_RX | (!multihop ? SKF_TTL_RX : 0); - sk->loop = p->p.loop; + sk->flags = SKF_LADDR_RX | (!multihop ? SKF_TTL_RX : 0); + + if (sk_open(sk, p->p.loop) < 0) + goto err; + + return sk; + + err: + sk_log_error(sk, p->p.name); + rfree(sk); + return NULL; +} + +sock * +bfd_open_rx_sk_bound(struct bfd_proto *p, ip_addr local, struct iface *ifa) +{ + sock *sk = sk_new(p->tpool); + sk->type = SK_UDP; + sk->saddr = local; + sk->sport = ifa ? BFD_CONTROL_PORT : BFD_MULTI_CTL_PORT; + sk->iface = ifa; + sk->vrf = p->p.vrf; + sk->data = p; + + sk->rbsize = BFD_MAX_LEN; + sk->rx_hook = bfd_rx_hook; + sk->err_hook = bfd_err_hook; + + /* TODO: configurable ToS and priority */ + sk->tos = IP_PREC_INTERNET_CONTROL; + sk->priority = sk_priority_control; + sk->flags = SKF_BIND | (ifa ? SKF_TTL_RX : 0); - if (sk_open(sk) < 0) + if (sk_open(sk, p->p.loop) < 0) goto err; - sk_start(sk); return sk; err: @@ -460,13 +492,11 @@ bfd_open_tx_sk(struct bfd_proto *p, ip_addr local, struct iface *ifa) sk->tos = IP_PREC_INTERNET_CONTROL; sk->priority = sk_priority_control; sk->ttl = ifa ? 255 : -1; - sk->flags = SKF_THREAD | SKF_BIND | SKF_HIGH_PORT; - sk->loop = p->p.loop; + sk->flags = SKF_BIND | SKF_HIGH_PORT; - if (sk_open(sk) < 0) + if (sk_open(sk, p->p.loop) < 0) goto err; - sk_start(sk); return sk; err: diff --git a/proto/bgp/Makefile b/proto/bgp/Makefile index 00aaef5e..f6a38678 100644 --- a/proto/bgp/Makefile +++ b/proto/bgp/Makefile @@ -3,4 +3,4 @@ obj := $(src-o-files) $(all-daemon) $(cf-local) -tests_objs := $(tests_objs) $(src-o-files)
\ No newline at end of file +tests_objs := $(tests_objs) $(src-o-files) diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c index 02b07410..8bff4c78 100644 --- a/proto/bgp/attrs.c +++ b/proto/bgp/attrs.c @@ -15,12 +15,13 @@ #include "nest/bird.h" #include "nest/iface.h" #include "nest/protocol.h" -#include "nest/route.h" -#include "nest/attrs.h" +#include "nest/rt.h" +#include "lib/attrs.h" #include "conf/conf.h" #include "lib/resource.h" #include "lib/string.h" #include "lib/unaligned.h" +#include "lib/macro.h" #include "bgp.h" @@ -45,9 +46,9 @@ * * export - Hook that validates and normalizes attribute during export phase. * Receives eattr, may modify it (e.g., sort community lists for canonical - * representation), UNSET() it (e.g., skip empty lists), or WITHDRAW() it if - * necessary. May assume that eattr has value valid w.r.t. its type, but may be - * invalid w.r.t. BGP constraints. Optional. + * representation), UNSET() it (e.g., skip empty lists), or REJECT() the route + * if necessary. May assume that eattr has value valid w.r.t. its type, but may + * be invalid w.r.t. BGP constraints. Optional. * * encode - Hook that converts internal representation to external one during * packet writing. Receives eattr and puts it in the buffer (including attribute @@ -64,37 +65,72 @@ * format - Optional hook that converts eattr to textual representation. */ - -struct bgp_attr_desc { - const char *name; - uint type; - uint flags; - void (*export)(struct bgp_export_state *s, eattr *a); - int (*encode)(struct bgp_write_state *s, eattr *a, byte *buf, uint size); - void (*decode)(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to); - void (*format)(const eattr *ea, byte *buf, uint size); +union bgp_attr_desc { + struct ea_class class; + struct { + EA_CLASS_INSIDE; + uint flags; + void (*export)(struct bgp_export_state *s, eattr *a); + int (*encode)(struct bgp_write_state *s, eattr *a, byte *buf, uint size); + void (*decode)(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to); + }; }; -static const struct bgp_attr_desc bgp_attr_table[]; +static union bgp_attr_desc bgp_attr_table[]; +static inline const union bgp_attr_desc *bgp_find_attr_desc(eattr *a) +{ + const struct ea_class *class = ea_class_find(a->id); -static inline int bgp_attr_known(uint code); + if ((class < &bgp_attr_table[0].class) || (class >= &bgp_attr_table[BGP_ATTR_MAX].class)) + return NULL; -eattr * -bgp_set_attr(ea_list **attrs, struct linpool *pool, uint code, uint flags, uintptr_t val) + return (const union bgp_attr_desc *) class; +} + +#define BGP_EA_ID(code) (bgp_attr_table[code].id) +#define EA_BGP_ID(code) (((union bgp_attr_desc *) ea_class_find(code)) - bgp_attr_table) + +void bgp_set_attr_u32(ea_list **to, uint code, uint flags, u32 val) { - ASSERT(bgp_attr_known(code)); + const union bgp_attr_desc *desc = &bgp_attr_table[code]; - return ea_set_attr( - attrs, - pool, - EA_CODE(PROTOCOL_BGP, code), - flags & ~BAF_EXT_LEN, - bgp_attr_table[code].type, - val - ); + ea_set_attr(to, EA_LITERAL_EMBEDDED( + &desc->class, + flags & ~BAF_EXT_LEN, + val + )); } +void bgp_set_attr_ptr(ea_list **to, uint code, uint flags, const struct adata *ad) +{ + const union bgp_attr_desc *desc = &bgp_attr_table[code]; + ea_set_attr(to, EA_LITERAL_DIRECT_ADATA( + &desc->class, + flags & ~BAF_EXT_LEN, + ad + )); +} + +void +bgp_set_attr_data(ea_list **to, uint code, uint flags, void *data, uint len) +{ + const union bgp_attr_desc *desc = &bgp_attr_table[code]; + + ea_set_attr(to, EA_LITERAL_STORE_ADATA( + &desc->class, + flags & ~BAF_EXT_LEN, + data, + len + )); +} + +void +bgp_unset_attr(ea_list **to, uint code) +{ + const union bgp_attr_desc *desc = &bgp_attr_table[code]; + ea_unset_attr(to, 0, &desc->class); +} #define REPORT(msg, args...) \ ({ log(L_REMOTE "%s: " msg, s->proto->p.name, ## args); }) @@ -106,7 +142,10 @@ bgp_set_attr(ea_list **attrs, struct linpool *pool, uint code, uint flags, uintp ({ REPORT(msg, ## args); s->err_withdraw = 1; return; }) #define UNSET(a) \ - ({ a->type = EAF_TYPE_UNDEF; return; }) + ({ a->undef = 1; return; }) + +#define REJECT(msg, args...) \ + ({ log(L_ERR "%s: " msg, s->proto->p.name, ## args); s->err_reject = 1; return; }) #define NEW_BGP "Discarding %s attribute received from AS4-aware neighbor" #define BAD_EBGP "Discarding %s attribute received from EBGP neighbor" @@ -148,7 +187,7 @@ bgp_encode_u8(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size) if (size < (3+1)) return -1; - bgp_put_attr_hdr3(buf, EA_ID(a->id), a->flags, 1); + bgp_put_attr_hdr3(buf, EA_BGP_ID(a->id), a->flags, 1); buf[3] = a->u.data; return 3+1; @@ -160,7 +199,7 @@ bgp_encode_u32(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size) if (size < (3+4)) return -1; - bgp_put_attr_hdr3(buf, EA_ID(a->id), a->flags, 4); + bgp_put_attr_hdr3(buf, EA_BGP_ID(a->id), a->flags, 4); put_u32(buf+3, a->u.data); return 3+4; @@ -174,7 +213,7 @@ bgp_encode_u32s(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size if (size < (4+len)) return -1; - uint hdr = bgp_put_attr_hdr(buf, EA_ID(a->id), a->flags, len); + uint hdr = bgp_put_attr_hdr(buf, EA_BGP_ID(a->id), a->flags, len); put_u32s(buf + hdr, (u32 *) a->u.ptr->data, len / 4); return hdr + len; @@ -195,7 +234,7 @@ bgp_put_attr(byte *buf, uint size, uint code, uint flags, const byte *data, uint static int bgp_encode_raw(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size) { - return bgp_put_attr(buf, size, EA_ID(a->id), a->flags, a->u.ptr->data, a->u.ptr->length); + return bgp_put_attr(buf, size, EA_BGP_ID(a->id), a->flags, a->u.ptr->data, a->u.ptr->length); } @@ -333,26 +372,26 @@ bgp_aigp_set_metric(struct linpool *pool, const struct adata *ad, u64 metric) } int -bgp_total_aigp_metric_(struct rta *a, u64 *metric, const struct adata **ad) +bgp_total_aigp_metric_(const rte *e, u64 *metric, const struct adata **ad) { - eattr *ea = ea_find(a->eattrs, EA_CODE(PROTOCOL_BGP, BA_AIGP)); - if (!ea) + eattr *a = ea_find(e->attrs, BGP_EA_ID(BA_AIGP)); + if (!a) return 0; - const byte *b = bgp_aigp_get_tlv(ea->u.ptr, BGP_AIGP_METRIC); + const byte *b = bgp_aigp_get_tlv(a->u.ptr, BGP_AIGP_METRIC); if (!b) return 0; u64 aigp = get_u64(b + 3); - u64 step = a->igp_metric; + u64 step = rt_get_igp_metric(e); - if (!rta_resolvable(a) || (step >= IGP_METRIC_UNKNOWN)) + if (!rte_resolvable(e) || (step >= IGP_METRIC_UNKNOWN)) step = BGP_AIGP_MAX; if (!step) step = 1; - *ad = ea->u.ptr; + *ad = a->u.ptr; *metric = aigp + step; if (*metric < aigp) *metric = BGP_AIGP_MAX; @@ -363,7 +402,7 @@ bgp_total_aigp_metric_(struct rta *a, u64 *metric, const struct adata **ad) static inline int bgp_init_aigp_metric(rte *e, u64 *metric, const struct adata **ad) { - if (e->attrs->source == RTS_BGP) + if (rt_get_source_attr(e) == RTS_BGP) return 0; *metric = rt_get_igp_metric(e); @@ -372,9 +411,9 @@ bgp_init_aigp_metric(rte *e, u64 *metric, const struct adata **ad) } u32 -bgp_rte_igp_metric(struct rte *rt) +bgp_rte_igp_metric(const rte *rt) { - u64 metric = bgp_total_aigp_metric(rt->attrs); + u64 metric = bgp_total_aigp_metric(rt); return (u32) MIN(metric, (u64) IGP_METRIC_UNKNOWN); } @@ -387,7 +426,7 @@ static void bgp_export_origin(struct bgp_export_state *s, eattr *a) { if (a->u.data > 2) - WITHDRAW(BAD_VALUE, "ORIGIN", a->u.data); + REJECT(BAD_VALUE, "ORIGIN", a->u.data); } static void @@ -399,7 +438,7 @@ bgp_decode_origin(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte if (data[0] > 2) WITHDRAW(BAD_VALUE, "ORIGIN", data[0]); - bgp_set_attr_u32(to, s->pool, BA_ORIGIN, flags, data[0]); + bgp_set_attr_u32(to, BA_ORIGIN, flags, data[0]); } static void @@ -467,7 +506,7 @@ bgp_decode_as_path(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte !bgp_as_path_first_as_equal(data, len, p->remote_as)) WITHDRAW("Malformed AS_PATH attribute - %s", "First AS differs from neigbor AS"); - bgp_set_attr_data(to, s->pool, BA_AS_PATH, flags, data, len); + bgp_set_attr_data(to, BA_AS_PATH, flags, data, len); } @@ -539,7 +578,7 @@ bgp_decode_med(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *da WITHDRAW(BAD_LENGTH, "MULTI_EXIT_DISC", len); u32 val = get_u32(data); - bgp_set_attr_u32(to, s->pool, BA_MULTI_EXIT_DISC, flags, val); + bgp_set_attr_u32(to, BA_MULTI_EXIT_DISC, flags, val); } @@ -560,7 +599,7 @@ bgp_decode_local_pref(struct bgp_parse_state *s, uint code UNUSED, uint flags, b WITHDRAW(BAD_LENGTH, "LOCAL_PREF", len); u32 val = get_u32(data); - bgp_set_attr_u32(to, s->pool, BA_LOCAL_PREF, flags, val); + bgp_set_attr_u32(to, BA_LOCAL_PREF, flags, val); } @@ -570,7 +609,7 @@ bgp_decode_atomic_aggr(struct bgp_parse_state *s, uint code UNUSED, uint flags, if (len != 0) DISCARD(BAD_LENGTH, "ATOMIC_AGGR", len); - bgp_set_attr_data(to, s->pool, BA_ATOMIC_AGGR, flags, NULL, 0); + bgp_set_attr_data(to, BA_ATOMIC_AGGR, flags, NULL, 0); } static int @@ -604,7 +643,7 @@ bgp_decode_aggregator(struct bgp_parse_state *s, uint code UNUSED, uint flags, b len = aggregator_16to32(data, src); } - bgp_set_attr_data(to, s->pool, BA_AGGREGATOR, flags, data, len); + bgp_set_attr_data(to, BA_AGGREGATOR, flags, data, len); } static void @@ -633,7 +672,7 @@ bgp_decode_community(struct bgp_parse_state *s, uint code UNUSED, uint flags, by struct adata *ad = lp_alloc_adata(s->pool, len); get_u32s(data, (u32 *) ad->data, len / 4); - bgp_set_attr_ptr(to, s->pool, BA_COMMUNITY, flags, ad); + bgp_set_attr_ptr(to, BA_COMMUNITY, flags, ad); } @@ -654,7 +693,7 @@ bgp_decode_originator_id(struct bgp_parse_state *s, uint code UNUSED, uint flags WITHDRAW(BAD_LENGTH, "ORIGINATOR_ID", len); u32 val = get_u32(data); - bgp_set_attr_u32(to, s->pool, BA_ORIGINATOR_ID, flags, val); + bgp_set_attr_u32(to, BA_ORIGINATOR_ID, flags, val); } @@ -679,7 +718,7 @@ bgp_decode_cluster_list(struct bgp_parse_state *s, uint code UNUSED, uint flags, struct adata *ad = lp_alloc_adata(s->pool, len); get_u32s(data, (u32 *) ad->data, len / 4); - bgp_set_attr_ptr(to, s->pool, BA_CLUSTER_LIST, flags, ad); + bgp_set_attr_ptr(to, BA_CLUSTER_LIST, flags, ad); } static void @@ -798,7 +837,7 @@ bgp_decode_ext_community(struct bgp_parse_state *s, uint code UNUSED, uint flags struct adata *ad = lp_alloc_adata(s->pool, len); get_u32s(data, (u32 *) ad->data, len / 4); - bgp_set_attr_ptr(to, s->pool, BA_EXT_COMMUNITY, flags, ad); + bgp_set_attr_ptr(to, BA_EXT_COMMUNITY, flags, ad); } @@ -811,7 +850,7 @@ bgp_decode_as4_aggregator(struct bgp_parse_state *s, uint code UNUSED, uint flag if (len != 8) DISCARD(BAD_LENGTH, "AS4_AGGREGATOR", len); - bgp_set_attr_data(to, s->pool, BA_AS4_AGGREGATOR, flags, data, len); + bgp_set_attr_data(to, BA_AS4_AGGREGATOR, flags, data, len); } static void @@ -841,7 +880,7 @@ bgp_decode_as4_path(struct bgp_parse_state *s, uint code UNUSED, uint flags, byt a = as_path_strip_confed(s->pool, a); } - bgp_set_attr_ptr(to, s->pool, BA_AS4_PATH, flags, a); + bgp_set_attr_ptr(to, BA_AS4_PATH, flags, a); } @@ -865,7 +904,7 @@ bgp_decode_aigp(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *d if (!bgp_aigp_valid(data, len, err, sizeof(err))) DISCARD("Malformed AIGP attribute - %s", err); - bgp_set_attr_data(to, s->pool, BA_AIGP, flags, data, len); + bgp_set_attr_data(to, BA_AIGP, flags, data, len); } static void @@ -897,9 +936,21 @@ bgp_decode_large_community(struct bgp_parse_state *s, uint code UNUSED, uint fla struct adata *ad = lp_alloc_adata(s->pool, len); get_u32s(data, (u32 *) ad->data, len / 4); - bgp_set_attr_ptr(to, s->pool, BA_LARGE_COMMUNITY, flags, ad); + bgp_set_attr_ptr(to, BA_LARGE_COMMUNITY, flags, ad); +} + + +static void +bgp_decode_otc(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data UNUSED, uint len, ea_list **to) +{ + if (len != 4) + WITHDRAW(BAD_LENGTH, "OTC", len); + + u32 val = get_u32(data); + bgp_set_attr_u32(to, BA_ONLY_TO_CUSTOMER, flags, val); } + static void bgp_export_mpls_label_stack(struct bgp_export_state *s, eattr *a) { @@ -909,20 +960,20 @@ bgp_export_mpls_label_stack(struct bgp_export_state *s, eattr *a) /* Perhaps we should just ignore it? */ if (!s->mpls) - WITHDRAW("Unexpected MPLS stack"); + REJECT("Unexpected MPLS stack"); /* Empty MPLS stack is not allowed */ if (!lnum) - WITHDRAW("Malformed MPLS stack - empty"); + REJECT("Malformed MPLS stack - empty"); /* This is ugly, but we must ensure that labels fit into NLRI field */ if ((24*lnum + (net_is_vpn(n) ? 64 : 0) + net_pxlen(n)) > 255) - WITHDRAW("Malformed MPLS stack - too many labels (%u)", lnum); + REJECT("Malformed MPLS stack - too many labels (%u)", lnum); for (uint i = 0; i < lnum; i++) { if (labels[i] > 0xfffff) - WITHDRAW("Malformed MPLS stack - invalid label (%u)", labels[i]); + REJECT("Malformed MPLS stack - invalid label (%u)", labels[i]); /* TODO: Check for special-purpose label values? */ } @@ -970,10 +1021,29 @@ bgp_format_mpls_label_stack(const eattr *a, byte *buf, uint size) } static inline void -bgp_decode_unknown(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to) +bgp_export_unknown(struct bgp_export_state *s UNUSED, eattr *a) { + if (!(a->flags & BAF_TRANSITIVE)) + UNSET(a); + + a->flags |= BAF_PARTIAL; +} + +static inline void +bgp_decode_unknown(struct bgp_parse_state *s UNUSED, uint code, uint flags, byte *data, uint len, ea_list **to) +{ + if (!(flags & BAF_OPTIONAL)) + WITHDRAW("Unknown attribute (code %u) - conflicting flags (%02x)", code, flags); + /* Cannot use bgp_set_attr_data() as it works on known attributes only */ - ea_set_attr_data(to, s->pool, EA_CODE(PROTOCOL_BGP, code), flags, EAF_TYPE_OPAQUE, data, len); + ea_set_attr_data(to, &bgp_attr_table[code].class, flags, data, len); +} + +static inline void +bgp_format_unknown(const eattr *a, byte *buf, uint size) +{ + if (a->flags & BAF_TRANSITIVE) + bsnprintf(buf, size, "(transitive)"); } @@ -981,10 +1051,10 @@ bgp_decode_unknown(struct bgp_parse_state *s, uint code, uint flags, byte *data, * Attribute table */ -static const struct bgp_attr_desc bgp_attr_table[] = { +static union bgp_attr_desc bgp_attr_table[BGP_ATTR_MAX] = { [BA_ORIGIN] = { - .name = "origin", - .type = EAF_TYPE_INT, + .name = "bgp_origin", + .type = T_ENUM_BGP_ORIGIN, .flags = BAF_TRANSITIVE, .export = bgp_export_origin, .encode = bgp_encode_u8, @@ -992,69 +1062,69 @@ static const struct bgp_attr_desc bgp_attr_table[] = { .format = bgp_format_origin, }, [BA_AS_PATH] = { - .name = "as_path", - .type = EAF_TYPE_AS_PATH, + .name = "bgp_path", + .type = T_PATH, .flags = BAF_TRANSITIVE, .encode = bgp_encode_as_path, .decode = bgp_decode_as_path, }, [BA_NEXT_HOP] = { - .name = "next_hop", - .type = EAF_TYPE_IP_ADDRESS, + .name = "bgp_next_hop", + .type = T_IP, .flags = BAF_TRANSITIVE, .encode = bgp_encode_next_hop, .decode = bgp_decode_next_hop, .format = bgp_format_next_hop, }, [BA_MULTI_EXIT_DISC] = { - .name = "med", - .type = EAF_TYPE_INT, + .name = "bgp_med", + .type = T_INT, .flags = BAF_OPTIONAL, .encode = bgp_encode_u32, .decode = bgp_decode_med, }, [BA_LOCAL_PREF] = { - .name = "local_pref", - .type = EAF_TYPE_INT, + .name = "bgp_local_pref", + .type = T_INT, .flags = BAF_TRANSITIVE, .export = bgp_export_local_pref, .encode = bgp_encode_u32, .decode = bgp_decode_local_pref, }, [BA_ATOMIC_AGGR] = { - .name = "atomic_aggr", - .type = EAF_TYPE_OPAQUE, + .name = "bgp_atomic_aggr", + .type = T_OPAQUE, .flags = BAF_TRANSITIVE, .encode = bgp_encode_raw, .decode = bgp_decode_atomic_aggr, }, [BA_AGGREGATOR] = { - .name = "aggregator", - .type = EAF_TYPE_OPAQUE, + .name = "bgp_aggregator", + .type = T_OPAQUE, .flags = BAF_OPTIONAL | BAF_TRANSITIVE, .encode = bgp_encode_aggregator, .decode = bgp_decode_aggregator, .format = bgp_format_aggregator, }, [BA_COMMUNITY] = { - .name = "community", - .type = EAF_TYPE_INT_SET, + .name = "bgp_community", + .type = T_CLIST, .flags = BAF_OPTIONAL | BAF_TRANSITIVE, .export = bgp_export_community, .encode = bgp_encode_u32s, .decode = bgp_decode_community, }, [BA_ORIGINATOR_ID] = { - .name = "originator_id", - .type = EAF_TYPE_ROUTER_ID, + .name = "bgp_originator_id", + .type = T_QUAD, .flags = BAF_OPTIONAL, .export = bgp_export_originator_id, .encode = bgp_encode_u32, .decode = bgp_decode_originator_id, }, [BA_CLUSTER_LIST] = { - .name = "cluster_list", - .type = EAF_TYPE_INT_SET, + .name = "bgp_cluster_list", + .type = T_CLIST, .flags = BAF_OPTIONAL, .export = bgp_export_cluster_list, .encode = bgp_encode_u32s, @@ -1062,43 +1132,47 @@ static const struct bgp_attr_desc bgp_attr_table[] = { .format = bgp_format_cluster_list, }, [BA_MP_REACH_NLRI] = { - .name = "mp_reach_nlri", - .type = EAF_TYPE_OPAQUE, + .name = "bgp_mp_reach_nlri", + .type = T_OPAQUE, + .hidden = 1, .flags = BAF_OPTIONAL, .decode = bgp_decode_mp_reach_nlri, }, [BA_MP_UNREACH_NLRI] = { - .name = "mp_unreach_nlri", - .type = EAF_TYPE_OPAQUE, + .name = "bgp_mp_unreach_nlri", + .type = T_OPAQUE, + .hidden = 1, .flags = BAF_OPTIONAL, .decode = bgp_decode_mp_unreach_nlri, }, [BA_EXT_COMMUNITY] = { - .name = "ext_community", - .type = EAF_TYPE_EC_SET, + .name = "bgp_ext_community", + .type = T_ECLIST, .flags = BAF_OPTIONAL | BAF_TRANSITIVE, .export = bgp_export_ext_community, .encode = bgp_encode_u32s, .decode = bgp_decode_ext_community, }, [BA_AS4_PATH] = { - .name = "as4_path", - .type = EAF_TYPE_AS_PATH, + .name = "bgp_as4_path", + .type = T_PATH, + .hidden = 1, .flags = BAF_OPTIONAL | BAF_TRANSITIVE, .encode = bgp_encode_raw, .decode = bgp_decode_as4_path, }, [BA_AS4_AGGREGATOR] = { - .name = "as4_aggregator", - .type = EAF_TYPE_OPAQUE, + .name = "bgp_as4_aggregator", + .type = T_OPAQUE, + .hidden = 1, .flags = BAF_OPTIONAL | BAF_TRANSITIVE, .encode = bgp_encode_raw, .decode = bgp_decode_as4_aggregator, .format = bgp_format_aggregator, }, [BA_AIGP] = { - .name = "aigp", - .type = EAF_TYPE_OPAQUE, + .name = "bgp_aigp", + .type = T_OPAQUE, .flags = BAF_OPTIONAL | BAF_DECODE_FLAGS, .export = bgp_export_aigp, .encode = bgp_encode_raw, @@ -1106,16 +1180,24 @@ static const struct bgp_attr_desc bgp_attr_table[] = { .format = bgp_format_aigp, }, [BA_LARGE_COMMUNITY] = { - .name = "large_community", - .type = EAF_TYPE_LC_SET, + .name = "bgp_large_community", + .type = T_LCLIST, .flags = BAF_OPTIONAL | BAF_TRANSITIVE, .export = bgp_export_large_community, .encode = bgp_encode_u32s, .decode = bgp_decode_large_community, }, + [BA_ONLY_TO_CUSTOMER] = { + .name = "otc", + .type = T_INT, + .flags = BAF_OPTIONAL | BAF_TRANSITIVE, + .encode = bgp_encode_u32, + .decode = bgp_decode_otc, + }, [BA_MPLS_LABEL_STACK] = { - .name = "mpls_label_stack", - .type = EAF_TYPE_INT_SET, + .name = "bgp_mpls_label_stack", + .type = T_CLIST, + .readonly = 1, .export = bgp_export_mpls_label_stack, .encode = bgp_encode_mpls_label_stack, .decode = bgp_decode_mpls_label_stack, @@ -1123,12 +1205,32 @@ static const struct bgp_attr_desc bgp_attr_table[] = { }, }; -static inline int -bgp_attr_known(uint code) +eattr * +bgp_find_attr(ea_list *attrs, uint code) { - return (code < ARRAY_SIZE(bgp_attr_table)) && bgp_attr_table[code].name; + return ea_find(attrs, BGP_EA_ID(code)); } +void +bgp_register_attrs(void) +{ + for (uint i=0; i<ARRAY_SIZE(bgp_attr_table); i++) + { + if (!bgp_attr_table[i].name) + bgp_attr_table[i] = (union bgp_attr_desc) { + .name = mb_sprintf(&root_pool, "bgp_unknown_0x%02x", i), + .type = T_OPAQUE, + .flags = BAF_OPTIONAL, + .readonly = 1, + .export = bgp_export_unknown, + .encode = bgp_encode_raw, + .decode = bgp_decode_unknown, + .format = bgp_format_unknown, + }; + + ea_register_init(&bgp_attr_table[i].class); + } +} /* * Attribute export @@ -1137,38 +1239,24 @@ bgp_attr_known(uint code) static inline void bgp_export_attr(struct bgp_export_state *s, eattr *a, ea_list *to) { - if (EA_PROTO(a->id) != PROTOCOL_BGP) + const union bgp_attr_desc *desc = bgp_find_attr_desc(a); + if (!desc) return; - uint code = EA_ID(a->id); + /* The flags might have been zero if the attr was added locally */ + a->flags = (a->flags & BAF_PARTIAL) | desc->flags; - if (bgp_attr_known(code)) - { - const struct bgp_attr_desc *desc = &bgp_attr_table[code]; - - /* The flags might have been zero if the attr was added by filters */ - a->flags = (a->flags & BAF_PARTIAL) | desc->flags; - - /* Set partial bit if new opt-trans attribute is attached to non-local route */ - if ((s->src != NULL) && (a->type & EAF_ORIGINATED) && - (a->flags & BAF_OPTIONAL) && (a->flags & BAF_TRANSITIVE)) - a->flags |= BAF_PARTIAL; - - /* Call specific hook */ - CALL(desc->export, s, a); + /* Set partial bit if new opt-trans attribute is attached to non-local route */ + if ((s->src != NULL) && (a->originated) && + (a->flags & BAF_OPTIONAL) && (a->flags & BAF_TRANSITIVE)) + a->flags |= BAF_PARTIAL; - /* Attribute might become undefined in hook */ - if ((a->type & EAF_TYPE_MASK) == EAF_TYPE_UNDEF) - return; - } - else - { - /* Don't re-export unknown non-transitive attributes */ - if (!(a->flags & BAF_TRANSITIVE)) - return; + /* Call specific hook */ + CALL(desc->export, s, a); - a->flags |= BAF_PARTIAL; - } + /* Attribute might become undefined in hook */ + if (a->undef) + return; /* Append updated attribute */ to->attrs[to->count++] = *a; @@ -1188,12 +1276,11 @@ bgp_export_attr(struct bgp_export_state *s, eattr *a, ea_list *to) * Result: one sorted attribute list segment, or NULL if attributes are unsuitable. */ static inline ea_list * -bgp_export_attrs(struct bgp_export_state *s, ea_list *attrs) +bgp_export_attrs(struct bgp_export_state *s, ea_list *a) { /* Merge the attribute list */ - ea_list *new = lp_alloc(s->pool, ea_scan(attrs)); - ea_merge(attrs, new); - ea_sort(new); + ea_list *new = ea_normalize(a, 0); + ASSERT_DIE(new); uint i, count; count = new->count; @@ -1203,7 +1290,7 @@ bgp_export_attrs(struct bgp_export_state *s, ea_list *attrs) for (i = 0; i < count; i++) bgp_export_attr(s, &new->attrs[i], new); - if (s->err_withdraw) + if (s->err_reject) return NULL; return new; @@ -1217,14 +1304,9 @@ bgp_export_attrs(struct bgp_export_state *s, ea_list *attrs) static inline int bgp_encode_attr(struct bgp_write_state *s, eattr *a, byte *buf, uint size) { - ASSERT(EA_PROTO(a->id) == PROTOCOL_BGP); - - uint code = EA_ID(a->id); - - if (bgp_attr_known(code)) - return bgp_attr_table[code].encode(s, a, buf, size); - else - return bgp_encode_raw(s, a, buf, size); + const union bgp_attr_desc *desc = bgp_find_attr_desc(a); + ASSERT_DIE(desc); + return desc->encode(s, a, buf, size); } /** @@ -1289,7 +1371,7 @@ bgp_cluster_list_loopy(struct bgp_proto *p, ea_list *attrs) } static inline void -bgp_decode_attr(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to) +bgp_decode_attr(struct bgp_parse_state *s, byte code, byte flags, byte *data, uint len, ea_list **to) { /* Handle duplicate attributes; RFC 7606 3 (g) */ if (BIT32_TEST(s->attrs_seen, code)) @@ -1301,24 +1383,15 @@ bgp_decode_attr(struct bgp_parse_state *s, uint code, uint flags, byte *data, ui } BIT32_SET(s->attrs_seen, code); - if (bgp_attr_known(code)) - { - const struct bgp_attr_desc *desc = &bgp_attr_table[code]; + ASSERT_DIE(bgp_attr_table[code].id); + const union bgp_attr_desc *desc = &bgp_attr_table[code]; - /* Handle conflicting flags; RFC 7606 3 (c) */ - if (((flags ^ desc->flags) & (BAF_OPTIONAL | BAF_TRANSITIVE)) && - !(desc->flags & BAF_DECODE_FLAGS)) - WITHDRAW("Malformed %s attribute - conflicting flags (%02x)", desc->name, flags); + /* Handle conflicting flags; RFC 7606 3 (c) */ + if (((flags ^ desc->flags) & (BAF_OPTIONAL | BAF_TRANSITIVE)) && + !(desc->flags & BAF_DECODE_FLAGS)) + WITHDRAW("Malformed %s attribute - conflicting flags (%02x, expected %02x)", desc->name, flags, desc->flags); - desc->decode(s, code, flags, data, len, to); - } - else /* Unknown attribute */ - { - if (!(flags & BAF_OPTIONAL)) - WITHDRAW("Unknown attribute (code %u) - conflicting flags (%02x)", code, flags); - - bgp_decode_unknown(s, code, flags, data, len, to); - } + desc->decode(s, code, flags, data, len, to); } /** @@ -1336,7 +1409,8 @@ bgp_decode_attrs(struct bgp_parse_state *s, byte *data, uint len) { struct bgp_proto *p = s->proto; ea_list *attrs = NULL; - uint code, flags, alen; + uint alen; + byte code, flags; byte *pos = data; /* Parse the attributes */ @@ -1401,23 +1475,23 @@ bgp_decode_attrs(struct bgp_parse_state *s, byte *data, uint len) /* Reject routes with our ASN in AS_PATH attribute */ if (bgp_as_path_loopy(p, attrs, p->local_as)) - goto withdraw; + goto loop; /* Reject routes with our Confederation ID in AS_PATH attribute; RFC 5065 4.0 */ if ((p->public_as != p->local_as) && bgp_as_path_loopy(p, attrs, p->public_as)) - goto withdraw; + goto loop; /* Reject routes with our Router ID in ORIGINATOR_ID attribute; RFC 4456 8 */ if (p->is_internal && bgp_originator_id_loopy(p, attrs)) - goto withdraw; + goto loop; /* Reject routes with our Cluster ID in CLUSTER_LIST attribute; RFC 4456 8 */ if (p->rr_client && bgp_cluster_list_loopy(p, attrs)) - goto withdraw; + goto loop; /* If there is no local preference, define one */ if (!BIT32_TEST(s->attrs_seen, BA_LOCAL_PREF)) - bgp_set_attr_u32(&attrs, s->pool, BA_LOCAL_PREF, 0, p->cf->default_local_pref); + bgp_set_attr_u32(&attrs, BA_LOCAL_PREF, 0, p->cf->default_local_pref); return attrs; @@ -1434,16 +1508,43 @@ withdraw: s->err_withdraw = 1; return NULL; + +loop: + /* Loops are handled as withdraws, but ignored silently. Do not set err_withdraw. */ + return NULL; } void -bgp_finish_attrs(struct bgp_parse_state *s, rta *a) +bgp_finish_attrs(struct bgp_parse_state *s, ea_list **to) { /* AIGP test here instead of in bgp_decode_aigp() - we need to know channel */ if (BIT32_TEST(s->attrs_seen, BA_AIGP) && !s->channel->cf->aigp) { REPORT("Discarding AIGP attribute received on non-AIGP session"); - bgp_unset_attr(&a->eattrs, s->pool, BA_AIGP); + bgp_unset_attr(to, BA_AIGP); + } + + /* Handle OTC ingress procedure, RFC 9234 */ + if (bgp_channel_is_role_applicable(s->channel)) + { + struct bgp_proto *p = s->proto; + eattr *e = bgp_find_attr(*to, BA_ONLY_TO_CUSTOMER); + + /* Reject routes from downstream if they are leaked */ + if (e && (p->cf->local_role == BGP_ROLE_PROVIDER || + p->cf->local_role == BGP_ROLE_RS_SERVER)) + WITHDRAW("Route leak detected - OTC attribute from downstream"); + + /* Reject routes from peers if they are leaked */ + if (e && (p->cf->local_role == BGP_ROLE_PEER) && (e->u.data != p->cf->remote_as)) + WITHDRAW("Route leak detected - OTC attribute with mismatched ASN (%u)", + (uint) e->u.data); + + /* Mark routes from upstream if it did not happened before */ + if (!e && (p->cf->local_role == BGP_ROLE_CUSTOMER || + p->cf->local_role == BGP_ROLE_PEER || + p->cf->local_role == BGP_ROLE_RS_CLIENT)) + bgp_set_attr_u32(to, BA_ONLY_TO_CUSTOMER, 0, p->cf->remote_as); } } @@ -1458,13 +1559,13 @@ bgp_finish_attrs(struct bgp_parse_state *s, rta *a) #define RBH_FN(a,h) h #define RBH_REHASH bgp_rbh_rehash -#define RBH_PARAMS /8, *2, 2, 2, 8, 20 +#define RBH_PARAMS /8, *2, 2, 2, 12, 20 HASH_DEFINE_REHASH_FN(RBH, struct bgp_bucket) -void -bgp_init_bucket_table(struct bgp_channel *c) +static void +bgp_init_bucket_table(struct bgp_pending_tx *c) { HASH_INIT(c->bucket_hash, c->pool, 8); @@ -1472,24 +1573,8 @@ bgp_init_bucket_table(struct bgp_channel *c) c->withdraw_bucket = NULL; } -void -bgp_free_bucket_table(struct bgp_channel *c) -{ - HASH_FREE(c->bucket_hash); - - struct bgp_bucket *b; - WALK_LIST_FIRST(b, c->bucket_queue) - { - rem_node(&b->send_node); - mb_free(b); - } - - mb_free(c->withdraw_bucket); - c->withdraw_bucket = NULL; -} - static struct bgp_bucket * -bgp_get_bucket(struct bgp_channel *c, ea_list *new) +bgp_get_bucket(struct bgp_pending_tx *c, ea_list *new) { /* Hash and lookup */ u32 hash = ea_hash(new); @@ -1498,55 +1583,27 @@ bgp_get_bucket(struct bgp_channel *c, ea_list *new) if (b) return b; - uint ea_size = sizeof(ea_list) + new->count * sizeof(eattr); - uint ea_size_aligned = BIRD_ALIGN(ea_size, CPU_STRUCT_ALIGN); - uint size = sizeof(struct bgp_bucket) + ea_size_aligned; - uint i; - byte *dest; - - /* Gather total size of non-inline attributes */ - for (i = 0; i < new->count; i++) - { - eattr *a = &new->attrs[i]; - - if (!(a->type & EAF_EMBEDDED)) - size += BIRD_ALIGN(sizeof(struct adata) + a->u.ptr->length, CPU_STRUCT_ALIGN); - } + /* Scan the list for total size */ + uint ea_size = BIRD_CPU_ALIGN(ea_list_size(new)); + uint size = sizeof(struct bgp_bucket) + ea_size; - /* Create the bucket */ + /* Allocate the bucket */ b = mb_alloc(c->pool, size); *b = (struct bgp_bucket) { }; init_list(&b->prefixes); b->hash = hash; - /* Copy list of extended attributes */ - memcpy(b->eattrs, new, ea_size); - dest = ((byte *) b->eattrs) + ea_size_aligned; - - /* Copy values of non-inline attributes */ - for (i = 0; i < new->count; i++) - { - eattr *a = &b->eattrs->attrs[i]; - - if (!(a->type & EAF_EMBEDDED)) - { - const struct adata *oa = a->u.ptr; - struct adata *na = (struct adata *) dest; - memcpy(na, oa, sizeof(struct adata) + oa->length); - a->u.ptr = na; - dest += BIRD_ALIGN(sizeof(struct adata) + na->length, CPU_STRUCT_ALIGN); - } - } + /* Copy the ea_list */ + ea_list_copy(b->eattrs, new, ea_size); - /* Insert the bucket to send queue and bucket hash */ - add_tail(&c->bucket_queue, &b->send_node); + /* Insert the bucket to bucket hash */ HASH_INSERT2(c->bucket_hash, RBH, c->pool, b); return b; } static struct bgp_bucket * -bgp_get_withdraw_bucket(struct bgp_channel *c) +bgp_get_withdraw_bucket(struct bgp_pending_tx *c) { if (!c->withdraw_bucket) { @@ -1557,25 +1614,45 @@ bgp_get_withdraw_bucket(struct bgp_channel *c) return c->withdraw_bucket; } -void -bgp_free_bucket(struct bgp_channel *c, struct bgp_bucket *b) +static void +bgp_free_bucket(struct bgp_pending_tx *c, struct bgp_bucket *b) { - rem_node(&b->send_node); HASH_REMOVE2(c->bucket_hash, RBH, c->pool, b); mb_free(b); } +int +bgp_done_bucket(struct bgp_channel *bc, struct bgp_bucket *b) +{ + struct bgp_pending_tx *c = bc->ptx; + + /* Won't free the withdraw bucket */ + if (b == c->withdraw_bucket) + return 0; + + if (EMPTY_LIST(b->prefixes)) + rem_node(&b->send_node); + + if (b->px_uc || !EMPTY_LIST(b->prefixes)) + return 0; + + bgp_free_bucket(c, b); + return 1; +} + void -bgp_defer_bucket(struct bgp_channel *c, struct bgp_bucket *b) +bgp_defer_bucket(struct bgp_channel *bc, struct bgp_bucket *b) { + struct bgp_pending_tx *c = bc->ptx; rem_node(&b->send_node); add_tail(&c->bucket_queue, &b->send_node); } void -bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b) +bgp_withdraw_bucket(struct bgp_channel *bc, struct bgp_bucket *b) { - struct bgp_proto *p = (void *) c->c.proto; + struct bgp_proto *p = (void *) bc->c.proto; + struct bgp_pending_tx *c = bc->ptx; struct bgp_bucket *wb = bgp_get_withdraw_bucket(c); log(L_ERR "%s: Attribute list too long", p->p.name); @@ -1584,8 +1661,8 @@ bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b) struct bgp_prefix *px = HEAD(b->prefixes); log(L_ERR "%s: - withdrawing %N", p->p.name, &px->net); - rem_node(&px->buck_node); - add_tail(&wb->prefixes, &px->buck_node); + rem_node(&px->buck_node_xx); + add_tail(&wb->prefixes, &px->buck_node_xx); } } @@ -1596,42 +1673,43 @@ bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b) #define PXH_KEY(px) px->net, px->path_id, px->hash #define PXH_NEXT(px) px->next -#define PXH_EQ(n1,i1,h1,n2,i2,h2) h1 == h2 && i1 == i2 && net_equal(n1, n2) +#define PXH_EQ(n1,i1,h1,n2,i2,h2) h1 == h2 && (add_path_tx ? (i1 == i2) : 1) && net_equal(n1, n2) #define PXH_FN(n,i,h) h #define PXH_REHASH bgp_pxh_rehash -#define PXH_PARAMS /8, *2, 2, 2, 8, 24 +#define PXH_PARAMS /8, *2, 2, 2, 12, 24 HASH_DEFINE_REHASH_FN(PXH, struct bgp_prefix) -void -bgp_init_prefix_table(struct bgp_channel *c) +static void +bgp_init_prefix_table(struct bgp_channel *bc) { + struct bgp_pending_tx *c = bc->ptx; HASH_INIT(c->prefix_hash, c->pool, 8); - uint alen = net_addr_length[c->c.net_type]; + uint alen = net_addr_length[bc->c.net_type]; c->prefix_slab = alen ? sl_new(c->pool, sizeof(struct bgp_prefix) + alen) : NULL; } -void -bgp_free_prefix_table(struct bgp_channel *c) -{ - HASH_FREE(c->prefix_hash); - - rfree(c->prefix_slab); - c->prefix_slab = NULL; -} - static struct bgp_prefix * -bgp_get_prefix(struct bgp_channel *c, const net_addr *net, u32 path_id) +bgp_get_prefix(struct bgp_pending_tx *c, const net_addr *net, struct rte_src *src, int add_path_tx) { - u32 hash = net_hash(net) ^ u32_hash(path_id); - struct bgp_prefix *px = HASH_FIND(c->prefix_hash, PXH, net, path_id, hash); + u32 path_id = src->global_id; + u32 path_id_hash = add_path_tx ? path_id : 0; + /* We must use a different hash function than the rtable */ + u32 hash = u32_hash(net_hash(net) ^ u32_hash(path_id_hash)); + struct bgp_prefix *px = HASH_FIND(c->prefix_hash, PXH, net, path_id_hash, hash); if (px) { - rem_node(&px->buck_node); + if (!add_path_tx && (path_id != px->path_id)) + { + rt_unlock_source(rt_find_source_global(px->path_id)); + rt_lock_source(src); + px->path_id = path_id; + } + return px; } @@ -1644,34 +1722,321 @@ bgp_get_prefix(struct bgp_channel *c, const net_addr *net, u32 path_id) px->hash = hash; px->path_id = path_id; net_copy(px->net, net); + rt_lock_source(src); HASH_INSERT2(c->prefix_hash, PXH, c->pool, px); return px; } -void -bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *px) +static void bgp_free_prefix(struct bgp_pending_tx *c, struct bgp_prefix *px); + +static inline int +bgp_update_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bucket *b) +{ +#define IS_WITHDRAW_BUCKET(b) ((b) == c->ptx->withdraw_bucket) +#define BPX_TRACE(what) do { \ + if (c->c.debug & D_ROUTES) log(L_TRACE "%s.%s < %s %N %uG %s", \ + c->c.proto->name, c->c.name, what, \ + px->net, px->path_id, IS_WITHDRAW_BUCKET(b) ? "withdraw" : "update"); } while (0) + px->lastmod = current_time(); + + /* Already queued for the same bucket */ + if (px->cur == b) + { + BPX_TRACE("already queued"); + return 0; + } + + /* Unqueue from the old bucket */ + if (px->cur) + { + rem_node(&px->buck_node_xx); + bgp_done_bucket(c, px->cur); + } + + /* The new bucket is the same as we sent before */ + if ((px->last == b) || c->c.out_table && !px->last && IS_WITHDRAW_BUCKET(b)) + { + if (px->cur) + BPX_TRACE("reverted"); + else + BPX_TRACE("already sent"); + + /* Well, we haven't sent anything yet */ + if (!px->last) + bgp_free_prefix(c->ptx, px); + + px->cur = NULL; + return 0; + } + + /* Enqueue the bucket if it has been empty */ + if (!IS_WITHDRAW_BUCKET(b) && EMPTY_LIST(b->prefixes)) + add_tail(&c->ptx->bucket_queue, &b->send_node); + + /* Enqueue to the new bucket and indicate the change */ + add_tail(&b->prefixes, &px->buck_node_xx); + px->cur = b; + + BPX_TRACE("queued"); + return 1; + +#undef BPX_TRACE +} + +static void +bgp_free_prefix(struct bgp_pending_tx *c, struct bgp_prefix *px) { - rem_node(&px->buck_node); HASH_REMOVE2(c->prefix_hash, PXH, c->pool, px); + rt_unlock_source(rt_find_source_global(px->path_id)); + if (c->prefix_slab) - sl_free(c->prefix_slab, px); + sl_free(px); else mb_free(px); } +void +bgp_done_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bucket *buck) +{ + /* Cleanup: We're called from bucket senders. */ + ASSERT_DIE(px->cur == buck); + rem_node(&px->buck_node_xx); + + /* We may want to store the updates */ + if (c->c.out_table) + { + /* Nothing to be sent right now */ + px->cur = NULL; + + /* Unref the previous sent version */ + if (px->last) + px->last->px_uc--; + + /* Ref the current sent version */ + if (!IS_WITHDRAW_BUCKET(buck)) + { + px->last = buck; + px->last->px_uc++; + return; + } + + /* Prefixes belonging to the withdraw bucket are freed always */ + } + + bgp_free_prefix(c->ptx, px); +} + +static void +bgp_pending_tx_rfree(resource *r) +{ + struct bgp_pending_tx *ptx = SKIP_BACK(struct bgp_pending_tx, r, r); + + HASH_WALK(ptx->prefix_hash, next, n) + rt_unlock_source(rt_find_source_global(n->path_id)); + HASH_WALK_END; +} + +static void bgp_pending_tx_dump(resource *r UNUSED, unsigned indent UNUSED) { debug("\n"); } + +static struct resclass bgp_pending_tx_class = { + .name = "BGP Pending TX", + .size = sizeof(struct bgp_pending_tx), + .free = bgp_pending_tx_rfree, + .dump = bgp_pending_tx_dump, +}; + +void +bgp_init_pending_tx(struct bgp_channel *c) +{ + ASSERT_DIE(!c->ptx); + + pool *p = rp_new(c->pool, "BGP Pending TX"); + c->ptx = ralloc(p, &bgp_pending_tx_class); + c->ptx->pool = p; + + bgp_init_bucket_table(c->ptx); + bgp_init_prefix_table(c); +} + +void +bgp_free_pending_tx(struct bgp_channel *c) +{ + ASSERT_DIE(c->ptx); + ASSERT_DIE(c->ptx->pool); + + rfree(c->ptx->pool); + c->ptx = NULL; +} + + +/* + * Prefix hash table exporter + */ + +struct bgp_out_export_hook { + struct rt_export_hook h; + u32 hash_iter; /* Iterator over hash */ +}; + +static void +bgp_out_table_feed(void *data) +{ + struct bgp_out_export_hook *hook = data; + struct bgp_channel *bc = SKIP_BACK(struct bgp_channel, prefix_exporter, hook->h.table); + struct bgp_pending_tx *c = bc->ptx; + + int max = 512; + + const net_addr *neq = (hook->h.req->addr_mode == TE_ADDR_EQUAL) ? hook->h.req->addr : NULL; + const net_addr *cand = NULL; + + do { + HASH_WALK_ITER(c->prefix_hash, PXH, n, hook->hash_iter) + { + switch (hook->h.req->addr_mode) + { + case TE_ADDR_IN: + if (!net_in_netX(n->net, hook->h.req->addr)) + continue; + /* fall through */ + case TE_ADDR_NONE: + /* Splitting only for multi-net exports */ + if (--max <= 0) + HASH_WALK_ITER_PUT; + break; + + case TE_ADDR_FOR: + if (!neq) + { + if (net_in_netX(hook->h.req->addr, n->net) && (!cand || (n->net->length > cand->length))) + cand = n->net; + continue; + } + /* fall through */ + case TE_ADDR_EQUAL: + if (!net_equal(n->net, neq)) + continue; + break; + } + + struct bgp_bucket *buck = n->cur ?: n->last; + ea_list *ea = NULL; + if (buck == c->withdraw_bucket) + ea_set_dest(&ea, 0, RTD_UNREACHABLE); + else + { + ea = buck->eattrs; + eattr *eanh = bgp_find_attr(ea, BA_NEXT_HOP); + ASSERT_DIE(eanh); + const ip_addr *nh = (const void *) eanh->u.ptr->data; + + struct nexthop_adata nhad = { + .ad = { .length = sizeof (struct nexthop_adata) - sizeof (struct adata), }, + .nh = { .gw = nh[0], }, + }; + + ea_set_attr(&ea, EA_LITERAL_DIRECT_ADATA(&ea_gen_nexthop, 0, tmp_copy_adata(&nhad.ad))); + } + + struct rte_storage es = { + .rte = { + .attrs = ea, + .net = n->net, + .src = rt_find_source_global(n->path_id), + .sender = NULL, + .lastmod = n->lastmod, + .flags = n->cur ? REF_PENDING : 0, + }, + }; + + struct rt_pending_export rpe = { + .new = &es, .new_best = &es, + }; + + if (hook->h.req->export_bulk) + { + const rte *feed = &es.rte; + hook->h.req->export_bulk(hook->h.req, n->net, &rpe, &rpe, &feed, 1); + } + else if (hook->h.req->export_one) + hook->h.req->export_one(hook->h.req, n->net, &rpe); + else + bug("No export method in export request"); + } + HASH_WALK_ITER_END; + + neq = cand; + cand = NULL; + } while (neq); + + if (hook->hash_iter) + ev_schedule_work(&hook->h.event); + else + rt_set_export_state(&hook->h, TES_READY); +} + +static void +bgp_out_table_export_start(struct rt_exporter *re, struct rt_export_request *req) +{ + req->hook = rt_alloc_export(re, sizeof(struct bgp_out_export_hook)); + req->hook->req = req; + + struct bgp_out_export_hook *hook = SKIP_BACK(struct bgp_out_export_hook, h, req->hook); + + hook->h.event.hook = bgp_out_table_feed; + rt_init_export(re, req->hook); +} + +static void +bgp_out_table_export_done(void *data) +{ + struct bgp_out_export_hook *hook = data; + struct rt_export_request *req = hook->h.req; + void (*stopped)(struct rt_export_request *) = hook->h.stopped; + + rt_export_stopped(&hook->h); + CALL(stopped, req); +} + +static const struct rt_exporter_class bgp_out_table_export_class = { + .start = bgp_out_table_export_start, + .done = bgp_out_table_export_done, +}; + +void +bgp_setup_out_table(struct bgp_channel *c) +{ + ASSERT_DIE(c->c.out_table == NULL); + + c->prefix_exporter = (struct rt_exporter) { + .class = &bgp_out_table_export_class, + .addr_type = c->c.table->addr_type, + .rp = c->c.proto->pool, + }; + + rt_exporter_init(&c->prefix_exporter); + + c->c.out_table = &c->prefix_exporter; +} + /* * BGP protocol glue */ int -bgp_preexport(struct channel *c, rte *e) +bgp_preexport(struct channel *C, rte *e) { - struct bgp_proto *p = (struct bgp_proto *) (c->proto); + struct bgp_proto *p = (struct bgp_proto *) C->proto; struct bgp_proto *src = bgp_rte_proto(e); + struct bgp_channel *c = (struct bgp_channel *) C; + + /* Ignore non-BGP channels */ + if (C->channel != &channel_bgp) + return -1; /* Reject our routes */ if (src == p) @@ -1681,6 +2046,22 @@ bgp_preexport(struct channel *c, rte *e) if (src == NULL) return 0; + /* Reject flowspec that failed validation */ + if (net_is_flow(e->net)) + switch (rt_get_flowspec_valid(e)) + { + case FLOWSPEC_VALID: + break; + case FLOWSPEC_INVALID: + return -1; + case FLOWSPEC_UNKNOWN: + ASSUME((rt_get_source_attr(e) != RTS_BGP) || + !((struct bgp_channel *) SKIP_BACK(struct channel, in_req, e->sender->req))->base_table); + break; + case FLOWSPEC__MAX: + bug("This never happens."); + } + /* IBGP route reflection, RFC 4456 */ if (p->is_internal && src->is_internal && (p->local_as == src->local_as)) { @@ -1690,16 +2071,16 @@ bgp_preexport(struct channel *c, rte *e) /* Generally, this should be handled when path is received, but we check it also here as rr_cluster_id may be undefined or different in src. */ - if (p->rr_cluster_id && bgp_cluster_list_loopy(p, e->attrs->eattrs)) + if (p->rr_cluster_id && bgp_cluster_list_loopy(p, e->attrs)) return -1; } /* Handle well-known communities, RFC 1997 */ - struct eattr *com; + struct eattr *a; if (p->cf->interpret_communities && - (com = ea_find(e->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY)))) + (a = bgp_find_attr(e->attrs, BA_COMMUNITY))) { - const struct adata *d = com->u.ptr; + const struct adata *d = a->u.ptr; /* Do not export anywhere */ if (int_set_contains(d, BGP_COMM_NO_ADVERTISE)) @@ -1718,6 +2099,16 @@ bgp_preexport(struct channel *c, rte *e) return -1; } + /* Do not export routes marked with OTC to upstream, RFC 9234 */ + if (bgp_channel_is_role_applicable(c)) + { + a = bgp_find_attr(e->attrs, BA_ONLY_TO_CUSTOMER); + if (a && (p->cf->local_role==BGP_ROLE_CUSTOMER || + p->cf->local_role==BGP_ROLE_PEER || + p->cf->local_role==BGP_ROLE_RS_CLIENT)) + return -1; + } + return 0; } @@ -1732,7 +2123,7 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at /* ORIGIN attribute - mandatory, attach if missing */ if (! bgp_find_attr(attrs0, BA_ORIGIN)) - bgp_set_attr_u32(&attrs, pool, BA_ORIGIN, 0, src ? ORIGIN_INCOMPLETE : ORIGIN_IGP); + bgp_set_attr_u32(&attrs, BA_ORIGIN, 0, src ? ORIGIN_INCOMPLETE : ORIGIN_IGP); /* AS_PATH attribute - mandatory */ a = bgp_find_attr(attrs0, BA_AS_PATH); @@ -1747,24 +2138,24 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at { /* IBGP or route server -> just ensure there is one */ if (!a) - bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, &null_adata); + bgp_set_attr_ptr(&attrs, BA_AS_PATH, 0, &null_adata); } else if (p->is_interior) { /* Confederation -> prepend ASN as AS_CONFED_SEQUENCE */ ad = as_path_prepend2(pool, ad, AS_PATH_CONFED_SEQUENCE, p->public_as); - bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, ad); + bgp_set_attr_ptr(&attrs, BA_AS_PATH, 0, ad); } else /* Regular EBGP (no RS, no confederation) */ { /* Regular EBGP -> prepend ASN as regular sequence */ ad = as_path_prepend2(pool, ad, AS_PATH_SEQUENCE, p->public_as); - bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, ad); + bgp_set_attr_ptr(&attrs, BA_AS_PATH, 0, ad); /* MULTI_EXIT_DESC attribute - accept only if set in export filter */ a = bgp_find_attr(attrs0, BA_MULTI_EXIT_DISC); - if (a && !(a->type & EAF_FRESH)) - bgp_unset_attr(&attrs, pool, BA_MULTI_EXIT_DISC); + if (a && !a->fresh && !p->cf->allow_med) + bgp_unset_attr(&attrs, BA_MULTI_EXIT_DISC); } /* NEXT_HOP attribute - delegated to AF-specific hook */ @@ -1773,16 +2164,16 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at /* LOCAL_PREF attribute - required for IBGP, attach if missing */ if (p->is_interior && ! bgp_find_attr(attrs0, BA_LOCAL_PREF)) - bgp_set_attr_u32(&attrs, pool, BA_LOCAL_PREF, 0, p->cf->default_local_pref); + bgp_set_attr_u32(&attrs, BA_LOCAL_PREF, 0, p->cf->default_local_pref); /* AIGP attribute - accumulate local metric or originate new one */ u64 metric; if (s.local_next_hop && - (bgp_total_aigp_metric_(e->attrs, &metric, &ad) || + (bgp_total_aigp_metric_(e, &metric, &ad) || (c->cf->aigp_originate && bgp_init_aigp_metric(e, &metric, &ad)))) { ad = bgp_aigp_set_metric(pool, ad, metric); - bgp_set_attr_ptr(&attrs, pool, BA_AIGP, 0, ad); + bgp_set_attr_ptr(&attrs, BA_AIGP, 0, ad); } /* IBGP route reflection, RFC 4456 */ @@ -1790,7 +2181,7 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at { /* ORIGINATOR_ID attribute - attach if not already set */ if (! bgp_find_attr(attrs0, BA_ORIGINATOR_ID)) - bgp_set_attr_u32(&attrs, pool, BA_ORIGINATOR_ID, 0, src->remote_id); + bgp_set_attr_u32(&attrs, BA_ORIGINATOR_ID, 0, src->remote_id); /* CLUSTER_LIST attribute - prepend cluster ID */ a = bgp_find_attr(attrs0, BA_CLUSTER_LIST); @@ -1805,7 +2196,7 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at ad = int_set_prepend(pool, ad, p->rr_cluster_id); /* Should be at least one prepended cluster ID */ - bgp_set_attr_ptr(&attrs, pool, BA_CLUSTER_LIST, 0, ad); + bgp_set_attr_ptr(&attrs, BA_CLUSTER_LIST, 0, ad); } /* AS4_* transition attributes, RFC 6793 4.2.2 */ @@ -1814,18 +2205,28 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at a = bgp_find_attr(attrs, BA_AS_PATH); if (a && as_path_contains_as4(a->u.ptr)) { - bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, as_path_to_old(pool, a->u.ptr)); - bgp_set_attr_ptr(&attrs, pool, BA_AS4_PATH, 0, as_path_strip_confed(pool, a->u.ptr)); + bgp_set_attr_ptr(&attrs, BA_AS_PATH, 0, as_path_to_old(pool, a->u.ptr)); + bgp_set_attr_ptr(&attrs, BA_AS4_PATH, 0, as_path_strip_confed(pool, a->u.ptr)); } a = bgp_find_attr(attrs, BA_AGGREGATOR); if (a && aggregator_contains_as4(a->u.ptr)) { - bgp_set_attr_ptr(&attrs, pool, BA_AGGREGATOR, 0, aggregator_to_old(pool, a->u.ptr)); - bgp_set_attr_ptr(&attrs, pool, BA_AS4_AGGREGATOR, 0, a->u.ptr); + bgp_set_attr_ptr(&attrs, BA_AGGREGATOR, 0, aggregator_to_old(pool, a->u.ptr)); + bgp_set_attr_ptr(&attrs, BA_AS4_AGGREGATOR, 0, a->u.ptr); } } + /* Mark routes for downstream with OTC, RFC 9234 */ + if (bgp_channel_is_role_applicable(c)) + { + a = bgp_find_attr(attrs, BA_ONLY_TO_CUSTOMER); + if (!a && (p->cf->local_role == BGP_ROLE_PROVIDER || + p->cf->local_role == BGP_ROLE_PEER || + p->cf->local_role == BGP_ROLE_RS_SERVER)) + bgp_set_attr_u32(&attrs, BA_ONLY_TO_CUSTOMER, 0, p->public_as); + } + /* * Presence of mandatory attributes ORIGIN and AS_PATH is ensured by above * conditions. Presence and validity of quasi-mandatory NEXT_HOP attribute @@ -1842,34 +2243,39 @@ bgp_rt_notify(struct proto *P, struct channel *C, const net_addr *n, rte *new, c struct bgp_proto *p = (void *) P; struct bgp_channel *c = (void *) C; struct bgp_bucket *buck; - struct bgp_prefix *px; - u32 path; + struct rte_src *path; + + /* Ignore non-BGP channels */ + if (C->channel != &channel_bgp) + return; if (new) { - struct ea_list *attrs = bgp_update_attrs(p, c, new, new->attrs->eattrs, C->rte_update_pool); + struct ea_list *attrs = bgp_update_attrs(p, c, new, new->attrs, tmp_linpool); + + /* Error during attribute processing */ + if (!attrs) + log(L_ERR "%s: Invalid route %N withdrawn", p->p.name, n); /* If attributes are invalid, we fail back to withdraw */ - buck = attrs ? bgp_get_bucket(c, attrs) : bgp_get_withdraw_bucket(c); - path = new->src->global_id; + buck = attrs ? bgp_get_bucket(c->ptx, attrs) : bgp_get_withdraw_bucket(c->ptx); + path = new->src; } else { - buck = bgp_get_withdraw_bucket(c); - path = old->src->global_id; + buck = bgp_get_withdraw_bucket(c->ptx); + path = old->src; } - px = bgp_get_prefix(c, n, c->add_path_tx ? path : 0); - add_tail(&buck->prefixes, &px->buck_node); - - bgp_schedule_packet(p->conn, c, PKT_UPDATE); + if (bgp_update_prefix(c, bgp_get_prefix(c->ptx, n, path, c->add_path_tx), buck)) + bgp_schedule_packet(p->conn, c, PKT_UPDATE); } static inline u32 -bgp_get_neighbor(rte *r) +bgp_get_neighbor(const rte *r) { - eattr *e = ea_find(r->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH)); + eattr *e = ea_find(r->attrs, BGP_EA_ID(BA_AS_PATH)); u32 as; if (e && as_path_get_first_regular(e->u.ptr, &as)) @@ -1881,30 +2287,14 @@ bgp_get_neighbor(rte *r) } static inline int -rte_stale(rte *r) +rte_stale(const rte *r) { - if (r->pflags & BGP_REF_STALE) - return 1; - - if (r->pflags & BGP_REF_NOT_STALE) - return 0; - - /* If staleness is unknown, compute and cache it */ - eattr *a = ea_find(r->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY)); - if (a && int_set_contains(a->u.ptr, BGP_COMM_LLGR_STALE)) - { - r->pflags |= BGP_REF_STALE; - return 1; - } - else - { - r->pflags |= BGP_REF_NOT_STALE; - return 0; - } + eattr *a = ea_find(r->attrs, BGP_EA_ID(BA_COMMUNITY)); + return a && int_set_contains(a->u.ptr, BGP_COMM_LLGR_STALE); } int -bgp_rte_better(rte *new, rte *old) +bgp_rte_better(const rte *new, const rte *old) { struct bgp_proto *new_bgp = bgp_rte_proto(new); struct bgp_proto *old_bgp = bgp_rte_proto(old); @@ -1920,8 +2310,8 @@ bgp_rte_better(rte *new, rte *old) return 1; /* RFC 4271 9.1.2.1. Route resolvability test */ - n = rta_resolvable(new->attrs); - o = rta_resolvable(old->attrs); + n = rte_resolvable(new); + o = rte_resolvable(old); if (n > o) return 1; if (n < o) @@ -1936,8 +2326,8 @@ bgp_rte_better(rte *new, rte *old) return 1; /* Start with local preferences */ - x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF)); - y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF)); + x = ea_find(new->attrs, BGP_EA_ID(BA_LOCAL_PREF)); + y = ea_find(old->attrs, BGP_EA_ID(BA_LOCAL_PREF)); n = x ? x->u.data : new_bgp->cf->default_local_pref; o = y ? y->u.data : old_bgp->cf->default_local_pref; if (n > o) @@ -1946,8 +2336,8 @@ bgp_rte_better(rte *new, rte *old) return 0; /* RFC 7311 4.1 - Apply AIGP metric */ - u64 n2 = bgp_total_aigp_metric(new->attrs); - u64 o2 = bgp_total_aigp_metric(old->attrs); + u64 n2 = bgp_total_aigp_metric(new); + u64 o2 = bgp_total_aigp_metric(old); if (n2 < o2) return 1; if (n2 > o2) @@ -1956,8 +2346,8 @@ bgp_rte_better(rte *new, rte *old) /* RFC 4271 9.1.2.2. a) Use AS path lengths */ if (new_bgp->cf->compare_path_lengths || old_bgp->cf->compare_path_lengths) { - x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH)); - y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH)); + x = ea_find(new->attrs, BGP_EA_ID(BA_AS_PATH)); + y = ea_find(old->attrs, BGP_EA_ID(BA_AS_PATH)); n = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN; o = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN; if (n < o) @@ -1967,8 +2357,8 @@ bgp_rte_better(rte *new, rte *old) } /* RFC 4271 9.1.2.2. b) Use origins */ - x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN)); - y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN)); + x = ea_find(new->attrs, BGP_EA_ID(BA_ORIGIN)); + y = ea_find(old->attrs, BGP_EA_ID(BA_ORIGIN)); n = x ? x->u.data : ORIGIN_INCOMPLETE; o = y ? y->u.data : ORIGIN_INCOMPLETE; if (n < o) @@ -1990,8 +2380,8 @@ bgp_rte_better(rte *new, rte *old) if (new_bgp->cf->med_metric || old_bgp->cf->med_metric || (bgp_get_neighbor(new) == bgp_get_neighbor(old))) { - x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC)); - y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC)); + x = ea_find(new->attrs, BGP_EA_ID(BA_MULTI_EXIT_DISC)); + y = ea_find(old->attrs, BGP_EA_ID(BA_MULTI_EXIT_DISC)); n = x ? x->u.data : new_bgp->cf->default_med; o = y ? y->u.data : old_bgp->cf->default_med; if (n < o) @@ -2007,8 +2397,8 @@ bgp_rte_better(rte *new, rte *old) return 1; /* RFC 4271 9.1.2.2. e) Compare IGP metrics */ - n = new_bgp->cf->igp_metric ? new->attrs->igp_metric : 0; - o = old_bgp->cf->igp_metric ? old->attrs->igp_metric : 0; + n = new_bgp->cf->igp_metric ? rt_get_igp_metric(new) : 0; + o = old_bgp->cf->igp_metric ? rt_get_igp_metric(old) : 0; if (n < o) return 1; if (n > o) @@ -2016,8 +2406,8 @@ bgp_rte_better(rte *new, rte *old) /* RFC 4271 9.1.2.2. f) Compare BGP identifiers */ /* RFC 4456 9. a) Use ORIGINATOR_ID instead of local neighbor ID */ - x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGINATOR_ID)); - y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGINATOR_ID)); + x = ea_find(new->attrs, BGP_EA_ID(BA_ORIGINATOR_ID)); + y = ea_find(old->attrs, BGP_EA_ID(BA_ORIGINATOR_ID)); n = x ? x->u.data : new_bgp->remote_id; o = y ? y->u.data : old_bgp->remote_id; @@ -2034,8 +2424,8 @@ bgp_rte_better(rte *new, rte *old) return 0; /* RFC 4456 9. b) Compare cluster list lengths */ - x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_CLUSTER_LIST)); - y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_CLUSTER_LIST)); + x = ea_find(new->attrs, BGP_EA_ID(BA_CLUSTER_LIST)); + y = ea_find(old->attrs, BGP_EA_ID(BA_CLUSTER_LIST)); n = x ? int_set_get_size(x->u.ptr) : 0; o = y ? int_set_get_size(y->u.ptr) : 0; if (n < o) @@ -2049,7 +2439,7 @@ bgp_rte_better(rte *new, rte *old) int -bgp_rte_mergable(rte *pri, rte *sec) +bgp_rte_mergable(const rte *pri, const rte *sec) { struct bgp_proto *pri_bgp = bgp_rte_proto(pri); struct bgp_proto *sec_bgp = bgp_rte_proto(sec); @@ -2057,17 +2447,20 @@ bgp_rte_mergable(rte *pri, rte *sec) u32 p, s; /* Skip suppressed routes (see bgp_rte_recalculate()) */ - /* LLGR draft - depreference stale routes */ - if (pri->pflags != sec->pflags) + if ((pri->pflags ^ sec->pflags) & BGP_REF_SUPPRESSED) return 0; /* RFC 4271 9.1.2.1. Route resolvability test */ - if (rta_resolvable(pri->attrs) != rta_resolvable(sec->attrs)) + if (rte_resolvable(pri) != rte_resolvable(sec)) + return 0; + + /* LLGR draft - depreference stale routes */ + if (rte_stale(pri) != rte_stale(sec)) return 0; /* Start with local preferences */ - x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF)); - y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF)); + x = ea_find(pri->attrs, BGP_EA_ID(BA_LOCAL_PREF)); + y = ea_find(sec->attrs, BGP_EA_ID(BA_LOCAL_PREF)); p = x ? x->u.data : pri_bgp->cf->default_local_pref; s = y ? y->u.data : sec_bgp->cf->default_local_pref; if (p != s) @@ -2076,8 +2469,8 @@ bgp_rte_mergable(rte *pri, rte *sec) /* RFC 4271 9.1.2.2. a) Use AS path lengths */ if (pri_bgp->cf->compare_path_lengths || sec_bgp->cf->compare_path_lengths) { - x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH)); - y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH)); + x = ea_find(pri->attrs, BGP_EA_ID(BA_AS_PATH)); + y = ea_find(sec->attrs, BGP_EA_ID(BA_AS_PATH)); p = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN; s = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN; @@ -2089,8 +2482,8 @@ bgp_rte_mergable(rte *pri, rte *sec) } /* RFC 4271 9.1.2.2. b) Use origins */ - x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN)); - y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN)); + x = ea_find(pri->attrs, BGP_EA_ID(BA_ORIGIN)); + y = ea_find(sec->attrs, BGP_EA_ID(BA_ORIGIN)); p = x ? x->u.data : ORIGIN_INCOMPLETE; s = y ? y->u.data : ORIGIN_INCOMPLETE; if (p != s) @@ -2100,8 +2493,8 @@ bgp_rte_mergable(rte *pri, rte *sec) if (pri_bgp->cf->med_metric || sec_bgp->cf->med_metric || (bgp_get_neighbor(pri) == bgp_get_neighbor(sec))) { - x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC)); - y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC)); + x = ea_find(pri->attrs, BGP_EA_ID(BA_MULTI_EXIT_DISC)); + y = ea_find(sec->attrs, BGP_EA_ID(BA_MULTI_EXIT_DISC)); p = x ? x->u.data : pri_bgp->cf->default_med; s = y ? y->u.data : sec_bgp->cf->default_med; if (p != s) @@ -2113,8 +2506,8 @@ bgp_rte_mergable(rte *pri, rte *sec) return 0; /* RFC 4271 9.1.2.2. e) Compare IGP metrics */ - p = pri_bgp->cf->igp_metric ? pri->attrs->igp_metric : 0; - s = sec_bgp->cf->igp_metric ? sec->attrs->igp_metric : 0; + p = pri_bgp->cf->igp_metric ? rt_get_igp_metric(pri) : 0; + s = sec_bgp->cf->igp_metric ? rt_get_igp_metric(sec) : 0; if (p != s) return 0; @@ -2127,7 +2520,7 @@ bgp_rte_mergable(rte *pri, rte *sec) static inline int same_group(rte *r, u32 lpref, u32 lasn) { - return (r->attrs->pref == lpref) && (bgp_get_neighbor(r) == lasn); + return (rt_get_preference(r) == lpref) && (bgp_get_neighbor(r) == lasn); } static inline int @@ -2138,10 +2531,10 @@ use_deterministic_med(struct rte_storage *r) } int -bgp_rte_recalculate(rtable_private *table, net *net, rte *new, rte *old, rte *old_best) +bgp_rte_recalculate(struct rtable_private *table, net *net, rte *new, rte *old, rte *old_best) { rte *key = new ? new : old; - u32 lpref = key->attrs->pref; + u32 lpref = rt_get_preference(key); u32 lasn = bgp_get_neighbor(key); int old_suppressed = old ? !!(old->pflags & BGP_REF_SUPPRESSED) : 0; @@ -2208,7 +2601,7 @@ bgp_rte_recalculate(rtable_private *table, net *net, rte *new, rte *old, rte *ol /* The default case - find a new best-in-group route */ rte *r = new; /* new may not be in the list */ - for (struct rte_storage *s = net->routes; rte_is_valid(&s->rte); s = s->next) + for (struct rte_storage *s = net->routes; rte_is_valid(RTE_OR_NULL(s)); s = s->next) if (use_deterministic_med(s) && same_group(&s->rte, lpref, lasn)) { s->rte.pflags |= BGP_REF_SUPPRESSED; @@ -2225,7 +2618,7 @@ bgp_rte_recalculate(rtable_private *table, net *net, rte *new, rte *old, rte *ol new->pflags &= ~BGP_REF_SUPPRESSED; /* Found all existing routes mergable with best-in-group */ - for (struct rte_storage *s = net->routes; rte_is_valid(&s->rte); s = s->next) + for (struct rte_storage *s = net->routes; rte_is_valid(RTE_OR_NULL(s)); s = s->next) if (use_deterministic_med(s) && same_group(&s->rte, lpref, lasn)) if ((&s->rte != r) && bgp_rte_mergable(r, &s->rte)) s->rte.pflags &= ~BGP_REF_SUPPRESSED; @@ -2264,43 +2657,58 @@ bgp_rte_recalculate(rtable_private *table, net *net, rte *new, rte *old, rte *ol } void -bgp_rte_modify_stale(struct rt_export_request *req, const net_addr *n, struct rt_pending_export *rpe UNUSED, rte **feed, uint count) +bgp_rte_modify_stale(struct rt_export_request *req, const net_addr *n, + struct rt_pending_export *first, struct rt_pending_export *last, + const rte **feed, uint count) { struct bgp_channel *c = SKIP_BACK(struct bgp_channel, stale_feed, req); + struct rt_import_hook *irh = c->c.in_req.hook; - do { - rte *r = feed[--count]; - if (r->sender != c->c.in_req.hook) - continue; + /* Find our routes among others */ + for (uint i=0; i<count; i++) + { + const rte *r = feed[i]; - /* A new route, do not mark as stale */ - if (r->stale_cycle == c->c.in_req.hook->stale_set) + if ( + !rte_is_valid(r) || /* Not a valid route */ + (r->sender != irh) || /* Not our route */ + (r->stale_cycle == irh->stale_set)) /* A new route, do not mark as stale */ continue; - eattr *ea = ea_find(r->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY)); + eattr *ea = ea_find(r->attrs, BGP_EA_ID(BA_COMMUNITY)); const struct adata *ad = ea ? ea->u.ptr : NULL; uint flags = ea ? ea->flags : BAF_PARTIAL; - rte e0 = *r; - e0.flags |= REF_USE_STALE; - + /* LLGR not allowed, withdraw the route */ if (ad && int_set_contains(ad, BGP_COMM_NO_LLGR)) + { rte_import(&c->c.in_req, n, NULL, r->src); + continue; + } - else if (ad && int_set_contains(ad, BGP_COMM_LLGR_STALE)) - rte_import(&c->c.in_req, n, &e0, r->src); + /* Route already marked as LLGR, do nothing */ + if (ad && int_set_contains(ad, BGP_COMM_LLGR_STALE)) + continue; - else { - rta *a = e0.attrs = rta_do_cow(r->attrs, c->c.rte_update_pool); + /* Store the tmp_linpool state to aggresively save memory */ + struct lp_state tmpp; + lp_save(tmp_linpool, &tmpp); - bgp_set_attr_ptr(&(a->eattrs), c->c.rte_update_pool, BA_COMMUNITY, flags, - int_set_add(c->c.rte_update_pool, ad, BGP_COMM_LLGR_STALE)); - e0.pflags |= BGP_REF_STALE; + /* Mark the route as LLGR */ + rte e0 = *r; + bgp_set_attr_ptr(&e0.attrs, BA_COMMUNITY, flags, int_set_add(tmp_linpool, ad, BGP_COMM_LLGR_STALE)); - rte_import(&c->c.in_req, n, &e0, r->src); - lp_flush(c->c.rte_update_pool); - } - } while (count); + /* We need to update the route but keep it stale. */ + ASSERT_DIE(irh->stale_set == irh->stale_valid + 1); + irh->stale_set--; + rte_import(&c->c.in_req, n, &e0, r->src); + irh->stale_set++; + + /* Restore the memory state */ + lp_restore(tmp_linpool, &tmpp); + } + + rpe_mark_seen_all(req->hook, first, last, NULL); } @@ -2316,8 +2724,8 @@ bgp_process_as4_attrs(ea_list **attrs, struct linpool *pool) eattr *a4 = bgp_find_attr(*attrs, BA_AS4_AGGREGATOR); /* First, unset AS4_* attributes */ - if (p4) bgp_unset_attr(attrs, pool, BA_AS4_PATH); - if (a4) bgp_unset_attr(attrs, pool, BA_AS4_AGGREGATOR); + if (p4) bgp_unset_attr(attrs, BA_AS4_PATH); + if (a4) bgp_unset_attr(attrs, BA_AS4_AGGREGATOR); /* Handle AGGREGATOR attribute */ if (a2 && a4) @@ -2350,60 +2758,37 @@ bgp_process_as4_attrs(ea_list **attrs, struct linpool *pool) } } -int -bgp_get_attr(const eattr *a, byte *buf, int buflen) -{ - uint i = EA_ID(a->id); - const struct bgp_attr_desc *d; - int len; - - if (bgp_attr_known(i)) - { - d = &bgp_attr_table[i]; - len = bsprintf(buf, "%s", d->name); - buf += len; - if (d->format) - { - *buf++ = ':'; - *buf++ = ' '; - d->format(a, buf, buflen - len - 2); - return GA_FULL; - } - return GA_NAME; - } - - bsprintf(buf, "%02x%s", i, (a->flags & BAF_TRANSITIVE) ? " [t]" : ""); - return GA_NAME; -} - void -bgp_get_route_info(rte *e, byte *buf) +bgp_get_route_info(const rte *e, byte *buf) { - eattr *p = ea_find(e->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH)); - eattr *o = ea_find(e->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN)); + eattr *p = ea_find(e->attrs, BGP_EA_ID(BA_AS_PATH)); + eattr *o = ea_find(e->attrs, BGP_EA_ID(BA_ORIGIN)); u32 origas; - buf += bsprintf(buf, " (%d", e->attrs->pref); + buf += bsprintf(buf, " (%d", rt_get_preference(e)); - if (e->pflags & BGP_REF_SUPPRESSED) - buf += bsprintf(buf, "-"); + if (!net_is_flow(e->net)) + { + if (e->pflags & BGP_REF_SUPPRESSED) + buf += bsprintf(buf, "-"); - if (rte_stale(e)) - buf += bsprintf(buf, "s"); + if (rte_stale(e)) + buf += bsprintf(buf, "s"); - u64 metric = bgp_total_aigp_metric(e->attrs); - if (metric < BGP_AIGP_MAX) - { - buf += bsprintf(buf, "/%lu", metric); - } - else if (e->attrs->igp_metric) - { - if (!rta_resolvable(e->attrs)) - buf += bsprintf(buf, "/-"); - else if (e->attrs->igp_metric >= IGP_METRIC_UNKNOWN) - buf += bsprintf(buf, "/?"); - else - buf += bsprintf(buf, "/%d", e->attrs->igp_metric); + u64 metric = bgp_total_aigp_metric(e); + if (metric < BGP_AIGP_MAX) + { + buf += bsprintf(buf, "/%lu", metric); + } + else if (metric = rt_get_igp_metric(e)) + { + if (!rte_resolvable(e)) + buf += bsprintf(buf, "/-"); + else if (metric >= IGP_METRIC_UNKNOWN) + buf += bsprintf(buf, "/?"); + else + buf += bsprintf(buf, "/%d", metric); + } } buf += bsprintf(buf, ") ["); diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index e2754649..cda0eb8d 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -101,7 +101,9 @@ * RFC 8203 - BGP Administrative Shutdown Communication * RFC 8212 - Default EBGP Route Propagation Behavior without Policies * RFC 8654 - Extended Message Support for BGP - * draft-ietf-idr-ext-opt-param-07 + * RFC 9072 - Extended Optional Parameters Length for BGP OPEN Message + * RFC 9117 - Revised Validation Procedure for BGP Flow Specifications + * RFC 9234 - Route Leak Prevention and Detection Using Roles * draft-uttaro-idr-bgp-persistence-04 * draft-walton-bgp-hostname-capability-02 */ @@ -113,7 +115,7 @@ #include "nest/bird.h" #include "nest/iface.h" #include "nest/protocol.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/cli.h" #include "nest/locks.h" #include "conf/conf.h" @@ -124,8 +126,13 @@ #include "bgp.h" -/* Global list of listening sockets */ -static list STATIC_LIST_INIT(bgp_sockets); +static void bgp_listen_create(void *); + +static list STATIC_LIST_INIT(bgp_sockets); /* Global list of listening sockets */ +static list STATIC_LIST_INIT(bgp_listen_pending); /* Global list of listening socket open requests */ +static event bgp_listen_event = { .hook = bgp_listen_create }; + +DOMAIN(rtable) bgp_listen_domain; static void bgp_connect(struct bgp_proto *p); static void bgp_active(struct bgp_proto *p); @@ -136,14 +143,69 @@ static void bgp_update_bfd(struct bgp_proto *p, const struct bfd_options *bfd); static int bgp_incoming_connection(sock *sk, uint dummy UNUSED); static void bgp_listen_sock_err(sock *sk UNUSED, int err); +static void bgp_initiate_disable(struct bgp_proto *p, int err_val); static void bgp_graceful_restart_feed(struct bgp_channel *c); -static inline void channel_refresh_end_reload(struct channel *c) + + +static inline int +bgp_setup_auth(struct bgp_proto *p, int enable) { - channel_refresh_end(c); + /* Beware. This is done from main_birdloop and protocol birdloop is NOT ENTERED. + * Anyway, we are only accessing: + * - protocol config which can be changed only from main_birdloop (reconfig) + * - protocol listen socket which is always driven by main_birdloop + * - protocol name which is set on reconfig + */ + + if (p->cf->password && p->listen.sock) + { + ip_addr prefix = p->cf->remote_ip; + int pxlen = -1; + + if (p->cf->remote_range) + { + prefix = net_prefix(p->cf->remote_range); + pxlen = net_pxlen(p->cf->remote_range); + } + + int rv = sk_set_md5_auth(p->listen.sock->sk, + p->cf->local_ip, prefix, pxlen, p->cf->iface, + enable ? p->cf->password : NULL, p->cf->setkey); + + if (rv < 0) + sk_log_error(p->listen.sock->sk, p->p.name); - if (c->in_table) - channel_request_reload(c); + return rv; + } + else + return 0; +} + +/** + * bgp_close - close a BGP instance + * @p: BGP instance + * + * This function frees and deconfigures shared BGP resources. + */ +static void +bgp_close(struct bgp_proto *p) +{ + LOCK_DOMAIN(rtable, bgp_listen_domain); + + struct bgp_listen_request *req = &p->listen; + struct bgp_socket *bs = req->sock; + + if (bs) + { + req->sock = NULL; + rem_node(&req->n); + + if (bs && EMPTY_LIST(bs->requests)) + ev_send(&global_event_list, &bgp_listen_event); + } + + UNLOCK_DOMAIN(rtable, bgp_listen_domain); } /** @@ -155,121 +217,136 @@ static inline void channel_refresh_end_reload(struct channel *c) * is acquired and neighbor is ready). When error, caller should change state to * PS_DOWN and return immediately. */ -static int +static void bgp_open(struct bgp_proto *p) { - ASSERT_DIE(birdloop_inside(&main_birdloop)); - - struct bgp_socket *bs = NULL; - struct iface *ifa = p->cf->strict_bind ? p->cf->iface : NULL; - ip_addr addr = p->cf->strict_bind ? p->cf->local_ip : - (p->ipv4 ? IPA_NONE4 : IPA_NONE6); - uint port = p->cf->local_port; - uint flags = p->cf->free_bind ? SKF_FREEBIND : 0; - uint flag_mask = SKF_FREEBIND; + LOCK_DOMAIN(rtable, bgp_listen_domain); + struct bgp_listen_request *req = &p->listen; /* We assume that cf->iface is defined iff cf->local_ip is link-local */ + req->iface = p->cf->strict_bind ? p->cf->iface : NULL; + req->vrf = p->p.vrf; + req->addr = p->cf->strict_bind ? p->cf->local_ip : + (p->ipv4 ? IPA_NONE4 : IPA_NONE6); + req->port = p->cf->local_port; + req->flags = p->cf->free_bind ? SKF_FREEBIND : 0; - WALK_LIST(bs, bgp_sockets) - if (ipa_equal(bs->sk->saddr, addr) && - (bs->sk->sport == port) && - (bs->sk->iface == ifa) && - (bs->sk->vrf == p->p.vrf) && - ((bs->sk->flags & flag_mask) == flags)) - { - bs->uc++; - p->sock = bs; - return 0; - } - - sock *sk = sk_new(proto_pool); - sk->type = SK_TCP_PASSIVE; - sk->ttl = 255; - sk->saddr = addr; - sk->sport = port; - sk->iface = ifa; - sk->vrf = p->p.vrf; - sk->flags = flags | SKF_PASSIVE_THREAD; - sk->tos = IP_PREC_INTERNET_CONTROL; - sk->rbsize = BGP_RX_BUFFER_SIZE; - sk->tbsize = BGP_TX_BUFFER_SIZE; - sk->rx_hook = bgp_incoming_connection; - sk->err_hook = bgp_listen_sock_err; - - if (sk_open(sk) < 0) - goto err; - - bs = mb_allocz(proto_pool, sizeof(struct bgp_socket)); - bs->sk = sk; - bs->uc = 1; - p->sock = bs; - sk->data = bs; + BGP_TRACE(D_EVENTS, "Requesting listen socket at %I%J port %u", req->addr, req->iface, req->port); - add_tail(&bgp_sockets, &bs->n); + add_tail(&bgp_listen_pending, &req->n); + ev_send(&global_event_list, &bgp_listen_event); - return 0; - -err: - sk_log_error(sk, p->p.name); - log(L_ERR "%s: Cannot open listening socket", p->p.name); - rfree(sk); - return -1; + UNLOCK_DOMAIN(rtable, bgp_listen_domain); } -/** - * bgp_close - close a BGP instance - * @p: BGP instance - * - * This function frees and deconfigures shared BGP resources. - */ static void -bgp_close(struct bgp_proto *p) +bgp_listen_create(void *_ UNUSED) { ASSERT_DIE(birdloop_inside(&main_birdloop)); - struct bgp_socket *bs = p->sock; - - ASSERT(bs && bs->uc); + uint flag_mask = SKF_FREEBIND; - if (--bs->uc) - return; + while (1) { + LOCK_DOMAIN(rtable, bgp_listen_domain); - rfree(bs->sk); - rem_node(&bs->n); - mb_free(bs); -} + if (EMPTY_LIST(bgp_listen_pending)) + { + UNLOCK_DOMAIN(rtable, bgp_listen_domain); + break; + } -static inline int -bgp_setup_auth(struct bgp_proto *p, int enable) -{ - if (p->cf->password) - { - ip_addr prefix = p->cf->remote_ip; - int pxlen = -1; + /* Get the first request to match */ + struct bgp_listen_request *req = HEAD(bgp_listen_pending); + struct bgp_proto *p = SKIP_BACK(struct bgp_proto, listen, req); + rem_node(&req->n); + + /* First try to find existing socket */ + struct bgp_socket *bs; + WALK_LIST(bs, bgp_sockets) + if (ipa_equal(bs->sk->saddr, req->addr) && + (bs->sk->sport == req->port) && + (bs->sk->iface == req->iface) && + (bs->sk->vrf == req->vrf) && + ((bs->sk->flags & flag_mask) == req->flags)) + break; - if (p->cf->remote_range) + /* Not found any */ + if (NODE_VALID(bs)) + BGP_TRACE(D_EVENTS, "Found a listening socket: %p", bs); + else { - prefix = net_prefix(p->cf->remote_range); - pxlen = net_pxlen(p->cf->remote_range); + /* Allocating new socket from global protocol pool. + * We can do this in main_birdloop. */ + sock *sk = sk_new(proto_pool); + sk->type = SK_TCP_PASSIVE; + sk->ttl = 255; + sk->saddr = req->addr; + sk->sport = req->port; + sk->iface = req->iface; + sk->vrf = req->vrf; + sk->flags = req->flags; + sk->tos = IP_PREC_INTERNET_CONTROL; + sk->rbsize = BGP_RX_BUFFER_SIZE; + sk->tbsize = BGP_TX_BUFFER_SIZE; + sk->rx_hook = bgp_incoming_connection; + sk->err_hook = bgp_listen_sock_err; + + if (sk_open(sk, &main_birdloop) < 0) + { + sk_log_error(sk, p->p.name); + log(L_ERR "%s: Cannot open listening socket", p->p.name); + rfree(sk); + UNLOCK_DOMAIN(rtable, bgp_listen_domain); + + bgp_initiate_disable(p, BEM_NO_SOCKET); + continue; + } + + bs = mb_allocz(proto_pool, sizeof(struct bgp_socket)); + bs->sk = sk; + sk->data = bs; + + init_list(&bs->requests); + add_tail(&bgp_sockets, &bs->n); + + BGP_TRACE(D_EVENTS, "Created new listening socket: %p", bs); } - int rv = sk_set_md5_auth(p->sock->sk, - p->cf->local_ip, prefix, pxlen, p->cf->iface, - enable ? p->cf->password : NULL, p->cf->setkey); + req->sock = bs; + add_tail(&bs->requests, &req->n); - if (rv < 0) - sk_log_error(p->sock->sk, p->p.name); + if (bgp_setup_auth(p, 1) < 0) + { + rem_node(&req->n); + req->sock = NULL; - return rv; + UNLOCK_DOMAIN(rtable, bgp_listen_domain); + + bgp_initiate_disable(p, BEM_INVALID_MD5); + continue; + } + + UNLOCK_DOMAIN(rtable, bgp_listen_domain); } - else - return 0; + + /* Cleanup leftover listening sockets */ + LOCK_DOMAIN(rtable, bgp_listen_domain); + struct bgp_socket *bs; + node *nxt; + WALK_LIST_DELSAFE(bs, nxt, bgp_sockets) + if (EMPTY_LIST(bs->requests)) + { + rfree(bs->sk); + rem_node(&bs->n); + mb_free(bs); + } + UNLOCK_DOMAIN(rtable, bgp_listen_domain); } static inline struct bgp_channel * bgp_find_channel(struct bgp_proto *p, u32 afi) { struct bgp_channel *c; - WALK_LIST(c, p->p.channels) + BGP_WALK_CHANNELS(p, c) if (c->afi == afi) return c; @@ -288,6 +365,8 @@ bgp_startup(struct bgp_proto *p) if (p->postponed_sk) { /* Apply postponed incoming connection */ + sk_reloop(p->postponed_sk, p->p.loop); + bgp_setup_conn(p, &p->incoming_conn); bgp_setup_sk(&p->incoming_conn, p->postponed_sk); bgp_send_open(&p->incoming_conn); @@ -305,13 +384,7 @@ bgp_startup_timeout(timer *t) static void bgp_initiate(struct bgp_proto *p) { - int err_val; - - if (bgp_open(p) < 0) - { err_val = BEM_NO_SOCKET; goto err1; } - - if (bgp_setup_auth(p, 1) < 0) - { err_val = BEM_INVALID_MD5; goto err2; } + bgp_open(p); if (p->cf->bfd) bgp_update_bfd(p, p->cf->bfd); @@ -324,24 +397,28 @@ bgp_initiate(struct bgp_proto *p) } else bgp_startup(p); +} - return; - -err2: - bgp_close(p); -err1: - p->p.disabled = 1; - bgp_store_error(p, NULL, BE_MISC, err_val); - - p->neigh = NULL; - proto_notify_state(&p->p, PS_DOWN); - - return; +static void +bgp_initiate_disable(struct bgp_proto *p, int err_val) +{ + PROTO_LOCKED_FROM_MAIN(&p->p) + { + /* The protocol may be already down for another reason. + * Shutdown the protocol only if it isn't already shutting down. */ + switch (p->p.proto_state) + { + case PS_START: + case PS_UP: + p->p.disabled = 1; + bgp_store_error(p, NULL, BE_MISC, err_val); + bgp_stop(p, err_val, NULL, 0); + } + } } /** * bgp_start_timer - start a BGP timer - * @p: bgp_proto which the timer belongs to * @t: timer * @value: time (in seconds) to fire (0 to disable the timer) * @@ -352,8 +429,6 @@ err1: void bgp_start_timer(struct bgp_proto *p, timer *t, uint value) { - BGP_ASSERT_INSIDE(p); - if (value) { /* The randomization procedure is specified in RFC 4271 section 10 */ @@ -375,7 +450,7 @@ bgp_start_timer(struct bgp_proto *p, timer *t, uint value) void bgp_close_conn(struct bgp_conn *conn) { - BGP_ASSERT_INSIDE(conn->bgp); + // struct bgp_proto *p = conn->bgp; DBG("BGP: Closing connection\n"); conn->packets_to_send = 0; @@ -386,8 +461,10 @@ bgp_close_conn(struct bgp_conn *conn) conn->keepalive_timer = NULL; rfree(conn->hold_timer); conn->hold_timer = NULL; + rfree(conn->tx_ev); conn->tx_ev = NULL; + rfree(conn->sk); conn->sk = NULL; @@ -467,8 +544,6 @@ bgp_graceful_close_conn(struct bgp_conn *conn, int subcode, byte *data, uint len static void bgp_down(struct bgp_proto *p) { - bgp_start_timer(p, p->startup_timer, 0); - if (p->start_state > BSS_PREPARE) { bgp_setup_auth(p, 0); @@ -482,34 +557,21 @@ bgp_down(struct bgp_proto *p) } static void -bgp_active_event(void *vp) +bgp_decision(void *vp) { struct bgp_proto *p = vp; - BGP_ASSERT_INSIDE(p); - - DBG("%s: Decision start\n", p->p.name); + DBG("BGP: Decision start\n"); if ((p->p.proto_state == PS_START) && (p->outgoing_conn.state == BS_IDLE) && (p->incoming_conn.state != BS_OPENCONFIRM) && !p->passive) bgp_active(p); -} - -static void -bgp_down_event(void *vp) -{ - struct bgp_proto *p = vp; - - BGP_ENTER(p); - DBG("%s: Down event\n", p->p.name); if ((p->p.proto_state == PS_STOP) && (p->outgoing_conn.state == BS_IDLE) && (p->incoming_conn.state == BS_IDLE)) bgp_down(p); - - BGP_LEAVE(p); } static struct bgp_proto * @@ -539,9 +601,16 @@ void bgp_stop(struct bgp_proto *p, int subcode, byte *data, uint len) { proto_notify_state(&p->p, PS_STOP); + p->uncork_ev->data = NULL; bgp_graceful_close_conn(&p->outgoing_conn, subcode, data, len); bgp_graceful_close_conn(&p->incoming_conn, subcode, data, len); - ev_send_loop(&main_birdloop, p->down_event); + + struct bgp_channel *c; + WALK_LIST(c, p->p.channels) + if (c->ptx) + bgp_free_pending_tx(c); + + proto_send_event(&p->p, p->event); } static inline void @@ -588,7 +657,6 @@ bgp_conn_enter_established_state(struct bgp_conn *conn) p->link_addr = p->neigh->iface->llv6->ip; conn->sk->fast_rx = 0; - conn->sk->cork = &rt_cork; p->conn = conn; p->last_error_class = 0; @@ -614,7 +682,7 @@ bgp_conn_enter_established_state(struct bgp_conn *conn) /* Summary state of ADD_PATH RX for active channels */ uint summary_add_path_rx = 0; - WALK_LIST(c, p->p.channels) + BGP_WALK_CHANNELS(p, c) { const struct bgp_af_caps *loc = bgp_find_af_caps(local, c->afi); const struct bgp_af_caps *rem = bgp_find_af_caps(peer, c->afi); @@ -635,7 +703,7 @@ bgp_conn_enter_established_state(struct bgp_conn *conn) int active = loc->ready && rem->ready; c->c.disabled = !active; - c->c.reloadable = p->route_refresh || c->cf->import_table; + c->c.reloadable = p->route_refresh || ((c->c.in_keep & RIK_PREFILTER) == RIK_PREFILTER); c->index = active ? num++ : 0; @@ -696,7 +764,7 @@ bgp_conn_enter_established_state(struct bgp_conn *conn) p->channel_count = num; p->summary_add_path_rx = summary_add_path_rx; - WALK_LIST(c, p->p.channels) + BGP_WALK_CHANNELS(p, c) { if (c->c.disabled) continue; @@ -747,8 +815,7 @@ bgp_conn_enter_idle_state(struct bgp_conn *conn) bgp_close_conn(conn); bgp_conn_set_state(conn, BS_IDLE); - ev_send_loop(p->p.loop, p->active_event); - ev_send_loop(&main_birdloop, p->down_event); + proto_send_event(&p->p, p->event); if (os == BS_ESTABLISHED) bgp_conn_leave_established_state(p); @@ -776,7 +843,7 @@ bgp_handle_graceful_restart(struct bgp_proto *p) p->gr_active_num = 0; struct bgp_channel *c; - WALK_LIST(c, p->p.channels) + BGP_WALK_CHANNELS(p, c) { /* FIXME: perhaps check for channel state instead of disabled flag? */ if (c->c.disabled) @@ -790,16 +857,14 @@ bgp_handle_graceful_restart(struct bgp_proto *p) { case BGP_GRS_NONE: c->gr_active = BGP_GRS_ACTIVE; - channel_refresh_begin(&c->c); - break; + /* fall through */ case BGP_GRS_ACTIVE: - channel_refresh_end(&c->c); - channel_refresh_begin(&c->c); + rt_refresh_begin(&c->c.in_req); break; case BGP_GRS_LLGR: - channel_refresh_begin(&c->c); + rt_refresh_begin(&c->c.in_req); bgp_graceful_restart_feed(c); break; } @@ -807,15 +872,13 @@ bgp_handle_graceful_restart(struct bgp_proto *p) else { /* Just flush the routes */ - channel_refresh_begin(&c->c); - channel_refresh_end(&c->c); + rt_refresh_begin(&c->c.in_req); + rt_refresh_end(&c->c.in_req); } /* Reset bucket and prefix tables */ - bgp_free_bucket_table(c); - bgp_free_prefix_table(c); - bgp_init_bucket_table(c); - bgp_init_prefix_table(c); + bgp_free_pending_tx(c); + bgp_init_pending_tx(c); c->packets_to_send = 0; } @@ -871,6 +934,8 @@ bgp_graceful_restart_feed(struct bgp_channel *c) } + + /** * bgp_graceful_restart_done - finish active BGP graceful restart * @c: BGP channel @@ -893,11 +958,8 @@ bgp_graceful_restart_done(struct bgp_channel *c) if (!p->gr_active_num) BGP_TRACE(D_EVENTS, "Neighbor graceful restart done"); - if (c->stale_feed.hook) - rt_stop_export(&c->stale_feed, bgp_graceful_restart_feed_done); - tm_stop(c->stale_timer); - channel_refresh_end_reload(&c->c); + rt_refresh_end(&c->c.in_req); } /** @@ -919,7 +981,7 @@ bgp_graceful_restart_timeout(timer *t) if (p->llgr_ready) { struct bgp_channel *c; - WALK_LIST(c, p->p.channels) + BGP_WALK_CHANNELS(p, c) { /* Channel is not in GR and is already flushed */ if (!c->gr_active) @@ -976,11 +1038,8 @@ bgp_refresh_begin(struct bgp_channel *c) if (c->load_state == BFS_LOADING) { log(L_WARN "%s: BEGIN-OF-RR received before END-OF-RIB, ignoring", p->p.name); return; } - if (c->load_state == BFS_REFRESHING) - channel_refresh_end(&c->c); - c->load_state = BFS_REFRESHING; - channel_refresh_begin(&c->c); + rt_refresh_begin(&c->c.in_req); } /** @@ -1001,7 +1060,7 @@ bgp_refresh_end(struct bgp_channel *c) { log(L_WARN "%s: END-OF-RR received without prior BEGIN-OF-RR, ignoring", p->p.name); return; } c->load_state = BFS_NONE; - channel_refresh_end_reload(&c->c); + rt_refresh_end(&c->c.in_req); } @@ -1137,12 +1196,10 @@ bgp_setup_conn(struct bgp_proto *p, struct bgp_conn *conn) static void bgp_setup_sk(struct bgp_conn *conn, sock *s) { - ASSERT_DIE(s->flags & SKF_THREAD); s->data = conn; s->err_hook = bgp_sock_err; s->fast_rx = 1; conn->sk = s; - sk_start(s); } static void @@ -1151,8 +1208,6 @@ bgp_active(struct bgp_proto *p) int delay = MAX(1, p->cf->connect_delay_time); struct bgp_conn *conn = &p->outgoing_conn; - BGP_ASSERT_INSIDE(p); - BGP_TRACE(D_EVENTS, "Connect delayed by %d seconds", delay); bgp_setup_conn(p, conn); bgp_conn_set_state(conn, BS_ACTIVE); @@ -1173,12 +1228,9 @@ bgp_connect(struct bgp_proto *p) /* Enter Connect state and start establishing c struct bgp_conn *conn = &p->outgoing_conn; int hops = p->cf->multihop ? : 1; - BGP_ASSERT_INSIDE(p); - DBG("BGP: Connecting\n"); sock *s = sk_new(p->p.pool); s->type = SK_TCP_ACTIVE; - s->flags |= SKF_THREAD; s->saddr = p->local_ip; s->daddr = p->remote_ip; s->dport = p->cf->remote_port; @@ -1190,6 +1242,7 @@ bgp_connect(struct bgp_proto *p) /* Enter Connect state and start establishing c s->tos = IP_PREC_INTERNET_CONTROL; s->password = p->cf->password; s->tx_hook = bgp_connected; + s->flags = p->cf->free_bind ? SKF_FREEBIND : 0; BGP_TRACE(D_EVENTS, "Connecting to %I%J from local address %I%J", s->daddr, ipa_is_link_local(s->daddr) ? p->cf->iface : NULL, s->saddr, ipa_is_link_local(s->saddr) ? s->iface : NULL); @@ -1197,7 +1250,7 @@ bgp_connect(struct bgp_proto *p) /* Enter Connect state and start establishing c bgp_setup_sk(conn, s); bgp_conn_set_state(conn, BS_CONNECT); - if (sk_open(s) < 0) + if (sk_open(s, p->p.loop) < 0) goto err; /* Set minimal receive TTL if needed */ @@ -1227,12 +1280,17 @@ static struct bgp_proto * bgp_find_proto(sock *sk) { struct bgp_proto *best = NULL; - struct bgp_proto *p; + struct bgp_socket *bs = sk->data; + struct bgp_listen_request *req; /* sk->iface is valid only if src or dst address is link-local */ int link = ipa_is_link_local(sk->saddr) || ipa_is_link_local(sk->daddr); - WALK_LIST(p, proto_list) + LOCK_DOMAIN(rtable, bgp_listen_domain); + + WALK_LIST(req, bs->requests) + { + struct bgp_proto *p = SKIP_BACK(struct bgp_proto, listen, req); if ((p->p.proto == &proto_bgp) && (ipa_equal(p->remote_ip, sk->daddr) || bgp_is_dynamic(p)) && (!p->cf->remote_range || ipa_in_netX(sk->daddr, p->cf->remote_range)) && @@ -1246,7 +1304,9 @@ bgp_find_proto(sock *sk) if (!bgp_is_dynamic(p)) break; } + } + UNLOCK_DOMAIN(rtable, bgp_listen_domain); return best; } @@ -1265,6 +1325,8 @@ bgp_find_proto(sock *sk) static int bgp_incoming_connection(sock *sk, uint dummy UNUSED) { + ASSERT_DIE(birdloop_inside(&main_birdloop)); + struct bgp_proto *p; int acc, hops; @@ -1278,17 +1340,7 @@ bgp_incoming_connection(sock *sk, uint dummy UNUSED) return 0; } - if (p->p.loop == &main_birdloop) - { - /* Protocol is down for whatever reason. No need for locking. */ - BGP_TRACE(D_EVENTS, "Incoming connection from %I%J (port %d) rejected (protocol is down)", - sk->daddr, ipa_is_link_local(sk->daddr) ? sk->iface : NULL, - sk->dport); - rfree(sk); - return 0; - } - - BGP_ENTER(p); + birdloop_enter(p->p.loop); /* * BIRD should keep multiple incoming connections in OpenSent state (for @@ -1319,8 +1371,7 @@ bgp_incoming_connection(sock *sk, uint dummy UNUSED) if (!acc) { rfree(sk); - BGP_LEAVE(p); - return 0; + goto leave; } hops = p->cf->multihop ? : 1; @@ -1345,22 +1396,24 @@ bgp_incoming_connection(sock *sk, uint dummy UNUSED) p = bgp_spawn(p, sk->daddr); p->postponed_sk = sk; rmove(sk, p->p.pool); - BGP_LEAVE(p); - return 0; + goto leave; } rmove(sk, p->p.pool); + sk_reloop(sk, p->p.loop); + bgp_setup_conn(p, &p->incoming_conn); bgp_setup_sk(&p->incoming_conn, sk); bgp_send_open(&p->incoming_conn); - BGP_LEAVE(p); - return 0; + goto leave; err: sk_log_error(sk, p->p.name); log(L_ERR "%s: Incoming connection aborted", p->p.name); rfree(sk); - BGP_LEAVE(p); + +leave: + birdloop_leave(p->p.loop); return 0; } @@ -1395,9 +1448,10 @@ bgp_neigh_notify(neighbor *n) struct bgp_proto *p = (struct bgp_proto *) n->proto; int ps = p->p.proto_state; - BGP_ASSERT_INSIDE(p); + if (n != p->neigh) + return; - if ((n != p->neigh) || (ps == PS_DOWN) || (ps == PS_STOP)) + if ((ps == PS_DOWN) || (ps == PS_STOP)) return; int prepare = (ps == PS_START) && (p->start_state == BSS_PREPARE); @@ -1470,15 +1524,22 @@ static void bgp_update_bfd(struct bgp_proto *p, const struct bfd_options *bfd) { if (bfd && p->bfd_req) + { + BGP_TRACE(D_EVENTS, "Updating existing BFD request"); bfd_update_request(p->bfd_req, bfd); + } if (bfd && !p->bfd_req && !bgp_is_dynamic(p)) + { p->bfd_req = bfd_request_session(p->p.pool, p->remote_ip, p->local_ip, p->cf->multihop ? NULL : p->neigh->iface, - p->p.vrf, bgp_bfd_notify, p, birdloop_event_list(p->p.loop), bfd); + p->p.vrf, bgp_bfd_notify, p, p->p.loop, bfd); + BGP_TRACE(D_EVENTS, "Requesting a new BFD session"); + } if (!bfd && p->bfd_req) { + BGP_TRACE(D_EVENTS, "Retracting the BFD request"); rfree(p->bfd_req); p->bfd_req = NULL; } @@ -1490,8 +1551,11 @@ bgp_reload_routes(struct channel *C) struct bgp_proto *p = (void *) C->proto; struct bgp_channel *c = (void *) C; - ASSERT(p->conn && (p->route_refresh)); + /* Ignore non-BGP channels */ + if (C->channel != &channel_bgp) + return; + ASSERT(p->conn && p->route_refresh); bgp_schedule_packet(p->conn, c, PKT_ROUTE_REFRESH); } @@ -1501,6 +1565,10 @@ bgp_feed_begin(struct channel *C, int initial) struct bgp_proto *p = (void *) C->proto; struct bgp_channel *c = (void *) C; + /* Ignore non-BGP channels */ + if (C->channel != &channel_bgp) + return; + /* This should not happen */ if (!p->conn) return; @@ -1508,6 +1576,12 @@ bgp_feed_begin(struct channel *C, int initial) if (initial && p->cf->gr_mode) c->feed_state = BFS_LOADING; + if (!initial && C->out_table) + { + c->feed_out_table = 1; + return; + } + /* It is refeed and both sides support enhanced route refresh */ if (!initial && p->enhanced_refresh) { @@ -1526,6 +1600,16 @@ bgp_feed_end(struct channel *C) struct bgp_proto *p = (void *) C->proto; struct bgp_channel *c = (void *) C; + /* Ignore non-BGP channels */ + if (C->channel != &channel_bgp) + return; + + if (c->feed_out_table) + { + c->feed_out_table = 0; + return; + } + /* This should not happen */ if (!p->conn) return; @@ -1548,17 +1632,14 @@ bgp_feed_end(struct channel *C) static void -bgp_start_locked(struct object_lock *lock) +bgp_start_locked(void *_p) { - struct bgp_proto *p = lock->data; + struct bgp_proto *p = _p; const struct bgp_config *cf = p->cf; - BGP_ENTER(p); - if (p->p.proto_state != PS_START) { DBG("BGP: Got lock in different state %d\n", p->p.proto_state); - BGP_LEAVE(p); return; } @@ -1568,11 +1649,10 @@ bgp_start_locked(struct object_lock *lock) { /* Multi-hop sessions do not use neighbor entries */ bgp_initiate(p); - BGP_LEAVE(p); return; } - neighbor *n = neigh_find(&p->p, p->remote_ip, cf->iface, NEF_STICKY | NEF_NOTIFY_MAIN); + neighbor *n = neigh_find(&p->p, p->remote_ip, cf->iface, NEF_STICKY); if (!n) { log(L_ERR "%s: Invalid remote address %I%J", p->p.name, p->remote_ip, cf->iface); @@ -1580,7 +1660,6 @@ bgp_start_locked(struct object_lock *lock) p->p.disabled = 1; bgp_store_error(p, NULL, BE_MISC, BEM_INVALID_NEXT_HOP); proto_notify_state(&p->p, PS_DOWN); - BGP_LEAVE(p); return; } @@ -1592,8 +1671,6 @@ bgp_start_locked(struct object_lock *lock) BGP_TRACE(D_EVENTS, "Waiting for link on %s", n->iface->name); else bgp_start_neighbor(p); - - BGP_LEAVE(p); } static int @@ -1632,13 +1709,12 @@ bgp_start(struct proto *P) p->stats.rx_bytes = p->stats.tx_bytes = 0; p->last_rx_update = 0; - p->active_event = ev_new_init(p->p.pool, bgp_active_event, p); - p->down_event = ev_new_init(p->p.pool, bgp_down_event, p); + p->event = ev_new_init(p->p.pool, bgp_decision, p); + p->uncork_ev = ev_new_init(p->p.pool, bgp_uncork, p); + p->startup_timer = tm_new_init(p->p.pool, bgp_startup_timeout, p, 0, 0); p->gr_timer = tm_new_init(p->p.pool, bgp_graceful_restart_timeout, p, 0, 0); - p->rx_lp = lp_new_default(p->p.pool); - p->local_id = proto_get_router_id(P->cf); if (p->rr_client) p->rr_cluster_id = p->cf->rr_cluster_id ? p->cf->rr_cluster_id : p->local_id; @@ -1650,7 +1726,7 @@ bgp_start(struct proto *P) if (p->p.gr_recovery && p->cf->gr_mode) { struct bgp_channel *c; - WALK_LIST(c, p->p.channels) + BGP_WALK_CHANNELS(p, c) channel_graceful_restart_lock(&c->c); } @@ -1665,8 +1741,11 @@ bgp_start(struct proto *P) lock->iface = p->cf->iface; lock->vrf = p->cf->iface ? NULL : p->p.vrf; lock->type = OBJLOCK_TCP; - lock->hook = bgp_start_locked; - lock->data = p; + lock->event = (event) { + .hook = bgp_start_locked, + .data = p, + }; + lock->target = proto_event_list(P); /* For dynamic BGP, we use inst 1 to avoid collisions with regular BGP */ if (bgp_is_dynamic(p)) @@ -1782,7 +1861,7 @@ bgp_init(struct proto_config *CF) P->rt_notify = bgp_rt_notify; P->preexport = bgp_preexport; - P->neigh_notify = bgp_neigh_notify; + P->iface_sub.neigh_notify = bgp_neigh_notify; P->reload_routes = bgp_reload_routes; P->feed_begin = bgp_feed_begin; P->feed_end = bgp_feed_end; @@ -1809,7 +1888,7 @@ bgp_init(struct proto_config *CF) /* Add all channels */ struct bgp_channel_config *cc; - WALK_LIST(cc, CF->channels) + BGP_CF_WALK_CHANNELS(cf, cc) proto_add_channel(P, &cc->c); return P; @@ -1830,6 +1909,9 @@ bgp_channel_init(struct channel *C, struct channel_config *CF) if (cf->igp_table_ip6) c->igp_table_ip6 = cf->igp_table_ip6->table; + + if (cf->base_table) + c->base_table = cf->base_table->table; } static int @@ -1840,22 +1922,23 @@ bgp_channel_start(struct channel *C) ip_addr src = p->local_ip; if (c->igp_table_ip4) - RT_LOCKED(c->igp_table_ip4, t) - rt_lock_table(t); + rt_lock_table(c->igp_table_ip4); if (c->igp_table_ip6) - RT_LOCKED(c->igp_table_ip6, t) - rt_lock_table(t); + rt_lock_table(c->igp_table_ip6); - c->pool = p->p.pool; // XXXX - bgp_init_bucket_table(c); - bgp_init_prefix_table(c); + if (c->base_table) + { + rt_lock_table(c->base_table); + rt_flowspec_link(c->base_table, c->c.table); + } - if (c->cf->import_table) - channel_setup_in_table(C, 0); + c->pool = p->p.pool; // XXXX if (c->cf->export_table) - channel_setup_out_table(C); + bgp_setup_out_table(c); + + bgp_init_pending_tx(c); c->stale_timer = tm_new_init(c->pool, bgp_long_lived_stale_timeout, c, 0, 0); @@ -1926,12 +2009,16 @@ bgp_channel_cleanup(struct channel *C) struct bgp_channel *c = (void *) C; if (c->igp_table_ip4) - RT_LOCKED(c->igp_table_ip4, t) - rt_unlock_table(t); + rt_unlock_table(c->igp_table_ip4); if (c->igp_table_ip6) - RT_LOCKED(c->igp_table_ip6, t) - rt_unlock_table(t); + rt_unlock_table(c->igp_table_ip6); + + if (c->base_table) + { + rt_flowspec_unlink(c->base_table, c->c.table); + rt_unlock_table(c->base_table); + } c->index = 0; @@ -1944,7 +2031,7 @@ bgp_find_channel_config(struct bgp_config *cf, u32 afi) { struct bgp_channel_config *cc; - WALK_LIST(cc, cf->c.channels) + BGP_CF_WALK_CHANNELS(cf, cc) if (cc->afi == afi) return cc; @@ -1974,12 +2061,31 @@ bgp_default_igp_table(struct bgp_config *cf, struct bgp_channel_config *cc, u32 return cc2->c.table; /* Last, try default table of given type */ - if (tab = cf->c.global->def_tables[type]) + if (tab = rt_get_default_table(cf->c.global, type)) return tab; cf_error("Undefined IGP table"); } +static struct rtable_config * +bgp_default_base_table(struct bgp_config *cf, struct bgp_channel_config *cc) +{ + /* Expected table type */ + u32 type = (cc->afi == BGP_AF_FLOW4) ? NET_IP4 : NET_IP6; + + /* First, try appropriate IP channel */ + u32 afi2 = BGP_AF(BGP_AFI(cc->afi), BGP_SAFI_UNICAST); + struct bgp_channel_config *cc2 = bgp_find_channel_config(cf, afi2); + if (cc2 && (cc2->c.table->addr_type == type)) + return cc2->c.table; + + /* Last, try default table of given type */ + struct rtable_config *tab = rt_get_default_table(cf->c.global, type); + if (tab) + return tab; + + cf_error("Undefined base table"); +} void bgp_postconfig(struct proto_config *CF) @@ -2042,6 +2148,15 @@ bgp_postconfig(struct proto_config *CF) if (internal && cf->rs_client) cf_error("Only external neighbor can be RS client"); + if (internal && (cf->local_role != BGP_ROLE_UNDEFINED)) + cf_error("Local role cannot be set on IBGP sessions"); + + if (interior && (cf->local_role != BGP_ROLE_UNDEFINED)) + log(L_WARN "BGP roles are not recommended to be used within AS confederations"); + + if (cf->require_roles && (cf->local_role == BGP_ROLE_UNDEFINED)) + cf_error("Local role must be set if roles are required"); + if (!cf->confederation && cf->confederation_member) cf_error("Confederation ID must be set for member sessions"); @@ -2064,9 +2179,24 @@ bgp_postconfig(struct proto_config *CF) if (internal && cf->enforce_first_as) cf_error("Enforce first AS check is requires EBGP sessions"); + if (cf->keepalive_time > cf->hold_time) + cf_error("Keepalive time must be at most hold time"); + + if (cf->keepalive_time > (cf->hold_time / 2)) + log(L_WARN "Keepalive time should be at most 1/2 of hold time"); + + if (cf->min_hold_time > cf->hold_time) + cf_error("Min hold time (%u) exceeds hold time (%u)", + cf->min_hold_time, cf->hold_time); + + uint keepalive_time = cf->keepalive_time ?: cf->hold_time / 3; + if (cf->min_keepalive_time > keepalive_time) + cf_error("Min keepalive time (%u) exceeds keepalive time (%u)", + cf->min_keepalive_time, keepalive_time); + struct bgp_channel_config *cc; - WALK_LIST(cc, CF->channels) + BGP_CF_WALK_CHANNELS(cf, cc) { /* Handle undefined import filter */ if (cc->c.in_filter == FILTER_UNDEF) @@ -2094,6 +2224,10 @@ bgp_postconfig(struct proto_config *CF) if (!cc->gw_mode) cc->gw_mode = cf->multihop ? GW_RECURSIVE : GW_DIRECT; + /* Different default for next_hop_prefer */ + if (!cc->next_hop_prefer) + cc->next_hop_prefer = (cc->gw_mode == GW_DIRECT) ? NHP_GLOBAL : NHP_LOCAL; + /* Defaults based on proto config */ if (cc->gr_able == 0xff) cc->gr_able = (cf->gr_mode == BGP_GR_ABLE); @@ -2124,6 +2258,14 @@ bgp_postconfig(struct proto_config *CF) cf_error("Mismatched IGP table type"); } + /* Default value of base table */ + if ((BGP_SAFI(cc->afi) == BGP_SAFI_FLOW) && cc->validate && !cc->base_table) + cc->base_table = bgp_default_base_table(cf, cc); + + if (cc->base_table && !cc->base_table->trie_used) + cf_error("Flowspec validation requires base table (%s) with trie", + cc->base_table->name); + if (cf->multihop && (cc->gw_mode == GW_DIRECT)) cf_error("Multihop BGP cannot use direct gateway mode"); @@ -2165,20 +2307,16 @@ bgp_reconfigure(struct proto *P, struct proto_config *CF) WALK_LIST(C, p->p.channels) C->stale = 1; - WALK_LIST(cc, new->c.channels) + BGP_CF_WALK_CHANNELS(new, cc) { C = (struct channel *) bgp_find_channel(p, cc->afi); same = proto_configure_channel(P, &C, &cc->c) && same; - - if (C) - C->stale = 0; } WALK_LIST_DELSAFE(C, C2, p->p.channels) if (C->stale) same = proto_configure_channel(P, &C, NULL) && same; - if (same && (p->start_state > BSS_PREPARE)) bgp_update_bfd(p, new->bfd); @@ -2192,7 +2330,7 @@ bgp_reconfigure(struct proto *P, struct proto_config *CF) return same; } -#define IGP_TABLE(cf, sym) ((cf)->igp_table_##sym ? (cf)->igp_table_##sym ->table : NULL ) +#define TABLE(cf, NAME) ((cf)->NAME ? (cf)->NAME->table : NULL ) static int bgp_channel_reconfigure(struct channel *C, struct channel_config *CC, int *import_changed, int *export_changed) @@ -2203,29 +2341,33 @@ bgp_channel_reconfigure(struct channel *C, struct channel_config *CC, int *impor struct bgp_channel_config *old = c->cf; if ((new->secondary != old->secondary) || + (new->validate != old->validate) || (new->gr_able != old->gr_able) || (new->llgr_able != old->llgr_able) || (new->llgr_time != old->llgr_time) || (new->ext_next_hop != old->ext_next_hop) || (new->add_path != old->add_path) || - (new->import_table != old->import_table) || (new->export_table != old->export_table) || - (IGP_TABLE(new, ip4) != IGP_TABLE(old, ip4)) || - (IGP_TABLE(new, ip6) != IGP_TABLE(old, ip6))) + (TABLE(new, igp_table_ip4) != TABLE(old, igp_table_ip4)) || + (TABLE(new, igp_table_ip6) != TABLE(old, igp_table_ip6)) || + (TABLE(new, base_table) != TABLE(old, base_table))) return 0; if (new->mandatory && !old->mandatory && (C->channel_state != CS_UP)) return 0; if ((new->gw_mode != old->gw_mode) || + (new->next_hop_prefer != old->next_hop_prefer) || (new->aigp != old->aigp) || (new->cost != old->cost)) { - /* import_changed itself does not force ROUTE_REFRESH when import_table is active */ - if (c->c.in_table && (c->c.channel_state == CS_UP)) + /* If import table is active and route refresh is possible, we just ask for route refresh */ + if ((c->c.in_keep & RIK_PREFILTER) && (c->c.channel_state == CS_UP) && p->route_refresh) bgp_schedule_packet(p->conn, c, PKT_ROUTE_REFRESH); - *import_changed = 1; + /* Otherwise we do complete reload */ + else + *import_changed = 1; } if (!ipa_equal(new->next_hop_addr, old->next_hop_addr) || @@ -2394,6 +2536,15 @@ bgp_show_afis(int code, char *s, u32 *afis, uint count) cli_msg(code, b.start); } +const char * +bgp_format_role_name(u8 role) +{ + static const char *bgp_role_names[] = { "provider", "rs_server", "rs_client", "customer", "peer" }; + if (role == BGP_ROLE_UNDEFINED) return "undefined"; + if (role < ARRAY_SIZE(bgp_role_names)) return bgp_role_names[role]; + return "?"; +} + static void bgp_show_capabilities(struct bgp_proto *p UNUSED, struct bgp_caps *caps) { @@ -2522,6 +2673,9 @@ bgp_show_capabilities(struct bgp_proto *p UNUSED, struct bgp_caps *caps) if (caps->hostname) cli_msg(-1006, " Hostname: %s", caps->hostname); + + if (caps->role != BGP_ROLE_UNDEFINED) + cli_msg(-1006, " Role: %s", bgp_format_role_name(caps->role)); } static void @@ -2529,9 +2683,6 @@ bgp_show_proto_info(struct proto *P) { struct bgp_proto *p = (struct bgp_proto *) P; - if (p->p.proto_state != PS_DOWN) - BGP_ASSERT_INSIDE(p); - cli_msg(-1006, " BGP state: %s", bgp_state_dsc(p)); if (bgp_is_dynamic(p) && p->cf->remote_range) @@ -2539,6 +2690,9 @@ bgp_show_proto_info(struct proto *P) else cli_msg(-1006, " Neighbor address: %I%J", p->remote_ip, p->cf->iface); + if ((p->conn == &p->outgoing_conn) && (p->cf->remote_port != BGP_PORT)) + cli_msg(-1006, " Neighbor port: %u", p->cf->remote_port); + cli_msg(-1006, " Neighbor AS: %u", p->remote_as); cli_msg(-1006, " Local AS: %u", p->cf->local_as); @@ -2581,6 +2735,9 @@ bgp_show_proto_info(struct proto *P) tm_remains(p->conn->hold_timer), p->conn->hold_time); cli_msg(-1006, " Keepalive timer: %t/%u", tm_remains(p->conn->keepalive_timer), p->conn->keepalive_time); + cli_msg(-1006, " TX pending: %d bytes%s", + p->conn->sk->tpos - p->conn->sk->ttx, + ev_active(p->conn->tx_ev) ? " (refill scheduled)" : ""); } #if 0 @@ -2609,6 +2766,9 @@ bgp_show_proto_info(struct proto *P) { channel_show_info(&c->c); + if (c->c.channel != &channel_bgp) + continue; + if (p->gr_active_num) cli_msg(-1006, " Neighbor GR: %s", bgp_gr_states[c->gr_active]); @@ -2628,6 +2788,25 @@ bgp_show_proto_info(struct proto *P) if (c->igp_table_ip6) cli_msg(-1006, " IGP IPv6 table: %s", c->igp_table_ip6->name); + + if (c->base_table) + cli_msg(-1006, " Base table: %s", c->base_table->name); + + uint bucket_cnt = 0; + uint prefix_cnt = 0; + struct bgp_bucket *buck; + struct bgp_prefix *px; + if (c->ptx) + WALK_LIST(buck, c->ptx->bucket_queue) + { + bucket_cnt++; + WALK_LIST(px, buck->prefixes) + if (px->cur) + prefix_cnt++; + } + + cli_msg(-1006, " Pending %u attribute sets with total %u prefixes to send", + bucket_cnt, prefix_cnt); } } } @@ -2645,7 +2824,6 @@ struct channel_class channel_bgp = { struct protocol proto_bgp = { .name = "BGP", .template = "bgp%d", - .class = PROTOCOL_BGP, .preference = DEF_PREF_BGP, .channel_mask = NB_IP | NB_VPN | NB_FLOW, .proto_size = sizeof(struct bgp_proto), @@ -2657,6 +2835,12 @@ struct protocol proto_bgp = { .reconfigure = bgp_reconfigure, .copy_config = bgp_copy_config, .get_status = bgp_get_status, - .get_attr = bgp_get_attr, .show_proto_info = bgp_show_proto_info }; + +void bgp_build(void) +{ + proto_build(&proto_bgp); + bgp_register_attrs(); + bgp_listen_domain = DOMAIN_NEW(rtable, "BGP Listen Sockets"); +} diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h index 2a2fe689..6f75874f 100644 --- a/proto/bgp/bgp.h +++ b/proto/bgp/bgp.h @@ -14,13 +14,12 @@ #include <stdint.h> #include <setjmp.h> #include "nest/bird.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/bfd.h" //#include "lib/lists.h" #include "lib/hash.h" #include "lib/socket.h" -struct linpool; struct eattr; @@ -68,10 +67,10 @@ struct bgp_af_desc { u8 no_igp; const char *name; uint (*encode_nlri)(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size); - void (*decode_nlri)(struct bgp_parse_state *s, byte *pos, uint len, rta *a); + void (*decode_nlri)(struct bgp_parse_state *s, byte *pos, uint len, ea_list *a); void (*update_next_hop)(struct bgp_export_state *s, eattr *nh, ea_list **to); uint (*encode_next_hop)(struct bgp_write_state *s, eattr *nh, byte *buf, uint size); - void (*decode_next_hop)(struct bgp_parse_state *s, byte *pos, uint len, rta *a); + void (*decode_next_hop)(struct bgp_parse_state *s, byte *pos, uint len, ea_list **to); }; @@ -98,7 +97,7 @@ struct bgp_config { int capabilities; /* Enable capability handshake [RFC 5492] */ int enable_refresh; /* Enable local support for route refresh [RFC 2918] */ int enable_as4; /* Enable local support for 4B AS numbers [RFC 6793] */ - int enable_extended_messages; /* Enable local support for extended messages [draft] */ + int enable_extended_messages; /* Enable local support for extended messages [RFC 8654] */ int enable_hostname; /* Enable local support for hostname [draft] */ u32 rr_cluster_id; /* Route reflector cluster ID, if different from local ID */ int rr_client; /* Whether neighbor is RR client of me */ @@ -109,18 +108,24 @@ struct bgp_config { int interpret_communities; /* Hardwired handling of well-known communities */ int allow_local_as; /* Allow that number of local ASNs in incoming AS_PATHs */ int allow_local_pref; /* Allow LOCAL_PREF in EBGP sessions */ + int allow_med; /* Allow BGP_MED in EBGP sessions */ int allow_as_sets; /* Allow AS_SETs in incoming AS_PATHs */ int enforce_first_as; /* Enable check for neighbor AS as first AS in AS_PATH */ int gr_mode; /* Graceful restart mode (BGP_GR_*) */ int llgr_mode; /* Long-lived graceful restart mode (BGP_LLGR_*) */ int setkey; /* Set MD5 password to system SA/SP database */ + u8 local_role; /* Set peering role with neighbor [RFC 9234] */ + int require_roles; /* Require configured roles on both sides */ /* Times below are in seconds */ unsigned gr_time; /* Graceful restart timeout */ unsigned llgr_time; /* Long-lived graceful restart stale time */ unsigned connect_delay_time; /* Minimum delay between connect attempts */ unsigned connect_retry_time; /* Timeout for connect attempts */ - unsigned hold_time, initial_hold_time; + unsigned hold_time; + unsigned min_hold_time; /* Minimum accepted hold time */ + unsigned initial_hold_time; unsigned keepalive_time; + unsigned min_keepalive_time; /* Minimum accepted keepalive time */ unsigned error_amnesia_time; /* Errors are forgotten after */ unsigned error_delay_time_min; /* Time to wait after an error is detected */ unsigned error_delay_time_max; @@ -144,9 +149,11 @@ struct bgp_channel_config { ip_addr next_hop_addr; /* Local address for NEXT_HOP attribute */ u8 next_hop_self; /* Always set next hop to local IP address (NH_*) */ u8 next_hop_keep; /* Do not modify next hop attribute (NH_*) */ + u8 next_hop_prefer; /* Prefer global or link-local next hop (NHP_*) */ u8 mandatory; /* Channel is mandatory in capability negotiation */ u8 gw_mode; /* How we compute route gateway from next_hop attr, see GW_* */ u8 secondary; /* Accept also non-best routes (i.e. RA_ACCEPTED) */ + u8 validate; /* Validate Flowspec per RFC 8955 (6) */ u8 gr_able; /* Allow full graceful restart for the channel */ u8 llgr_able; /* Allow full long-lived GR for the channel */ uint llgr_time; /* Long-lived graceful restart stale time */ @@ -156,15 +163,23 @@ struct bgp_channel_config { u8 aigp_originate; /* AIGP is originated automatically */ u32 cost; /* IGP cost for direct next hops */ u8 import_table; /* Use c.in_table as Adj-RIB-In */ - u8 export_table; /* Use c.out_table as Adj-RIB-Out */ + u8 export_table; /* Keep Adj-RIB-Out and export it */ struct rtable_config *igp_table_ip4; /* Table for recursive IPv4 next hop lookups */ struct rtable_config *igp_table_ip6; /* Table for recursive IPv6 next hop lookups */ + struct rtable_config *base_table; /* Base table for Flowspec validation */ }; #define BGP_PT_INTERNAL 1 #define BGP_PT_EXTERNAL 2 +#define BGP_ROLE_UNDEFINED 255 +#define BGP_ROLE_PROVIDER 0 +#define BGP_ROLE_RS_SERVER 1 +#define BGP_ROLE_RS_CLIENT 2 +#define BGP_ROLE_CUSTOMER 3 +#define BGP_ROLE_PEER 4 + #define NH_NO 0 #define NH_ALL 1 #define NH_IBGP 2 @@ -177,6 +192,9 @@ struct bgp_channel_config { #define GW_DIRECT 1 #define GW_RECURSIVE 2 +#define NHP_GLOBAL 1 +#define NHP_LOCAL 2 + #define BGP_ADD_PATH_RX 1 #define BGP_ADD_PATH_TX 2 #define BGP_ADD_PATH_FULL 3 @@ -203,8 +221,6 @@ struct bgp_channel_config { /* rte->pflags */ #define BGP_REF_SUPPRESSED 0x1 /* Used for deterministic MED comparison */ -#define BGP_REF_STALE 0x2 /* Route is LLGR_STATE */ -#define BGP_REF_NOT_STALE 0x4 /* Route is NOT LLGR_STATE */ struct bgp_af_caps { u32 afi; @@ -222,9 +238,10 @@ struct bgp_caps { u32 as4_number; /* Announced ASN */ u8 as4_support; /* Four-octet AS capability, RFC 6793 */ - u8 ext_messages; /* Extended message length, RFC draft */ + u8 ext_messages; /* Extended message length, RFC 8654 */ u8 route_refresh; /* Route refresh capability, RFC 2918 */ u8 enhanced_refresh; /* Enhanced route refresh, RFC 7313 */ + u8 role; /* BGP role capability, RFC 9234 */ u8 gr_aware; /* Graceful restart capability, RFC 4724 */ u8 gr_flags; /* Graceful restart flags */ @@ -248,8 +265,8 @@ struct bgp_caps { struct bgp_socket { node n; /* Node in global bgp_sockets */ + list requests; /* Listen requests */ sock *sk; /* Real listening socket */ - u32 uc; /* Use count */ }; struct bgp_stats { @@ -284,6 +301,16 @@ struct bgp_conn { uint hold_time, keepalive_time; /* Times calculated from my and neighbor's requirements */ }; +struct bgp_listen_request { + node n; /* Node in bgp_socket / pending list */ + struct bgp_socket *sock; /* Assigned socket */ + ip_addr addr; + struct iface *iface; + struct iface *vrf; + uint port; + uint flags; +}; + struct bgp_proto { struct proto p; const struct bgp_config *cf; /* Shortcut to BGP configuration */ @@ -313,18 +340,17 @@ struct bgp_proto { struct bgp_conn *conn; /* Connection we have established */ struct bgp_conn outgoing_conn; /* Outgoing connection we're working with */ struct bgp_conn incoming_conn; /* Incoming connection we have neither accepted nor rejected yet */ - struct linpool *rx_lp; /* Linpool for parsing received updates */ struct object_lock *lock; /* Lock for neighbor connection */ struct neighbor *neigh; /* Neighbor entry corresponding to remote ip, NULL if multihop */ - struct bgp_socket *sock; /* Shared listening socket */ + struct bgp_listen_request listen; /* Shared listening socket */ struct bfd_request *bfd_req; /* BFD request, if BFD is used */ struct birdsock *postponed_sk; /* Postponed incoming socket for dynamic BGP */ + event *uncork_ev; /* Uncork event in case of congestion */ struct bgp_stats stats; /* BGP statistics */ btime last_established; /* Last time of enter/leave of established state */ btime last_rx_update; /* Last time of RX update */ ip_addr link_addr; /* Link-local version of local_ip */ - event *active_event; /* Event for respawning */ - event *down_event; /* Event to shut down */ + event *event; /* Event for respawning and shutting process */ timer *startup_timer; /* Timer used to delay protocol startup due to previous errors (startup_delay) */ timer *gr_timer; /* Timer waiting for reestablishment after graceful restart */ int dynamic_name_counter; /* Counter for dynamic BGP names */ @@ -347,15 +373,12 @@ struct bgp_channel { rtable *igp_table_ip4; /* Table for recursive IPv4 next hop lookups */ rtable *igp_table_ip6; /* Table for recursive IPv6 next hop lookups */ + rtable *base_table; /* Base table for Flowspec validation */ /* Rest are zeroed when down */ pool *pool; - HASH(struct bgp_bucket) bucket_hash; /* Hash table of route buckets */ - struct bgp_bucket *withdraw_bucket; /* Withdrawn routes */ - list bucket_queue; /* Queue of buckets to send (struct bgp_bucket) */ - - HASH(struct bgp_prefix) prefix_hash; /* Prefixes to be sent */ - slab *prefix_slab; /* Slab holding prefix nodes */ + struct bgp_pending_tx *ptx; /* Routes waiting to be sent */ + struct rt_exporter prefix_exporter; /* Table-like exporter for ptx */ ip_addr next_hop_addr; /* Local address for NEXT_HOP attribute */ ip_addr link_addr; /* Link-local version of next_hop_addr */ @@ -376,11 +399,16 @@ struct bgp_channel { u8 feed_state; /* Feed state (TX) for EoR, RR packets, see BFS_* */ u8 load_state; /* Load state (RX) for EoR, RR packets, see BFS_* */ + + u8 feed_out_table; /* Refeed into out_table */ }; struct bgp_prefix { - node buck_node; /* Node in per-bucket list */ + node buck_node_xx; /* Node in per-bucket list */ struct bgp_prefix *next; /* Node in prefix hash table */ + struct bgp_bucket *last; /* Last bucket sent with this prefix */ + struct bgp_bucket *cur; /* Current bucket (cur == last) if no update is required */ + btime lastmod; /* Last modification of this prefix */ u32 hash; u32 path_id; net_addr net[0]; @@ -389,11 +417,24 @@ struct bgp_prefix { struct bgp_bucket { node send_node; /* Node in send queue */ struct bgp_bucket *next; /* Node in bucket hash table */ - list prefixes; /* Prefixes in this bucket (struct bgp_prefix) */ + list prefixes; /* Prefixes to send in this bucket (struct bgp_prefix) */ u32 hash; /* Hash over extended attributes */ + u32 px_uc; /* How many prefixes are linking this bucket */ ea_list eattrs[0]; /* Per-bucket extended attributes */ }; +struct bgp_pending_tx { + resource r; + pool *pool; + + HASH(struct bgp_bucket) bucket_hash; /* Hash table of route buckets */ + struct bgp_bucket *withdraw_bucket; /* Withdrawn routes */ + list bucket_queue; /* Queue of buckets to send (struct bgp_bucket) */ + + HASH(struct bgp_prefix) prefix_hash; /* Prefixes to be sent */ + slab *prefix_slab; /* Slab holding prefix nodes */ +}; + struct bgp_export_state { struct bgp_proto *proto; struct bgp_channel *channel; @@ -404,7 +445,7 @@ struct bgp_export_state { int mpls; u32 attrs_seen[1]; - uint err_withdraw; + uint err_reject; uint local_next_hop; }; @@ -430,6 +471,7 @@ struct bgp_parse_state { int as4_session; int add_path; int mpls; + int reach_nlri_step; u32 attrs_seen[256/32]; @@ -456,13 +498,12 @@ struct bgp_parse_state { uint err_subcode; jmp_buf err_jmpbuf; - struct hostentry *hostentry; adata *mpls_labels; /* Cached state for bgp_rte_update() */ u32 last_id; struct rte_src *last_src; - rta *cached_rta; + ea_list *cached_ea; }; #define BGP_PORT 179 @@ -475,6 +516,9 @@ struct bgp_parse_state { #define BGP_RX_BUFFER_EXT_SIZE 65535 #define BGP_TX_BUFFER_EXT_SIZE 65535 +#define BGP_CF_WALK_CHANNELS(P,C) WALK_LIST(C, P->c.channels) if (C->c.channel == &channel_bgp) +#define BGP_WALK_CHANNELS(P,C) WALK_LIST(C, P->p.channels) if (C->c.channel == &channel_bgp) + static inline int bgp_channel_is_ipv4(struct bgp_channel *c) { return BGP_AFI(c->afi) == BGP_AFI_IPV4; } @@ -487,6 +531,12 @@ static inline int bgp_cc_is_ipv4(struct bgp_channel_config *c) static inline int bgp_cc_is_ipv6(struct bgp_channel_config *c) { return BGP_AFI(c->afi) == BGP_AFI_IPV6; } +static inline int bgp_channel_is_role_applicable(struct bgp_channel *c) +{ return (c->afi == BGP_AF_IPV4 || c->afi == BGP_AF_IPV6); } + +static inline int bgp_cc_is_role_applicable(struct bgp_channel_config *c) +{ return (c->afi == BGP_AF_IPV4 || c->afi == BGP_AF_IPV6); } + static inline uint bgp_max_packet_length(struct bgp_conn *conn) { return conn->ext_messages ? BGP_MAX_EXT_MSG_LENGTH : BGP_MAX_MESSAGE_LENGTH; } @@ -513,14 +563,17 @@ void bgp_refresh_begin(struct bgp_channel *c); void bgp_refresh_end(struct bgp_channel *c); void bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code); void bgp_stop(struct bgp_proto *p, int subcode, byte *data, uint len); - -struct rte_source *bgp_find_source(struct bgp_proto *p, u32 path_id); -struct rte_source *bgp_get_source(struct bgp_proto *p, u32 path_id); +const char *bgp_format_role_name(u8 role); static inline int -rta_resolvable(rta *a) +rte_resolvable(const rte *rt) { - return a->dest == RTD_UNICAST; + eattr *nhea = ea_find(rt->attrs, &ea_gen_nexthop); + if (!nhea) + return 0; + + struct nexthop_adata *nhad = (void *) nhea->u.ptr; + return NEXTHOP_IS_REACHABLE(nhad) || (nhad->dest != RTD_UNREACHABLE); } extern struct rte_owner_class bgp_rte_owner_class; @@ -539,63 +592,41 @@ extern struct rte_owner_class bgp_rte_owner_class; /* attrs.c */ -static inline eattr * -bgp_find_attr(ea_list *attrs, uint code) -{ - return ea_find(attrs, EA_CODE(PROTOCOL_BGP, code)); -} - eattr * -bgp_set_attr(ea_list **attrs, struct linpool *pool, uint code, uint flags, uintptr_t val); - -static inline void -bgp_set_attr_u32(ea_list **to, struct linpool *pool, uint code, uint flags, u32 val) -{ bgp_set_attr(to, pool, code, flags, (uintptr_t) val); } - -static inline void -bgp_set_attr_ptr(ea_list **to, struct linpool *pool, uint code, uint flags, const struct adata *val) -{ bgp_set_attr(to, pool, code, flags, (uintptr_t) val); } +bgp_find_attr(ea_list *attrs, uint code); -static inline void -bgp_set_attr_data(ea_list **to, struct linpool *pool, uint code, uint flags, void *data, uint len) -{ - struct adata *a = lp_alloc_adata(pool, len); - bmemcpy(a->data, data, len); - bgp_set_attr(to, pool, code, flags, (uintptr_t) a); -} - -static inline void -bgp_unset_attr(ea_list **to, struct linpool *pool, uint code) -{ eattr *e = bgp_set_attr(to, pool, code, 0, 0); e->type = EAF_TYPE_UNDEF; } +void bgp_set_attr_u32(ea_list **to, uint code, uint flags, u32 val); +void bgp_set_attr_ptr(ea_list **to, uint code, uint flags, const struct adata *ad); +void bgp_set_attr_data(ea_list **to, uint code, uint flags, void *data, uint len); +void bgp_unset_attr(ea_list **to, uint code); int bgp_encode_mp_reach_mrt(struct bgp_write_state *s, eattr *a, byte *buf, uint size); int bgp_encode_attrs(struct bgp_write_state *s, ea_list *attrs, byte *buf, byte *end); ea_list * bgp_decode_attrs(struct bgp_parse_state *s, byte *data, uint len); -void bgp_finish_attrs(struct bgp_parse_state *s, rta *a); +void bgp_finish_attrs(struct bgp_parse_state *s, ea_list **to); + +void bgp_setup_out_table(struct bgp_channel *c); + +void bgp_init_pending_tx(struct bgp_channel *c); +void bgp_free_pending_tx(struct bgp_channel *c); -void bgp_init_bucket_table(struct bgp_channel *c); -void bgp_free_bucket_table(struct bgp_channel *c); -void bgp_free_bucket(struct bgp_channel *c, struct bgp_bucket *b); -void bgp_defer_bucket(struct bgp_channel *c, struct bgp_bucket *b); void bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b); +int bgp_done_bucket(struct bgp_channel *c, struct bgp_bucket *b); -void bgp_init_prefix_table(struct bgp_channel *c); -void bgp_free_prefix_table(struct bgp_channel *c); -void bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *bp); +void bgp_done_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bucket *buck); -int bgp_rte_better(struct rte *, struct rte *); -int bgp_rte_mergable(rte *pri, rte *sec); -int bgp_rte_recalculate(rtable_private *table, net *net, rte *new, rte *old, rte *old_best); -void bgp_rte_modify_stale(struct rt_export_request *, const net_addr *, struct rt_pending_export *, rte **, uint); -u32 bgp_rte_igp_metric(struct rte *); +int bgp_rte_better(const rte *, const rte *); +int bgp_rte_mergable(const rte *pri, const rte *sec); +int bgp_rte_recalculate(struct rtable_private *table, net *net, rte *new, rte *old, rte *old_best); +void bgp_rte_modify_stale(struct rt_export_request *req, const net_addr *n, struct rt_pending_export *first, struct rt_pending_export *last, const rte **feed, uint count); +u32 bgp_rte_igp_metric(const rte *); void bgp_rt_notify(struct proto *P, struct channel *C, const net_addr *n, rte *new, const rte *old); int bgp_preexport(struct channel *, struct rte *); -int bgp_get_attr(const struct eattr *e, byte *buf, int buflen); -void bgp_get_route_info(struct rte *, byte *); -int bgp_total_aigp_metric_(rta *a, u64 *metric, const struct adata **ad); +void bgp_get_route_info(const rte *, byte *); +int bgp_total_aigp_metric_(const rte *e, u64 *metric, const struct adata **ad); -static inline struct bgp_proto *bgp_rte_proto(struct rte *rte) +static inline struct bgp_proto *bgp_rte_proto(const rte *rte) { return (rte->src->owner->class == &bgp_rte_owner_class) ? SKIP_BACK(struct bgp_proto, p.sources, rte->src->owner) : NULL; @@ -605,15 +636,17 @@ static inline struct bgp_proto *bgp_rte_proto(struct rte *rte) #define BGP_AIGP_MAX U64(0xffffffffffffffff) static inline u64 -bgp_total_aigp_metric(rta *a) +bgp_total_aigp_metric(const rte *e) { u64 metric = BGP_AIGP_MAX; const struct adata *ad; - bgp_total_aigp_metric_(a, &metric, &ad); + bgp_total_aigp_metric_(e, &metric, &ad); return metric; } +void bgp_register_attrs(void); + /* packets.c */ @@ -625,6 +658,7 @@ void bgp_schedule_packet(struct bgp_conn *conn, struct bgp_channel *c, int type) void bgp_kick_tx(void *vconn); void bgp_tx(struct birdsock *sk); int bgp_rx(struct birdsock *sk, uint size); +void bgp_uncork(void *vp); const char * bgp_error_dsc(unsigned code, unsigned subcode); void bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsigned subcode, byte *data, unsigned len); @@ -650,26 +684,32 @@ void bgp_update_next_hop(struct bgp_export_state *s, eattr *a, ea_list **to); #define BAF_DECODE_FLAGS 0x0100 /* Private flag - attribute flags are handled by the decode hook */ -#define BA_ORIGIN 0x01 /* RFC 4271 */ /* WM */ -#define BA_AS_PATH 0x02 /* WM */ -#define BA_NEXT_HOP 0x03 /* WM */ -#define BA_MULTI_EXIT_DISC 0x04 /* ON */ -#define BA_LOCAL_PREF 0x05 /* WD */ -#define BA_ATOMIC_AGGR 0x06 /* WD */ -#define BA_AGGREGATOR 0x07 /* OT */ -#define BA_COMMUNITY 0x08 /* RFC 1997 */ /* OT */ -#define BA_ORIGINATOR_ID 0x09 /* RFC 4456 */ /* ON */ -#define BA_CLUSTER_LIST 0x0a /* RFC 4456 */ /* ON */ -#define BA_MP_REACH_NLRI 0x0e /* RFC 4760 */ -#define BA_MP_UNREACH_NLRI 0x0f /* RFC 4760 */ -#define BA_EXT_COMMUNITY 0x10 /* RFC 4360 */ -#define BA_AS4_PATH 0x11 /* RFC 6793 */ -#define BA_AS4_AGGREGATOR 0x12 /* RFC 6793 */ -#define BA_AIGP 0x1a /* RFC 7311 */ -#define BA_LARGE_COMMUNITY 0x20 /* RFC 8092 */ +enum bgp_attr_id { + BA_ORIGIN = 0x01, /* RFC 4271 */ /* WM */ + BA_AS_PATH = 0x02, /* WM */ + BA_NEXT_HOP = 0x03, /* WM */ + BA_MULTI_EXIT_DISC = 0x04, /* ON */ + BA_LOCAL_PREF = 0x05, /* WD */ + BA_ATOMIC_AGGR = 0x06, /* WD */ + BA_AGGREGATOR = 0x07, /* OT */ + BA_COMMUNITY = 0x08, /* RFC 1997 */ /* OT */ + BA_ORIGINATOR_ID = 0x09, /* RFC 4456 */ /* ON */ + BA_CLUSTER_LIST = 0x0a, /* RFC 4456 */ /* ON */ + BA_MP_REACH_NLRI = 0x0e, /* RFC 4760 */ + BA_MP_UNREACH_NLRI = 0x0f, /* RFC 4760 */ + BA_EXT_COMMUNITY = 0x10, /* RFC 4360 */ + BA_AS4_PATH = 0x11, /* RFC 6793 */ + BA_AS4_AGGREGATOR = 0x12, /* RFC 6793 */ + BA_AIGP = 0x1a, /* RFC 7311 */ + BA_LARGE_COMMUNITY = 0x20, /* RFC 8092 */ +#define BA_ONLY_TO_CUSTOMER 0x23 /* RFC 9234 */ /* Bird's private internal BGP attributes */ -#define BA_MPLS_LABEL_STACK 0xfe /* MPLS label stack transfer attribute */ + BA_MPLS_LABEL_STACK = 0x100, /* MPLS label stack transfer attribute */ + +/* Maximum */ + BGP_ATTR_MAX, +}; /* BGP connection states */ @@ -762,10 +802,5 @@ void bgp_update_next_hop(struct bgp_export_state *s, eattr *a, ea_list **to); #define ORIGIN_EGP 1 #define ORIGIN_INCOMPLETE 2 -/* Loop */ - -#define BGP_ENTER(bgp) birdloop_enter(bgp->p.loop) -#define BGP_LEAVE(bgp) birdloop_leave(bgp->p.loop) -#define BGP_ASSERT_INSIDE(bgp) ASSERT_DIE((bgp->p.loop != &main_birdloop) && birdloop_inside(bgp->p.loop)) #endif diff --git a/proto/bgp/config.Y b/proto/bgp/config.Y index 7c0d8d65..a37bb27a 100644 --- a/proto/bgp/config.Y +++ b/proto/bgp/config.Y @@ -19,19 +19,19 @@ CF_DECLS CF_KEYWORDS(BGP, LOCAL, NEIGHBOR, AS, HOLD, TIME, CONNECT, RETRY, KEEPALIVE, MULTIHOP, STARTUP, VIA, NEXT, HOP, SELF, DEFAULT, PATH, METRIC, ERROR, - START, DELAY, FORGET, WAIT, ENABLE, DISABLE, AFTER, BGP_PATH, - BGP_LOCAL_PREF, BGP_MED, BGP_ORIGIN, BGP_NEXT_HOP, BGP_ATOMIC_AGGR, - BGP_AGGREGATOR, BGP_COMMUNITY, BGP_EXT_COMMUNITY, BGP_LARGE_COMMUNITY, + START, DELAY, FORGET, WAIT, ENABLE, DISABLE, AFTER, + BGP_LOCAL_PREF, BGP_MED, SOURCE, ADDRESS, PASSWORD, RR, RS, CLIENT, CLUSTER, ID, AS4, ADVERTISE, IPV4, CAPABILITIES, LIMIT, PASSIVE, PREFER, OLDER, MISSING, LLADDR, - DROP, IGNORE, ROUTE, REFRESH, INTERPRET, COMMUNITIES, BGP_ORIGINATOR_ID, - BGP_CLUSTER_LIST, IGP, TABLE, GATEWAY, DIRECT, RECURSIVE, MED, TTL, + DROP, IGNORE, ROUTE, REFRESH, INTERPRET, COMMUNITIES, + IGP, TABLE, GATEWAY, DIRECT, RECURSIVE, MED, TTL, SECURITY, DETERMINISTIC, SECONDARY, ALLOW, BFD, ADD, PATHS, RX, TX, GRACEFUL, RESTART, AWARE, CHECK, LINK, PORT, EXTENDED, MESSAGES, SETKEY, STRICT, BIND, CONFEDERATION, MEMBER, MULTICAST, FLOW4, FLOW6, LONG, LIVED, STALE, IMPORT, IBGP, EBGP, MANDATORY, INTERNAL, EXTERNAL, SETS, - DYNAMIC, RANGE, NAME, DIGITS, BGP_AIGP, AIGP, ORIGINATE, COST, ENFORCE, - FIRST, FREE) + DYNAMIC, RANGE, NAME, DIGITS, AIGP, ORIGINATE, COST, ENFORCE, + FIRST, FREE, VALIDATE, BASE, ROLE, ROLES, PEER, PROVIDER, CUSTOMER, + RS_SERVER, RS_CLIENT, REQUIRE, BGP_OTC, GLOBAL) %type <i> bgp_nh %type <i32> bgp_afi @@ -40,10 +40,14 @@ CF_KEYWORDS(CEASE, PREFIX, LIMIT, HIT, ADMINISTRATIVE, SHUTDOWN, RESET, PEER, CONFIGURATION, CHANGE, DECONFIGURED, CONNECTION, REJECTED, COLLISION, OUT, OF, RESOURCES) -%type<i> bgp_cease_mask bgp_cease_list bgp_cease_flag +%type<i> bgp_cease_mask bgp_cease_list bgp_cease_flag bgp_role_name CF_GRAMMAR +/* Workaround for collisions between keywords and symbols */ +toksym: ROLE | PEER | PROVIDER | CUSTOMER | RS_SERVER | RS_CLIENT ; +toksym: BGP_MED | BGP_LOCAL_PREF | SOURCE ; + proto: bgp_proto '}' ; bgp_proto_start: proto_start BGP { @@ -73,6 +77,7 @@ bgp_proto_start: proto_start BGP { BGP_CFG->llgr_mode = -1; BGP_CFG->llgr_time = 3600; BGP_CFG->setkey = 1; + BGP_CFG->local_role = BGP_ROLE_UNDEFINED; BGP_CFG->dynamic_name = "dynbgp"; BGP_CFG->check_link = -1; } @@ -115,6 +120,14 @@ bgp_cease_flag: | OUT OF RESOURCES { $$ = 1 << 8; } ; +bgp_role_name: + PEER { $$ = BGP_ROLE_PEER; } + | PROVIDER { $$ = BGP_ROLE_PROVIDER; } + | CUSTOMER { $$ = BGP_ROLE_CUSTOMER; } + | RS_SERVER { $$ = BGP_ROLE_RS_SERVER; } + | RS_CLIENT { $$ = BGP_ROLE_RS_CLIENT; } + ; + bgp_proto: bgp_proto_start proto_name '{' | bgp_proto proto_item ';' @@ -144,7 +157,8 @@ bgp_proto: | bgp_proto RS CLIENT bool ';' { BGP_CFG->rs_client = $4; } | bgp_proto CONFEDERATION expr ';' { BGP_CFG->confederation = $3; } | bgp_proto CONFEDERATION MEMBER bool ';' { BGP_CFG->confederation_member = $4; } - | bgp_proto HOLD TIME expr ';' { BGP_CFG->hold_time = $4; } + | bgp_proto HOLD TIME expr ';' { BGP_CFG->hold_time = $4; if (($4 && $4<3) || ($4>65535)) cf_error("Hold time must be in range 3-65535 or zero"); } + | bgp_proto MIN HOLD TIME expr ';' { BGP_CFG->min_hold_time = $5; } | bgp_proto STARTUP HOLD TIME expr ';' { BGP_CFG->initial_hold_time = $5; } | bgp_proto DIRECT ';' { BGP_CFG->multihop = 0; } | bgp_proto MULTIHOP ';' { BGP_CFG->multihop = 64; } @@ -168,7 +182,8 @@ bgp_proto: | bgp_proto START DELAY TIME expr ';' { BGP_CFG->connect_delay_time = $5; log(L_WARN "%s: Start delay time option is deprecated, use connect delay time", this_proto->name); } | bgp_proto CONNECT DELAY TIME expr ';' { BGP_CFG->connect_delay_time = $5; } | bgp_proto CONNECT RETRY TIME expr ';' { BGP_CFG->connect_retry_time = $5; } - | bgp_proto KEEPALIVE TIME expr ';' { BGP_CFG->keepalive_time = $4; } + | bgp_proto KEEPALIVE TIME expr ';' { BGP_CFG->keepalive_time = $4; if (($4<1) || ($4>65535)) cf_error("Keepalive time must be in range 1-65535"); } + | bgp_proto MIN KEEPALIVE TIME expr ';' { BGP_CFG->min_keepalive_time = $5; } | bgp_proto ERROR FORGET TIME expr ';' { BGP_CFG->error_amnesia_time = $5; } | bgp_proto ERROR WAIT TIME expr ',' expr ';' { BGP_CFG->error_delay_time_min = $5; BGP_CFG->error_delay_time_max = $7; } | bgp_proto DISABLE AFTER ERROR bool ';' { BGP_CFG->disable_after_error = $5; } @@ -185,6 +200,7 @@ bgp_proto: | bgp_proto ALLOW LOCAL AS ';' { BGP_CFG->allow_local_as = -1; } | bgp_proto ALLOW LOCAL AS expr ';' { BGP_CFG->allow_local_as = $5; } | bgp_proto ALLOW BGP_LOCAL_PREF bool ';' { BGP_CFG->allow_local_pref = $4; } + | bgp_proto ALLOW BGP_MED bool ';' { BGP_CFG->allow_med = $4; } | bgp_proto ALLOW AS SETS bool ';' { BGP_CFG->allow_as_sets = $5; } | bgp_proto GRACEFUL RESTART bool ';' { BGP_CFG->gr_mode = $4; } | bgp_proto GRACEFUL RESTART AWARE ';' { BGP_CFG->gr_mode = BGP_GR_AWARE; } @@ -198,6 +214,8 @@ bgp_proto: | bgp_proto BFD GRACEFUL ';' { init_bfd_opts(&BGP_CFG->bfd); BGP_CFG->bfd->mode = BGP_BFD_GRACEFUL; } | bgp_proto BFD { open_bfd_opts(&BGP_CFG->bfd); } bfd_opts { close_bfd_opts(); } ';' | bgp_proto ENFORCE FIRST AS bool ';' { BGP_CFG->enforce_first_as = $5; } + | bgp_proto LOCAL ROLE bgp_role_name ';' { BGP_CFG->local_role = $4; } + | bgp_proto REQUIRE ROLES bool ';' { BGP_CFG->require_roles = $4; } ; bgp_afi: @@ -252,11 +270,17 @@ bgp_channel_item: | NEXT HOP ADDRESS ipa { BGP_CC->next_hop_addr = $4; } | NEXT HOP SELF bgp_nh { BGP_CC->next_hop_self = $4; } | NEXT HOP KEEP bgp_nh { BGP_CC->next_hop_keep = $4; } + | NEXT HOP PREFER GLOBAL { BGP_CC->next_hop_prefer = NHP_GLOBAL; } | MANDATORY bool { BGP_CC->mandatory = $2; } | MISSING LLADDR bgp_lladdr { log(L_WARN "%s.%s: Missing lladdr option is deprecated and ignored, remove it", this_proto->name, this_channel->name); } | GATEWAY DIRECT { BGP_CC->gw_mode = GW_DIRECT; } | GATEWAY RECURSIVE { BGP_CC->gw_mode = GW_RECURSIVE; } | SECONDARY bool { BGP_CC->secondary = $2; } + | VALIDATE bool { + BGP_CC->validate = $2; + if (BGP_SAFI(BGP_CC->afi) != BGP_SAFI_FLOW) + cf_error("Validate option limited to flowspec channels"); + } | GRACEFUL RESTART bool { BGP_CC->gr_able = $3; } | LONG LIVED GRACEFUL RESTART bool { BGP_CC->llgr_able = $5; } | LONG LIVED STALE TIME expr { BGP_CC->llgr_time = $5; } @@ -280,6 +304,16 @@ bgp_channel_item: else cf_error("Mismatched IGP table type"); } + | BASE TABLE rtable { + if (BGP_SAFI(BGP_CC->afi) != BGP_SAFI_FLOW) + cf_error("Base table option limited to flowspec channels"); + + if (((BGP_CC->afi == BGP_AF_FLOW4) && ($3->addr_type == NET_IP4)) || + ((BGP_CC->afi == BGP_AF_FLOW6) && ($3->addr_type == NET_IP6))) + BGP_CC->base_table = $3; + else + cf_error("Mismatched base table type"); + } ; bgp_channel_opts: @@ -297,41 +331,14 @@ bgp_channel_end: if (!this_channel->table) cf_error("Routing table not specified"); + if (BGP_CC->import_table) + this_channel->in_keep |= RIK_PREFILTER; + this_channel = NULL; }; bgp_proto_channel: bgp_channel_start bgp_channel_opt_list bgp_channel_end; - -dynamic_attr: BGP_ORIGIN - { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_ENUM_BGP_ORIGIN, EA_CODE(PROTOCOL_BGP, BA_ORIGIN)); } ; -dynamic_attr: BGP_PATH - { $$ = f_new_dynamic_attr(EAF_TYPE_AS_PATH, T_PATH, EA_CODE(PROTOCOL_BGP, BA_AS_PATH)); } ; -dynamic_attr: BGP_NEXT_HOP - { $$ = f_new_dynamic_attr(EAF_TYPE_IP_ADDRESS, T_IP, EA_CODE(PROTOCOL_BGP, BA_NEXT_HOP)); } ; -dynamic_attr: BGP_MED - { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC)); } ; -dynamic_attr: BGP_LOCAL_PREF - { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF)); } ; -dynamic_attr: BGP_ATOMIC_AGGR - { $$ = f_new_dynamic_attr(EAF_TYPE_OPAQUE, T_ENUM_EMPTY, EA_CODE(PROTOCOL_BGP, BA_ATOMIC_AGGR)); } ; -dynamic_attr: BGP_AGGREGATOR - { $$ = f_new_dynamic_attr(EAF_TYPE_OPAQUE, T_ENUM_EMPTY, EA_CODE(PROTOCOL_BGP, BA_AGGREGATOR)); } ; -dynamic_attr: BGP_COMMUNITY - { $$ = f_new_dynamic_attr(EAF_TYPE_INT_SET, T_CLIST, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY)); } ; -dynamic_attr: BGP_ORIGINATOR_ID - { $$ = f_new_dynamic_attr(EAF_TYPE_ROUTER_ID, T_QUAD, EA_CODE(PROTOCOL_BGP, BA_ORIGINATOR_ID)); } ; -dynamic_attr: BGP_CLUSTER_LIST - { $$ = f_new_dynamic_attr(EAF_TYPE_INT_SET, T_CLIST, EA_CODE(PROTOCOL_BGP, BA_CLUSTER_LIST)); } ; -dynamic_attr: BGP_EXT_COMMUNITY - { $$ = f_new_dynamic_attr(EAF_TYPE_EC_SET, T_ECLIST, EA_CODE(PROTOCOL_BGP, BA_EXT_COMMUNITY)); } ; -dynamic_attr: BGP_AIGP - { $$ = f_new_dynamic_attr(EAF_TYPE_OPAQUE, T_ENUM_EMPTY, EA_CODE(PROTOCOL_BGP, BA_AIGP)); } ; -dynamic_attr: BGP_LARGE_COMMUNITY - { $$ = f_new_dynamic_attr(EAF_TYPE_LC_SET, T_LCLIST, EA_CODE(PROTOCOL_BGP, BA_LARGE_COMMUNITY)); } ; - - - CF_ENUM(T_ENUM_BGP_ORIGIN, ORIGIN_, IGP, EGP, INCOMPLETE) CF_CODE diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c index 88b66040..978a1891 100644 --- a/proto/bgp/packets.c +++ b/proto/bgp/packets.c @@ -15,8 +15,8 @@ #include "nest/bird.h" #include "nest/iface.h" #include "nest/protocol.h" -#include "nest/route.h" -#include "nest/attrs.h" +#include "nest/rt.h" +#include "lib/attrs.h" #include "proto/mrt/mrt.h" #include "conf/conf.h" #include "lib/unaligned.h" @@ -238,6 +238,7 @@ bgp_prepare_capabilities(struct bgp_conn *conn) caps->ext_messages = p->cf->enable_extended_messages; caps->route_refresh = p->cf->enable_refresh; caps->enhanced_refresh = p->cf->enable_refresh; + caps->role = p->cf->local_role; if (caps->as4_support) caps->as4_number = p->public_as; @@ -261,7 +262,7 @@ bgp_prepare_capabilities(struct bgp_conn *conn) } /* Allocate and fill per-AF fields */ - WALK_LIST(c, p->p.channels) + BGP_WALK_CHANNELS(p, c) { ac = &caps->af_data[caps->af_count++]; ac->afi = c->afi; @@ -350,6 +351,13 @@ bgp_write_capabilities(struct bgp_conn *conn, byte *buf) *buf++ = 0; /* Capability data length */ } + if (caps->role != BGP_ROLE_UNDEFINED) + { + *buf++ = 9; /* Capability 9: Announce chosen BGP role */ + *buf++ = 1; /* Capability data length */ + *buf++ = caps->role; + } + if (caps->gr_aware) { *buf++ = 64; /* Capability 64: Support for graceful restart */ @@ -449,11 +457,15 @@ bgp_read_capabilities(struct bgp_conn *conn, byte *pos, int len) struct bgp_proto *p = conn->bgp; struct bgp_caps *caps; struct bgp_af_caps *ac; + uint err_subcode = 0; int i, cl; u32 af; if (!conn->remote_caps) + { caps = mb_allocz(p->p.pool, sizeof(struct bgp_caps) + sizeof(struct bgp_af_caps)); + caps->role = BGP_ROLE_UNDEFINED; + } else { caps = conn->remote_caps; @@ -506,13 +518,28 @@ bgp_read_capabilities(struct bgp_conn *conn, byte *pos, int len) } break; - case 6: /* Extended message length capability, RFC draft */ + case 6: /* Extended message length capability, RFC 8654 */ if (cl != 0) goto err; caps->ext_messages = 1; break; + case 9: /* BGP role capability, RFC 9234 */ + if (cl != 1) + goto err; + + /* Reserved value */ + if (pos[2] == BGP_ROLE_UNDEFINED) + { err_subcode = 11; goto err; } + + /* Multiple inconsistent values */ + if ((caps->role != BGP_ROLE_UNDEFINED) && (caps->role != pos[2])) + { err_subcode = 11; goto err; } + + caps->role = pos[2]; + break; + case 64: /* Graceful restart capability, RFC 4724 */ if (cl % 4 != 2) goto err; @@ -638,7 +665,7 @@ bgp_read_capabilities(struct bgp_conn *conn, byte *pos, int len) err: mb_free(caps); - bgp_error(conn, 2, 0, NULL, 0); + bgp_error(conn, 2, err_subcode, NULL, 0); return -1; } @@ -654,7 +681,7 @@ bgp_check_capabilities(struct bgp_conn *conn) /* This is partially overlapping with bgp_conn_enter_established_state(), but we need to run this just after we receive OPEN message */ - WALK_LIST(c, p->p.channels) + BGP_WALK_CHANNELS(p, c) { const struct bgp_af_caps *loc = bgp_find_af_caps(local, c->afi); const struct bgp_af_caps *rem = bgp_find_af_caps(remote, c->afi); @@ -684,7 +711,7 @@ bgp_read_options(struct bgp_conn *conn, byte *pos, uint len, uint rest) struct bgp_proto *p = conn->bgp; int ext = 0; - /* Handle extended length (draft-ietf-idr-ext-opt-param-07) */ + /* Handle extended length, RFC 9072 */ if ((len > 0) && (rest > 0) && (pos[0] == 255)) { if (rest < 3) @@ -769,7 +796,7 @@ bgp_create_open(struct bgp_conn *conn, byte *buf) buf[10] = 2; /* Option 2: Capability list */ buf[11] = len; /* Option data length */ } - else /* draft-ietf-idr-ext-opt-param-07 */ + else /* Extended length, RFC 9072 */ { /* Move capabilities 4 B forward */ memmove(buf + 16, pos, len); @@ -820,9 +847,25 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, uint len) if (bgp_read_options(conn, pkt+29, pkt[28], len-29) < 0) return; + /* RFC 4271 4.2 - hold time must be either 0 or at least 3 */ if (hold > 0 && hold < 3) { bgp_error(conn, 2, 6, pkt+22, 2); return; } + /* Compute effective hold and keepalive times */ + uint hold_time = MIN(hold, p->cf->hold_time); + uint keepalive_time = p->cf->keepalive_time ? + (p->cf->keepalive_time * hold_time / p->cf->hold_time) : + hold_time / 3; + + /* Keepalive time might be rounded down to zero */ + if (hold_time && !keepalive_time) + keepalive_time = 1; + + /* Check effective values against configured minimums */ + if ((hold_time < p->cf->min_hold_time) || + (keepalive_time < p->cf->min_keepalive_time)) + { bgp_error(conn, 2, 6, pkt+22, 2); return; } + /* RFC 6286 2.2 - router ID is nonzero and AS-wide unique */ if (!id || (p->is_internal && id == p->local_id)) { bgp_error(conn, 2, 3, pkt+24, -4); return; } @@ -854,6 +897,22 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, uint len) conn->received_as = asn; } + /* RFC 9234 4.2 - check role agreement */ + u8 local_role = p->cf->local_role; + u8 neigh_role = caps->role; + + if ((local_role != BGP_ROLE_UNDEFINED) && + (neigh_role != BGP_ROLE_UNDEFINED) && + !((local_role == BGP_ROLE_PEER && neigh_role == BGP_ROLE_PEER) || + (local_role == BGP_ROLE_CUSTOMER && neigh_role == BGP_ROLE_PROVIDER) || + (local_role == BGP_ROLE_PROVIDER && neigh_role == BGP_ROLE_CUSTOMER) || + (local_role == BGP_ROLE_RS_CLIENT && neigh_role == BGP_ROLE_RS_SERVER) || + (local_role == BGP_ROLE_RS_SERVER && neigh_role == BGP_ROLE_RS_CLIENT))) + { bgp_error(conn, 2, 11, &neigh_role, -1); return; } + + if ((p->cf->require_roles) && (neigh_role == BGP_ROLE_UNDEFINED)) + { bgp_error(conn, 2, 11, &neigh_role, -1); return; } + /* Check the other connection */ other = (conn == &p->outgoing_conn) ? &p->incoming_conn : &p->outgoing_conn; switch (other->state) @@ -904,8 +963,8 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, uint len) } /* Update our local variables */ - conn->hold_time = MIN(hold, p->cf->hold_time); - conn->keepalive_time = p->cf->keepalive_time ? : conn->hold_time / 3; + conn->hold_time = hold_time; + conn->keepalive_time = keepalive_time; conn->as4_session = conn->local_caps->as4_support && caps->as4_support; conn->ext_messages = conn->local_caps->ext_messages && caps->ext_messages; p->remote_id = id; @@ -932,14 +991,18 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, uint len) #define WITHDRAW(msg, args...) \ ({ REPORT(msg, ## args); s->err_withdraw = 1; return; }) +#define REJECT(msg, args...) \ + ({ log(L_ERR "%s: " msg, s->proto->p.name, ## args); s->err_reject = 1; return; }) + #define BAD_AFI "Unexpected AF <%u/%u> in UPDATE" #define BAD_NEXT_HOP "Invalid NEXT_HOP attribute" #define NO_NEXT_HOP "Missing NEXT_HOP attribute" #define NO_LABEL_STACK "Missing MPLS stack" +#define MISMATCHED_AF " - mismatched address family (%I for %s)" static void -bgp_apply_next_hop(struct bgp_parse_state *s, rta *a, ip_addr gw, ip_addr ll) +bgp_apply_next_hop(struct bgp_parse_state *s, ea_list **to, ip_addr gw, ip_addr ll) { struct bgp_proto *p = s->proto; struct bgp_channel *c = s->channel; @@ -949,67 +1012,88 @@ bgp_apply_next_hop(struct bgp_parse_state *s, rta *a, ip_addr gw, ip_addr ll) neighbor *nbr = NULL; /* GW_DIRECT -> single_hop -> p->neigh != NULL */ - if (ipa_nonzero(gw)) + if (ipa_nonzero2(gw)) nbr = neigh_find(&p->p, gw, NULL, 0); else if (ipa_nonzero(ll)) nbr = neigh_find(&p->p, ll, p->neigh->iface, 0); + else + WITHDRAW(BAD_NEXT_HOP " - zero address"); - if (!nbr || (nbr->scope == SCOPE_HOST)) - WITHDRAW(BAD_NEXT_HOP); + if (!nbr) + WITHDRAW(BAD_NEXT_HOP " - address %I not directly reachable", ipa_nonzero(gw) ? gw : ll); - a->dest = RTD_UNICAST; - a->nh.gw = nbr->addr; - a->nh.iface = nbr->iface; - a->igp_metric = c->cf->cost; + if (nbr->scope == SCOPE_HOST) + WITHDRAW(BAD_NEXT_HOP " - address %I is local", nbr->addr); + + ea_set_attr_u32(to, &ea_gen_igp_metric, 0, c->cf->cost); + + struct nexthop_adata nhad = { + .nh = { + .gw = nbr->addr, + .iface = nbr->iface, + }, + .ad = { + .length = sizeof nhad - sizeof nhad.ad, + }, + }; + ea_set_attr_data(to, &ea_gen_nexthop, 0, nhad.ad.data, nhad.ad.length); } else /* GW_RECURSIVE */ { - if (ipa_zero(gw)) - WITHDRAW(BAD_NEXT_HOP); + if (ipa_zero2(gw)) + WITHDRAW(BAD_NEXT_HOP " - zero address"); rtable *tab = ipa_is_ip4(gw) ? c->igp_table_ip4 : c->igp_table_ip6; - s->hostentry = rt_get_hostentry(tab, gw, ll, c->c.table); - - if (!s->mpls) - rta_apply_hostentry(a, s->hostentry, NULL, s->pool); + ip_addr lla = (c->cf->next_hop_prefer == NHP_LOCAL) ? ll : IPA_NONE; - /* With MPLS, hostentry is applied later in bgp_apply_mpls_labels() */ + if (s->mpls) + { + u32 labels[BGP_MPLS_MAX]; + ea_set_hostentry(to, c->c.table, tab, gw, lla, BGP_MPLS_MAX, labels); + } + else + ea_set_hostentry(to, c->c.table, tab, gw, lla, 0, NULL); } } static void -bgp_apply_mpls_labels(struct bgp_parse_state *s, rta *a, u32 *labels, uint lnum) +bgp_apply_mpls_labels(struct bgp_parse_state *s, ea_list **to, u32 lnum, u32 labels[lnum]) { if (lnum > MPLS_MAX_LABEL_STACK) { REPORT("Too many MPLS labels ($u)", lnum); - a->dest = RTD_UNREACHABLE; - a->hostentry = NULL; - a->nh = (struct nexthop) { }; + ea_set_dest(to, 0, RTD_UNREACHABLE); return; } /* Handle implicit NULL as empty MPLS stack */ if ((lnum == 1) && (labels[0] == BGP_MPLS_NULL)) - lnum = 0; + lnum = s->mpls_labels->length = 0; if (s->channel->cf->gw_mode == GW_DIRECT) { - a->nh.labels = lnum; - memcpy(a->nh.label, labels, 4*lnum); + eattr *e = ea_find(*to, &ea_gen_nexthop); + struct { + struct nexthop_adata nhad; + u32 labels[MPLS_MAX_LABEL_STACK]; + } nh; + + memcpy(&nh.nhad, e->u.ptr, sizeof(struct adata) + e->u.ptr->length); + nh.nhad.nh.labels = lnum; + memcpy(nh.labels, labels, lnum * sizeof(u32)); + nh.nhad.ad.length = sizeof nh.nhad + lnum * sizeof(u32); } else /* GW_RECURSIVE */ { - mpls_label_stack ms; - - ms.len = lnum; - memcpy(ms.stack, labels, 4*lnum); - rta_apply_hostentry(a, s->hostentry, &ms, s->pool); + eattr *e = ea_find(*to, &ea_gen_hostentry); + ASSERT_DIE(e); + struct hostentry_adata *head = (void *) e->u.ptr; + memcpy(&head->labels, labels, lnum * sizeof(u32)); + head->ad.length = (void *)(&head->labels[lnum]) - (void *) head->ad.data; } } - static int bgp_match_src(struct bgp_export_state *s, int mode) { @@ -1039,13 +1123,17 @@ bgp_use_next_hop(struct bgp_export_state *s, eattr *a) return 1; /* Keep it when explicitly set in export filter */ - if (a->type & EAF_FRESH) + if (a->fresh) return 1; /* Check for non-matching AF */ if ((ipa_is_ip4(*nh) != bgp_channel_is_ipv4(c)) && !c->ext_next_hop) return 0; + /* Do not pass NEXT_HOP between different VRFs */ + if (p->p.vrf && s->src && s->src->p.vrf && (p->p.vrf != s->src->p.vrf)) + return 0; + /* Keep it when exported to internal peers */ if (p->is_interior && ipa_nonzero(*nh)) return 1; @@ -1056,31 +1144,45 @@ bgp_use_next_hop(struct bgp_export_state *s, eattr *a) return p->neigh && (p->neigh->iface == ifa); } -static inline int +static inline struct nexthop * bgp_use_gateway(struct bgp_export_state *s) { struct bgp_proto *p = s->proto; struct bgp_channel *c = s->channel; - rta *ra = s->route->attrs; + ea_list *ra = s->route->attrs; /* Handle next hop self option - also applies to gateway */ if (c->cf->next_hop_self && bgp_match_src(s, c->cf->next_hop_self)) - return 0; + return NULL; + + eattr *nhea = ea_find(ra, &ea_gen_nexthop); + if (!nhea) + return NULL; /* We need one valid global gateway */ - if ((ra->dest != RTD_UNICAST) || ra->nh.next || ipa_zero(ra->nh.gw) || ipa_is_link_local(ra->nh.gw)) - return 0; + struct nexthop_adata *nhad = (struct nexthop_adata *) nhea->u.ptr; + if (!NEXTHOP_IS_REACHABLE(nhad) || + !NEXTHOP_ONE(nhad) || ipa_zero(nhad->nh.gw) || + ipa_is_link_local(nhad->nh.gw)) + return NULL; /* Check for non-matching AF */ - if ((ipa_is_ip4(ra->nh.gw) != bgp_channel_is_ipv4(c)) && !c->ext_next_hop) + if ((ipa_is_ip4(nhad->nh.gw) != bgp_channel_is_ipv4(c)) && !c->ext_next_hop) + return NULL; + + /* Do not use gateway from different VRF */ + if (p->p.vrf && nhad->nh.iface && (p->p.vrf != nhad->nh.iface->master)) return 0; /* Use it when exported to internal peers */ if (p->is_interior) - return 1; + return &nhad->nh; /* Use it when forwarded to single-hop BGP peer on on the same iface */ - return p->neigh && (p->neigh->iface == ra->nh.iface); + if (p->neigh && (p->neigh->iface == nhad->nh.iface)) + return &nhad->nh; + + return NULL; } static void @@ -1088,60 +1190,64 @@ bgp_update_next_hop_ip(struct bgp_export_state *s, eattr *a, ea_list **to) { if (!a || !bgp_use_next_hop(s, a)) { - if (bgp_use_gateway(s)) + struct nexthop *nhloc; + if (nhloc = bgp_use_gateway(s)) { - rta *ra = s->route->attrs; - ip_addr nh[1] = { ra->nh.gw }; - bgp_set_attr_data(to, s->pool, BA_NEXT_HOP, 0, nh, 16); + ip_addr nh[1] = { nhloc->gw }; + bgp_set_attr_data(to, BA_NEXT_HOP, 0, nh, 16); if (s->mpls) { u32 implicit_null = BGP_MPLS_NULL; - u32 *labels = ra->nh.labels ? ra->nh.label : &implicit_null; - uint lnum = ra->nh.labels ? ra->nh.labels : 1; - bgp_set_attr_data(to, s->pool, BA_MPLS_LABEL_STACK, 0, labels, lnum * 4); + u32 *labels = nhloc->labels ? nhloc->label : &implicit_null; + uint lnum = nhloc->labels ? nhloc->labels : 1; + bgp_set_attr_data(to, BA_MPLS_LABEL_STACK, 0, labels, lnum * 4); } + else + bgp_unset_attr(to, BA_MPLS_LABEL_STACK); } else { ip_addr nh[2] = { s->channel->next_hop_addr, s->channel->link_addr }; - bgp_set_attr_data(to, s->pool, BA_NEXT_HOP, 0, nh, ipa_nonzero(nh[1]) ? 32 : 16); + bgp_set_attr_data(to, BA_NEXT_HOP, 0, nh, ipa_nonzero(nh[1]) ? 32 : 16); s->local_next_hop = 1; /* TODO: Use local MPLS assigned label */ if (s->mpls) { u32 implicit_null = BGP_MPLS_NULL; - bgp_set_attr_data(to, s->pool, BA_MPLS_LABEL_STACK, 0, &implicit_null, 4); + bgp_set_attr_data(to, BA_MPLS_LABEL_STACK, 0, &implicit_null, 4); } + else + bgp_unset_attr(to, BA_MPLS_LABEL_STACK); } } /* Check if next hop is valid */ a = bgp_find_attr(*to, BA_NEXT_HOP); if (!a) - WITHDRAW(NO_NEXT_HOP); + REJECT(NO_NEXT_HOP); ip_addr *nh = (void *) a->u.ptr->data; ip_addr peer = s->proto->remote_ip; uint len = a->u.ptr->length; /* Forbid zero next hop */ - if (ipa_zero(nh[0]) && ((len != 32) || ipa_zero(nh[1]))) - WITHDRAW(BAD_NEXT_HOP); + if (ipa_zero2(nh[0]) && ((len != 32) || ipa_zero(nh[1]))) + REJECT(BAD_NEXT_HOP " - zero address"); /* Forbid next hop equal to neighbor IP */ if (ipa_equal(peer, nh[0]) || ((len == 32) && ipa_equal(peer, nh[1]))) - WITHDRAW(BAD_NEXT_HOP); + REJECT(BAD_NEXT_HOP " - neighbor address %I", peer); /* Forbid next hop with non-matching AF */ if ((ipa_is_ip4(nh[0]) != bgp_channel_is_ipv4(s->channel)) && !s->channel->ext_next_hop) - WITHDRAW(BAD_NEXT_HOP); + REJECT(BAD_NEXT_HOP MISMATCHED_AF, nh[0], s->channel->desc->name); /* Just check if MPLS stack */ if (s->mpls && !bgp_find_attr(*to, BA_MPLS_LABEL_STACK)) - WITHDRAW(NO_LABEL_STACK); + REJECT(NO_LABEL_STACK); } static uint @@ -1175,7 +1281,7 @@ bgp_encode_next_hop_ip(struct bgp_write_state *s, eattr *a, byte *buf, uint size } static void -bgp_decode_next_hop_ip(struct bgp_parse_state *s, byte *data, uint len, rta *a) +bgp_decode_next_hop_ip(struct bgp_parse_state *s, byte *data, uint len, ea_list **to) { struct bgp_channel *c = s->channel; struct adata *ad = lp_alloc_adata(s->pool, 32); @@ -1212,12 +1318,12 @@ bgp_decode_next_hop_ip(struct bgp_parse_state *s, byte *data, uint len, rta *a) ad->length = 16; if ((bgp_channel_is_ipv4(c) != ipa_is_ip4(nh[0])) && !c->ext_next_hop) - WITHDRAW(BAD_NEXT_HOP); + WITHDRAW(BAD_NEXT_HOP MISMATCHED_AF, nh[0], c->desc->name); // XXXX validate next hop - bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_NEXT_HOP, 0, ad); - bgp_apply_next_hop(s, a, nh[0], nh[1]); + bgp_set_attr_ptr(to, BA_NEXT_HOP, 0, ad); + bgp_apply_next_hop(s, to, nh[0], nh[1]); } static uint @@ -1255,7 +1361,7 @@ bgp_encode_next_hop_vpn(struct bgp_write_state *s, eattr *a, byte *buf, uint siz } static void -bgp_decode_next_hop_vpn(struct bgp_parse_state *s, byte *data, uint len, rta *a) +bgp_decode_next_hop_vpn(struct bgp_parse_state *s, byte *data, uint len, ea_list **to) { struct bgp_channel *c = s->channel; struct adata *ad = lp_alloc_adata(s->pool, 32); @@ -1293,12 +1399,12 @@ bgp_decode_next_hop_vpn(struct bgp_parse_state *s, byte *data, uint len, rta *a) bgp_parse_error(s, 9); if ((bgp_channel_is_ipv4(c) != ipa_is_ip4(nh[0])) && !c->ext_next_hop) - WITHDRAW(BAD_NEXT_HOP); + WITHDRAW(BAD_NEXT_HOP MISMATCHED_AF, nh[0], c->desc->name); // XXXX validate next hop - bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_NEXT_HOP, 0, ad); - bgp_apply_next_hop(s, a, nh[0], nh[1]); + bgp_set_attr_ptr(to, BA_NEXT_HOP, 0, ad); + bgp_apply_next_hop(s, to, nh[0], nh[1]); } @@ -1310,7 +1416,7 @@ bgp_encode_next_hop_none(struct bgp_write_state *s UNUSED, eattr *a UNUSED, byte } static void -bgp_decode_next_hop_none(struct bgp_parse_state *s UNUSED, byte *data UNUSED, uint len UNUSED, rta *a UNUSED) +bgp_decode_next_hop_none(struct bgp_parse_state *s UNUSED, byte *data UNUSED, uint len UNUSED, ea_list **to UNUSED) { /* * Although we expect no next hop and RFC 7606 7.11 states that attribute @@ -1322,11 +1428,11 @@ bgp_decode_next_hop_none(struct bgp_parse_state *s UNUSED, byte *data UNUSED, ui } static void -bgp_update_next_hop_none(struct bgp_export_state *s, eattr *a, ea_list **to) +bgp_update_next_hop_none(struct bgp_export_state *s UNUSED, eattr *a, ea_list **to) { /* NEXT_HOP shall not pass */ if (a) - bgp_unset_attr(to, s->pool, BA_NEXT_HOP); + bgp_unset_attr(to, BA_NEXT_HOP); } @@ -1335,7 +1441,7 @@ bgp_update_next_hop_none(struct bgp_export_state *s, eattr *a, ea_list **to) */ static void -bgp_rte_update(struct bgp_parse_state *s, net_addr *n, u32 path_id, rta *a0) +bgp_rte_update(struct bgp_parse_state *s, const net_addr *n, u32 path_id, ea_list *a0) { if (path_id != s->last_id) { @@ -1344,28 +1450,27 @@ bgp_rte_update(struct bgp_parse_state *s, net_addr *n, u32 path_id, rta *a0) s->last_src = rt_get_source(&s->proto->p, path_id); s->last_id = path_id; - rta_free(s->cached_rta); - s->cached_rta = NULL; + ea_free(s->cached_ea); + s->cached_ea = NULL; } if (!a0) { + /* Route update was changed to withdraw */ + if (s->err_withdraw && s->reach_nlri_step) + REPORT("Invalid route %N withdrawn", n); + /* Route withdraw */ rte_update(&s->channel->c, n, NULL, s->last_src); return; } /* Prepare cached route attributes */ - if (s->cached_rta == NULL) - { - /* Workaround for rta_lookup() breaking eattrs */ - ea_list *ea = a0->eattrs; - s->cached_rta = rta_lookup(a0); - a0->eattrs = ea; - } + if (s->cached_ea == NULL) + s->cached_ea = ea_lookup(a0, 0); rte e0 = { - .attrs = s->cached_rta, + .attrs = s->cached_ea, .src = s->last_src, }; @@ -1392,9 +1497,10 @@ bgp_encode_mpls_labels(struct bgp_write_state *s UNUSED, const adata *mpls, byte } static void -bgp_decode_mpls_labels(struct bgp_parse_state *s, byte **pos, uint *len, uint *pxlen, rta *a) +bgp_decode_mpls_labels(struct bgp_parse_state *s, byte **pos, uint *len, uint *pxlen, ea_list **to) { - u32 labels[BGP_MPLS_MAX], label; + u32 labels[BGP_MPLS_MAX]; + u32 label; uint lnum = 0; do { @@ -1408,31 +1514,20 @@ bgp_decode_mpls_labels(struct bgp_parse_state *s, byte **pos, uint *len, uint *p /* RFC 8277 2.4 - withdraw does not have variable-size MPLS stack but fixed-size 24-bit Compatibility field, which MUST be ignored */ - if (!a && !s->err_withdraw) + if (!s->reach_nlri_step) return; } while (!(label & BGP_MPLS_BOS)); - if (!a) + if (!*to) return; - /* Attach MPLS attribute unless we already have one */ - if (!s->mpls_labels) - { - s->mpls_labels = lp_alloc_adata(s->pool, 4*BGP_MPLS_MAX); - bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_MPLS_LABEL_STACK, 0, s->mpls_labels); - } - - /* Overwrite data in the attribute */ - s->mpls_labels->length = 4*lnum; - memcpy(s->mpls_labels->data, labels, 4*lnum); - /* Update next hop entry in rta */ - bgp_apply_mpls_labels(s, a, labels, lnum); + bgp_apply_mpls_labels(s, to, lnum, labels); /* Attributes were changed, invalidate cached entry */ - rta_free(s->cached_rta); - s->cached_rta = NULL; + rta_free(s->cached_ea); + s->cached_ea = NULL; return; } @@ -1468,14 +1563,14 @@ bgp_encode_nlri_ip4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *bu memcpy(pos, &a, b); ADVANCE(pos, size, b); - bgp_free_prefix(s->channel, px); + bgp_done_prefix(s->channel, px, buck); } return pos - buf; } static void -bgp_decode_nlri_ip4(struct bgp_parse_state *s, byte *pos, uint len, rta *a) +bgp_decode_nlri_ip4(struct bgp_parse_state *s, byte *pos, uint len, ea_list *a) { while (len) { @@ -1501,7 +1596,7 @@ bgp_decode_nlri_ip4(struct bgp_parse_state *s, byte *pos, uint len, rta *a) /* Decode MPLS labels */ if (s->mpls) - bgp_decode_mpls_labels(s, &pos, &len, &l, a); + bgp_decode_mpls_labels(s, &pos, &len, &l, &a); if (l > IP4_MAX_PREFIX_LENGTH) bgp_parse_error(s, 10); @@ -1553,14 +1648,14 @@ bgp_encode_nlri_ip6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *bu memcpy(pos, &a, b); ADVANCE(pos, size, b); - bgp_free_prefix(s->channel, px); + bgp_done_prefix(s->channel, px, buck); } return pos - buf; } static void -bgp_decode_nlri_ip6(struct bgp_parse_state *s, byte *pos, uint len, rta *a) +bgp_decode_nlri_ip6(struct bgp_parse_state *s, byte *pos, uint len, ea_list *a) { while (len) { @@ -1586,7 +1681,7 @@ bgp_decode_nlri_ip6(struct bgp_parse_state *s, byte *pos, uint len, rta *a) /* Decode MPLS labels */ if (s->mpls) - bgp_decode_mpls_labels(s, &pos, &len, &l, a); + bgp_decode_mpls_labels(s, &pos, &len, &l, &a); if (l > IP6_MAX_PREFIX_LENGTH) bgp_parse_error(s, 10); @@ -1641,14 +1736,14 @@ bgp_encode_nlri_vpn4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *b memcpy(pos, &a, b); ADVANCE(pos, size, b); - bgp_free_prefix(s->channel, px); + bgp_done_prefix(s->channel, px, buck); } return pos - buf; } static void -bgp_decode_nlri_vpn4(struct bgp_parse_state *s, byte *pos, uint len, rta *a) +bgp_decode_nlri_vpn4(struct bgp_parse_state *s, byte *pos, uint len, ea_list *a) { while (len) { @@ -1674,7 +1769,7 @@ bgp_decode_nlri_vpn4(struct bgp_parse_state *s, byte *pos, uint len, rta *a) /* Decode MPLS labels */ if (s->mpls) - bgp_decode_mpls_labels(s, &pos, &len, &l, a); + bgp_decode_mpls_labels(s, &pos, &len, &l, &a); /* Decode route distinguisher */ if (l < 64) @@ -1738,14 +1833,14 @@ bgp_encode_nlri_vpn6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *b memcpy(pos, &a, b); ADVANCE(pos, size, b); - bgp_free_prefix(s->channel, px); + bgp_done_prefix(s->channel, px, buck); } return pos - buf; } static void -bgp_decode_nlri_vpn6(struct bgp_parse_state *s, byte *pos, uint len, rta *a) +bgp_decode_nlri_vpn6(struct bgp_parse_state *s, byte *pos, uint len, ea_list *a) { while (len) { @@ -1771,7 +1866,7 @@ bgp_decode_nlri_vpn6(struct bgp_parse_state *s, byte *pos, uint len, rta *a) /* Decode MPLS labels */ if (s->mpls) - bgp_decode_mpls_labels(s, &pos, &len, &l, a); + bgp_decode_mpls_labels(s, &pos, &len, &l, &a); /* Decode route distinguisher */ if (l < 64) @@ -1825,14 +1920,14 @@ bgp_encode_nlri_flow4(struct bgp_write_state *s, struct bgp_bucket *buck, byte * memcpy(pos, net->data, flen); ADVANCE(pos, size, flen); - bgp_free_prefix(s->channel, px); + bgp_done_prefix(s->channel, px, buck); } return pos - buf; } static void -bgp_decode_nlri_flow4(struct bgp_parse_state *s, byte *pos, uint len, rta *a) +bgp_decode_nlri_flow4(struct bgp_parse_state *s, byte *pos, uint len, ea_list *a) { while (len) { @@ -1913,14 +2008,14 @@ bgp_encode_nlri_flow6(struct bgp_write_state *s, struct bgp_bucket *buck, byte * memcpy(pos, net->data, flen); ADVANCE(pos, size, flen); - bgp_free_prefix(s->channel, px); + bgp_done_prefix(s->channel, px, buck); } return pos - buf; } static void -bgp_decode_nlri_flow6(struct bgp_parse_state *s, byte *pos, uint len, rta *a) +bgp_decode_nlri_flow6(struct bgp_parse_state *s, byte *pos, uint len, ea_list *a) { while (len) { @@ -2147,6 +2242,8 @@ bgp_create_ip_reach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *bu * var IPv4 Network Layer Reachability Information */ + ASSERT_DIE(s->channel->ptx->withdraw_bucket != buck); + int lr, la; la = bgp_encode_attrs(s, buck->eattrs, buf+4, buf + MAX_ATTRS_LENGTH); @@ -2168,6 +2265,8 @@ bgp_create_ip_reach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *bu static byte * bgp_create_mp_reach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end) { + ASSERT_DIE(s->channel->ptx->withdraw_bucket != buck); + /* * 2 B IPv4 Withdrawn Routes Length (zero) * --- IPv4 Withdrawn Routes NLRI (unused) @@ -2291,11 +2390,14 @@ bgp_create_update(struct bgp_channel *c, byte *buf) again: ; + struct lp_state tmpp; + lp_save(tmp_linpool, &tmpp); + /* Initialize write state */ struct bgp_write_state s = { .proto = p, .channel = c, - .pool = c->c.rte_update_pool, + .pool = tmp_linpool, .mp_reach = (c->afi != BGP_AF_IPV4) || c->ext_next_hop, .as4_session = p->as4_session, .add_path = c->add_path_tx, @@ -2303,7 +2405,7 @@ again: ; }; /* Try unreachable bucket */ - if ((buck = c->withdraw_bucket) && !EMPTY_LIST(buck->prefixes)) + if ((buck = c->ptx->withdraw_bucket) && !EMPTY_LIST(buck->prefixes)) { res = (c->afi == BGP_AF_IPV4) && !c->ext_next_hop ? bgp_create_ip_unreach(&s, buck, buf, end): @@ -2313,14 +2415,14 @@ again: ; } /* Try reachable buckets */ - if (!EMPTY_LIST(c->bucket_queue)) + if (!EMPTY_LIST(c->ptx->bucket_queue)) { - buck = HEAD(c->bucket_queue); + buck = HEAD(c->ptx->bucket_queue); /* Cleanup empty buckets */ - if (EMPTY_LIST(buck->prefixes)) + if (bgp_done_bucket(c, buck)) { - bgp_free_bucket(c, buck); + lp_restore(tmp_linpool, &tmpp); goto again; } @@ -2328,13 +2430,13 @@ again: ; bgp_create_ip_reach(&s, buck, buf, end): bgp_create_mp_reach(&s, buck, buf, end); - if (EMPTY_LIST(buck->prefixes)) - bgp_free_bucket(c, buck); - else - bgp_defer_bucket(c, buck); + bgp_done_bucket(c, buck); if (!res) + { + lp_restore(tmp_linpool, &tmpp); goto again; + } goto done; } @@ -2345,7 +2447,7 @@ again: ; done: BGP_TRACE_RL(&rl_snd_update, D_PACKETS, "Sending UPDATE"); p->stats.tx_updates++; - lp_flush(s.pool); + lp_restore(tmp_linpool, &tmpp); return res; } @@ -2412,7 +2514,6 @@ static inline void bgp_decode_nlri(struct bgp_parse_state *s, u32 afi, byte *nlri, uint len, ea_list *ea, byte *nh, uint nh_len) { struct bgp_channel *c = bgp_get_channel(s->proto, afi); - rta *a = NULL; if (!c) DISCARD(BAD_AFI, BGP_AFI(afi), BGP_SAFI(afi)); @@ -2434,26 +2535,22 @@ bgp_decode_nlri(struct bgp_parse_state *s, u32 afi, byte *nlri, uint len, ea_lis if (ea) { - a = allocz(RTA_MAX_SIZE); - - a->source = RTS_BGP; - a->scope = SCOPE_UNIVERSE; - a->from = s->proto->remote_ip; - a->eattrs = ea; - a->pref = c->c.preference; + ea_set_attr_data(&ea, &ea_gen_from, 0, &s->proto->remote_ip, sizeof(ip_addr)); + ea_set_attr_u32(&ea, &ea_gen_preference, 0, c->c.preference); + ea_set_attr_u32(&ea, &ea_gen_source, 0, RTS_BGP); - c->desc->decode_next_hop(s, nh, nh_len, a); - bgp_finish_attrs(s, a); + c->desc->decode_next_hop(s, nh, nh_len, &ea); + bgp_finish_attrs(s, &ea); /* Handle withdraw during next hop decoding */ if (s->err_withdraw) - a = NULL; + ea = NULL; } - c->desc->decode_nlri(s, nlri, len, a); + c->desc->decode_nlri(s, nlri, len, ea); - rta_free(s->cached_rta); - s->cached_rta = NULL; + rta_free(s->cached_ea); + s->cached_ea = NULL; rt_unlock_source(s->last_src); } @@ -2477,10 +2574,13 @@ bgp_rx_update(struct bgp_conn *conn, byte *pkt, uint len) bgp_start_timer(p, conn->hold_timer, conn->hold_time); + struct lp_state tmpp; + lp_save(tmp_linpool, &tmpp); + /* Initialize parse state */ struct bgp_parse_state s = { .proto = p, - .pool = p->rx_lp, + .pool = tmp_linpool, .as4_session = p->as4_session, }; @@ -2546,6 +2646,8 @@ bgp_rx_update(struct bgp_conn *conn, byte *pkt, uint len) if (s.mp_unreach_len) bgp_decode_nlri(&s, s.mp_unreach_af, s.mp_unreach_nlri, s.mp_unreach_len, NULL, NULL, 0); + s.reach_nlri_step = 1; + if (s.ip_reach_len) bgp_decode_nlri(&s, BGP_AF_IPV4, s.ip_reach_nlri, s.ip_reach_len, ea, s.ip_next_hop_data, s.ip_next_hop_len); @@ -2555,8 +2657,8 @@ bgp_rx_update(struct bgp_conn *conn, byte *pkt, uint len) ea, s.mp_next_hop_data, s.mp_next_hop_len); done: - rta_free(s.cached_rta); - lp_flush(s.pool); + rta_free(s.cached_ea); + lp_restore(tmp_linpool, &tmpp); return; } @@ -2879,7 +2981,11 @@ bgp_schedule_packet(struct bgp_conn *conn, struct bgp_channel *c, int type) { ASSERT(conn->sk); - DBG("BGP: Scheduling packet type %d\n", type); + struct bgp_proto *p = conn->bgp; + if (c) + BGP_TRACE(D_PACKETS, "Scheduling packet type %d for channel %s", type, c->c.name); + else + BGP_TRACE(D_PACKETS, "Scheduling packet type %d", type); if (c) { @@ -2896,7 +3002,7 @@ bgp_schedule_packet(struct bgp_conn *conn, struct bgp_channel *c, int type) conn->packets_to_send |= 1 << type; if ((conn->sk->tpos == conn->sk->tbuf) && !ev_active(conn->tx_ev)) - ev_send_loop(conn->bgp->p.loop, conn->tx_ev); + proto_send_event(&p->p, conn->tx_ev); } void bgp_kick_tx(void *vconn) @@ -2909,7 +3015,7 @@ bgp_kick_tx(void *vconn) ; if (!max && !ev_active(conn->tx_ev)) - ev_send_loop(conn->bgp->p.loop, conn->tx_ev); + proto_send_event(&conn->bgp->p, conn->tx_ev); } void @@ -2917,13 +3023,14 @@ bgp_tx(sock *sk) { struct bgp_conn *conn = sk->data; + ASSERT_DIE(birdloop_inside(conn->bgp->p.loop)); DBG("BGP: TX hook\n"); uint max = 1024; while (--max && (bgp_fire_tx(conn) > 0)) ; if (!max && !ev_active(conn->tx_ev)) - ev_send_loop(conn->bgp->p.loop, conn->tx_ev); + proto_send_event(&conn->bgp->p, conn->tx_ev); } @@ -2944,6 +3051,7 @@ static struct { { 2, 6, "Unacceptable hold time" }, { 2, 7, "Required capability missing" }, /* [RFC5492] */ { 2, 8, "No supported AFI/SAFI" }, /* This error msg is nonstandard */ + { 2,11, "Role mismatch" }, /* From Open Policy, RFC 9234 */ { 3, 0, "Invalid UPDATE message" }, { 3, 1, "Malformed attribute list" }, { 3, 2, "Unrecognized well-known attribute" }, @@ -3038,13 +3146,19 @@ bgp_log_error(struct bgp_proto *p, u8 class, char *msg, uint code, uint subcode, if (len) { - /* Bad peer AS - we would like to print the AS */ - if ((code == 2) && (subcode == 2) && ((len == 2) || (len == 4))) + /* Bad peer AS / unacceptable hold time - print the value as decimal number */ + if ((code == 2) && ((subcode == 2) || (subcode == 6)) && ((len == 2) || (len == 4))) { t += bsprintf(t, ": %u", (len == 2) ? get_u16(data) : get_u32(data)); goto done; } + if ((code == 2) && (subcode == 11) && (len == 1)) + { + t += bsprintf(t, " (%s)", bgp_format_role_name(get_u8(data))); + goto done; + } + /* RFC 8203 - shutdown communication */ if (((code == 6) && ((subcode == 2) || (subcode == 4)))) if (bgp_handle_message(p, data, len, &t)) @@ -3147,6 +3261,30 @@ bgp_rx_packet(struct bgp_conn *conn, byte *pkt, uint len) } } +void +bgp_uncork(void *vp) +{ + /* The uncork event is run from &main_birdloop and there is no useful way how + * to assign the target loop to it, thus we have to lock it ourselves. */ + + struct bgp_proto *p = vp; + if (!p) + return; + + birdloop_enter(p->p.loop); + + if (p && p->conn && (p->conn->state == BS_ESTABLISHED) && !p->conn->sk->rx_hook) + { + struct birdsock *sk = p->conn->sk; + ASSERT_DIE(sk->rpos > sk->rbuf); + sk_resume_rx(p->p.loop, sk, bgp_rx); + bgp_rx(sk, sk->rpos - sk->rbuf); + BGP_TRACE(D_PACKETS, "Uncorked"); + } + + birdloop_leave(p->p.loop); +} + /** * bgp_rx - handle received data * @sk: socket @@ -3161,6 +3299,7 @@ int bgp_rx(sock *sk, uint size) { struct bgp_conn *conn = sk->data; + struct bgp_proto *p = conn->bgp; byte *pkt_start = sk->rbuf; byte *end = pkt_start + size; uint i, len; @@ -3170,6 +3309,12 @@ bgp_rx(sock *sk, uint size) { if ((conn->state == BS_CLOSE) || (conn->sk != sk)) return 0; + if ((conn->state == BS_ESTABLISHED) && rt_cork_check(conn->bgp->uncork_ev)) + { + sk_pause_rx(p->p.loop, sk); + BGP_TRACE(D_PACKETS, "Corked"); + return 0; + } for(i=0; i<16; i++) if (pkt_start[i] != 0xff) { diff --git a/proto/mrt/Makefile b/proto/mrt/Makefile index 925fb102..8cd44ac1 100644 --- a/proto/mrt/Makefile +++ b/proto/mrt/Makefile @@ -3,4 +3,4 @@ obj := $(src-o-files) $(all-daemon) $(cf-local) -tests_objs := $(tests_objs) $(src-o-files)
\ No newline at end of file +tests_objs := $(tests_objs) $(src-o-files) diff --git a/proto/mrt/mrt.c b/proto/mrt/mrt.c index e12f7743..82fd426a 100644 --- a/proto/mrt/mrt.c +++ b/proto/mrt/mrt.c @@ -113,13 +113,13 @@ mrt_buffer_flush(buffer *b) } #define MRT_DEFINE_TYPE(S, T) \ - UNUSED static inline void mrt_put_##S##_(buffer *b, T x) \ + static inline void UNUSED mrt_put_##S##_(buffer *b, T x) \ { \ put_##S(b->pos, x); \ b->pos += sizeof(T); \ } \ \ - UNUSED static inline void mrt_put_##S(buffer *b, T x) \ + static inline void UNUSED mrt_put_##S(buffer *b, T x) \ { \ mrt_buffer_need(b, sizeof(T)); \ put_##S(b->pos, x); \ @@ -243,21 +243,15 @@ mrt_next_table(struct mrt_table_dump_state *s) rtable *tab = mrt_next_table_(s->table, s->table_ptr, s->table_expr); if (s->table) - { - RT_LOCK(s->table); - rt_unlock_table(RT_PRIV(s->table)); - RT_UNLOCK(s->table); - } + RT_LOCKED(s->table, tab) + rt_unlock_table(tab); s->table = tab; s->ipv4 = tab ? (tab->addr_type == NET_IP4) : 0; if (s->table) - { - RT_LOCK(s->table); - rt_lock_table(RT_PRIV(s->table)); - RT_UNLOCK(s->table); - } + RT_LOCKED(s->table, tab) + rt_lock_table(tab); return s->table; } @@ -431,7 +425,7 @@ mrt_rib_table_header(struct mrt_table_dump_state *s, net_addr *n) static void mrt_rib_table_entry_bgp_attrs(struct mrt_table_dump_state *s, rte *r) { - struct ea_list *eattrs = r->attrs->eattrs; + struct ea_list *eattrs = r->attrs; buffer *b = &s->buf; if (!eattrs) @@ -439,7 +433,7 @@ mrt_rib_table_entry_bgp_attrs(struct mrt_table_dump_state *s, rte *r) /* Attribute list must be normalized for bgp_encode_attrs() */ if (!rta_is_cached(r->attrs)) - ea_normalize(eattrs); + eattrs = ea_normalize(eattrs, 0); mrt_buffer_need(b, MRT_ATTR_BUFFER_SIZE); byte *pos = b->pos; @@ -533,7 +527,7 @@ mrt_rib_table_dump(struct mrt_table_dump_state *s, net *n, int add_path) } rte e = rt->rte; - if (f_run(s->filter, &e, s->linpool, 0) <= F_ACCEPT) + if (f_run(s->filter, &e, 0) <= F_ACCEPT) mrt_rib_table_entry(s, &e); lp_flush(s->linpool); @@ -561,13 +555,12 @@ mrt_rib_table_dump(struct mrt_table_dump_state *s, net *n, int add_path) static struct mrt_table_dump_state * mrt_table_dump_init(pool *pp) { - pool *pool = rp_new(pp, &main_birdloop, "MRT Table Dump"); + pool *pool = rp_new(pp, "MRT Table Dump"); struct mrt_table_dump_state *s = mb_allocz(pool, sizeof(struct mrt_table_dump_state)); s->pool = pool; - s->parent = pp; - s->linpool = lp_new(pool, 4080); - s->peer_lp = lp_new(pool, 4080); + s->linpool = lp_new(pool); + s->peer_lp = lp_new(pool); mrt_buffer_init(&s->buf, pool, 2 * MRT_ATTR_BUFFER_SIZE); /* We lock the current config as we may reference it indirectly by filter */ @@ -583,26 +576,21 @@ static void mrt_table_dump_free(struct mrt_table_dump_state *s) { if (s->table) - { - RT_LOCK(s->table); - - if (s->table_open) - FIB_ITERATE_UNLINK(&s->fit, &RT_PRIV(s->table)->fib); + RT_LOCKED(s->table, tab) + { + if (s->table_open) + FIB_ITERATE_UNLINK(&s->fit, &tab->fib); - rt_unlock_table(RT_PRIV(s->table)); - RT_UNLOCK(s->table); - } + rt_unlock_table(tab); + } if (s->table_ptr) - { - RT_LOCK(s->table_ptr); - rt_unlock_table(RT_PRIV(s->table_ptr)); - RT_UNLOCK(s->table_ptr); - } + RT_LOCKED(s->table_ptr, tab) + rt_unlock_table(tab); config_del_obstacle(s->config); - rp_free(s->pool, s->parent); + rfree(s->pool); } @@ -614,14 +602,8 @@ mrt_table_dump_step(struct mrt_table_dump_state *s) s->max = 2048; s->bws = &bws; - rtable_private *tab; - if (s->table_open) - { - RT_LOCK(s->table); - tab = RT_PRIV(s->table); goto step; - } while (mrt_next_table(s)) { @@ -630,8 +612,9 @@ mrt_table_dump_step(struct mrt_table_dump_state *s) mrt_peer_table_dump(s); - RT_LOCK(s->table); - tab = RT_PRIV(s->table); + RT_LOCKED(s->table, tab) + { + FIB_ITERATE_INIT(&s->fit, &tab->fib); s->table_open = 1; @@ -641,8 +624,7 @@ mrt_table_dump_step(struct mrt_table_dump_state *s) if (s->max < 0) { FIB_ITERATE_PUT(&s->fit); - RT_UNLOCK(s->table); - return 0; + RT_RETURN(tab, 0); } /* With Always ADD_PATH option, we jump directly to second phase */ @@ -657,11 +639,12 @@ mrt_table_dump_step(struct mrt_table_dump_state *s) FIB_ITERATE_END; s->table_open = 0; + } + mrt_close_file(s); mrt_peer_table_flush(s); } - RT_UNLOCK(s->table); return 1; } @@ -689,11 +672,8 @@ mrt_timer(timer *t) s->always_add_path = cf->always_add_path; if (s->table_ptr) - { - RT_LOCK(s->table_ptr); - rt_lock_table(RT_PRIV(s->table_ptr)); - RT_UNLOCK(s->table_ptr); - } + RT_LOCKED(s->table_ptr, tab) + rt_lock_table(tab); p->table_dump = s; ev_schedule(p->event); @@ -735,14 +715,17 @@ mrt_dump_cont(struct cli *c) cli_printf(c, 0, ""); mrt_table_dump_free(c->rover); - c->cont = c->cleanup = c->rover = NULL; + c->cont = NULL; + c->cleanup = NULL; + c->rover = NULL; } -static void +static int mrt_dump_cleanup(struct cli *c) { mrt_table_dump_free(c->rover); c->rover = NULL; + return 0; } void @@ -766,11 +749,8 @@ mrt_dump_cmd(struct mrt_dump_data *d) s->filename = d->filename; if (s->table_ptr) - { - RT_LOCK(s->table_ptr); - rt_lock_table(RT_PRIV(s->table_ptr)); - RT_UNLOCK(s->table_ptr); - } + RT_LOCKED(s->table_ptr, tab) + rt_lock_table(tab); this_cli->cont = mrt_dump_cont; this_cli->cleanup = mrt_dump_cleanup; @@ -940,7 +920,6 @@ mrt_copy_config(struct proto_config *dest UNUSED, struct proto_config *src UNUSE struct protocol proto_mrt = { .name = "MRT", .template = "mrt%d", - .class = PROTOCOL_MRT, .proto_size = sizeof(struct mrt_proto), .config_size = sizeof(struct mrt_config), .init = mrt_init, @@ -949,3 +928,9 @@ struct protocol proto_mrt = { .reconfigure = mrt_reconfigure, .copy_config = mrt_copy_config, }; + +void +mrt_build(void) +{ + proto_build(&proto_mrt); +} diff --git a/proto/mrt/mrt.h b/proto/mrt/mrt.h index 2e616f6f..f535a391 100644 --- a/proto/mrt/mrt.h +++ b/proto/mrt/mrt.h @@ -13,7 +13,7 @@ #include "nest/bird.h" #include "nest/protocol.h" #include "lib/lists.h" -#include "nest/route.h" +#include "nest/rt.h" #include "lib/event.h" #include "lib/hash.h" @@ -67,7 +67,6 @@ struct mrt_table_dump_state { /* Allocated by mrt_table_dump_init() */ pool *pool; /* Pool for table dump */ - pool *parent; /* Parent pool for cleanup */ linpool *linpool; /* Temporary linear pool */ linpool *peer_lp; /* Linear pool for peer entries in peer_hash */ buffer buf; /* Buffer for MRT messages */ diff --git a/proto/ospf/Makefile b/proto/ospf/Makefile index 39e74f71..015f394a 100644 --- a/proto/ospf/Makefile +++ b/proto/ospf/Makefile @@ -3,4 +3,4 @@ obj := $(src-o-files) $(all-daemon) $(cf-local) -tests_objs := $(tests_objs) $(src-o-files)
\ No newline at end of file +tests_objs := $(tests_objs) $(src-o-files) diff --git a/proto/ospf/config.Y b/proto/ospf/config.Y index 4b7d5a36..bc3df8db 100644 --- a/proto/ospf/config.Y +++ b/proto/ospf/config.Y @@ -190,7 +190,7 @@ ospf_check_auth(void) CF_DECLS -CF_KEYWORDS(OSPF, V2, V3, OSPF_METRIC1, OSPF_METRIC2, OSPF_TAG, OSPF_ROUTER_ID) +CF_KEYWORDS(OSPF, V2, V3) CF_KEYWORDS(AREA, NEIGHBORS, RFC1583COMPAT, STUB, TICK, COST, COST2, RETRANSMIT) CF_KEYWORDS(HELLO, TRANSMIT, PRIORITY, DEAD, TYPE, BROADCAST, BCAST, DEFAULT) CF_KEYWORDS(NONBROADCAST, NBMA, POINTOPOINT, PTP, POINTOMULTIPOINT, PTMP) @@ -505,11 +505,6 @@ ospf_iface: ospf_iface_start ospf_iface_patt_list ospf_iface_opt_list { ospf_iface_finish(); } ; -dynamic_attr: OSPF_METRIC1 { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_OSPF_METRIC1); } ; -dynamic_attr: OSPF_METRIC2 { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_OSPF_METRIC2); } ; -dynamic_attr: OSPF_TAG { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_OSPF_TAG); } ; -dynamic_attr: OSPF_ROUTER_ID { $$ = f_new_dynamic_attr(EAF_TYPE_ROUTER_ID, T_QUAD, EA_OSPF_ROUTER_ID); } ; - CF_CLI_HELP(SHOW OSPF, ..., [[Show information about OSPF protocol]]); CF_CLI(SHOW OSPF, optproto, [<name>], [[Show information about OSPF protocol]]) { PROTO_WALK_CMD($3, &proto_ospf, p) ospf_sh(p); }; diff --git a/proto/ospf/iface.c b/proto/ospf/iface.c index 049030ac..0aa7fa00 100644 --- a/proto/ospf/iface.c +++ b/proto/ospf/iface.c @@ -136,7 +136,7 @@ ospf_sk_open(struct ospf_iface *ifa) sk->flags = SKF_LADDR_RX | (ifa->check_ttl ? SKF_TTL_RX : 0); sk->ttl = ifa->cf->ttl_security ? 255 : 1; - if (sk_open(sk) < 0) + if (sk_open(sk, p->p.loop) < 0) goto err; /* 12 is an offset of the checksum in an OSPFv3 packet */ @@ -220,7 +220,7 @@ ospf_open_vlink_sk(struct ospf_proto *p) sk->data = (void *) p; sk->flags = 0; - if (sk_open(sk) < 0) + if (sk_open(sk, p->p.loop) < 0) goto err; /* 12 is an offset of the checksum in an OSPFv3 packet */ @@ -311,7 +311,7 @@ ospf_iface_remove(struct ospf_iface *ifa) ospf_iface_sm(ifa, ISM_DOWN); rem_node(NODE ifa); - rp_free(ifa->pool, p->p.pool); + rfree(ifa->pool); } void @@ -484,9 +484,9 @@ ospf_iface_find(struct ospf_proto *p, struct iface *what) } static void -ospf_iface_add(struct object_lock *lock) +ospf_iface_add(void *_ifa) { - struct ospf_iface *ifa = lock->data; + struct ospf_iface *ifa = _ifa; struct ospf_proto *p = ifa->oa->po; /* Open socket if interface is not stub */ @@ -567,7 +567,7 @@ ospf_iface_new(struct ospf_area *oa, struct ifa *addr, struct ospf_iface_patt *i OSPF_TRACE(D_EVENTS, "Adding interface %s (%N) to area %R", iface->name, &addr->prefix, oa->areaid); - pool = rp_new(p->p.pool, p->p.loop, "OSPF Interface"); + pool = rp_new(p->p.pool, "OSPF Interface"); ifa = mb_allocz(pool, sizeof(struct ospf_iface)); ifa->iface = iface; ifa->addr = addr; @@ -668,8 +668,11 @@ ospf_iface_new(struct ospf_area *oa, struct ifa *addr, struct ospf_iface_patt *i lock->port = OSPF_PROTO; lock->inst = ifa->instance_id; lock->iface = iface; - lock->data = ifa; - lock->hook = ospf_iface_add; + lock->event = (event) { + .hook = ospf_iface_add, + .data = ifa, + }; + lock->target = &global_event_list; olock_acquire(lock); } @@ -687,7 +690,7 @@ ospf_iface_new_vlink(struct ospf_proto *p, struct ospf_iface_patt *ip) /* Vlink ifname is stored just after the ospf_iface structure */ - pool = rp_new(p->p.pool, p->p.loop, "OSPF Vlink"); + pool = rp_new(p->p.pool, "OSPF Vlink"); ifa = mb_allocz(pool, sizeof(struct ospf_iface) + 16); ifa->oa = p->backbone; ifa->cf = ip; @@ -1222,12 +1225,13 @@ ospf_ifa_notify3(struct proto *P, uint flags, struct ifa *a) static void ospf_reconfigure_ifaces2(struct ospf_proto *p) { - struct iface *iface; struct ifa *a; - IFACE_LEGACY_ACCESS; - WALK_LIST(iface, global_iface_list) + IFACE_WALK(iface) { + if (p->p.vrf && p->p.vrf != iface->master) + continue; + if (! (iface->flags & IF_UP)) continue; @@ -1269,12 +1273,13 @@ ospf_reconfigure_ifaces2(struct ospf_proto *p) static void ospf_reconfigure_ifaces3(struct ospf_proto *p) { - struct iface *iface; struct ifa *a; - IFACE_LEGACY_ACCESS; - WALK_LIST(iface, global_iface_list) + IFACE_WALK(iface) { + if (p->p.vrf && p->p.vrf != iface->master) + continue; + if (! (iface->flags & IF_UP)) continue; diff --git a/proto/ospf/neighbor.c b/proto/ospf/neighbor.c index 4ae0d3fa..b0fdc42f 100644 --- a/proto/ospf/neighbor.c +++ b/proto/ospf/neighbor.c @@ -80,7 +80,7 @@ struct ospf_neighbor * ospf_neighbor_new(struct ospf_iface *ifa) { struct ospf_proto *p = ifa->oa->po; - struct pool *pool = rp_new(p->p.pool, p->p.loop, "OSPF Neighbor"); + struct pool *pool = rp_new(p->p.pool, "OSPF Neighbor"); struct ospf_neighbor *n = mb_allocz(pool, sizeof(struct ospf_neighbor)); n->pool = pool; @@ -120,7 +120,7 @@ ospf_neigh_down(struct ospf_neighbor *n) s_get(&(n->dbsi)); release_lsrtl(p, n); rem_node(NODE n); - rp_free(n->pool, p->p.pool); + rfree(n->pool); OSPF_TRACE(D_EVENTS, "Neighbor %R on %s removed", rid, ifa->ifname); } @@ -777,7 +777,7 @@ ospf_neigh_update_bfd(struct ospf_neighbor *n, int use_bfd) if (use_bfd && !n->bfd_req) n->bfd_req = bfd_request_session(n->pool, n->ip, n->ifa->addr->ip, n->ifa->iface, p->p.vrf, - ospf_neigh_bfd_hook, n, birdloop_event_list(p->p.loop), NULL); + ospf_neigh_bfd_hook, n, p->p.loop, NULL); if (!use_bfd && n->bfd_req) { diff --git a/proto/ospf/ospf.c b/proto/ospf/ospf.c index 16774df6..896bf5a3 100644 --- a/proto/ospf/ospf.c +++ b/proto/ospf/ospf.c @@ -106,11 +106,12 @@ #include <stdlib.h> #include "ospf.h" +#include "lib/macro.h" static int ospf_preexport(struct channel *C, rte *new); static void ospf_reload_routes(struct channel *C); -static int ospf_rte_better(struct rte *new, struct rte *old); -static u32 ospf_rte_igp_metric(struct rte *rt); +static int ospf_rte_better(const rte *new, const rte *old); +static u32 ospf_rte_igp_metric(const rte *rt); static void ospf_disp(timer *timer); @@ -299,7 +300,7 @@ ospf_start(struct proto *P) p->lsab_size = 256; p->lsab_used = 0; p->lsab = mb_alloc(P->pool, p->lsab_size); - p->nhpool = lp_new(P->pool, 12*sizeof(struct nexthop)); + p->nhpool = lp_new(P->pool); init_list(&(p->iface_list)); init_list(&(p->area_list)); fib_init(&p->rtf, P->pool, ospf_get_af(p), sizeof(ort), OFFSETOF(ort, fn), 0, NULL); @@ -370,8 +371,8 @@ ospf_init(struct proto_config *CF) P->main_channel = proto_add_channel(P, proto_cf_main_channel(CF)); P->rt_notify = ospf_rt_notify; - P->if_notify = ospf_if_notify; - P->ifa_notify = cf->ospf2 ? ospf_ifa_notify2 : ospf_ifa_notify3; + P->iface_sub.if_notify = ospf_if_notify; + P->iface_sub.ifa_notify = cf->ospf2 ? ospf_ifa_notify2 : ospf_ifa_notify3; P->preexport = ospf_preexport; P->reload_routes = ospf_reload_routes; P->feed_begin = ospf_feed_begin; @@ -384,25 +385,28 @@ ospf_init(struct proto_config *CF) /* If new is better return 1 */ static int -ospf_rte_better(struct rte *new, struct rte *old) +ospf_rte_better(const rte *new, const rte *old) { - u32 new_metric1 = ea_get_int(new->attrs->eattrs, EA_OSPF_METRIC1, LSINFINITY); + u32 new_metric1 = ea_get_int(new->attrs, &ea_ospf_metric1, LSINFINITY); if (new_metric1 == LSINFINITY) return 0; - if(new->attrs->source < old->attrs->source) return 1; - if(new->attrs->source > old->attrs->source) return 0; + u32 ns = rt_get_source_attr(new); + u32 os = rt_get_source_attr(old); - if(new->attrs->source == RTS_OSPF_EXT2) + if (ns < os) return 1; + if (ns > os) return 0; + + if (ns == RTS_OSPF_EXT2) { - u32 old_metric2 = ea_get_int(old->attrs->eattrs, EA_OSPF_METRIC2, LSINFINITY); - u32 new_metric2 = ea_get_int(new->attrs->eattrs, EA_OSPF_METRIC2, LSINFINITY); - if(new_metric2 < old_metric2) return 1; - if(new_metric2 > old_metric2) return 0; + u32 old_metric2 = ea_get_int(old->attrs, &ea_ospf_metric2, LSINFINITY); + u32 new_metric2 = ea_get_int(new->attrs, &ea_ospf_metric2, LSINFINITY); + if (new_metric2 < old_metric2) return 1; + if (new_metric2 > old_metric2) return 0; } - u32 old_metric1 = ea_get_int(old->attrs->eattrs, EA_OSPF_METRIC1, LSINFINITY); + u32 old_metric1 = ea_get_int(old->attrs, &ea_ospf_metric1, LSINFINITY); if (new_metric1 < old_metric1) return 1; @@ -410,12 +414,12 @@ ospf_rte_better(struct rte *new, struct rte *old) } static u32 -ospf_rte_igp_metric(struct rte *rt) +ospf_rte_igp_metric(const rte *rt) { - if (rt->attrs->source == RTS_OSPF_EXT2) + if (rt_get_source_attr(rt) == RTS_OSPF_EXT2) return IGP_METRIC_UNKNOWN; - return ea_get_int(rt->attrs->eattrs, EA_OSPF_METRIC1, LSINFINITY); + return ea_get_int(rt->attrs, &ea_ospf_metric1, LSINFINITY); } void @@ -482,13 +486,13 @@ ospf_disp(timer * timer) * import to the filters. */ static int -ospf_preexport(struct channel *c, rte *e) +ospf_preexport(struct channel *C, rte *e) { - struct ospf_proto *p = (struct ospf_proto *) c->proto; + struct ospf_proto *p = (struct ospf_proto *) C->proto; struct ospf_area *oa = ospf_main_area(p); /* Reject our own routes */ - if (e->sender == c->in_req.hook) + if (e->sender == C->in_req.hook) return -1; /* Do not export routes to stub areas */ @@ -531,7 +535,7 @@ ospf_shutdown(struct proto *P) /* Cleanup locked rta entries */ FIB_WALK(&p->rtf, ort, nf) { - rta_free(nf->old_rta); + ea_free(nf->old_ea); } FIB_WALK_END; @@ -566,11 +570,12 @@ ospf_get_status(struct proto *P, byte * buf) } static void -ospf_get_route_info(rte * rte, byte * buf) +ospf_get_route_info(const rte * rte, byte * buf) { char *type = "<bug>"; - switch (rte->attrs->source) + uint source = rt_get_source_attr(rte); + switch (source) { case RTS_OSPF: type = "I"; @@ -587,42 +592,26 @@ ospf_get_route_info(rte * rte, byte * buf) } buf += bsprintf(buf, " %s", type); - buf += bsprintf(buf, " (%d/%d", rte->attrs->pref, ea_get_int(rte->attrs->eattrs, EA_OSPF_METRIC1, LSINFINITY)); - if (rte->attrs->source == RTS_OSPF_EXT2) - buf += bsprintf(buf, "/%d", ea_get_int(rte->attrs->eattrs, EA_OSPF_METRIC2, LSINFINITY)); + buf += bsprintf(buf, " (%d/%d", rt_get_preference(rte), ea_get_int(rte->attrs, &ea_ospf_metric1, LSINFINITY)); + if (source == RTS_OSPF_EXT2) + buf += bsprintf(buf, "/%d", ea_get_int(rte->attrs, &ea_ospf_metric2, LSINFINITY)); buf += bsprintf(buf, ")"); - if (rte->attrs->source == RTS_OSPF_EXT1 || rte->attrs->source == RTS_OSPF_EXT2) + if (source == RTS_OSPF_EXT1 || source == RTS_OSPF_EXT2) { - eattr *ea = ea_find(rte->attrs->eattrs, EA_OSPF_TAG); + eattr *ea = ea_find(rte->attrs, &ea_ospf_tag); if (ea && (ea->u.data > 0)) buf += bsprintf(buf, " [%x]", ea->u.data); } - eattr *ea = ea_find(rte->attrs->eattrs, EA_OSPF_ROUTER_ID); + eattr *ea = ea_find(rte->attrs, &ea_ospf_router_id); if (ea) buf += bsprintf(buf, " [%R]", ea->u.data); } -static int -ospf_get_attr(const eattr * a, byte * buf, int buflen UNUSED) +static void +ospf_tag_format(const eattr * a, byte * buf, uint buflen) { - switch (a->id) - { - case EA_OSPF_METRIC1: - bsprintf(buf, "metric1"); - return GA_NAME; - case EA_OSPF_METRIC2: - bsprintf(buf, "metric2"); - return GA_NAME; - case EA_OSPF_TAG: - bsprintf(buf, "tag: 0x%08x", a->u.data); - return GA_FULL; - case EA_OSPF_ROUTER_ID: - bsprintf(buf, "router_id"); - return GA_NAME; - default: - return GA_UNKNOWN; - } + bsnprintf(buf, buflen, "0x%08x", a->u.data); } static void @@ -1526,7 +1515,6 @@ struct rte_owner_class ospf_rte_owner_class = { struct protocol proto_ospf = { .name = "OSPF", .template = "ospf%d", - .class = PROTOCOL_OSPF, .preference = DEF_PREF_OSPF, .channel_mask = NB_IP, .proto_size = sizeof(struct ospf_proto), @@ -1537,5 +1525,38 @@ struct protocol proto_ospf = { .shutdown = ospf_shutdown, .reconfigure = ospf_reconfigure, .get_status = ospf_get_status, - .get_attr = ospf_get_attr, }; + +struct ea_class ea_ospf_metric1 = { + .name = "ospf_metric1", + .type = T_INT, +}; + +struct ea_class ea_ospf_metric2 = { + .name = "ospf_metric2", + .type = T_INT, +}; + +struct ea_class ea_ospf_tag = { + .name = "ospf_tag", + .type = T_INT, + .format = ospf_tag_format, +}; + +struct ea_class ea_ospf_router_id = { + .name = "ospf_router_id", + .type = T_QUAD, +}; + +void +ospf_build(void) +{ + proto_build(&proto_ospf); + + EA_REGISTER_ALL( + &ea_ospf_metric1, + &ea_ospf_metric2, + &ea_ospf_tag, + &ea_ospf_router_id + ); +} diff --git a/proto/ospf/ospf.h b/proto/ospf/ospf.h index a5f83e79..3477ba5a 100644 --- a/proto/ospf/ospf.h +++ b/proto/ospf/ospf.h @@ -22,7 +22,7 @@ #include "lib/resource.h" #include "nest/protocol.h" #include "nest/iface.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/cli.h" #include "nest/locks.h" #include "nest/bfd.h" @@ -939,12 +939,7 @@ struct lsadb_show_data { u32 router; /* Advertising router, 0 -> all */ }; - -#define EA_OSPF_METRIC1 EA_CODE(PROTOCOL_OSPF, 0) -#define EA_OSPF_METRIC2 EA_CODE(PROTOCOL_OSPF, 1) -#define EA_OSPF_TAG EA_CODE(PROTOCOL_OSPF, 2) -#define EA_OSPF_ROUTER_ID EA_CODE(PROTOCOL_OSPF, 3) - +extern struct ea_class ea_ospf_metric1, ea_ospf_metric2, ea_ospf_tag, ea_ospf_router_id; /* * For regular networks, neighbor address must match network prefix. diff --git a/proto/ospf/rt.c b/proto/ospf/rt.c index 3e208023..69c2907d 100644 --- a/proto/ospf/rt.c +++ b/proto/ospf/rt.c @@ -28,24 +28,30 @@ nh_is_vlink(struct nexthop *nhs) static inline int unresolved_vlink(ort *ort) { - return ort->n.nhs && nh_is_vlink(ort->n.nhs); + return ort->n.nhs && nh_is_vlink(&ort->n.nhs->nh); } -static inline struct nexthop * +static inline struct nexthop_adata * new_nexthop(struct ospf_proto *p, ip_addr gw, struct iface *iface, byte weight) { - struct nexthop *nh = lp_allocz(p->nhpool, sizeof(struct nexthop)); - nh->gw = gw; - nh->iface = iface; - nh->weight = weight; - return nh; + struct nexthop_adata *nhad = lp_alloc(p->nhpool, sizeof(struct nexthop_adata)); + *nhad = (struct nexthop_adata) { + .ad = { .length = sizeof *nhad - sizeof nhad->ad, }, + .nh = { + .gw = gw, + .iface = iface, + .weight = weight, + }, + }; + + return nhad; } /* Returns true if there are device nexthops in n */ static inline int -has_device_nexthops(const struct nexthop *n) +has_device_nexthops(struct nexthop_adata *nhad) { - for (; n; n = n->next) + NEXTHOP_WALK(n, nhad) if (ipa_zero(n->gw)) return 1; @@ -53,38 +59,22 @@ has_device_nexthops(const struct nexthop *n) } /* Replace device nexthops with nexthops to gw */ -static struct nexthop * -fix_device_nexthops(struct ospf_proto *p, const struct nexthop *n, ip_addr gw) +static struct nexthop_adata * +fix_device_nexthops(struct ospf_proto *p, struct nexthop_adata *old, ip_addr gw) { - struct nexthop *root1 = NULL; - struct nexthop *root2 = NULL; - struct nexthop **nn1 = &root1; - struct nexthop **nn2 = &root2; - if (!p->ecmp) - return new_nexthop(p, gw, n->iface, n->weight); - - /* This is a bit tricky. We cannot just copy the list and update n->gw, - because the list should stay sorted, so we create two lists, one with new - gateways and one with old ones, and then merge them. */ - - for (; n; n = n->next) { - struct nexthop *nn = new_nexthop(p, ipa_zero(n->gw) ? gw : n->gw, n->iface, n->weight); + struct nexthop_adata *new = (struct nexthop_adata *) lp_store_adata(p->nhpool, old->ad.data, old->ad.length); + new->nh.gw = gw; + return new; + } + struct nexthop_adata *tmp = (struct nexthop_adata *) tmp_copy_adata(&old->ad); + NEXTHOP_WALK(n, tmp) if (ipa_zero(n->gw)) - { - *nn1 = nn; - nn1 = &(nn->next); - } - else - { - *nn2 = nn; - nn2 = &(nn->next); - } - } + n->gw = gw; - return nexthop_merge(root1, root2, 1, 1, p->ecmp, p->nhpool); + return nexthop_sort(tmp, p->nhpool); } @@ -169,9 +159,9 @@ orta_compare(const struct ospf_proto *p, const orta *new, const orta *old) return -1; if (!new->nhs) return 1; - if (nh_is_vlink(new->nhs)) + if (nh_is_vlink(&new->nhs->nh)) return -1; - if (nh_is_vlink(old->nhs)) + if (nh_is_vlink(&old->nhs->nh)) return 1; @@ -279,11 +269,7 @@ ort_merge(struct ospf_proto *p, ort *o, const orta *new) orta *old = &o->n; if (old->nhs != new->nhs) - { - old->nhs = nexthop_merge(old->nhs, new->nhs, old->nhs_reuse, new->nhs_reuse, - p->ecmp, p->nhpool); - old->nhs_reuse = 1; - } + old->nhs = nexthop_merge(old->nhs, new->nhs, p->ecmp, p->nhpool); if (old->rid < new->rid) old->rid = new->rid; @@ -295,11 +281,7 @@ ort_merge_ext(struct ospf_proto *p, ort *o, const orta *new) orta *old = &o->n; if (old->nhs != new->nhs) - { - old->nhs = nexthop_merge(old->nhs, new->nhs, old->nhs_reuse, new->nhs_reuse, - p->ecmp, p->nhpool); - old->nhs_reuse = 1; - } + old->nhs = nexthop_merge(old->nhs, new->nhs, p->ecmp, p->nhpool); if (old->tag != new->tag) old->tag = 0; @@ -1165,7 +1147,7 @@ ospf_check_vlinks(struct ospf_proto *p) if (tmp && (tmp->color == INSPF) && ipa_nonzero(tmp->lb) && tmp->nhs) { - struct ospf_iface *nhi = ospf_iface_find(p, tmp->nhs->iface); + struct ospf_iface *nhi = ospf_iface_find(p, tmp->nhs->nh.iface); if ((ifa->state != OSPF_IS_PTP) || (ifa->vifa != nhi) @@ -1579,10 +1561,7 @@ ospf_ext_spf(struct ospf_proto *p) /* Replace device nexthops with nexthops to forwarding address from LSA */ if (has_device_nexthops(nfa.nhs)) - { nfa.nhs = fix_device_nexthops(p, nfa.nhs, rt.fwaddr); - nfa.nhs_reuse = 1; - } } if (rt.ebit) @@ -1726,10 +1705,10 @@ ospf_rt_spf(struct ospf_proto *p) static inline int -inherit_nexthops(struct nexthop *pn) +inherit_nexthops(struct nexthop_adata *pn) { /* Proper nexthops (with defined GW) or dummy vlink nexthops (without iface) */ - return pn && (ipa_nonzero(pn->gw) || !pn->iface); + return pn && (ipa_nonzero(pn->nh.gw) || !pn->nh.iface); } static inline ip_addr @@ -1744,12 +1723,12 @@ link_lsa_lladdr(struct ospf_proto *p, struct top_hash_entry *en) return ospf_is_ip4(p) ? ipa_from_ip4(ospf3_6to4(ll)) : ipa_from_ip6(ll); } -static struct nexthop * +static struct nexthop_adata * calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry *par, int pos, uint data, uint lif, uint nif) { struct ospf_proto *p = oa->po; - struct nexthop *pn = par->nhs; + struct nexthop_adata *pn = par->nhs; struct top_hash_entry *link = NULL; struct ospf_iface *ifa = NULL; ip_addr nh = IPA_NONE; @@ -1827,10 +1806,10 @@ calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en, return NULL; } - struct nexthop *nhs = new_nexthop(p, nh, ifa->iface, ifa->ecmp_weight); + struct nexthop_adata *nhs = new_nexthop(p, nh, ifa->iface, ifa->ecmp_weight); if (ifa->addr->flags & IA_HOST) - nhs->flags = RNF_ONLINK; + nhs->nh.flags = RNF_ONLINK; return nhs; } @@ -1851,7 +1830,7 @@ calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en, if (ipa_zero(en->lb)) goto bad; - return new_nexthop(p, en->lb, pn->iface, pn->weight); + return new_nexthop(p, en->lb, pn->nh.iface, pn->nh.weight); } else /* OSPFv3 */ { @@ -1859,7 +1838,7 @@ calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en, * Next-hop is taken from lladdr field of Link-LSA, en->lb_id * is computed in link_back(). */ - link = ospf_hash_find(p->gr, pn->iface->index, en->lb_id, rid, LSA_T_LINK); + link = ospf_hash_find(p->gr, pn->nh.iface->index, en->lb_id, rid, LSA_T_LINK); if (!link) return NULL; @@ -1867,7 +1846,7 @@ calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en, if (ipa_zero(nh)) return NULL; - return new_nexthop(p, nh, pn->iface, pn->weight); + return new_nexthop(p, nh, pn->nh.iface, pn->nh.weight); } } @@ -1914,7 +1893,7 @@ add_cand(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry if (!link_back(oa, en, par, lif, nif)) return; - struct nexthop *nhs = calc_next_hop(oa, en, par, pos, data, lif, nif); + struct nexthop_adata *nhs = calc_next_hop(oa, en, par, pos, data, lif, nif); if (!nhs) { log(L_WARN "%s: Cannot find next hop for LSA (Type: %04x, Id: %R, Rt: %R)", @@ -1923,7 +1902,7 @@ add_cand(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry } /* If en->dist > 0, we know that en->color == CANDIDATE and en->nhs is defined. */ - if ((dist == en->dist) && !nh_is_vlink(en->nhs)) + if ((dist == en->dist) && !nh_is_vlink(&en->nhs->nh)) { /* * For multipath, we should merge nexthops. We merge regular nexthops only. @@ -1947,13 +1926,11 @@ add_cand(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry */ /* Keep old ones */ - if (!p->ecmp || nh_is_vlink(nhs) || (nhs == en->nhs)) + if (!p->ecmp || nh_is_vlink(&nhs->nh) || (nhs == en->nhs)) return; /* Merge old and new */ - int new_reuse = (par->nhs != nhs); - en->nhs = nexthop_merge(en->nhs, nhs, en->nhs_reuse, new_reuse, p->ecmp, p->nhpool); - en->nhs_reuse = 1; + en->nhs = nexthop_merge(en->nhs, nhs, p->ecmp, p->nhpool); return; } @@ -1967,7 +1944,6 @@ add_cand(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry en->nhs = nhs; en->dist = dist; en->color = CANDIDATE; - en->nhs_reuse = (par->nhs != nhs); prev = NULL; @@ -2001,14 +1977,34 @@ add_cand(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry } static inline int -ort_changed(ort *nf, rta *nr) +ort_changed(ort *nf, ea_list *nr) { - rta *or = nf->old_rta; - return !or || + ea_list *or = nf->old_ea; + + if (!or || (nf->n.metric1 != nf->old_metric1) || (nf->n.metric2 != nf->old_metric2) || - (nf->n.tag != nf->old_tag) || (nf->n.rid != nf->old_rid) || - (nr->source != or->source) || (nr->dest != or->dest) || - !nexthop_same(&(nr->nh), &(or->nh)); + (nf->n.tag != nf->old_tag) || (nf->n.rid != nf->old_rid)) + return 1; + + eattr *nhea_n = ea_find(nr, &ea_gen_nexthop); + eattr *nhea_o = ea_find(or, &ea_gen_nexthop); + if (!nhea_n != !nhea_o) + return 1; + + if (nhea_n && nhea_o) + { + struct nexthop_adata *nhad_n = (struct nexthop_adata *) nhea_n->u.ptr; + struct nexthop_adata *nhad_o = (struct nexthop_adata *) nhea_o->u.ptr; + + if (!nexthop_same(nhad_n, nhad_o)) + return 1; + } + + if ( ea_get_int(nr, &ea_gen_source, 0) + != ea_get_int(or, &ea_gen_source, 0)) + return 1; + + return 0; } static void @@ -2030,10 +2026,9 @@ again1: FIB_ITERATE_START(fib, &fit, ort, nf) { /* Sanity check of next-hop addresses, failure should not happen */ - if (nf->n.type) + if (nf->n.type && nf->n.nhs) { - struct nexthop *nh; - for (nh = nf->n.nhs; nh; nh = nh->next) + NEXTHOP_WALK(nh, nf->n.nhs) if (ipa_nonzero(nh->gw)) { neighbor *nbr = neigh_find(&p->p, nh->gw, nh->iface, @@ -2052,69 +2047,67 @@ again1: if (nf->n.type) /* Add the route */ { - rta a0 = { - .source = nf->n.type, - .scope = SCOPE_UNIVERSE, - .dest = RTD_UNICAST, - .nh = *(nf->n.nhs), - .pref = p->p.main_channel->preference, - }; - - if (reload || ort_changed(nf, &a0)) - { - a0.eattrs = alloca(sizeof(ea_list) + 4 * sizeof(eattr)); - memset(a0.eattrs, 0, sizeof(ea_list)); + struct { + ea_list l; + eattr a[7]; + } eattrs; + eattrs.l = (ea_list) {}; + + eattrs.a[eattrs.l.count++] = + EA_LITERAL_EMBEDDED(&ea_gen_preference, 0, p->p.main_channel->preference); + + eattrs.a[eattrs.l.count++] = + EA_LITERAL_EMBEDDED(&ea_gen_source, 0, nf->n.type); + + eattrs.a[eattrs.l.count++] = + EA_LITERAL_DIRECT_ADATA(&ea_gen_nexthop, 0, &nf->n.nhs->ad); + + if (reload || ort_changed(nf, &eattrs.l)) + { nf->old_metric1 = nf->n.metric1; nf->old_metric2 = nf->n.metric2; nf->old_tag = nf->n.tag; nf->old_rid = nf->n.rid; - a0.eattrs->attrs[a0.eattrs->count++] = (eattr) { - .id = EA_OSPF_METRIC1, - .type = EAF_TYPE_INT, - .u.data = nf->n.metric1, - }; + eattrs.a[eattrs.l.count++] = + EA_LITERAL_EMBEDDED(&ea_ospf_metric1, 0, nf->n.metric1); if (nf->n.type == RTS_OSPF_EXT2) - a0.eattrs->attrs[a0.eattrs->count++] = (eattr) { - .id = EA_OSPF_METRIC2, - .type = EAF_TYPE_INT, - .u.data = nf->n.metric2, - }; + eattrs.a[eattrs.l.count++] = + EA_LITERAL_EMBEDDED(&ea_ospf_metric2, 0, nf->n.metric2); if ((nf->n.type == RTS_OSPF_EXT1) || (nf->n.type == RTS_OSPF_EXT2)) - a0.eattrs->attrs[a0.eattrs->count++] = (eattr) { - .id = EA_OSPF_TAG, - .type = EAF_TYPE_INT, - .u.data = nf->n.tag, - }; - - a0.eattrs->attrs[a0.eattrs->count++] = (eattr) { - .id = EA_OSPF_ROUTER_ID, - .type = EAF_TYPE_ROUTER_ID, - .u.data = nf->n.rid, - }; + eattrs.a[eattrs.l.count++] = + EA_LITERAL_EMBEDDED(&ea_ospf_tag, 0, nf->n.tag); + + eattrs.a[eattrs.l.count++] = + EA_LITERAL_EMBEDDED(&ea_ospf_router_id, 0, nf->n.rid); - rta_free(nf->old_rta); - nf->old_rta = rta_lookup(&a0); + ASSERT_DIE(ARRAY_SIZE(eattrs.a) >= eattrs.l.count); + + ea_list *eal = ea_lookup(&eattrs.l, 0); + ea_free(nf->old_ea); + nf->old_ea = eal; rte e0 = { - .attrs = nf->old_rta, + .attrs = eal, .src = p->p.main_source, }; + /* DBG("Mod rte type %d - %N via %I on iface %s, met %d\n", a0.source, nf->fn.addr, a0.gw, a0.iface ? a0.iface->name : "(none)", nf->n.metric1); + */ rte_update(p->p.main_channel, nf->fn.addr, &e0, p->p.main_source); } } - else if (nf->old_rta) + else if (nf->old_ea) { /* Remove the route */ - rta_free(nf->old_rta); - nf->old_rta = NULL; + rta_free(nf->old_ea); + nf->old_ea = NULL; rte_update(p->p.main_channel, nf->fn.addr, NULL, p->p.main_source); } diff --git a/proto/ospf/rt.h b/proto/ospf/rt.h index 094e125b..88eefef9 100644 --- a/proto/ospf/rt.h +++ b/proto/ospf/rt.h @@ -18,8 +18,6 @@ typedef struct orta { u8 type; /* RTS_OSPF_* */ - u8 nhs_reuse; /* Whether nhs nodes can be reused during merging. - See a note in rt.c:add_cand() */ u32 options; /* * For ORT_ROUTER routes, options field are router-LSA style @@ -53,7 +51,7 @@ typedef struct orta struct ospf_area *oa; struct ospf_area *voa; /* Used when route is replaced in ospf_rt_sum_tr(), NULL otherwise */ - struct nexthop *nhs; /* Next hops computed during SPF */ + struct nexthop_adata *nhs; /* Next hops computed during SPF */ struct top_hash_entry *en; /* LSA responsible for this orta */ } orta; @@ -80,7 +78,7 @@ typedef struct ort */ orta n; u32 old_metric1, old_metric2, old_tag, old_rid; - rta *old_rta; + ea_list *old_ea; u32 lsa_id; u8 external_rte; u8 area_net; diff --git a/proto/ospf/topology.c b/proto/ospf/topology.c index bb88d20a..85bce03d 100644 --- a/proto/ospf/topology.c +++ b/proto/ospf/topology.c @@ -1337,9 +1337,9 @@ ospf_rt_notify(struct proto *P, struct channel *ch UNUSED, const net_addr *n, rt ASSERT(p->asbr); /* Get route attributes */ - rta *a = new->attrs; - eattr *m1a = ea_find(a->eattrs, EA_OSPF_METRIC1); - eattr *m2a = ea_find(a->eattrs, EA_OSPF_METRIC2); + ea_list *a = new->attrs; + eattr *m1a = ea_find(a, &ea_ospf_metric1); + eattr *m2a = ea_find(a, &ea_ospf_metric2); uint m1 = m1a ? m1a->u.data : 0; uint m2 = m2a ? m2a->u.data : 10000; @@ -1363,11 +1363,14 @@ ospf_rt_notify(struct proto *P, struct channel *ch UNUSED, const net_addr *n, rt uint ebit = m2a || !m1a; uint metric = ebit ? m2 : m1; - uint tag = ea_get_int(a->eattrs, EA_OSPF_TAG, 0); + uint tag = ea_get_int(a, &ea_ospf_tag, 0); ip_addr fwd = IPA_NONE; - if ((a->dest == RTD_UNICAST) && use_gw_for_fwaddr(p, a->nh.gw, a->nh.iface)) - fwd = a->nh.gw; + eattr *nhea = ea_find(a, &ea_gen_nexthop); + struct nexthop_adata *nhad = (struct nexthop_adata *) nhea->u.ptr; + if (NEXTHOP_IS_REACHABLE(nhad)) + if (use_gw_for_fwaddr(p, nhad->nh.gw, nhad->nh.iface)) + fwd = nhad->nh.gw; /* NSSA-LSA with P-bit set must have non-zero forwarding address */ if (oa && ipa_zero(fwd)) @@ -2135,7 +2138,7 @@ ospf_hash_delete(struct top_graph *f, struct top_hash_entry *e) if (*ee == e) { *ee = e->next; - sl_free(f->hash_slab, e); + sl_free(e); if (f->hash_entries-- < f->hash_entries_min) ospf_top_rehash(f, -HASH_LO_STEP); return; diff --git a/proto/ospf/topology.h b/proto/ospf/topology.h index c36d0b50..3c92b431 100644 --- a/proto/ospf/topology.h +++ b/proto/ospf/topology.h @@ -28,7 +28,7 @@ struct top_hash_entry u16 next_lsa_opts; /* For postponed LSA origination */ btime inst_time; /* Time of installation into DB */ struct ort *nf; /* Reference fibnode for sum and ext LSAs, NULL for otherwise */ - struct nexthop *nhs; /* Computed nexthops - valid only in ospf_rt_spf() */ + struct nexthop_adata *nhs; /* Computed nexthops - valid only in ospf_rt_spf() */ ip_addr lb; /* In OSPFv2, link back address. In OSPFv3, any global address in the area useful for vlinks */ u32 lb_id; /* Interface ID of link back iface (for bcast or NBMA networks) */ u32 dist; /* Distance from the root */ @@ -39,8 +39,6 @@ struct top_hash_entry #define CANDIDATE 1 #define INSPF 2 u8 mode; /* LSA generated during RT calculation (LSA_RTCALC or LSA_STALE)*/ - u8 nhs_reuse; /* Whether nhs nodes can be reused during merging. - See a note in rt.c:add_cand() */ }; diff --git a/proto/perf/perf.c b/proto/perf/perf.c index aa688d88..dc5bbf2f 100644 --- a/proto/perf/perf.c +++ b/proto/perf/perf.c @@ -18,7 +18,7 @@ #include "nest/bird.h" #include "nest/iface.h" #include "nest/protocol.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/cli.h" #include "conf/conf.h" #include "filter/filter.h" @@ -85,7 +85,7 @@ random_net_ip4(void) } struct perf_random_routes { - struct rta *a; + ea_list *a; net_addr net; }; @@ -142,17 +142,21 @@ perf_loop(void *data) *((net_addr_ip4 *) &(p->data[i].net)) = random_net_ip4(); if (!p->attrs_per_rte || !(i % p->attrs_per_rte)) { - struct rta a0 = { - .source = RTS_PERF, - .scope = SCOPE_UNIVERSE, - .dest = RTD_UNICAST, - .pref = p->p.main_channel->preference, + ea_list *ea = NULL; + + ea_set_attr_u32(&ea, &ea_gen_preference, 0, p->p.main_channel->preference); + ea_set_attr_u32(&ea, &ea_gen_source, 0, RTS_PERF); + + struct nexthop_adata nhad = { .nh.iface = p->ifa->iface, .nh.gw = gw, .nh.weight = 1, }; - p->data[i].a = rta_lookup(&a0); + ea_set_attr_data(&ea, &ea_gen_nexthop, 0, + &nhad.ad.data, sizeof nhad - sizeof nhad.ad); + + p->data[i].a = rta_lookup(ea, 0); } else p->data[i].a = rta_clone(p->data[i-1].a); @@ -198,9 +202,9 @@ perf_loop(void *data) p->exp++; } - RT_LOCK(P->main_channel->table); - rt_schedule_prune(RT_PRIV(P->main_channel->table)); - RT_UNLOCK(P->main_channel->table); + RT_LOCKED(P->main_channel->table, tab) + rt_schedule_prune(tab); + ev_schedule(p->loop); } @@ -268,7 +272,7 @@ perf_init(struct proto_config *CF) switch (p->mode) { case PERF_MODE_IMPORT: - P->ifa_notify = perf_ifa_notify; + P->iface_sub.ifa_notify = perf_ifa_notify; break; case PERF_MODE_EXPORT: P->rt_notify = perf_rt_notify; @@ -307,7 +311,6 @@ perf_copy_config(struct proto_config *dest UNUSED, struct proto_config *src UNUS struct protocol proto_perf = { .name = "Perf", .template = "perf%d", - .class = PROTOCOL_PERF, .channel_mask = NB_IP, .proto_size = sizeof(struct perf_proto), .config_size = sizeof(struct perf_config), @@ -316,3 +319,9 @@ struct protocol proto_perf = { .reconfigure = perf_reconfigure, .copy_config = perf_copy_config, }; + +void +perf_build(void) +{ + proto_build(&proto_perf); +} diff --git a/proto/pipe/Makefile b/proto/pipe/Makefile index 5093da98..0d68db4c 100644 --- a/proto/pipe/Makefile +++ b/proto/pipe/Makefile @@ -3,4 +3,4 @@ obj := $(src-o-files) $(all-daemon) $(cf-local) -tests_objs := $(tests_objs) $(src-o-files)
\ No newline at end of file +tests_objs := $(tests_objs) $(src-o-files) diff --git a/proto/pipe/config.Y b/proto/pipe/config.Y index fc08445f..444de127 100644 --- a/proto/pipe/config.Y +++ b/proto/pipe/config.Y @@ -42,6 +42,12 @@ pipe_proto: pipe_proto_start '{' | pipe_proto proto_item ';' | pipe_proto channel_item_ ';' + | pipe_proto IMPORT IN net_any imexport ';' { + if (this_channel->net_type && ($4->type != this_channel->net_type)) + cf_error("Incompatible export prefilter type"); + PIPE_CFG->in_subprefix = $4; + this_channel->in_filter = $5; + } | pipe_proto PEER TABLE rtable ';' { PIPE_CFG->peer = $4; } | pipe_proto MAX GENERATION expr ';' { if (($4 < 1) || ($4 > 254)) cf_error("Max generation must be in range 1..254, got %u", $4); diff --git a/proto/pipe/pipe.c b/proto/pipe/pipe.c index 270f7b92..b2083010 100644 --- a/proto/pipe/pipe.c +++ b/proto/pipe/pipe.c @@ -35,7 +35,7 @@ #include "nest/bird.h" #include "nest/iface.h" #include "nest/protocol.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/cli.h" #include "conf/conf.h" #include "filter/filter.h" @@ -43,32 +43,29 @@ #include "pipe.h" -#ifdef CONFIG_BGP -#include "proto/bgp/bgp.h" -#endif - static void pipe_rt_notify(struct proto *P, struct channel *src_ch, const net_addr *n, rte *new, const rte *old) { struct pipe_proto *p = (void *) P; struct channel *dst = (src_ch == p->pri) ? p->sec : p->pri; + uint *flags = (src_ch == p->pri) ? &p->sec_flags : &p->pri_flags; if (!new && !old) return; + /* Start the route refresh if requested to */ + if (*flags & PIPE_FL_RR_BEGIN_PENDING) + { + *flags &= ~PIPE_FL_RR_BEGIN_PENDING; + rt_refresh_begin(&dst->in_req); + } + if (new) { - rta *a = alloca(rta_size(new->attrs)); - memcpy(a, new->attrs, rta_size(new->attrs)); + rte e0 = rte_init_from(new); - a->cached = 0; - a->hostentry = NULL; - - rte e0 = { - .attrs = a, - .src = new->src, - .generation = new->generation + 1, - }; + e0.generation = new->generation + 1; + ea_unset_attr(&e0.attrs, 0, &ea_gen_hostentry); rte_update(dst, n, &e0, new->src); } @@ -77,12 +74,12 @@ pipe_rt_notify(struct proto *P, struct channel *src_ch, const net_addr *n, rte * } static int -pipe_preexport(struct channel *c, rte *e) +pipe_preexport(struct channel *C, rte *e) { - struct pipe_proto *p = (void *) c->proto; + struct pipe_proto *p = (void *) C->proto; /* Avoid direct loopbacks */ - if (e->sender == c->in_req.hook) + if (e->sender == C->in_req.hook) return -1; /* Indirection check */ @@ -90,8 +87,8 @@ pipe_preexport(struct channel *c, rte *e) if (e->generation >= max_generation) { log_rl(&p->rl_gen, L_ERR "Route overpiped (%u hops of %u configured in %s) in table %s: %N %s/%u:%u", - e->generation, max_generation, c->proto->name, - c->table->name, e->net, e->src->owner->name, e->src->private_id, e->src->global_id); + e->generation, max_generation, C->proto->name, + C->table->name, e->net, e->src->owner->name, e->src->private_id, e->src->global_id); return -1; } @@ -109,12 +106,12 @@ pipe_reload_routes(struct channel *C) } static void -pipe_feed_begin(struct channel *C, int refeeding UNUSED) +pipe_feed_begin(struct channel *C, int initial UNUSED) { struct pipe_proto *p = (void *) C->proto; - struct channel *dst = (C == p->pri) ? p->sec : p->pri; + uint *flags = (C == p->pri) ? &p->sec_flags : &p->pri_flags; - channel_refresh_begin(dst); + *flags |= PIPE_FL_RR_BEGIN_PENDING; } static void @@ -122,8 +119,17 @@ pipe_feed_end(struct channel *C) { struct pipe_proto *p = (void *) C->proto; struct channel *dst = (C == p->pri) ? p->sec : p->pri; + uint *flags = (C == p->pri) ? &p->sec_flags : &p->pri_flags; + + /* If not even started, start the RR now */ + if (*flags & PIPE_FL_RR_BEGIN_PENDING) + { + *flags &= ~PIPE_FL_RR_BEGIN_PENDING; + rt_refresh_begin(&dst->in_req); + } - channel_refresh_end(dst); + /* Finish RR always */ + rt_refresh_end(&dst->in_req); } static void @@ -144,10 +150,16 @@ pipe_postconfig(struct proto_config *CF) if (cc->table->addr_type != cf->peer->addr_type) cf_error("Primary table and peer table must have the same type"); + if (cc->out_subprefix && (cc->table->addr_type != cc->out_subprefix->type)) + cf_error("Export subprefix must match table type"); + + if (cf->in_subprefix && (cc->table->addr_type != cf->in_subprefix->type)) + cf_error("Import subprefix must match table type"); + if (cc->rx_limit.action) cf_error("Pipe protocol does not support receive limits"); - if (cc->in_keep_filtered) + if (cc->in_keep) cf_error("Pipe protocol prohibits keeping filtered routes"); cc->debug = cf->c.debug; @@ -163,6 +175,7 @@ pipe_configure_channels(struct pipe_proto *p, struct pipe_config *cf) .channel = cc->channel, .table = cc->table, .out_filter = cc->out_filter, + .out_subprefix = cc->out_subprefix, .in_limit = cc->in_limit, .ra_mode = RA_ANY, .debug = cc->debug, @@ -174,6 +187,7 @@ pipe_configure_channels(struct pipe_proto *p, struct pipe_config *cf) .channel = cc->channel, .table = cf->peer, .out_filter = cc->in_filter, + .out_subprefix = cf->in_subprefix, .in_limit = cc->out_limit, .ra_mode = RA_ANY, .debug = cc->debug, @@ -318,7 +332,6 @@ pipe_update_debug(struct proto *P) struct protocol proto_pipe = { .name = "Pipe", .template = "pipe%d", - .class = PROTOCOL_PIPE, .proto_size = sizeof(struct pipe_proto), .config_size = sizeof(struct pipe_config), .postconfig = pipe_postconfig, @@ -328,3 +341,9 @@ struct protocol proto_pipe = { .get_status = pipe_get_status, .show_proto_info = pipe_show_proto_info }; + +void +pipe_build(void) +{ + proto_build(&proto_pipe); +} diff --git a/proto/pipe/pipe.h b/proto/pipe/pipe.h index 60c857eb..501b8565 100644 --- a/proto/pipe/pipe.h +++ b/proto/pipe/pipe.h @@ -12,6 +12,7 @@ struct pipe_config { struct proto_config c; struct rtable_config *peer; /* Table we're connected to */ + const net_addr *in_subprefix; u8 max_generation; }; @@ -19,7 +20,11 @@ struct pipe_proto { struct proto p; struct channel *pri; struct channel *sec; + uint pri_flags; + uint sec_flags; struct tbf rl_gen; }; +#define PIPE_FL_RR_BEGIN_PENDING 1 /* Route refresh should start with the first route notified */ + #endif diff --git a/proto/radv/Makefile b/proto/radv/Makefile index 05317eff..5c56fbf3 100644 --- a/proto/radv/Makefile +++ b/proto/radv/Makefile @@ -3,4 +3,4 @@ obj := $(src-o-files) $(all-daemon) $(cf-local) -tests_objs := $(tests_objs) $(src-o-files)
\ No newline at end of file +tests_objs := $(tests_objs) $(src-o-files) diff --git a/proto/radv/config.Y b/proto/radv/config.Y index 8d4a3ab9..fb68d2e5 100644 --- a/proto/radv/config.Y +++ b/proto/radv/config.Y @@ -33,7 +33,7 @@ CF_KEYWORDS(RADV, PREFIX, INTERFACE, MIN, MAX, RA, DELAY, INTERVAL, SOLICITED, RETRANS, TIMER, CURRENT, HOP, LIMIT, DEFAULT, VALID, PREFERRED, MULT, LIFETIME, SKIP, ONLINK, AUTONOMOUS, RDNSS, DNSSL, NS, DOMAIN, LOCAL, TRIGGER, SENSITIVE, PREFERENCE, LOW, MEDIUM, HIGH, PROPAGATE, ROUTE, - ROUTES, RA_PREFERENCE, RA_LIFETIME) + ROUTES) CF_ENUM(T_ENUM_RA_PREFERENCE, RA_PREF_, LOW, MEDIUM, HIGH) @@ -336,9 +336,6 @@ radv_sensitive: | SENSITIVE bool { $$ = $2; } ; -dynamic_attr: RA_PREFERENCE { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_ENUM_RA_PREFERENCE, EA_RA_PREFERENCE); } ; -dynamic_attr: RA_LIFETIME { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_RA_LIFETIME); } ; - CF_CODE CF_END diff --git a/proto/radv/packets.c b/proto/radv/packets.c index 5cd8b2de..c6b565d2 100644 --- a/proto/radv/packets.c +++ b/proto/radv/packets.c @@ -493,7 +493,7 @@ radv_sk_open(struct radv_iface *ifa) sk->data = ifa; sk->flags = SKF_LADDR_RX; - if (sk_open(sk) < 0) + if (sk_open(sk, ifa->ra->p.loop) < 0) goto err; /* We want listen just to ICMPv6 messages of type RS and RA */ diff --git a/proto/radv/radv.c b/proto/radv/radv.c index 15673555..434155dc 100644 --- a/proto/radv/radv.c +++ b/proto/radv/radv.c @@ -10,6 +10,7 @@ #include <stdlib.h> #include "radv.h" +#include "lib/macro.h" /** * DOC: Router Advertisements @@ -42,6 +43,8 @@ * RFC 6106 - DNS extensions (RDDNS, DNSSL) */ +static struct ea_class ea_radv_preference, ea_radv_lifetime; + static void radv_prune_prefixes(struct radv_iface *ifa); static void radv_prune_routes(struct radv_proto *p); @@ -263,9 +266,9 @@ radv_iface_find(struct radv_proto *p, struct iface *what) } static void -radv_iface_add(struct object_lock *lock) +radv_iface_add(void *_ifa) { - struct radv_iface *ifa = lock->data; + struct radv_iface *ifa = _ifa; struct radv_proto *p = ifa->ra; if (! radv_sk_open(ifa)) @@ -284,7 +287,7 @@ radv_iface_new(struct radv_proto *p, struct iface *iface, struct radv_iface_conf RADV_TRACE(D_EVENTS, "Adding interface %s", iface->name); - pool *pool = rp_new(p->p.pool, p->p.loop, iface->name); + pool *pool = rp_new(p->p.pool, iface->name); ifa = mb_allocz(pool, sizeof(struct radv_iface)); ifa->pool = pool; ifa->ra = p; @@ -302,8 +305,11 @@ radv_iface_new(struct radv_proto *p, struct iface *iface, struct radv_iface_conf lock->type = OBJLOCK_IP; lock->port = ICMPV6_PROTO; lock->iface = iface; - lock->data = ifa; - lock->hook = radv_iface_add; + lock->event = (event) { + .hook = radv_iface_add, + .data = ifa, + }; + lock->target = &global_event_list; ifa->lock = lock; olock_acquire(lock); @@ -317,7 +323,7 @@ radv_iface_remove(struct radv_iface *ifa) rem_node(NODE ifa); - rp_free(ifa->pool, p->p.pool); + rfree(ifa->pool); } static void @@ -391,10 +397,10 @@ radv_net_match_trigger(struct radv_config *cf, const net_addr *n) } int -radv_preexport(struct channel *c, rte *new) +radv_preexport(struct channel *C, rte *new) { // struct radv_proto *p = (struct radv_proto *) P; - struct radv_config *cf = (struct radv_config *) (c->proto->cf); + struct radv_config *cf = (struct radv_config *) (C->proto->cf); if (radv_net_match_trigger(cf, new->net)) return RIC_PROCESS; @@ -444,11 +450,11 @@ radv_rt_notify(struct proto *P, struct channel *ch UNUSED, const net_addr *n, rt { /* Update */ - ea = ea_find(new->attrs->eattrs, EA_RA_PREFERENCE); + ea = ea_find(new->attrs, &ea_radv_preference); uint preference = ea ? ea->u.data : RA_PREF_MEDIUM; uint preference_set = !!ea; - ea = ea_find(new->attrs->eattrs, EA_RA_LIFETIME); + ea = ea_find(new->attrs, &ea_radv_lifetime); uint lifetime = ea ? ea->u.data : 0; uint lifetime_set = !!ea; @@ -555,10 +561,7 @@ radv_check_active(struct radv_proto *p) return 1; struct channel *c = p->p.main_channel; - RT_LOCK(c->table); - int active = rt_examine(RT_PRIV(c->table), &cf->trigger, c, c->out_filter); - RT_UNLOCK(c->table); - return active; + return rt_examine(c->table, &cf->trigger, c, c->out_filter); } static void @@ -580,8 +583,8 @@ radv_init(struct proto_config *CF) P->preexport = radv_preexport; P->rt_notify = radv_rt_notify; - P->if_notify = radv_if_notify; - P->ifa_notify = radv_ifa_notify; + P->iface_sub.if_notify = radv_if_notify; + P->iface_sub.ifa_notify = radv_ifa_notify; return P; } @@ -663,10 +666,11 @@ radv_reconfigure(struct proto *P, struct proto_config *CF) if (!old->propagate_routes && new->propagate_routes) channel_request_feeding(p->p.main_channel); - IFACE_LEGACY_ACCESS; - struct iface *iface; - WALK_LIST(iface, global_iface_list) + IFACE_WALK(iface) { + if (p->p.vrf && p->p.vrf != iface->master) + continue; + if (!(iface->flags & IF_UP)) continue; @@ -742,27 +746,26 @@ radv_pref_str(u32 pref) } } -/* The buffer has some minimal size */ -static int -radv_get_attr(const eattr *a, byte *buf, int buflen UNUSED) +static void +radv_preference_format(const eattr *a, byte *buf, uint buflen) { - switch (a->id) - { - case EA_RA_PREFERENCE: - bsprintf(buf, "preference: %s", radv_pref_str(a->u.data)); - return GA_FULL; - case EA_RA_LIFETIME: - bsprintf(buf, "lifetime"); - return GA_NAME; - default: - return GA_UNKNOWN; - } + bsnprintf(buf, buflen, "%s", radv_pref_str(a->u.data)); } +static struct ea_class ea_radv_preference = { + .name = "radv_preference", + .type = T_ENUM_RA_PREFERENCE, + .format = radv_preference_format, +}; + +static struct ea_class ea_radv_lifetime = { + .name = "radv_lifetime", + .type = T_INT, +}; + struct protocol proto_radv = { .name = "RAdv", .template = "radv%d", - .class = PROTOCOL_RADV, .channel_mask = NB_IP6, .proto_size = sizeof(struct radv_proto), .config_size = sizeof(struct radv_config), @@ -773,5 +776,15 @@ struct protocol proto_radv = { .reconfigure = radv_reconfigure, .copy_config = radv_copy_config, .get_status = radv_get_status, - .get_attr = radv_get_attr }; + +void +radv_build(void) +{ + proto_build(&proto_radv); + + EA_REGISTER_ALL( + &ea_radv_preference, + &ea_radv_lifetime + ); +} diff --git a/proto/radv/radv.h b/proto/radv/radv.h index 14d40f8a..c9219bda 100644 --- a/proto/radv/radv.h +++ b/proto/radv/radv.h @@ -19,7 +19,7 @@ #include "lib/resource.h" #include "nest/protocol.h" #include "nest/iface.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/cli.h" #include "nest/locks.h" #include "conf/conf.h" @@ -195,10 +195,6 @@ struct radv_iface #define RA_PREF_HIGH 0x08 #define RA_PREF_MASK 0x18 -/* Attributes */ -#define EA_RA_PREFERENCE EA_CODE(PROTOCOL_RADV, 0) -#define EA_RA_LIFETIME EA_CODE(PROTOCOL_RADV, 1) - #ifdef LOCAL_DEBUG #define RADV_FORCE_DEBUG 1 #else diff --git a/proto/rip/Makefile b/proto/rip/Makefile index 7feabcd8..f4a6fa72 100644 --- a/proto/rip/Makefile +++ b/proto/rip/Makefile @@ -3,4 +3,4 @@ obj := $(src-o-files) $(all-daemon) $(cf-local) -tests_objs := $(tests_objs) $(src-o-files)
\ No newline at end of file +tests_objs := $(tests_objs) $(src-o-files) diff --git a/proto/rip/config.Y b/proto/rip/config.Y index 28ee9609..3c0973b1 100644 --- a/proto/rip/config.Y +++ b/proto/rip/config.Y @@ -37,7 +37,7 @@ CF_KEYWORDS(RIP, NG, ECMP, LIMIT, WEIGHT, INFINITY, METRIC, UPDATE, TIMEOUT, PASSIVE, VERSION, SPLIT, HORIZON, POISON, REVERSE, CHECK, ZERO, TIME, BFD, AUTHENTICATION, NONE, PLAINTEXT, CRYPTOGRAPHIC, MD5, TTL, SECURITY, RX, TX, BUFFER, LENGTH, PRIORITY, ONLY, LINK, - DEMAND, CIRCUIT, RIP_METRIC, RIP_TAG) + DEMAND, CIRCUIT) %type <i> rip_variant rip_auth @@ -190,9 +190,6 @@ rip_iface: rip_iface_start iface_patt_list_nopx rip_iface_opt_list rip_iface_finish; -dynamic_attr: RIP_METRIC { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_RIP_METRIC); } ; -dynamic_attr: RIP_TAG { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_RIP_TAG); } ; - CF_CLI_HELP(SHOW RIP, ..., [[Show information about RIP protocol]]); CF_CLI(SHOW RIP INTERFACES, optproto opttext, [<name>] [\"<interface>\"], [[Show information about RIP interfaces]]) diff --git a/proto/rip/packets.c b/proto/rip/packets.c index 9c3bd7a3..fecdf896 100644 --- a/proto/rip/packets.c +++ b/proto/rip/packets.c @@ -1012,7 +1012,7 @@ rip_open_socket(struct rip_iface *ifa) /* sk->rbsize and sk->tbsize are handled in rip_iface_update_buffers() */ - if (sk_open(sk) < 0) + if (sk_open(sk, p->p.loop) < 0) goto err; if (ifa->cf->mode == RIP_IM_MULTICAST) diff --git a/proto/rip/rip.c b/proto/rip/rip.c index 0f8b10ad..d15177da 100644 --- a/proto/rip/rip.c +++ b/proto/rip/rip.c @@ -78,6 +78,7 @@ #include <stdlib.h> #include "rip.h" +#include "lib/macro.h" static inline void rip_lock_neighbor(struct rip_neighbor *n); @@ -88,6 +89,7 @@ static inline void rip_iface_kick_timer(struct rip_iface *ifa); static void rip_iface_timer(timer *timer); static void rip_trigger_update(struct rip_proto *p); +static struct ea_class ea_rip_metric, ea_rip_tag, ea_rip_from; /* * RIP routes @@ -108,14 +110,14 @@ rip_add_rte(struct rip_proto *p, struct rip_rte **rp, struct rip_rte *src) } static inline void -rip_remove_rte(struct rip_proto *p, struct rip_rte **rp) +rip_remove_rte(struct rip_proto *p UNUSED, struct rip_rte **rp) { struct rip_rte *rt = *rp; rip_unlock_neighbor(rt->from); *rp = rt->next; - sl_free(p->rte_slab, rt); + sl_free(rt); } static inline int rip_same_rte(struct rip_rte *a, struct rip_rte *b) @@ -124,6 +126,11 @@ static inline int rip_same_rte(struct rip_rte *a, struct rip_rte *b) static inline int rip_valid_rte(struct rip_rte *rt) { return rt->from->ifa != NULL; } +struct rip_iface_adata { + struct adata ad; + struct iface *iface; +}; + /** * rip_announce_rte - announce route from RIP routing table to the core * @p: RIP instance @@ -144,71 +151,87 @@ rip_announce_rte(struct rip_proto *p, struct rip_entry *en) if (rt) { /* Update */ - rta a0 = { - .pref = p->p.main_channel->preference, - .source = RTS_RIP, - .scope = SCOPE_UNIVERSE, - .dest = RTD_UNICAST, + struct { + ea_list l; + eattr a[3]; + } ea_block = { + .l.count = ARRAY_SIZE(ea_block.a), + .a = { + EA_LITERAL_EMBEDDED(&ea_gen_preference, 0, p->p.main_channel->preference), + EA_LITERAL_EMBEDDED(&ea_gen_source, 0, RTS_RIP), + EA_LITERAL_EMBEDDED(&ea_rip_metric, 0, rt->metric), + }, }; - u8 rt_metric = rt->metric; + ea_list *ea = &ea_block.l; + u16 rt_tag = rt->tag; + struct iface *rt_from = NULL; if (p->ecmp) { /* ECMP route */ - struct nexthop *nhs = NULL; int num = 0; for (rt = en->routes; rt && (num < p->ecmp); rt = rt->next) + if (rip_valid_rte(rt)) + num++; + + struct nexthop_adata *nhad = (struct nexthop_adata *) tmp_alloc_adata((num+1) * sizeof(struct nexthop)); + struct nexthop *nh = &nhad->nh; + + for (rt = en->routes; rt && (num < p->ecmp); rt = rt->next) { if (!rip_valid_rte(rt)) - continue; + continue; - struct nexthop *nh = allocz(sizeof(struct nexthop)); + *nh = (struct nexthop) { + .gw = rt->next_hop, + .iface = rt->from->ifa->iface, + .weight = rt->from->ifa->cf->ecmp_weight, + }; - nh->gw = rt->next_hop; - nh->iface = rt->from->ifa->iface; - nh->weight = rt->from->ifa->cf->ecmp_weight; + if (!rt_from) + rt_from = rt->from->ifa->iface; - nexthop_insert(&nhs, nh); - num++; + nh = NEXTHOP_NEXT(nh); if (rt->tag != rt_tag) rt_tag = 0; } - a0.nh = *nhs; + nhad->ad.length = ((void *) nh - (void *) nhad->ad.data); + + ea_set_attr(&ea, + EA_LITERAL_DIRECT_ADATA(&ea_gen_nexthop, 0, + &(nexthop_sort(nhad, tmp_linpool)->ad))); } else { /* Unipath route */ - a0.from = rt->from->nbr->addr; - a0.nh.gw = rt->next_hop; - a0.nh.iface = rt->from->ifa->iface; + rt_from = rt->from->ifa->iface; + + struct nexthop_adata nhad = { + .nh.gw = rt->next_hop, + .nh.iface = rt->from->ifa->iface, + }; + + ea_set_attr_data(&ea, &ea_gen_nexthop, 0, + &nhad.ad.data, sizeof nhad - sizeof nhad.ad); + ea_set_attr_data(&ea, &ea_gen_from, 0, &rt->from->nbr->addr, sizeof(ip_addr)); } - a0.eattrs = alloca(sizeof(ea_list) + 3*sizeof(eattr)); - memset(a0.eattrs, 0, sizeof(ea_list)); /* Zero-ing only the ea_list header */ - a0.eattrs->count = 3; - a0.eattrs->attrs[0] = (eattr) { - .id = EA_RIP_METRIC, - .type = EAF_TYPE_INT, - .u.data = rt_metric, - }; - a0.eattrs->attrs[1] = (eattr) { - .id = EA_RIP_TAG, - .type = EAF_TYPE_INT, - .u.data = rt_tag, - }; - a0.eattrs->attrs[2] = (eattr) { - .id = EA_RIP_FROM, - .type = EAF_TYPE_PTR, - .u.data = (uintptr_t) a0.nh.iface, + ea_set_attr_u32(&ea, &ea_rip_tag, 0, rt_tag); + + struct rip_iface_adata riad = { + .ad = { .length = sizeof(struct rip_iface_adata) - sizeof(struct adata) }, + .iface = rt_from, }; + ea_set_attr(&ea, + EA_LITERAL_DIRECT_ADATA(&ea_rip_from, 0, &riad.ad)); rte e0 = { - .attrs = &a0, + .attrs = ea, .src = p->p.main_source, }; @@ -320,9 +343,10 @@ rip_rt_notify(struct proto *P, struct channel *ch UNUSED, const net_addr *net, s if (new) { /* Update */ - u32 rt_tag = ea_get_int(new->attrs->eattrs, EA_RIP_TAG, 0); - u32 rt_metric = ea_get_int(new->attrs->eattrs, EA_RIP_METRIC, 1); - struct iface *rt_from = (struct iface *) ea_get_int(new->attrs->eattrs, EA_RIP_FROM, 0); + u32 rt_tag = ea_get_int(new->attrs, &ea_rip_tag, 0); + u32 rt_metric = ea_get_int(new->attrs, &ea_rip_metric, 1); + const eattr *rie = ea_find(new->attrs, &ea_rip_from); + struct iface *rt_from = rie ? ((struct rip_iface_adata *) rie->u.ptr)->iface : NULL; if (rt_metric > p->infinity) { @@ -354,8 +378,14 @@ rip_rt_notify(struct proto *P, struct channel *ch UNUSED, const net_addr *net, s en->metric = rt_metric; en->tag = rt_tag; en->from = (new->src->owner == &P->sources) ? rt_from : NULL; - en->iface = new->attrs->nh.iface; - en->next_hop = new->attrs->nh.gw; + + eattr *nhea = ea_find(new->attrs, &ea_gen_nexthop); + if (nhea) + { + struct nexthop_adata *nhad = (struct nexthop_adata *) nhea->u.ptr; + en->iface = nhad->nh.iface; + en->next_hop = nhad->nh.gw; + } } else { @@ -513,7 +543,7 @@ rip_update_bfd(struct rip_proto *p, struct rip_neighbor *n) ip_addr saddr = rip_is_v2(p) ? n->ifa->sk->saddr : n->nbr->ifa->ip; n->bfd_req = bfd_request_session(p->p.pool, n->nbr->addr, saddr, n->nbr->iface, p->p.vrf, - rip_bfd_notify, n, birdloop_event_list(p->p.loop), NULL); + rip_bfd_notify, n, p->p.loop, NULL); } if (!use_bfd && n->bfd_req) @@ -637,9 +667,9 @@ rip_iface_update_bfd(struct rip_iface *ifa) static void -rip_iface_locked(struct object_lock *lock) +rip_iface_locked(void *_ifa) { - struct rip_iface *ifa = lock->data; + struct rip_iface *ifa = _ifa; struct rip_proto *p = ifa->rip; if (!rip_open_socket(ifa)) @@ -701,8 +731,11 @@ rip_add_iface(struct rip_proto *p, struct iface *iface, struct rip_iface_config lock->type = OBJLOCK_UDP; lock->port = ic->port; lock->iface = iface; - lock->data = ifa; - lock->hook = rip_iface_locked; + lock->event = (event) { + .hook = rip_iface_locked, + .data = ifa, + }; + lock->target = &global_event_list; ifa->lock = lock; olock_acquire(lock); @@ -774,11 +807,11 @@ rip_reconfigure_iface(struct rip_proto *p, struct rip_iface *ifa, struct rip_ifa static void rip_reconfigure_ifaces(struct rip_proto *p, struct rip_config *cf) { - struct iface *iface; - - IFACE_LEGACY_ACCESS; - WALK_LIST(iface, global_iface_list) + IFACE_WALK(iface) { + if (p->p.vrf && p->p.vrf != iface->master) + continue; + if (!(iface->flags & IF_UP)) continue; @@ -1086,28 +1119,22 @@ rip_reload_routes(struct channel *C) static struct rte_owner_class rip_rte_owner_class; static inline struct rip_proto * -rip_rte_proto(struct rte *rte) +rip_rte_proto(const rte *rte) { return (rte->src->owner->class == &rip_rte_owner_class) ? SKIP_BACK(struct rip_proto, p.sources, rte->src->owner) : NULL; } -static int -rip_rte_better(struct rte *new, struct rte *old) +static u32 +rip_rte_igp_metric(const rte *rt) { - ASSERT_DIE(new->src == old->src); - struct rip_proto *p = rip_rte_proto(new); - - u32 new_metric = ea_get_int(new->attrs->eattrs, EA_RIP_METRIC, p->infinity); - u32 old_metric = ea_get_int(old->attrs->eattrs, EA_RIP_METRIC, p->infinity); - - return new_metric < old_metric; + return ea_get_int(rt->attrs, &ea_rip_metric, IGP_METRIC_UNKNOWN); } -static u32 -rip_rte_igp_metric(struct rte *rt) +static int +rip_rte_better(const rte *new, const rte *old) { - return ea_get_int(rt->attrs->eattrs, EA_RIP_METRIC, IGP_METRIC_UNKNOWN); + return rip_rte_igp_metric(new) < rip_rte_igp_metric(old); } static void @@ -1127,9 +1154,9 @@ rip_init(struct proto_config *CF) P->main_channel = proto_add_channel(P, proto_cf_main_channel(CF)); - P->if_notify = rip_if_notify; + P->iface_sub.if_notify = rip_if_notify; P->rt_notify = rip_rt_notify; - P->neigh_notify = rip_neigh_notify; + P->iface_sub.neigh_notify = rip_neigh_notify; P->reload_routes = rip_reload_routes; P->sources.class = &rip_rte_owner_class; @@ -1204,35 +1231,41 @@ rip_reconfigure(struct proto *P, struct proto_config *CF) } static void -rip_get_route_info(rte *rte, byte *buf) +rip_get_route_info(const rte *rte, byte *buf) { struct rip_proto *p = rip_rte_proto(rte); - u32 rt_metric = ea_get_int(rte->attrs->eattrs, EA_RIP_METRIC, p->infinity); - u32 rt_tag = ea_get_int(rte->attrs->eattrs, EA_RIP_TAG, 0); + u32 rt_metric = ea_get_int(rte->attrs, &ea_rip_metric, p->infinity); + u32 rt_tag = ea_get_int(rte->attrs, &ea_rip_tag, 0); - buf += bsprintf(buf, " (%d/%d)", rte->attrs->pref, rt_metric); + buf += bsprintf(buf, " (%d/%d)", rt_get_preference(rte), rt_metric); if (rt_tag) bsprintf(buf, " [%04x]", rt_tag); } -static int -rip_get_attr(const eattr *a, byte *buf, int buflen UNUSED) +static void +rip_tag_format(const eattr *a, byte *buf, uint buflen) { - switch (a->id) - { - case EA_RIP_METRIC: - bsprintf(buf, "metric: %d", a->u.data); - return GA_FULL; + bsnprintf(buf, buflen, "%04x", a->u.data); +} + +static struct ea_class ea_rip_metric = { + .name = "rip_metric", + .type = T_INT, +}; - case EA_RIP_TAG: - bsprintf(buf, "tag: %04x", a->u.data); - return GA_FULL; +static struct ea_class ea_rip_tag = { + .name = "rip_tag", + .type = T_INT, + .format = rip_tag_format, +}; - default: - return GA_UNKNOWN; - } -} +static struct ea_class ea_rip_from = { + .name = "rip_from", + .type = T_IFACE, + .readonly = 1, + .hidden = 1, +}; void rip_show_interfaces(struct proto *P, const char *iff) @@ -1342,7 +1375,6 @@ static struct rte_owner_class rip_rte_owner_class = { struct protocol proto_rip = { .name = "RIP", .template = "rip%d", - .class = PROTOCOL_RIP, .preference = DEF_PREF_RIP, .channel_mask = NB_IP, .proto_size = sizeof(struct rip_proto), @@ -1353,5 +1385,16 @@ struct protocol proto_rip = { .start = rip_start, .shutdown = rip_shutdown, .reconfigure = rip_reconfigure, - .get_attr = rip_get_attr }; + +void +rip_build(void) +{ + proto_build(&proto_rip); + + EA_REGISTER_ALL( + &ea_rip_metric, + &ea_rip_tag, + &ea_rip_from + ); +} diff --git a/proto/rip/rip.h b/proto/rip/rip.h index f8713c4a..a01f8d3b 100644 --- a/proto/rip/rip.h +++ b/proto/rip/rip.h @@ -16,7 +16,7 @@ #include "nest/cli.h" #include "nest/iface.h" #include "nest/protocol.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/password.h" #include "nest/locks.h" #include "nest/bfd.h" @@ -195,10 +195,6 @@ struct rip_rte #define RIP_ENTRY_VALID 1 /* Valid outgoing route */ #define RIP_ENTRY_STALE 2 /* Stale outgoing route, waiting for GC */ -#define EA_RIP_METRIC EA_CODE(PROTOCOL_RIP, 0) -#define EA_RIP_TAG EA_CODE(PROTOCOL_RIP, 1) -#define EA_RIP_FROM EA_CODE(PROTOCOL_RIP, 2) - static inline int rip_is_v2(struct rip_proto *p) { return p->rip2; } diff --git a/proto/rpki/Makefile b/proto/rpki/Makefile index eb09b7df..0f60b2a0 100644 --- a/proto/rpki/Makefile +++ b/proto/rpki/Makefile @@ -3,4 +3,4 @@ obj := $(src-o-files) $(all-daemon) $(cf-local) -tests_objs := $(tests_objs) $(src-o-files)
\ No newline at end of file +tests_objs := $(tests_objs) $(src-o-files) diff --git a/proto/rpki/rpki.c b/proto/rpki/rpki.c index afba2216..e5638aff 100644 --- a/proto/rpki/rpki.c +++ b/proto/rpki/rpki.c @@ -121,14 +121,11 @@ rpki_table_add_roa(struct rpki_cache *cache, struct channel *channel, const net_ { struct rpki_proto *p = cache->p; - rta a0 = { - .pref = channel->preference, - .source = RTS_RPKI, - .scope = SCOPE_UNIVERSE, - .dest = RTD_NONE, - }; + ea_list *ea = NULL; + ea_set_attr_u32(&ea, &ea_gen_preference, 0, channel->preference); + ea_set_attr_u32(&ea, &ea_gen_source, 0, RTS_RPKI); - rte e0 = { .attrs = &a0, .src = p->p.main_source, }; + rte e0 = { .attrs = ea, .src = p->p.main_source, }; rte_update(channel, &pfxr->n, &e0, p->p.main_source); } @@ -302,12 +299,13 @@ rpki_cache_change_state(struct rpki_cache *cache, const enum rpki_cache_state ne case RPKI_CS_NO_INCR_UPDATE_AVAIL: /* Server was unable to answer the last Serial Query and sent Cache Reset. */ - rpki_cache_change_state(cache, RPKI_CS_RESET); - break; - case RPKI_CS_ERROR_NO_DATA_AVAIL: /* No validation records are available on the cache server. */ - rpki_cache_change_state(cache, RPKI_CS_RESET); + + if (old_state == RPKI_CS_ESTABLISHED) + rpki_cache_change_state(cache, RPKI_CS_RESET); + else + rpki_schedule_next_retry(cache); break; case RPKI_CS_ERROR_FATAL: @@ -491,6 +489,11 @@ rpki_retry_hook(timer *tm) } break; + case RPKI_CS_NO_INCR_UPDATE_AVAIL: + case RPKI_CS_ERROR_NO_DATA_AVAIL: + rpki_cache_change_state(cache, RPKI_CS_RESET); + break; + default: rpki_cache_change_state(cache, RPKI_CS_CONNECTING); break; @@ -596,7 +599,7 @@ rpki_check_expire_interval(uint seconds) static struct rpki_cache * rpki_init_cache(struct rpki_proto *p, struct rpki_config *cf) { - pool *pool = rp_new(p->p.pool, p->p.loop, cf->hostname); + pool *pool = rp_new(p->p.pool, cf->hostname); struct rpki_cache *cache = mb_allocz(pool, sizeof(struct rpki_cache)); @@ -870,16 +873,27 @@ rpki_show_proto_info(struct proto *P) if (cache) { const char *transport_name = "---"; + uint default_port = 0; switch (cf->tr_config.type) { #if HAVE_LIBSSH - case RPKI_TR_SSH: transport_name = "SSHv2"; break; + case RPKI_TR_SSH: + transport_name = "SSHv2"; + default_port = RPKI_SSH_PORT; + break; #endif - case RPKI_TR_TCP: transport_name = "Unprotected over TCP"; break; + case RPKI_TR_TCP: + transport_name = "Unprotected over TCP"; + default_port = RPKI_TCP_PORT; + break; }; cli_msg(-1006, " Cache server: %s", cf->hostname); + + if (cf->port != default_port) + cli_msg(-1006, " Cache port: %u", cf->port); + cli_msg(-1006, " Status: %s", rpki_cache_state_to_str(cache->state)); cli_msg(-1006, " Transport: %s", transport_name); cli_msg(-1006, " Protocol version: %u", cache->version); @@ -977,7 +991,6 @@ rpki_copy_config(struct proto_config *dest UNUSED, struct proto_config *src UNUS struct protocol proto_rpki = { .name = "RPKI", .template = "rpki%d", - .class = PROTOCOL_RPKI, .preference = DEF_PREF_RPKI, .proto_size = sizeof(struct rpki_proto), .config_size = sizeof(struct rpki_config), @@ -991,3 +1004,9 @@ struct protocol proto_rpki = { .reconfigure = rpki_reconfigure, .get_status = rpki_get_status, }; + +void +rpki_build(void) +{ + proto_build(&proto_rpki); +} diff --git a/proto/rpki/rpki.h b/proto/rpki/rpki.h index a70a2027..20253844 100644 --- a/proto/rpki/rpki.h +++ b/proto/rpki/rpki.h @@ -13,7 +13,7 @@ #define _BIRD_RPKI_H_ #include "nest/bird.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/protocol.h" #include "lib/socket.h" #include "lib/ip.h" diff --git a/proto/rpki/ssh_transport.c b/proto/rpki/ssh_transport.c index 223afa80..425ad460 100644 --- a/proto/rpki/ssh_transport.c +++ b/proto/rpki/ssh_transport.c @@ -35,11 +35,9 @@ rpki_tr_ssh_open(struct rpki_tr_sock *tr) sk->ssh->subsystem = "rpki-rtr"; sk->ssh->state = SK_SSH_CONNECT; - if (sk_open(sk) != 0) + if (sk_open(sk, cache->p->p.loop) != 0) return RPKI_TR_ERROR; - sk_start(sk); - return RPKI_TR_SUCCESS; } diff --git a/proto/rpki/tcp_transport.c b/proto/rpki/tcp_transport.c index 4e850c44..ebb8030f 100644 --- a/proto/rpki/tcp_transport.c +++ b/proto/rpki/tcp_transport.c @@ -28,11 +28,9 @@ rpki_tr_tcp_open(struct rpki_tr_sock *tr) sk->type = SK_TCP_ACTIVE; - if (sk_open(sk) != 0) + if (sk_open(sk, tr->cache->p->p.loop) != 0) return RPKI_TR_ERROR; - sk_start(sk); - return RPKI_TR_SUCCESS; } diff --git a/proto/rpki/transport.c b/proto/rpki/transport.c index 26609764..81bd6dd8 100644 --- a/proto/rpki/transport.c +++ b/proto/rpki/transport.c @@ -85,8 +85,7 @@ rpki_tr_open(struct rpki_tr_sock *tr) sk->rbsize = RPKI_RX_BUFFER_SIZE; sk->tbsize = RPKI_TX_BUFFER_SIZE; sk->tos = IP_PREC_INTERNET_CONTROL; - sk->flags |= SKF_THREAD; - sk->loop = cache->p->p.loop; + sk->vrf = cache->p->p.vrf; if (ipa_zero(sk->daddr) && sk->host) { @@ -121,7 +120,6 @@ rpki_tr_close(struct rpki_tr_sock *tr) if (tr->sk) { - sk_stop(tr->sk); rfree(tr->sk); tr->sk = NULL; } diff --git a/proto/static/Makefile b/proto/static/Makefile index e38f9b74..de6e819b 100644 --- a/proto/static/Makefile +++ b/proto/static/Makefile @@ -3,4 +3,4 @@ obj := $(src-o-files) $(all-daemon) $(cf-local) -tests_objs := $(tests_objs) $(src-o-files)
\ No newline at end of file +tests_objs := $(tests_objs) $(src-o-files) diff --git a/proto/static/config.Y b/proto/static/config.Y index 41e10dbf..9d26ee82 100644 --- a/proto/static/config.Y +++ b/proto/static/config.Y @@ -40,7 +40,7 @@ static_route_finish(void) if (net_type_match(this_srt->net, NB_DEST) == !this_srt->dest) cf_error("Unexpected or missing nexthop/type"); - this_srt->cmds = f_linearize(this_srt_cmds); + this_srt->cmds = f_linearize(this_srt_cmds, 0); } CF_DECLS diff --git a/proto/static/static.c b/proto/static/static.c index d89ca8b0..6bae827b 100644 --- a/proto/static/static.c +++ b/proto/static/static.c @@ -38,7 +38,7 @@ #include "nest/bird.h" #include "nest/iface.h" #include "nest/protocol.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/cli.h" #include "conf/conf.h" #include "filter/filter.h" @@ -47,8 +47,6 @@ #include "static.h" -static linpool *static_lp; - static inline struct rte_src * static_get_source(struct static_proto *p, uint i) { return i ? rt_get_source(&p->p, i) : p->p.main_source; } @@ -59,68 +57,77 @@ static void static_announce_rte(struct static_proto *p, struct static_route *r) { struct rte_src *src; - rta *a = allocz(RTA_MAX_SIZE); - a->source = RTS_STATIC; - a->scope = SCOPE_UNIVERSE; - a->dest = r->dest; - a->pref = p->p.main_channel->preference; + ea_list *ea = NULL; + ea_set_attr_u32(&ea, &ea_gen_preference, 0, p->p.main_channel->preference); + ea_set_attr_u32(&ea, &ea_gen_source, 0, RTS_STATIC); if (r->dest == RTD_UNICAST) { - struct static_route *r2; - struct nexthop *nhs = NULL; + uint sz = 0; + for (struct static_route *r2 = r; r2; r2 = r2->mp_next) + if (r2->active) + sz += NEXTHOP_SIZE_CNT(r2->mls ? r2->mls->length / sizeof(u32) : 0); - for (r2 = r; r2; r2 = r2->mp_next) + if (!sz) + goto withdraw; + + struct nexthop_adata *nhad = allocz(sz + sizeof *nhad); + struct nexthop *nh = &nhad->nh; + + for (struct static_route *r2 = r; r2; r2 = r2->mp_next) { if (!r2->active) continue; - struct nexthop *nh = allocz(NEXTHOP_MAX_SIZE); - nh->gw = r2->via; - nh->iface = r2->neigh->iface; - nh->flags = r2->onlink ? RNF_ONLINK : 0; - nh->weight = r2->weight; + *nh = (struct nexthop) { + .gw = r2->via, + .iface = r2->neigh->iface, + .flags = r2->onlink ? RNF_ONLINK : 0, + .weight = r2->weight, + }; + if (r2->mls) { - nh->labels = r2->mls->len; - memcpy(nh->label, r2->mls->stack, r2->mls->len * sizeof(u32)); + nh->labels = r2->mls->length / sizeof(u32); + memcpy(nh->label, r2->mls->data, r2->mls->length); } - nexthop_insert(&nhs, nh); + nh = NEXTHOP_NEXT(nh); } - if (!nhs) - goto withdraw; - - nexthop_link(a, nhs); + ea_set_attr_data(&ea, &ea_gen_nexthop, 0, + nhad->ad.data, (void *) nh - (void *) nhad->ad.data); } - if (r->dest == RTDX_RECURSIVE) + else if (r->dest == RTDX_RECURSIVE) { rtable *tab = ipa_is_ip4(r->via) ? p->igp_table_ip4 : p->igp_table_ip6; - rta_set_recursive_next_hop(p->p.main_channel->table, a, tab, r->via, IPA_NONE, r->mls, static_lp); + u32 *labels = r->mls ? (void *) r->mls->data : NULL; + u32 lnum = r->mls ? r->mls->length / sizeof(u32) : 0; + + ea_set_hostentry(&ea, p->p.main_channel->table, tab, + r->via, IPA_NONE, lnum, labels); } + else if (r->dest) + ea_set_dest(&ea, 0, r->dest); + /* Already announced */ if (r->state == SRS_CLEAN) return; /* We skip rta_lookup() here */ src = static_get_source(p, r->index); - rte e0 = { .attrs = a, .src = src, .net = r->net, }, *e = &e0; + rte e0 = { .attrs = ea, .src = src, .net = r->net, }, *e = &e0; /* Evaluate the filter */ if (r->cmds) - f_eval_rte(r->cmds, e, static_lp); + f_eval_rte(r->cmds, e); rte_update(p->p.main_channel, r->net, e, src); static_free_source(src, r->index); r->state = SRS_CLEAN; - - if (r->cmds) - lp_flush(static_lp); - return; withdraw: @@ -206,7 +213,7 @@ static_update_bfd(struct static_proto *p, struct static_route *r) // ip_addr local = ipa_nonzero(r->local) ? r->local : nb->ifa->ip; r->bfd_req = bfd_request_session(p->p.pool, r->via, nb->ifa->ip, nb->iface, p->p.vrf, - static_bfd_notify, r, birdloop_event_list(p->p.loop), NULL); + static_bfd_notify, r, p->p.loop, NULL); } if (!bfd_up && r->bfd_req) @@ -322,31 +329,17 @@ static_same_dest(struct static_route *x, struct static_route *y) (x->weight != y->weight) || (x->use_bfd != y->use_bfd) || (!x->mls != !y->mls) || - ((x->mls) && (y->mls) && (x->mls->len != y->mls->len))) + ((x->mls) && (y->mls) && adata_same(x->mls, y->mls))) return 0; - - if (!x->mls) - continue; - - for (uint i = 0; i < x->mls->len; i++) - if (x->mls->stack[i] != y->mls->stack[i]) - return 0; } return !x && !y; case RTDX_RECURSIVE: if (!ipa_equal(x->via, y->via) || (!x->mls != !y->mls) || - ((x->mls) && (y->mls) && (x->mls->len != y->mls->len))) + ((x->mls) && (y->mls) && adata_same(x->mls, y->mls))) return 0; - if (!x->mls) - return 1; - - for (uint i = 0; i < x->mls->len; i++) - if (x->mls->stack[i] != y->mls->stack[i]) - return 0; - return 1; default: @@ -413,18 +406,18 @@ static_reload_routes(struct channel *C) } static int -static_rte_better(rte *new, rte *old) +static_rte_better(const rte *new, const rte *old) { - u32 n = ea_get_int(new->attrs->eattrs, EA_GEN_IGP_METRIC, IGP_METRIC_UNKNOWN); - u32 o = ea_get_int(old->attrs->eattrs, EA_GEN_IGP_METRIC, IGP_METRIC_UNKNOWN); + u32 n = ea_get_int(new->attrs, &ea_gen_igp_metric, IGP_METRIC_UNKNOWN); + u32 o = ea_get_int(old->attrs, &ea_gen_igp_metric, IGP_METRIC_UNKNOWN); return n < o; } static int -static_rte_mergable(rte *pri, rte *sec) +static_rte_mergable(const rte *pri, const rte *sec) { - u32 a = ea_get_int(pri->attrs->eattrs, EA_GEN_IGP_METRIC, IGP_METRIC_UNKNOWN); - u32 b = ea_get_int(sec->attrs->eattrs, EA_GEN_IGP_METRIC, IGP_METRIC_UNKNOWN); + u32 a = ea_get_int(pri->attrs, &ea_gen_igp_metric, IGP_METRIC_UNKNOWN); + u32 b = ea_get_int(sec->attrs, &ea_gen_igp_metric, IGP_METRIC_UNKNOWN); return a == b; } @@ -443,11 +436,11 @@ static_postconfig(struct proto_config *CF) if (!cf->igp_table_ip4) cf->igp_table_ip4 = (cc->table->addr_type == NET_IP4) ? - cc->table : cf->c.global->def_tables[NET_IP4]; + cc->table : rt_get_default_table(cf->c.global, NET_IP4); if (!cf->igp_table_ip6) cf->igp_table_ip6 = (cc->table->addr_type == NET_IP6) ? - cc->table : cf->c.global->def_tables[NET_IP6]; + cc->table : rt_get_default_table(cf->c.global, NET_IP6); WALK_LIST(r, cf->routes) if (r->net && (r->net->type != CF->net_type)) @@ -467,7 +460,7 @@ static_init(struct proto_config *CF) P->main_channel = proto_add_channel(P, proto_cf_main_channel(CF)); - P->neigh_notify = static_neigh_notify; + P->iface_sub.neigh_notify = static_neigh_notify; P->reload_routes = static_reload_routes; P->sources.class = &static_rte_owner_class; @@ -487,16 +480,11 @@ static_start(struct proto *P) struct static_config *cf = (void *) P->cf; struct static_route *r; - if (!static_lp) - static_lp = lp_new(&root_pool, LP_GOOD_SIZE(1024)); - if (p->igp_table_ip4) - RT_LOCKED(p->igp_table_ip4, t) - rt_lock_table(t); + rt_lock_table(p->igp_table_ip4); if (p->igp_table_ip6) - RT_LOCKED(p->igp_table_ip6, t) - rt_lock_table(t); + rt_lock_table(p->igp_table_ip6); p->event = ev_new_init(p->p.pool, static_announce_marked, p); @@ -506,7 +494,12 @@ static_start(struct proto *P) proto_notify_state(P, PS_UP); WALK_LIST(r, cf->routes) + { + struct lp_state lps; + lp_save(tmp_linpool, &lps); static_add_rte(p, r); + lp_restore(tmp_linpool, &lps); + } return PS_UP; } @@ -523,12 +516,10 @@ static_shutdown(struct proto *P) static_reset_rte(p, r); if (p->igp_table_ip4) - RT_LOCKED(p->igp_table_ip4, t) - rt_unlock_table(t); + rt_unlock_table(p->igp_table_ip4); if (p->igp_table_ip6) - RT_LOCKED(p->igp_table_ip6, t) - rt_unlock_table(t); + rt_unlock_table(p->igp_table_ip6); return PS_DOWN; } @@ -718,13 +709,14 @@ static_copy_config(struct proto_config *dest, struct proto_config *src) } static void -static_get_route_info(rte *rte, byte *buf) +static_get_route_info(const rte *rte, byte *buf) { - eattr *a = ea_find(rte->attrs->eattrs, EA_GEN_IGP_METRIC); - if (a) - buf += bsprintf(buf, " (%d/%u)", rte->attrs->pref, a->u.data); + eattr *a = ea_find(rte->attrs, &ea_gen_igp_metric); + u32 pref = rt_get_preference(rte); + if (a && (a->u.data < IGP_METRIC_UNKNOWN)) + buf += bsprintf(buf, " (%d/%u)", pref, a->u.data); else - buf += bsprintf(buf, " (%d)", rte->attrs->pref); + buf += bsprintf(buf, " (%d)", pref); } static void @@ -783,7 +775,6 @@ static struct rte_owner_class static_rte_owner_class = { struct protocol proto_static = { .name = "Static", .template = "static%d", - .class = PROTOCOL_STATIC, .preference = DEF_PREF_STATIC, .channel_mask = NB_ANY, .proto_size = sizeof(struct static_proto), @@ -796,3 +787,9 @@ struct protocol proto_static = { .reconfigure = static_reconfigure, .copy_config = static_copy_config, }; + +void +static_build(void) +{ + proto_build(&proto_static); +} diff --git a/proto/static/static.h b/proto/static/static.h index fc91f71c..ea7ca33b 100644 --- a/proto/static/static.h +++ b/proto/static/static.h @@ -9,7 +9,7 @@ #ifndef _BIRD_STATIC_H_ #define _BIRD_STATIC_H_ -#include "nest/route.h" +#include "nest/rt.h" #include "nest/bfd.h" #include "lib/buffer.h" @@ -49,7 +49,7 @@ struct static_route { byte weight; /* Multipath next hop weight */ byte use_bfd; /* Configured to use BFD */ struct bfd_request *bfd_req; /* BFD request, if BFD is used */ - mpls_label_stack *mls; /* MPLS label stack; may be NULL */ + struct adata *mls; /* MPLS label stack; may be NULL */ }; /* |