summaryrefslogtreecommitdiff
path: root/proto
diff options
context:
space:
mode:
Diffstat (limited to 'proto')
-rw-r--r--proto/babel/babel.c469
-rw-r--r--proto/babel/babel.h21
-rw-r--r--proto/babel/config.Y9
-rw-r--r--proto/babel/packets.c129
-rw-r--r--proto/bfd/bfd.c484
-rw-r--r--proto/bfd/bfd.h30
-rw-r--r--proto/bfd/config.Y5
-rw-r--r--proto/bfd/packets.c66
-rw-r--r--proto/bgp/Makefile2
-rw-r--r--proto/bgp/attrs.c1217
-rw-r--r--proto/bgp/bgp.c682
-rw-r--r--proto/bgp/bgp.h233
-rw-r--r--proto/bgp/config.Y87
-rw-r--r--proto/bgp/packets.c449
-rw-r--r--proto/mrt/Makefile2
-rw-r--r--proto/mrt/mrt.c99
-rw-r--r--proto/mrt/mrt.h3
-rw-r--r--proto/ospf/Makefile2
-rw-r--r--proto/ospf/config.Y7
-rw-r--r--proto/ospf/iface.c35
-rw-r--r--proto/ospf/neighbor.c6
-rw-r--r--proto/ospf/ospf.c123
-rw-r--r--proto/ospf/ospf.h9
-rw-r--r--proto/ospf/rt.c223
-rw-r--r--proto/ospf/rt.h6
-rw-r--r--proto/ospf/topology.c17
-rw-r--r--proto/ospf/topology.h4
-rw-r--r--proto/perf/perf.c35
-rw-r--r--proto/pipe/Makefile2
-rw-r--r--proto/pipe/config.Y6
-rw-r--r--proto/pipe/pipe.c71
-rw-r--r--proto/pipe/pipe.h5
-rw-r--r--proto/radv/Makefile2
-rw-r--r--proto/radv/config.Y5
-rw-r--r--proto/radv/packets.c2
-rw-r--r--proto/radv/radv.c83
-rw-r--r--proto/radv/radv.h6
-rw-r--r--proto/rip/Makefile2
-rw-r--r--proto/rip/config.Y5
-rw-r--r--proto/rip/packets.c2
-rw-r--r--proto/rip/rip.c217
-rw-r--r--proto/rip/rip.h6
-rw-r--r--proto/rpki/Makefile2
-rw-r--r--proto/rpki/rpki.c49
-rw-r--r--proto/rpki/rpki.h2
-rw-r--r--proto/rpki/ssh_transport.c4
-rw-r--r--proto/rpki/tcp_transport.c4
-rw-r--r--proto/rpki/transport.c4
-rw-r--r--proto/static/Makefile2
-rw-r--r--proto/static/config.Y2
-rw-r--r--proto/static/static.c145
-rw-r--r--proto/static/static.h4
52 files changed, 3123 insertions, 1963 deletions
diff --git a/proto/babel/babel.c b/proto/babel/babel.c
index 6d2a593e..d398da8e 100644
--- a/proto/babel/babel.c
+++ b/proto/babel/babel.c
@@ -14,9 +14,8 @@
/**
* DOC: The Babel protocol
*
- * Babel (RFC6126) is a loop-avoiding distance-vector routing protocol that is
- * robust and efficient both in ordinary wired networks and in wireless mesh
- * networks.
+ * The Babel is a loop-avoiding distance-vector routing protocol that is robust
+ * and efficient both in ordinary wired networks and in wireless mesh networks.
*
* The Babel protocol keeps state for each neighbour in a &babel_neighbor
* struct, tracking received Hello and I Heard You (IHU) messages. A
@@ -33,10 +32,17 @@
* an entry is updated by receiving updates from the network or when modified by
* internal timers. The function selects from feasible and reachable routes the
* one with the lowest metric to be announced to the core.
+ *
+ * Supported standards:
+ * RFC 8966 - The Babel Routing Protocol
+ * RFC 8967 - MAC Authentication for Babel
+ * RFC 9079 - Source Specific Routing for Babel
+ * RFC 9229 - IPv4 Routes with IPv6 Next Hop for Babel
*/
#include <stdlib.h>
#include "babel.h"
+#include "lib/macro.h"
#define LOG_PKT_AUTH(msg, args...) \
log_rl(&p->log_pkt_tbf, L_AUTH "%s: " msg, p->p.name, args)
@@ -49,21 +55,27 @@
static inline int ge_mod64k(uint a, uint b)
{ return (u16)(a - b) < 0x8000; }
+/* Strict inequality version of the above */
+static inline int gt_mod64k(uint a, uint b)
+{ return ge_mod64k(a, b) && a != b; }
+
static void babel_expire_requests(struct babel_proto *p, struct babel_entry *e);
static void babel_select_route(struct babel_proto *p, struct babel_entry *e, struct babel_route *mod);
static inline void babel_announce_retraction(struct babel_proto *p, struct babel_entry *e);
static void babel_send_route_request(struct babel_proto *p, struct babel_entry *e, struct babel_neighbor *n);
-static void babel_send_seqno_request(struct babel_proto *p, struct babel_entry *e, struct babel_seqno_request *sr);
+static void babel_send_seqno_request(struct babel_proto *p, struct babel_entry *e, struct babel_seqno_request *sr, struct babel_neighbor *n);
static void babel_update_cost(struct babel_neighbor *n);
static inline void babel_kick_timer(struct babel_proto *p);
static inline void babel_iface_kick_timer(struct babel_iface *ifa);
+static struct ea_class ea_babel_metric, ea_babel_router_id, ea_babel_seqno;
+
/*
* Functions to maintain data structures
*/
static void
-babel_init_entry(void *E)
+babel_init_entry(struct fib *f UNUSED, void *E)
{
struct babel_entry *e = E;
@@ -101,7 +113,8 @@ babel_find_source(struct babel_entry *e, u64 router_id)
}
static struct babel_source *
-babel_get_source(struct babel_proto *p, struct babel_entry *e, u64 router_id)
+babel_get_source(struct babel_proto *p, struct babel_entry *e, u64 router_id,
+ u16 initial_seqno)
{
struct babel_source *s = babel_find_source(e, router_id);
@@ -111,7 +124,7 @@ babel_get_source(struct babel_proto *p, struct babel_entry *e, u64 router_id)
s = sl_allocz(p->source_slab);
s->router_id = router_id;
s->expires = current_time() + BABEL_GARBAGE_INTERVAL;
- s->seqno = 0;
+ s->seqno = initial_seqno;
s->metric = BABEL_INFINITY;
add_tail(&e->sources, NODE s);
@@ -119,7 +132,7 @@ babel_get_source(struct babel_proto *p, struct babel_entry *e, u64 router_id)
}
static void
-babel_expire_sources(struct babel_proto *p, struct babel_entry *e)
+babel_expire_sources(struct babel_proto *p UNUSED, struct babel_entry *e)
{
struct babel_source *n, *nx;
btime now_ = current_time();
@@ -129,7 +142,7 @@ babel_expire_sources(struct babel_proto *p, struct babel_entry *e)
if (n->expires && n->expires <= now_)
{
rem_node(NODE n);
- sl_free(p->source_slab, n);
+ sl_free(n);
}
}
}
@@ -174,7 +187,7 @@ babel_retract_route(struct babel_proto *p, struct babel_route *r)
}
static void
-babel_flush_route(struct babel_proto *p, struct babel_route *r)
+babel_flush_route(struct babel_proto *p UNUSED, struct babel_route *r)
{
DBG("Babel: Flush route %N router_id %lR neigh %I\n",
r->e->n.addr, r->router_id, r->neigh->addr);
@@ -185,7 +198,7 @@ babel_flush_route(struct babel_proto *p, struct babel_route *r)
if (r->e->selected == r)
r->e->selected = NULL;
- sl_free(p->route_slab, r);
+ sl_free(r);
}
static void
@@ -288,61 +301,92 @@ babel_expire_routes(struct babel_proto *p)
babel_expire_routes_(p, &p->ip6_rtable);
}
-static inline int seqno_request_valid(struct babel_seqno_request *sr)
-{ return !sr->nbr || sr->nbr->ifa; }
-
/*
- * Add seqno request to the table of pending requests (RFC 6216 3.2.6) and send
+ * Add seqno request to the table of pending requests (RFC 8966 3.2.6) and send
* it to network. Do nothing if it is already in the table.
*/
static void
babel_add_seqno_request(struct babel_proto *p, struct babel_entry *e,
u64 router_id, u16 seqno, u8 hop_count,
- struct babel_neighbor *nbr)
+ struct babel_neighbor *target)
{
struct babel_seqno_request *sr;
+ btime now_ = current_time();
WALK_LIST(sr, e->requests)
if (sr->router_id == router_id)
{
- /* Found matching or newer */
- if (ge_mod64k(sr->seqno, seqno) && seqno_request_valid(sr))
+ /*
+ * To suppress duplicates, check if we already have a newer (higher seqno)
+ * outstanding request. If we do, suppress this request if the outstanding
+ * request is one we originated ourselves. If the outstanding request is
+ * forwarded, suppress only if this request is also one we're forwarding
+ * *and* we're within the duplicate suppression time of that request (see
+ * below).
+ */
+ if (ge_mod64k(sr->seqno, seqno) &&
+ (!sr->forwarded || (target && now_ < sr->dup_suppress_time)))
return;
- /* Found older */
rem_node(NODE sr);
- rem_node(&sr->nbr_node);
+
+ /* Allow upgrading from forwarded to non-forwarded */
+ if (!target)
+ sr->forwarded = 0;
goto found;
}
/* No entries found */
sr = sl_allocz(p->seqno_slab);
+ sr->forwarded = !!target;
found:
sr->router_id = router_id;
sr->seqno = seqno;
- sr->hop_count = hop_count;
+ sr->hop_count = hop_count ?: BABEL_INITIAL_HOP_COUNT;
sr->count = 0;
- sr->expires = current_time() + BABEL_SEQNO_REQUEST_EXPIRY;
- if (sr->nbr = nbr)
- add_tail(&nbr->requests, &sr->nbr_node);
+ if (sr->forwarded)
+ {
+ /*
+ * We want to keep the entry around for a reasonable period of time so it
+ * can be used to trigger an update (through babel_satisfy_seqno_request()).
+ * However, duplicate suppression should only trigger for a short period of
+ * time so it suppresses duplicates from multiple sources, but not
+ * retransmissions from the same source. Hence we keep two timers.
+ */
+ sr->expires = now_ + BABEL_SEQNO_FORWARD_EXPIRY;
+ sr->dup_suppress_time = now_ + BABEL_SEQNO_DUP_SUPPRESS_TIME;
+ }
+ else
+ {
+ sr->expires = now_ + BABEL_SEQNO_REQUEST_EXPIRY;
+ }
add_tail(&e->requests, NODE sr);
-
- babel_send_seqno_request(p, e, sr);
+ babel_send_seqno_request(p, e, sr, target);
}
static void
-babel_remove_seqno_request(struct babel_proto *p, struct babel_seqno_request *sr)
+babel_generate_seqno_request(struct babel_proto *p, struct babel_entry *e,
+ u64 router_id, u16 seqno, struct babel_neighbor *target)
{
- if (sr->nbr)
- rem_node(&sr->nbr_node);
+ struct babel_seqno_request req = {
+ .router_id = router_id,
+ .seqno = seqno,
+ .hop_count = BABEL_INITIAL_HOP_COUNT,
+ };
+ babel_send_seqno_request(p, e, &req, target);
+}
+
+static void
+babel_remove_seqno_request(struct babel_proto *p UNUSED, struct babel_seqno_request *sr)
+{
rem_node(NODE sr);
- sl_free(p->seqno_slab, sr);
+ sl_free(sr);
}
static int
@@ -370,21 +414,14 @@ babel_expire_requests(struct babel_proto *p, struct babel_entry *e)
WALK_LIST_DELSAFE(sr, srx, e->requests)
{
- /* Remove seqno requests sent to dead neighbors */
- if (!seqno_request_valid(sr))
- {
- babel_remove_seqno_request(p, sr);
- continue;
- }
-
/* Handle expired requests - resend or remove */
if (sr->expires && sr->expires <= now_)
{
- if (sr->count < BABEL_SEQNO_REQUEST_RETRY)
+ if (!sr->forwarded && sr->count < BABEL_SEQNO_REQUEST_RETRY)
{
sr->count++;
sr->expires += (BABEL_SEQNO_REQUEST_EXPIRY << sr->count);
- babel_send_seqno_request(p, e, sr);
+ babel_send_seqno_request(p, e, sr, NULL);
}
else
{
@@ -429,7 +466,6 @@ babel_get_neighbor(struct babel_iface *ifa, ip_addr addr)
nbr->cost = BABEL_INFINITY;
nbr->init_expiry = current_time() + BABEL_INITIAL_NEIGHBOR_TIMEOUT;
init_list(&nbr->routes);
- init_list(&nbr->requests);
add_tail(&ifa->neigh_list, NODE nbr);
return nbr;
@@ -450,13 +486,6 @@ babel_flush_neighbor(struct babel_proto *p, struct babel_neighbor *nbr)
babel_flush_route(p, r);
}
- struct babel_seqno_request *sr;
- WALK_LIST_FIRST2(sr, nbr_node, nbr->requests)
- {
- sr->nbr = NULL;
- rem_node(&sr->nbr_node);
- }
-
nbr->ifa = NULL;
rem_node(NODE nbr);
mb_free(nbr);
@@ -543,7 +572,7 @@ babel_is_feasible(struct babel_source *s, u16 seqno, u16 metric)
{
return !s ||
(metric == BABEL_INFINITY) ||
- (seqno > s->seqno) ||
+ gt_mod64k(seqno, s->seqno) ||
((seqno == s->seqno) && (metric < s->metric));
}
@@ -640,37 +669,14 @@ babel_announce_rte(struct babel_proto *p, struct babel_entry *e)
if (r)
{
- rta a0 = {
- .source = RTS_BABEL,
- .scope = SCOPE_UNIVERSE,
- .dest = RTD_UNICAST,
- .pref = c->preference,
- .from = r->neigh->addr,
- .nh.gw = r->next_hop,
- .nh.iface = r->neigh->ifa->iface,
- .eattrs = alloca(sizeof(ea_list) + 3*sizeof(eattr)),
- };
-
- *a0.eattrs = (ea_list) { .count = 3 };
- a0.eattrs->attrs[0] = (eattr) {
- .id = EA_BABEL_METRIC,
- .type = EAF_TYPE_INT,
- .u.data = r->metric,
- };
-
- struct adata *ad = alloca(sizeof(struct adata) + sizeof(u64));
- ad->length = sizeof(u64);
- memcpy(ad->data, &(r->router_id), sizeof(u64));
- a0.eattrs->attrs[1] = (eattr) {
- .id = EA_BABEL_ROUTER_ID,
- .type = EAF_TYPE_OPAQUE,
- .u.ptr = ad,
- };
-
- a0.eattrs->attrs[2] = (eattr) {
- .id = EA_BABEL_SEQNO,
- .type = EAF_TYPE_INT,
- .u.data = r->seqno,
+ struct nexthop_adata nhad = {
+ .nh = {
+ .gw = r->next_hop,
+ .iface = r->neigh->ifa->iface,
+ },
+ .ad = {
+ .length = sizeof nhad - sizeof nhad.ad,
+ },
};
/*
@@ -679,10 +685,26 @@ babel_announce_rte(struct babel_proto *p, struct babel_entry *e)
* have routing work.
*/
if (!neigh_find(&p->p, r->next_hop, r->neigh->ifa->iface, 0))
- a0.nh.flags = RNF_ONLINK;
+ nhad.nh.flags = RNF_ONLINK;
+
+ struct {
+ ea_list l;
+ eattr a[7];
+ } eattrs = {
+ .l.count = ARRAY_SIZE(eattrs.a),
+ .a = {
+ EA_LITERAL_EMBEDDED(&ea_gen_preference, 0, c->preference),
+ EA_LITERAL_STORE_ADATA(&ea_gen_from, 0, &r->neigh->addr, sizeof(r->neigh->addr)),
+ EA_LITERAL_EMBEDDED(&ea_gen_source, 0, RTS_BABEL),
+ EA_LITERAL_STORE_ADATA(&ea_gen_nexthop, 0, nhad.ad.data, nhad.ad.length),
+ EA_LITERAL_EMBEDDED(&ea_babel_metric, 0, r->metric),
+ EA_LITERAL_STORE_ADATA(&ea_babel_router_id, 0, &r->router_id, sizeof(r->router_id)),
+ EA_LITERAL_EMBEDDED(&ea_babel_seqno, 0, r->seqno),
+ }
+ };
rte e0 = {
- .attrs = &a0,
+ .attrs = &eattrs.l,
.src = p->p.main_source,
};
@@ -692,15 +714,14 @@ babel_announce_rte(struct babel_proto *p, struct babel_entry *e)
else if (e->valid && (e->router_id != p->router_id))
{
/* Unreachable */
- rta a0 = {
- .source = RTS_BABEL,
- .scope = SCOPE_UNIVERSE,
- .dest = RTD_UNREACHABLE,
- .pref = 1,
- };
+ ea_list *ea = NULL;
+
+ ea_set_attr_u32(&ea, &ea_gen_preference, 0, 1);
+ ea_set_attr_u32(&ea, &ea_gen_source, 0, RTS_BABEL);
+ ea_set_dest(&ea, 0, RTD_UNREACHABLE);
rte e0 = {
- .attrs = &a0,
+ .attrs = ea,
.src = p->p.main_source,
};
@@ -862,14 +883,14 @@ babel_send_ihus(struct babel_iface *ifa)
}
static void
-babel_send_hello(struct babel_iface *ifa)
+babel_send_hello(struct babel_iface *ifa, uint interval)
{
struct babel_proto *p = ifa->proto;
union babel_msg msg = {};
msg.type = BABEL_TLV_HELLO;
msg.hello.seqno = ifa->hello_seqno++;
- msg.hello.interval = ifa->cf->hello_interval;
+ msg.hello.interval = interval ?: ifa->cf->hello_interval;
TRACE(D_PACKETS, "Sending hello on %s with seqno %d interval %t",
ifa->ifname, msg.hello.seqno, (btime) msg.hello.interval);
@@ -907,22 +928,23 @@ babel_send_wildcard_request(struct babel_iface *ifa)
}
static void
-babel_send_seqno_request(struct babel_proto *p, struct babel_entry *e, struct babel_seqno_request *sr)
+babel_send_seqno_request(struct babel_proto *p, struct babel_entry *e,
+ struct babel_seqno_request *sr, struct babel_neighbor *n)
{
union babel_msg msg = {};
msg.type = BABEL_TLV_SEQNO_REQUEST;
- msg.seqno_request.hop_count = sr->hop_count ?: BABEL_INITIAL_HOP_COUNT;
+ msg.seqno_request.hop_count = sr->hop_count;
msg.seqno_request.seqno = sr->seqno;
msg.seqno_request.router_id = sr->router_id;
net_copy(&msg.seqno_request.net, e->n.addr);
- if (sr->nbr)
+ if (n)
{
TRACE(D_PACKETS, "Sending seqno request for %N router-id %lR seqno %d to %I on %s",
- e->n.addr, sr->router_id, sr->seqno, sr->nbr->addr, sr->nbr->ifa->ifname);
+ e->n.addr, sr->router_id, sr->seqno, n->addr, n->ifa->ifname);
- babel_send_unicast(&msg, sr->nbr->ifa, sr->nbr->addr);
+ babel_send_unicast(&msg, n->ifa, n->addr);
}
else
{
@@ -985,8 +1007,18 @@ babel_send_update_(struct babel_iface *ifa, btime changed, struct fib *rtable)
msg.update.router_id = e->router_id;
net_copy(&msg.update.net, e->n.addr);
- msg.update.next_hop = ((e->n.addr->type == NET_IP4) ?
- ifa->next_hop_ip4 : ifa->next_hop_ip6);
+ if (e->n.addr->type == NET_IP4)
+ {
+ /* Always prefer IPv4 nexthop if set */
+ if (ipa_nonzero(ifa->next_hop_ip4))
+ msg.update.next_hop = ifa->next_hop_ip4;
+
+ /* Only send IPv6 nexthop if enabled */
+ else if (ifa->cf->ext_next_hop)
+ msg.update.next_hop = ifa->next_hop_ip6;
+ }
+ else
+ msg.update.next_hop = ifa->next_hop_ip6;
/* Do not send route if next hop is unknown, e.g. no configured IPv4 address */
if (ipa_zero(msg.update.next_hop))
@@ -994,13 +1026,13 @@ babel_send_update_(struct babel_iface *ifa, btime changed, struct fib *rtable)
babel_enqueue(&msg, ifa);
- /* Update feasibility distance for redistributed routes */
+ /* RFC 8966 3.7.3 - update feasibility distance for redistributed routes */
if (e->router_id != p->router_id)
{
- struct babel_source *s = babel_get_source(p, e, e->router_id);
+ struct babel_source *s = babel_get_source(p, e, e->router_id, msg.update.seqno);
s->expires = current_time() + BABEL_GARBAGE_INTERVAL;
- if ((msg.update.seqno > s->seqno) ||
+ if (gt_mod64k(msg.update.seqno, s->seqno) ||
((msg.update.seqno == s->seqno) && (msg.update.metric < s->metric)))
{
s->seqno = msg.update.seqno;
@@ -1245,6 +1277,13 @@ babel_handle_update(union babel_msg *m, struct babel_iface *ifa)
return;
}
+ /* Reject IPv4 via IPv6 routes if disabled */
+ if ((msg->net.type == NET_IP4) && ipa_is_ip6(msg->next_hop) && !ifa->cf->ext_next_hop)
+ {
+ DBG("Babel: Ignoring disabled IPv4 via IPv6 route.\n");
+ return;
+ }
+
/* Retraction */
if (msg->metric == BABEL_INFINITY)
{
@@ -1289,10 +1328,14 @@ babel_handle_update(union babel_msg *m, struct babel_iface *ifa)
metric = babel_compute_metric(nbr, msg->metric);
best = e->selected;
- /* RFC section 3.8.2.2 - Dealing with unfeasible updates */
- if (!feasible && (metric != BABEL_INFINITY) &&
+ /*
+ * RFC 8966 3.8.2.2 - dealing with unfeasible updates. Generate a one-off
+ * (not retransmitted) unicast seqno request to the originator of this update.
+ * Note: !feasible -> s exists, check for 's' is just for clarity / safety.
+ */
+ if (!feasible && s && (metric != BABEL_INFINITY) &&
(!best || (r == best) || (metric < best->metric)))
- babel_add_seqno_request(p, e, s->router_id, s->seqno + 1, 0, nbr);
+ babel_generate_seqno_request(p, e, s->router_id, s->seqno + 1, nbr);
/* Special case - ignore unfeasible update to best route */
if (r == best && !feasible && (msg->router_id == r->router_id))
@@ -1331,7 +1374,7 @@ babel_handle_route_request(union babel_msg *m, struct babel_iface *ifa)
struct babel_proto *p = ifa->proto;
struct babel_msg_route_request *msg = &m->route_request;
- /* RFC 6126 3.8.1.1 */
+ /* RFC 8966 3.8.1.1 */
/* Wildcard request - full update on the interface */
if (msg->full)
@@ -1357,13 +1400,39 @@ babel_handle_route_request(union babel_msg *m, struct babel_iface *ifa)
}
}
+static struct babel_neighbor *
+babel_find_seqno_request_target(struct babel_entry *e, struct babel_neighbor *skip)
+{
+ struct babel_route *r, *best_feasible = NULL, *best_any = NULL;
+
+ WALK_LIST(r, e->routes)
+ {
+ if (r->neigh == skip)
+ continue;
+
+ if (r->feasible && (!best_feasible || r->metric < best_feasible->metric))
+ best_feasible = r;
+
+ if (!best_any || r->metric < best_any->metric)
+ best_any = r;
+ }
+
+ if (best_feasible)
+ return best_feasible->neigh;
+
+ if (best_any)
+ return best_any->neigh;
+
+ return NULL;
+}
+
void
babel_handle_seqno_request(union babel_msg *m, struct babel_iface *ifa)
{
struct babel_proto *p = ifa->proto;
struct babel_msg_seqno_request *msg = &m->seqno_request;
- /* RFC 6126 3.8.1.2 */
+ /* RFC 8966 3.8.1.2 */
TRACE(D_PACKETS, "Handling seqno request for %N router-id %lR seqno %d hop count %d",
&msg->net, msg->router_id, msg->seqno, msg->hop_count);
@@ -1393,26 +1462,21 @@ babel_handle_seqno_request(union babel_msg *m, struct babel_iface *ifa)
{
/* Not ours; forward if TTL allows it */
- /* Find best admissible route */
- struct babel_route *r, *best1 = NULL, *best2 = NULL;
- WALK_LIST(r, e->routes)
- if ((r->router_id == msg->router_id) && !ipa_equal(r->neigh->addr, msg->sender))
- {
- /* Find best feasible route */
- if ((!best1 || r->metric < best1->metric) && r->feasible)
- best1 = r;
+ struct babel_neighbor *nbr, *target;
- /* Find best not necessary feasible route */
- if (!best2 || r->metric < best2->metric)
- best2 = r;
- }
+ nbr = babel_find_neighbor(ifa, msg->sender);
+ if (!nbr)
+ return;
- /* If no route is found, do nothing */
- r = best1 ?: best2;
- if (!r)
+ target = babel_find_seqno_request_target(e, nbr);
+ if (!target)
+ {
+ TRACE(D_PACKETS, "No neighbor to forward seqno request for %N router-id %lR seqno %d to",
+ e->n.addr, msg->router_id, msg->seqno);
return;
+ }
- babel_add_seqno_request(p, e, msg->router_id, msg->seqno, msg->hop_count-1, r->neigh);
+ babel_add_seqno_request(p, e, msg->router_id, msg->seqno, msg->hop_count-1, target);
}
}
@@ -1506,7 +1570,8 @@ babel_auth_check_pc(struct babel_iface *ifa, struct babel_msg_auth *msg)
n->auth_index_len = msg->index_len;
memcpy(n->auth_index, msg->index, msg->index_len);
- n->auth_pc = msg->pc;
+ n->auth_pc_unicast = msg->pc;
+ n->auth_pc_multicast = msg->pc;
n->auth_passed = 1;
return 1;
@@ -1525,16 +1590,30 @@ babel_auth_check_pc(struct babel_iface *ifa, struct babel_msg_auth *msg)
return 0;
}
- /* (6) Index matches; only accept if PC is greater than last */
- if (n->auth_pc >= msg->pc)
+ /*
+ * (6) Index matches; only accept if PC is greater than last. We keep separate
+ * counters for unicast and multicast because multicast packets can be delayed
+ * significantly on wireless networks (enough to be received out of order).
+ * Separate counters are safe because the packet destination address is part
+ * of the MAC pseudo-header (so unicast packets can't be replayed as multicast
+ * and vice versa).
+ */
+ u32 auth_pc = msg->unicast ? n->auth_pc_unicast : n->auth_pc_multicast;
+ if (auth_pc >= msg->pc)
{
LOG_PKT_AUTH("Authentication failed for %I on %s - "
- "lower packet counter (rcv %u, old %u)",
- msg->sender, ifa->ifname, msg->pc, n->auth_pc);
+ "lower %s packet counter (rcv %u, old %u)",
+ msg->sender, ifa->ifname,
+ msg->unicast ? "unicast" : "multicast",
+ msg->pc, auth_pc);
return 0;
}
- n->auth_pc = msg->pc;
+ if (msg->unicast)
+ n->auth_pc_unicast = msg->pc;
+ else
+ n->auth_pc_multicast = msg->pc;
+
n->auth_passed = 1;
return 1;
@@ -1577,7 +1656,7 @@ babel_iface_timer(timer *t)
if (now_ >= ifa->next_hello)
{
- babel_send_hello(ifa);
+ babel_send_hello(ifa, 0);
ifa->next_hello += hello_period * (1 + (now_ - ifa->next_hello) / hello_period);
}
@@ -1624,7 +1703,7 @@ babel_iface_start(struct babel_iface *ifa)
tm_start(ifa->timer, 100 MS);
ifa->up = 1;
- babel_send_hello(ifa);
+ babel_send_hello(ifa, 0);
babel_send_wildcard_retraction(ifa);
babel_send_wildcard_request(ifa);
babel_send_update(ifa, 0); /* Full update */
@@ -1686,7 +1765,7 @@ babel_iface_update_addr4(struct babel_iface *ifa)
ip_addr addr4 = ifa->iface->addr4 ? ifa->iface->addr4->ip : IPA_NONE;
ifa->next_hop_ip4 = ipa_nonzero(ifa->cf->next_hop_ip4) ? ifa->cf->next_hop_ip4 : addr4;
- if (ipa_zero(ifa->next_hop_ip4) && p->ip4_channel)
+ if (ipa_zero(ifa->next_hop_ip4) && p->ip4_channel && !ifa->cf->ext_next_hop)
log(L_WARN "%s: Missing IPv4 next hop address for %s", p->p.name, ifa->ifname);
if (ifa->up)
@@ -1725,9 +1804,9 @@ babel_find_iface(struct babel_proto *p, struct iface *what)
}
static void
-babel_iface_locked(struct object_lock *lock)
+babel_iface_locked(void *_ifa)
{
- struct babel_iface *ifa = lock->data;
+ struct babel_iface *ifa = _ifa;
struct babel_proto *p = ifa->proto;
if (!babel_open_socket(ifa))
@@ -1747,7 +1826,7 @@ babel_add_iface(struct babel_proto *p, struct iface *new, struct babel_iface_con
TRACE(D_EVENTS, "Adding interface %s", new->name);
- pool *pool = rp_new(p->p.pool, p->p.loop, new->name);
+ pool *pool = rp_new(p->p.pool, new->name);
ifa = mb_allocz(pool, sizeof(struct babel_iface));
ifa->proto = p;
@@ -1763,8 +1842,8 @@ babel_add_iface(struct babel_proto *p, struct iface *new, struct babel_iface_con
ifa->next_hop_ip4 = ipa_nonzero(ic->next_hop_ip4) ? ic->next_hop_ip4 : addr4;
ifa->next_hop_ip6 = ipa_nonzero(ic->next_hop_ip6) ? ic->next_hop_ip6 : ifa->addr;
- if (ipa_zero(ifa->next_hop_ip4) && p->ip4_channel)
- log(L_WARN "%s: Missing IPv4 next hop address for %s", p->p.name, new->name);
+ if (ipa_zero(ifa->next_hop_ip4) && p->ip4_channel && !ic->ext_next_hop)
+ log(L_WARN "%s: Missing IPv4 next hop address for %s", p->p.name, ifa->ifname);
init_list(&ifa->neigh_list);
ifa->hello_seqno = 1;
@@ -1782,8 +1861,11 @@ babel_add_iface(struct babel_proto *p, struct iface *new, struct babel_iface_con
lock->addr = IP6_BABEL_ROUTERS;
lock->port = ifa->cf->port;
lock->iface = ifa->iface;
- lock->hook = babel_iface_locked;
- lock->data = ifa;
+ lock->event = (event) {
+ .hook = babel_iface_locked,
+ .data = ifa,
+ };
+ lock->target = &global_event_list;
olock_acquire(lock);
}
@@ -1799,7 +1881,7 @@ babel_remove_iface(struct babel_proto *p, struct babel_iface *ifa)
rem_node(NODE ifa);
- rp_free(ifa->pool, p->p.pool); /* contains ifa itself, locks, socket, etc */
+ rfree(ifa->pool); /* contains ifa itself, locks, socket, etc */
}
static int
@@ -1884,7 +1966,7 @@ babel_reconfigure_iface(struct babel_proto *p, struct babel_iface *ifa, struct b
if ((new->auth_type != BABEL_AUTH_NONE) && (new->auth_type != old->auth_type))
babel_auth_reset_index(ifa);
- if (ipa_zero(ifa->next_hop_ip4) && p->ip4_channel)
+ if (ipa_zero(ifa->next_hop_ip4) && p->ip4_channel && !new->ext_next_hop)
log(L_WARN "%s: Missing IPv4 next hop address for %s", p->p.name, ifa->ifname);
if (ifa->next_hello > (current_time() + new->hello_interval))
@@ -1905,11 +1987,11 @@ babel_reconfigure_iface(struct babel_proto *p, struct babel_iface *ifa, struct b
static void
babel_reconfigure_ifaces(struct babel_proto *p, struct babel_config *cf)
{
- struct iface *iface;
-
- IFACE_LEGACY_ACCESS;
- WALK_LIST(iface, global_iface_list)
+ IFACE_WALK(iface)
{
+ if (p->p.vrf && p->p.vrf != iface->master)
+ continue;
+
if (!(iface->flags & IF_UP))
continue;
@@ -1920,7 +2002,7 @@ babel_reconfigure_ifaces(struct babel_proto *p, struct babel_config *cf)
struct babel_iface *ifa = babel_find_iface(p, iface);
struct babel_iface_config *ic = (void *) iface_patt_find(&cf->iface_list, iface, NULL);
- if (ic && iface_is_valid(p, iface))
+ if (ic && !iface_is_valid(p, iface))
ic = NULL;
if (ifa && ic)
@@ -2029,41 +2111,45 @@ babel_dump(struct proto *P)
}
static void
-babel_get_route_info(rte *rte, byte *buf)
+babel_get_route_info(const rte *rte, byte *buf)
{
u64 rid = 0;
- eattr *e = ea_find(rte->attrs->eattrs, EA_BABEL_ROUTER_ID);
+ eattr *e = ea_find(rte->attrs, &ea_babel_router_id);
if (e)
memcpy(&rid, e->u.ptr->data, sizeof(u64));
- buf += bsprintf(buf, " (%d/%d) [%lR]", rte->attrs->pref,
- ea_get_int(rte->attrs->eattrs, EA_BABEL_METRIC, BABEL_INFINITY), rid);
+ buf += bsprintf(buf, " (%d/%d) [%lR]",
+ rt_get_preference(rte),
+ ea_get_int(rte->attrs, &ea_babel_metric, BABEL_INFINITY), rid);
}
-static int
-babel_get_attr(const eattr *a, byte *buf, int buflen UNUSED)
+static void
+babel_router_id_format(const eattr *a, byte *buf, uint len)
{
- switch (a->id)
- {
- case EA_BABEL_SEQNO:
- return GA_FULL;
+ u64 rid = 0;
+ memcpy(&rid, a->u.ptr->data, sizeof(u64));
+ bsnprintf(buf, len, "%lR", rid);
+}
- case EA_BABEL_METRIC:
- bsprintf(buf, "metric: %d", a->u.data);
- return GA_FULL;
+static struct ea_class ea_babel_metric = {
+ .name = "babel_metric",
+ .type = T_INT,
+};
- case EA_BABEL_ROUTER_ID:
- {
- u64 rid = 0;
- memcpy(&rid, a->u.ptr->data, sizeof(u64));
- bsprintf(buf, "router_id: %lR", rid);
- return GA_FULL;
- }
+static struct ea_class ea_babel_router_id = {
+ .name = "babel_router_id",
+ .type = T_OPAQUE,
+ .readonly = 1,
+ .format = babel_router_id_format,
+};
+
+static struct ea_class ea_babel_seqno = {
+ .name = "babel_seqno",
+ .type = T_INT,
+ .readonly = 1,
+ .hidden = 1,
+};
- default:
- return GA_UNKNOWN;
- }
-}
void
babel_show_interfaces(struct proto *P, const char *iff)
@@ -2261,11 +2347,15 @@ babel_kick_timer(struct babel_proto *p)
static int
-babel_preexport(struct channel *c, struct rte *new)
+babel_preexport(struct channel *C, struct rte *new)
{
- struct rta *a = new->attrs;
+ if (new->src->owner != &C->proto->sources)
+ return 0;
+
/* Reject our own unreachable routes */
- if ((a->dest == RTD_UNREACHABLE) && (new->src->owner == &c->proto->sources))
+ eattr *ea = ea_find(new->attrs, &ea_gen_nexthop);
+ struct nexthop_adata *nhad = (void *) ea->u.ptr;
+ if (!NEXTHOP_IS_REACHABLE(nhad))
return -1;
return 0;
@@ -2286,13 +2376,13 @@ babel_rt_notify(struct proto *P, struct channel *c UNUSED, const net_addr *net,
{
/* Update */
uint rt_seqno;
- uint rt_metric = ea_get_int(new->attrs->eattrs, EA_BABEL_METRIC, 0);
+ uint rt_metric = ea_get_int(new->attrs, &ea_babel_metric, 0);
u64 rt_router_id = 0;
if (new->src->owner == &P->sources)
{
- rt_seqno = ea_find(new->attrs->eattrs, EA_BABEL_SEQNO)->u.data;
- eattr *e = ea_find(new->attrs->eattrs, EA_BABEL_ROUTER_ID);
+ rt_seqno = ea_get_int(new->attrs, &ea_babel_seqno, 0);
+ eattr *e = ea_find(new->attrs, &ea_babel_router_id);
if (e)
memcpy(&rt_router_id, e->u.ptr->data, sizeof(u64));
}
@@ -2341,18 +2431,18 @@ babel_rt_notify(struct proto *P, struct channel *c UNUSED, const net_addr *net,
}
static int
-babel_rte_better(struct rte *new, struct rte *old)
+babel_rte_better(const rte *new, const rte *old)
{
- uint new_metric = ea_find(new->attrs->eattrs, EA_BABEL_SEQNO)->u.data;
- uint old_metric = ea_find(old->attrs->eattrs, EA_BABEL_SEQNO)->u.data;
+ uint new_metric = ea_get_int(new->attrs, &ea_babel_metric, BABEL_INFINITY);
+ uint old_metric = ea_get_int(old->attrs, &ea_babel_metric, BABEL_INFINITY);
return new_metric < old_metric;
}
static u32
-babel_rte_igp_metric(struct rte *rt)
+babel_rte_igp_metric(const rte *rt)
{
- return ea_get_int(rt->attrs->eattrs, EA_BABEL_METRIC, BABEL_INFINITY);
+ return ea_get_int(rt->attrs, &ea_babel_metric, BABEL_INFINITY);
}
@@ -2389,7 +2479,7 @@ babel_init(struct proto_config *CF)
proto_configure_channel(P, &p->ip4_channel, cf->ip4_channel);
proto_configure_channel(P, &p->ip6_channel, cf->ip6_channel);
- P->if_notify = babel_if_notify;
+ P->iface_sub.if_notify = babel_if_notify;
P->rt_notify = babel_rt_notify;
P->preexport = babel_preexport;
@@ -2442,6 +2532,11 @@ babel_iface_shutdown(struct babel_iface *ifa)
{
if (ifa->sk)
{
+ /*
+ * Retract all our routes and lower the hello interval so peers' neighbour
+ * state expires quickly
+ */
+ babel_send_hello(ifa, BABEL_MIN_INTERVAL);
babel_send_wildcard_retraction(ifa);
babel_send_queue(ifa);
}
@@ -2489,7 +2584,6 @@ babel_reconfigure(struct proto *P, struct proto_config *CF)
struct protocol proto_babel = {
.name = "Babel",
.template = "babel%d",
- .class = PROTOCOL_BABEL,
.preference = DEF_PREF_BABEL,
.channel_mask = NB_IP | NB_IP6_SADR,
.proto_size = sizeof(struct babel_proto),
@@ -2500,5 +2594,16 @@ struct protocol proto_babel = {
.start = babel_start,
.shutdown = babel_shutdown,
.reconfigure = babel_reconfigure,
- .get_attr = babel_get_attr
};
+
+void
+babel_build(void)
+{
+ proto_build(&proto_babel);
+
+ EA_REGISTER_ALL(
+ &ea_babel_metric,
+ &ea_babel_router_id,
+ &ea_babel_seqno
+ );
+}
diff --git a/proto/babel/babel.h b/proto/babel/babel.h
index 8b6da3c8..562abac2 100644
--- a/proto/babel/babel.h
+++ b/proto/babel/babel.h
@@ -16,7 +16,7 @@
#include "nest/bird.h"
#include "nest/cli.h"
#include "nest/iface.h"
-#include "nest/route.h"
+#include "nest/rt.h"
#include "nest/protocol.h"
#include "nest/locks.h"
#include "nest/password.h"
@@ -26,10 +26,6 @@
#include "lib/string.h"
#include "lib/timer.h"
-#define EA_BABEL_METRIC EA_CODE(PROTOCOL_BABEL, 0)
-#define EA_BABEL_ROUTER_ID EA_CODE(PROTOCOL_BABEL, 1)
-#define EA_BABEL_SEQNO EA_CODE(PROTOCOL_BABEL, 2)
-
#define BABEL_MAGIC 42
#define BABEL_VERSION 2
#define BABEL_PORT 6696
@@ -48,6 +44,8 @@
#define BABEL_ROUTE_REFRESH_FACTOR(X) ((btime)(X)*5/2) /* 2.5 */
#define BABEL_SEQNO_REQUEST_RETRY 4
#define BABEL_SEQNO_REQUEST_EXPIRY (2 S_)
+#define BABEL_SEQNO_FORWARD_EXPIRY (10 S_)
+#define BABEL_SEQNO_DUP_SUPPRESS_TIME (1 S_)
#define BABEL_GARBAGE_INTERVAL (300 S_)
#define BABEL_RXCOST_WIRED 96
#define BABEL_RXCOST_WIRELESS 256
@@ -112,6 +110,7 @@ enum babel_ae_type {
BABEL_AE_IP4 = 1,
BABEL_AE_IP6 = 2,
BABEL_AE_IP6_LL = 3,
+ BABEL_AE_IP4_VIA_IP6 = 4,
BABEL_AE_MAX
};
@@ -145,8 +144,9 @@ struct babel_iface_config {
ip_addr next_hop_ip4;
ip_addr next_hop_ip6;
+ u8 ext_next_hop; /* Enable IPv4 via IPv6 */
- u8 auth_type; /* Authentication type (BABEL_AUTH_*) */
+ u8 auth_type; /* Authentication type (BABEL_AUTH_*) */
u8 auth_permissive; /* Don't drop packets failing auth check */
uint mac_num_keys; /* Number of configured HMAC keys */
uint mac_total_len; /* Total digest length for all configured keys */
@@ -225,7 +225,8 @@ struct babel_neighbor {
u16 next_hello_seqno;
uint last_hello_int;
- u32 auth_pc;
+ u32 auth_pc_unicast;
+ u32 auth_pc_multicast;
u8 auth_passed;
u8 auth_index_len;
u8 auth_index[BABEL_AUTH_INDEX_LEN];
@@ -240,7 +241,6 @@ struct babel_neighbor {
btime init_expiry;
list routes; /* Routes this neighbour has sent us (struct babel_route) */
- list requests; /* Seqno requests bound to this neighbor */
};
struct babel_source {
@@ -270,13 +270,13 @@ struct babel_route {
struct babel_seqno_request {
node n;
- node nbr_node;
u64 router_id;
u16 seqno;
+ u8 forwarded;
u8 hop_count;
u8 count;
btime expires;
- struct babel_neighbor *nbr;
+ btime dup_suppress_time;
};
struct babel_entry {
@@ -404,6 +404,7 @@ struct babel_msg_auth {
u8 challenge_seen;
u8 challenge_len;
u8 challenge[BABEL_AUTH_MAX_NONCE_LEN];
+ u8 unicast;
};
static inline int babel_sadr_enabled(struct babel_proto *p)
diff --git a/proto/babel/config.Y b/proto/babel/config.Y
index 05210fa4..6d1ad7d0 100644
--- a/proto/babel/config.Y
+++ b/proto/babel/config.Y
@@ -24,8 +24,9 @@ CF_DECLS
CF_KEYWORDS(BABEL, INTERFACE, METRIC, RXCOST, HELLO, UPDATE, INTERVAL, PORT,
TYPE, WIRED, WIRELESS, RX, TX, BUFFER, PRIORITY, LENGTH, CHECK, LINK,
- NEXT, HOP, IPV4, IPV6, BABEL_METRIC, SHOW, INTERFACES, NEIGHBORS,
- ENTRIES, RANDOMIZE, ROUTER, ID, AUTHENTICATION, NONE, MAC, PERMISSIVE)
+ NEXT, HOP, IPV4, IPV6, SHOW, INTERFACES, NEIGHBORS,
+ ENTRIES, RANDOMIZE, ROUTER, ID, AUTHENTICATION, NONE, MAC, PERMISSIVE,
+ EXTENDED)
CF_GRAMMAR
@@ -67,6 +68,7 @@ babel_iface_start:
BABEL_IFACE->tx_tos = IP_PREC_INTERNET_CONTROL;
BABEL_IFACE->tx_priority = sk_priority_control;
BABEL_IFACE->check_link = 1;
+ BABEL_IFACE->ext_next_hop = 1;
};
@@ -143,6 +145,7 @@ babel_iface_item:
| CHECK LINK bool { BABEL_IFACE->check_link = $3; }
| NEXT HOP IPV4 ipa { BABEL_IFACE->next_hop_ip4 = $4; if (!ipa_is_ip4($4)) cf_error("Must be an IPv4 address"); }
| NEXT HOP IPV6 ipa { BABEL_IFACE->next_hop_ip6 = $4; if (!ipa_is_ip6($4)) cf_error("Must be an IPv6 address"); }
+ | EXTENDED NEXT HOP bool { BABEL_IFACE->ext_next_hop = $4; }
| AUTHENTICATION NONE { BABEL_IFACE->auth_type = BABEL_AUTH_NONE; }
| AUTHENTICATION MAC { BABEL_IFACE->auth_type = BABEL_AUTH_MAC; BABEL_IFACE->auth_permissive = 0; }
| AUTHENTICATION MAC PERMISSIVE { BABEL_IFACE->auth_type = BABEL_AUTH_MAC; BABEL_IFACE->auth_permissive = 1; }
@@ -163,8 +166,6 @@ babel_iface_opt_list:
babel_iface:
babel_iface_start iface_patt_list_nopx babel_iface_opt_list babel_iface_finish;
-dynamic_attr: BABEL_METRIC { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_BABEL_METRIC); } ;
-
CF_CLI_HELP(SHOW BABEL, ..., [[Show information about Babel protocol]]);
CF_CLI(SHOW BABEL INTERFACES, optproto opttext, [<name>] [\"<interface>\"], [[Show information about Babel interfaces]])
diff --git a/proto/babel/packets.c b/proto/babel/packets.c
index f13410e2..d26ee5c6 100644
--- a/proto/babel/packets.c
+++ b/proto/babel/packets.c
@@ -166,10 +166,12 @@ struct babel_parse_state {
ip_addr next_hop_ip6;
u64 router_id; /* Router ID used in subsequent updates */
u8 def_ip6_prefix[16]; /* Implicit IPv6 prefix in network order */
- u8 def_ip4_prefix[4]; /* Implicit IPv4 prefix in network order */
+ u8 def_ip4_prefix[4]; /* Implicit IPv4 prefix (AE 1) in network order */
+ u8 def_ip4_via_ip6_prefix[4]; /* Implicit IPv4 prefix (AE 4) in network order */
u8 router_id_seen; /* router_id field is valid */
u8 def_ip6_prefix_seen; /* def_ip6_prefix is valid */
u8 def_ip4_prefix_seen; /* def_ip4_prefix is valid */
+ u8 def_ip4_via_ip6_prefix_seen; /* def_ip4_via_ip6_prefix is valid */
u8 current_tlv_endpos; /* End of self-terminating TLVs (offset from start) */
u8 sadr_enabled;
u8 is_unicast;
@@ -515,9 +517,6 @@ babel_read_ihu(struct babel_tlv *hdr, union babel_msg *m,
msg->addr = IPA_NONE;
msg->sender = state->saddr;
- if (msg->ae >= BABEL_AE_MAX)
- return PARSE_IGNORE;
-
/*
* We only actually read link-local IPs. In every other case, the addr field
* will be 0 but validation will succeed. The handler takes care of these
@@ -526,17 +525,20 @@ babel_read_ihu(struct babel_tlv *hdr, union babel_msg *m,
*/
switch (msg->ae)
{
+ case BABEL_AE_WILDCARD:
+ return PARSE_SUCCESS;
+
case BABEL_AE_IP4:
if (TLV_OPT_LENGTH(tlv) < 4)
return PARSE_ERROR;
state->current_tlv_endpos += 4;
- break;
+ return PARSE_SUCCESS;
case BABEL_AE_IP6:
if (TLV_OPT_LENGTH(tlv) < 16)
return PARSE_ERROR;
state->current_tlv_endpos += 16;
- break;
+ return PARSE_SUCCESS;
case BABEL_AE_IP6_LL:
if (TLV_OPT_LENGTH(tlv) < 8)
@@ -544,10 +546,17 @@ babel_read_ihu(struct babel_tlv *hdr, union babel_msg *m,
msg->addr = ipa_from_ip6(get_ip6_ll(&tlv->addr));
state->current_tlv_endpos += 8;
- break;
+ return PARSE_SUCCESS;
+
+ /* RFC 9229 2.4 - IHU TLV MUST NOT carry the AE 4 (IPv4-via-IPv6) */
+ case BABEL_AE_IP4_VIA_IP6:
+ return PARSE_ERROR;
+
+ default:
+ return PARSE_IGNORE;
}
- return PARSE_SUCCESS;
+ return PARSE_IGNORE;
}
static uint
@@ -640,6 +649,10 @@ babel_read_next_hop(struct babel_tlv *hdr, union babel_msg *m UNUSED,
state->current_tlv_endpos += 8;
return PARSE_IGNORE;
+ /* RFC 9229 2.4 - Next Hop TLV MUST NOT carry the AE 4 (IPv4-via-IPv6) */
+ case BABEL_AE_IP4_VIA_IP6:
+ return PARSE_ERROR;
+
default:
return PARSE_IGNORE;
}
@@ -692,6 +705,42 @@ babel_write_next_hop(struct babel_tlv *hdr, ip_addr addr,
return 0;
}
+/* This is called directly from babel_read_update() to handle
+ both BABEL_AE_IP4 and BABEL_AE_IP4_VIA_IP6 encodings */
+static int
+babel_read_ip4_prefix(struct babel_tlv_update *tlv, struct babel_msg_update *msg,
+ u8 *def_prefix, u8 *def_prefix_seen, ip_addr next_hop, int len)
+{
+ if (tlv->plen > IP4_MAX_PREFIX_LENGTH)
+ return PARSE_ERROR;
+
+ /* Cannot omit data if there is no saved prefix */
+ if (tlv->omitted && !*def_prefix_seen)
+ return PARSE_ERROR;
+
+ /* Update must have next hop, unless it is retraction */
+ if (ipa_zero(next_hop) && msg->metric != BABEL_INFINITY)
+ return PARSE_ERROR;
+
+ /* Merge saved prefix and received prefix parts */
+ u8 buf[4] = {};
+ memcpy(buf, def_prefix, tlv->omitted);
+ memcpy(buf + tlv->omitted, tlv->addr, len);
+
+ ip4_addr prefix4 = get_ip4(buf);
+ net_fill_ip4(&msg->net, prefix4, tlv->plen);
+
+ if (tlv->flags & BABEL_UF_DEF_PREFIX)
+ {
+ put_ip4(def_prefix, prefix4);
+ *def_prefix_seen = 1;
+ }
+
+ msg->next_hop = next_hop;
+
+ return PARSE_SUCCESS;
+}
+
static int
babel_read_update(struct babel_tlv *hdr, union babel_msg *m,
struct babel_parse_state *state)
@@ -706,11 +755,11 @@ babel_read_update(struct babel_tlv *hdr, union babel_msg *m,
/* Length of received prefix data without omitted part */
int len = BYTES(tlv->plen) - (int) tlv->omitted;
- u8 buf[16] = {};
if ((len < 0) || ((uint) len > TLV_OPT_LENGTH(tlv)))
return PARSE_ERROR;
+ int rc;
switch (tlv->ae)
{
case BABEL_AE_WILDCARD:
@@ -724,31 +773,20 @@ babel_read_update(struct babel_tlv *hdr, union babel_msg *m,
break;
case BABEL_AE_IP4:
- if (tlv->plen > IP4_MAX_PREFIX_LENGTH)
- return PARSE_ERROR;
-
- /* Cannot omit data if there is no saved prefix */
- if (tlv->omitted && !state->def_ip4_prefix_seen)
- return PARSE_ERROR;
-
- /* Update must have next hop, unless it is retraction */
- if (ipa_zero(state->next_hop_ip4) && (msg->metric != BABEL_INFINITY))
- return PARSE_IGNORE;
+ rc = babel_read_ip4_prefix(tlv, msg, state->def_ip4_prefix,
+ &state->def_ip4_prefix_seen,
+ state->next_hop_ip4, len);
+ if (rc != PARSE_SUCCESS)
+ return rc;
- /* Merge saved prefix and received prefix parts */
- memcpy(buf, state->def_ip4_prefix, tlv->omitted);
- memcpy(buf + tlv->omitted, tlv->addr, len);
-
- ip4_addr prefix4 = get_ip4(buf);
- net_fill_ip4(&msg->net, prefix4, tlv->plen);
-
- if (tlv->flags & BABEL_UF_DEF_PREFIX)
- {
- put_ip4(state->def_ip4_prefix, prefix4);
- state->def_ip4_prefix_seen = 1;
- }
+ break;
- msg->next_hop = state->next_hop_ip4;
+ case BABEL_AE_IP4_VIA_IP6:
+ rc = babel_read_ip4_prefix(tlv, msg, state->def_ip4_via_ip6_prefix,
+ &state->def_ip4_via_ip6_prefix_seen,
+ state->next_hop_ip6, len);
+ if (rc != PARSE_SUCCESS)
+ return rc;
break;
@@ -761,6 +799,7 @@ babel_read_update(struct babel_tlv *hdr, union babel_msg *m,
return PARSE_ERROR;
/* Merge saved prefix and received prefix parts */
+ u8 buf[16] = {};
memcpy(buf, state->def_ip6_prefix, tlv->omitted);
memcpy(buf + tlv->omitted, tlv->addr, len);
@@ -863,7 +902,7 @@ babel_write_update(struct babel_tlv *hdr, union babel_msg *m,
}
else if (msg->net.type == NET_IP4)
{
- tlv->ae = BABEL_AE_IP4;
+ tlv->ae = ipa_is_ip4(msg->next_hop) ? BABEL_AE_IP4 : BABEL_AE_IP4_VIA_IP6;
tlv->plen = net4_pxlen(&msg->net);
put_ip4_px(tlv->addr, &msg->net);
}
@@ -931,7 +970,12 @@ babel_read_route_request(struct babel_tlv *hdr, union babel_msg *m,
msg->full = 1;
return PARSE_SUCCESS;
+ /*
+ * RFC 9229 2.3 - When receiving requests, AE 1 (IPv4) and AE 4
+ * (IPv4-via-IPv6) MUST be treated in the same manner.
+ */
case BABEL_AE_IP4:
+ case BABEL_AE_IP4_VIA_IP6:
if (tlv->plen > IP4_MAX_PREFIX_LENGTH)
return PARSE_ERROR;
@@ -1032,7 +1076,12 @@ babel_read_seqno_request(struct babel_tlv *hdr, union babel_msg *m,
case BABEL_AE_WILDCARD:
return PARSE_ERROR;
+ /*
+ * RFC 9229 2.3 - When receiving requests, AE 1 (IPv4) and AE 4
+ * (IPv4-via-IPv6) MUST be treated in the same manner.
+ */
case BABEL_AE_IP4:
+ case BABEL_AE_IP4_VIA_IP6:
if (tlv->plen > IP4_MAX_PREFIX_LENGTH)
return PARSE_ERROR;
@@ -1318,7 +1367,6 @@ babel_send_to(struct babel_iface *ifa, ip_addr dest)
static uint
babel_write_queue(struct babel_iface *ifa, list *queue)
{
- struct babel_proto *p = ifa->proto;
struct babel_write_state state = { .next_hop_ip6 = ifa->addr };
if (EMPTY_LIST(*queue))
@@ -1346,7 +1394,7 @@ babel_write_queue(struct babel_iface *ifa, list *queue)
pos += len;
rem_node(NODE msg);
- sl_free(p->msg_slab, msg);
+ sl_free(msg);
}
pos += babel_auth_add_tlvs(ifa, (struct babel_tlv *) pos, end - pos);
@@ -1507,13 +1555,13 @@ babel_process_packet(struct babel_iface *ifa,
else if (res == PARSE_IGNORE)
{
DBG("Babel: Ignoring TLV of type %d\n", tlv->type);
- sl_free(p->msg_slab, msg);
+ sl_free(msg);
}
else /* PARSE_ERROR */
{
LOG_PKT("Bad TLV from %I via %s type %d pos %d - parse error",
saddr, ifa->iface->name, tlv->type, (int) ((byte *)tlv - (byte *)pkt));
- sl_free(p->msg_slab, msg);
+ sl_free(msg);
break;
}
}
@@ -1525,7 +1573,7 @@ babel_process_packet(struct babel_iface *ifa,
if (tlv_data[msg->msg.type].handle_tlv)
tlv_data[msg->msg.type].handle_tlv(&msg->msg, ifa);
rem_node(NODE msg);
- sl_free(p->msg_slab, msg);
+ sl_free(msg);
}
}
@@ -1618,7 +1666,7 @@ babel_open_socket(struct babel_iface *ifa)
sk->ttl = 1;
sk->flags = SKF_LADDR_RX;
- if (sk_open(sk) < 0)
+ if (sk_open(sk, p->p.loop) < 0)
goto err;
if (sk_setup_multicast(sk) < 0)
@@ -1656,6 +1704,7 @@ babel_read_pc(struct babel_tlv *hdr, union babel_msg *m UNUSED,
state->auth.pc_seen = 1;
state->auth.index_len = index_len;
state->auth.index = tlv->index;
+ state->auth.unicast = state->is_unicast;
state->current_tlv_endpos += index_len;
return PARSE_SUCCESS;
@@ -2011,7 +2060,7 @@ babel_auth_sign(struct babel_iface *ifa, ip_addr dest)
}
DBG("Added MAC signatures (%d bytes) on ifa %s for dest %I\n",
- tot_len, ifa->ifname, dest);
+ pos - (pkt + len), ifa->ifname, dest);
return pos - (pkt + len);
}
diff --git a/proto/bfd/bfd.c b/proto/bfd/bfd.c
index 479c1510..c88c1cb2 100644
--- a/proto/bfd/bfd.c
+++ b/proto/bfd/bfd.c
@@ -82,7 +82,7 @@
* BFD thread to the main thread. This is done in an asynchronous way, sesions
* with pending notifications are linked (in the BFD thread) to @notify_list in
* &bfd_proto, and then bfd_notify_hook() in the main thread is activated using
- * bfd_notify_kick() and a pipe. The hook then processes scheduled sessions and
+ * a standard event sending code. The hook then processes scheduled sessions and
* calls hooks from associated BFD requests. This @notify_list (and state fields
* in structure &bfd_session) is protected by a spinlock in &bfd_proto and
* functions bfd_lock_sessions() / bfd_unlock_sessions().
@@ -113,26 +113,22 @@
#define HASH_IP_EQ(a1,n1,a2,n2) ipa_equal(a1, a2) && n1 == n2
#define HASH_IP_FN(a,n) ipa_hash(a) ^ u32_hash(n)
-DEFINE_DOMAIN(rtable);
#define BFD_LOCK LOCK_DOMAIN(rtable, bfd_global.lock)
#define BFD_UNLOCK UNLOCK_DOMAIN(rtable, bfd_global.lock)
-#define BFD_ASSERT_LOCKED ASSERT_DIE(DOMAIN_IS_LOCKED(rtable, bfd_global.lock))
static struct {
DOMAIN(rtable) lock;
list wait_list;
+ list pickup_list;
list proto_list;
+ uint pickup_reload;
} bfd_global;
-static struct bfd_session bfd_admin_down = { .loc = ATOMIC_VAR_INIT((struct bfd_session_state) { .state = BFD_STATE_ADMIN_DOWN }), };
-
const char *bfd_state_names[] = { "AdminDown", "Down", "Init", "Up" };
static void bfd_session_set_min_tx(struct bfd_session *s, u32 val);
static struct bfd_iface *bfd_get_iface(struct bfd_proto *p, ip_addr local, struct iface *iface);
static void bfd_free_iface(struct bfd_iface *ifa);
-static void bfd_remove_session(struct bfd_proto *p, struct bfd_session *s);
-static void bfd_reconfigure_session_hook(void *vsession);
/*
@@ -151,57 +147,37 @@ bfd_merge_options(const struct bfd_iface_config *cf, const struct bfd_options *o
};
}
-static int
+static void
bfd_session_update_state(struct bfd_session *s, uint state, uint diag)
{
struct bfd_proto *p = s->ifa->bfd;
- uint old_state = BFD_LOC_STATE(s).state;
+ uint old_state = s->loc_state;
+ int notify;
if (state == old_state)
- {
- if (current_time() > s->last_reqlist_check + 5 S)
- {
- BFD_LOCK;
- if (EMPTY_LIST(s->request_list))
- {
- bfd_remove_session(p, s);
- BFD_UNLOCK;
- return 1;
- }
-
- s->last_reqlist_check = current_time();
- BFD_UNLOCK;
- }
- return 0;
- }
+ return;
TRACE(D_EVENTS, "Session to %I changed state from %s to %s",
s->addr, bfd_state_names[old_state], bfd_state_names[state]);
- atomic_store_explicit(&s->loc, ((struct bfd_session_state) { .state = state, .diag = diag }), memory_order_release);
+ bfd_lock_sessions(p);
+ s->loc_state = state;
+ s->loc_diag = diag;
s->last_state_change = current_time();
+ notify = !NODE_VALID(&s->n);
+ if (notify)
+ add_tail(&p->notify_list, &s->n);
+ bfd_unlock_sessions(p);
+
if (state == BFD_STATE_UP)
bfd_session_set_min_tx(s, s->cf.min_tx_int);
if (old_state == BFD_STATE_UP)
bfd_session_set_min_tx(s, s->cf.idle_tx_int);
- BFD_LOCK;
- if (EMPTY_LIST(s->request_list))
- {
- bfd_remove_session(p, s);
- BFD_UNLOCK;
- return 1;
- }
-
- struct bfd_request *req;
- node *nn;
- WALK_LIST2(req, nn, s->request_list, n)
- ev_send_self(&req->event);
-
- BFD_UNLOCK;
- return 0;
+ if (notify)
+ ev_send(&global_event_list, &p->notify_event);
}
static void
@@ -246,8 +222,8 @@ bfd_session_control_tx_timer(struct bfd_session *s, int reset)
if (s->rem_demand_mode &&
!s->poll_active &&
- (BFD_LOC_STATE(s).state == BFD_STATE_UP) &&
- (s->rem.state == BFD_STATE_UP))
+ (s->loc_state == BFD_STATE_UP) &&
+ (s->rem_state == BFD_STATE_UP))
goto stop;
if (s->rem_min_rx_int == 0)
@@ -317,29 +293,28 @@ bfd_session_process_ctl(struct bfd_session *s, u8 flags, u32 old_tx_int, u32 old
int next_state = 0;
int diag = BFD_DIAG_NOTHING;
- switch (BFD_LOC_STATE(s).state)
+ switch (s->loc_state)
{
case BFD_STATE_ADMIN_DOWN:
return;
case BFD_STATE_DOWN:
- if (s->rem.state == BFD_STATE_DOWN) next_state = BFD_STATE_INIT;
- else if (s->rem.state == BFD_STATE_INIT) next_state = BFD_STATE_UP;
+ if (s->rem_state == BFD_STATE_DOWN) next_state = BFD_STATE_INIT;
+ else if (s->rem_state == BFD_STATE_INIT) next_state = BFD_STATE_UP;
break;
case BFD_STATE_INIT:
- if (s->rem.state == BFD_STATE_ADMIN_DOWN) next_state = BFD_STATE_DOWN, diag = BFD_DIAG_NEIGHBOR_DOWN;
- else if (s->rem.state >= BFD_STATE_INIT) next_state = BFD_STATE_UP;
+ if (s->rem_state == BFD_STATE_ADMIN_DOWN) next_state = BFD_STATE_DOWN, diag = BFD_DIAG_NEIGHBOR_DOWN;
+ else if (s->rem_state >= BFD_STATE_INIT) next_state = BFD_STATE_UP;
break;
case BFD_STATE_UP:
- if (s->rem.state <= BFD_STATE_DOWN) next_state = BFD_STATE_DOWN, diag = BFD_DIAG_NEIGHBOR_DOWN;
+ if (s->rem_state <= BFD_STATE_DOWN) next_state = BFD_STATE_DOWN, diag = BFD_DIAG_NEIGHBOR_DOWN;
break;
}
if (next_state)
- if (bfd_session_update_state(s, next_state, diag))
- return;
+ bfd_session_update_state(s, next_state, diag);
bfd_session_control_tx_timer(s, 0);
@@ -354,7 +329,7 @@ bfd_session_timeout(struct bfd_session *s)
TRACE(D_EVENTS, "Session to %I expired", s->addr);
- s->rem.state = BFD_STATE_DOWN;
+ s->rem_state = BFD_STATE_DOWN;
s->rem_id = 0;
s->rem_min_tx_int = 0;
s->rem_min_rx_int = 1;
@@ -365,8 +340,7 @@ bfd_session_timeout(struct bfd_session *s)
s->poll_active = 0;
s->poll_scheduled = 0;
- if (bfd_session_update_state(s, BFD_STATE_DOWN, BFD_DIAG_TIMEOUT))
- return;
+ bfd_session_update_state(s, BFD_STATE_DOWN, BFD_DIAG_TIMEOUT);
bfd_session_control_tx_timer(s, 1);
}
@@ -382,7 +356,7 @@ bfd_session_set_min_tx(struct bfd_session *s, u32 val)
s->des_min_tx_new = val;
/* Postpone timer update if des_min_tx_int increases and the session is up */
- if ((BFD_LOC_STATE(s).state != BFD_STATE_UP) || (val < s->des_min_tx_int))
+ if ((s->loc_state != BFD_STATE_UP) || (val < s->des_min_tx_int))
{
s->des_min_tx_int = val;
bfd_session_update_tx_interval(s);
@@ -402,7 +376,7 @@ bfd_session_set_min_rx(struct bfd_session *s, u32 val)
s->req_min_rx_new = val;
/* Postpone timer update if req_min_rx_int decreases and the session is up */
- if ((BFD_LOC_STATE(s).state != BFD_STATE_UP) || (val > s->req_min_rx_int))
+ if ((s->loc_state != BFD_STATE_UP) || (val > s->req_min_rx_int))
{
s->req_min_rx_int = val;
bfd_session_update_detection_time(s, 0);
@@ -414,14 +388,12 @@ bfd_session_set_min_rx(struct bfd_session *s, u32 val)
struct bfd_session *
bfd_find_session_by_id(struct bfd_proto *p, u32 id)
{
- ASSERT_DIE(birdloop_inside(p->p.loop));
return HASH_FIND(p->session_hash_id, HASH_ID, id);
}
struct bfd_session *
bfd_find_session_by_addr(struct bfd_proto *p, ip_addr addr, uint ifindex)
{
- ASSERT_DIE(birdloop_inside(p->p.loop));
return HASH_FIND(p->session_hash_ip, HASH_IP, addr, ifindex);
}
@@ -455,7 +427,6 @@ static struct bfd_session *
bfd_add_session(struct bfd_proto *p, ip_addr addr, ip_addr local, struct iface *iface, struct bfd_options *opts)
{
ASSERT_DIE(birdloop_inside(p->p.loop));
- BFD_ASSERT_LOCKED;
struct bfd_iface *ifa = bfd_get_iface(p, local, iface);
@@ -469,15 +440,10 @@ bfd_add_session(struct bfd_proto *p, ip_addr addr, ip_addr local, struct iface *
HASH_INSERT(p->session_hash_ip, HASH_IP, s);
s->cf = bfd_merge_options(ifa->cf, opts);
- s->update_event = (event) {
- .hook = bfd_reconfigure_session_hook,
- .data = s,
- .list = birdloop_event_list(p->p.loop),
- };
/* Initialization of state variables - see RFC 5880 6.8.1 */
- atomic_store_explicit(&s->loc, ((struct bfd_session_state) { .state = BFD_STATE_DOWN }), memory_order_relaxed);
- s->rem.state = BFD_STATE_DOWN;
+ s->loc_state = BFD_STATE_DOWN;
+ s->rem_state = BFD_STATE_DOWN;
s->des_min_tx_int = s->des_min_tx_new = s->cf.idle_tx_int;
s->req_min_rx_int = s->req_min_rx_new = s->cf.min_rx_int;
s->rem_min_rx_int = 1;
@@ -485,8 +451,8 @@ bfd_add_session(struct bfd_proto *p, ip_addr addr, ip_addr local, struct iface *
s->passive = s->cf.passive;
s->tx_csn = random_u32();
- s->tx_timer = tm_new_init(p->p.pool, bfd_tx_timer_hook, s, 0, 0);
- s->hold_timer = tm_new_init(p->p.pool, bfd_hold_timer_hook, s, 0, 0);
+ s->tx_timer = tm_new_init(p->tpool, bfd_tx_timer_hook, s, 0, 0);
+ s->hold_timer = tm_new_init(p->tpool, bfd_hold_timer_hook, s, 0, 0);
bfd_session_update_tx_interval(s);
bfd_session_control_tx_timer(s, 1);
@@ -498,12 +464,42 @@ bfd_add_session(struct bfd_proto *p, ip_addr addr, ip_addr local, struct iface *
return s;
}
+/*
static void
-bfd_remove_session(struct bfd_proto *p, struct bfd_session *s)
+bfd_open_session(struct bfd_proto *p, struct bfd_session *s, ip_addr local, struct iface *ifa)
{
- ASSERT_DIE(birdloop_inside(p->p.loop));
- BFD_ASSERT_LOCKED;
- ASSERT_DIE(EMPTY_LIST(s->request_list));
+ birdloop_enter(p->p.loop);
+
+ s->opened = 1;
+
+ bfd_session_control_tx_timer(s);
+
+ birdloop_leave(p->p.loop);
+}
+
+static void
+bfd_close_session(struct bfd_proto *p, struct bfd_session *s)
+{
+ birdloop_enter(p->p.loop);
+
+ s->opened = 0;
+
+ bfd_session_update_state(s, BFD_STATE_DOWN, BFD_DIAG_PATH_DOWN);
+ bfd_session_control_tx_timer(s);
+
+ birdloop_leave(p->p.loop);
+}
+*/
+
+static void
+bfd_remove_session_locked(struct bfd_proto *p, struct bfd_session *s)
+{
+ /* Caller should ensure that request list is empty */
+
+ /* Remove session from notify list if scheduled for notification */
+ /* No need for bfd_lock_sessions(), we are already protected by birdloop_enter() */
+ if (NODE_VALID(&s->n))
+ rem_node(&s->n);
bfd_free_iface(s->ifa);
@@ -515,25 +511,29 @@ bfd_remove_session(struct bfd_proto *p, struct bfd_session *s)
TRACE(D_EVENTS, "Session to %I removed", s->addr);
- sl_free(p->session_slab, s);
+ sl_free(s);
+}
+
+static void
+bfd_remove_session(struct bfd_proto *p, struct bfd_session *s)
+{
+ birdloop_enter(p->p.loop);
+ bfd_remove_session_locked(p, s);
+ birdloop_leave(p->p.loop);
}
static void
bfd_reconfigure_session(struct bfd_proto *p, struct bfd_session *s)
{
- ASSERT_DIE(birdloop_inside(p->p.loop));
- BFD_LOCK;
if (EMPTY_LIST(s->request_list))
- {
- bfd_remove_session(p, s);
- BFD_UNLOCK;
return;
- }
+
+ birdloop_enter(p->p.loop);
struct bfd_request *req = SKIP_BACK(struct bfd_request, n, HEAD(s->request_list));
s->cf = bfd_merge_options(s->ifa->cf, &req->opts);
- u32 tx = (BFD_LOC_STATE(s).state == BFD_STATE_UP) ? s->cf.min_tx_int : s->cf.idle_tx_int;
+ u32 tx = (s->loc_state == BFD_STATE_UP) ? s->cf.min_tx_int : s->cf.idle_tx_int;
bfd_session_set_min_tx(s, tx);
bfd_session_set_min_rx(s, s->cf.min_rx_int);
s->detect_mult = s->cf.multiplier;
@@ -541,15 +541,9 @@ bfd_reconfigure_session(struct bfd_proto *p, struct bfd_session *s)
bfd_session_control_tx_timer(s, 0);
- TRACE(D_EVENTS, "Session to %I reconfigured", s->addr);
- BFD_UNLOCK;
-}
+ birdloop_leave(p->p.loop);
-static void
-bfd_reconfigure_session_hook(void *data)
-{
- struct bfd_session *s = data;
- return bfd_reconfigure_session(s->ifa->bfd, s);
+ TRACE(D_EVENTS, "Session to %I reconfigured", s->addr);
}
@@ -586,7 +580,7 @@ bfd_get_iface(struct bfd_proto *p, ip_addr local, struct iface *iface)
struct bfd_config *cf = (struct bfd_config *) (p->p.cf);
struct bfd_iface_config *ic = bfd_find_iface_config(cf, iface);
- ifa = mb_allocz(p->p.pool, sizeof(struct bfd_iface));
+ ifa = mb_allocz(p->tpool, sizeof(struct bfd_iface));
ifa->local = local;
ifa->iface = iface;
ifa->cf = ic;
@@ -595,6 +589,9 @@ bfd_get_iface(struct bfd_proto *p, ip_addr local, struct iface *iface)
ifa->sk = bfd_open_tx_sk(p, local, iface);
ifa->uc = 1;
+ if (cf->strict_bind)
+ ifa->rx = bfd_open_rx_sk_bound(p, local, iface);
+
add_tail(&p->iface_list, &ifa->n);
return ifa;
@@ -607,17 +604,17 @@ bfd_free_iface(struct bfd_iface *ifa)
return;
if (ifa->sk)
- {
- sk_stop(ifa->sk);
rfree(ifa->sk);
- }
+
+ if (ifa->rx)
+ rfree(ifa->rx);
rem_node(&ifa->n);
mb_free(ifa);
}
static void
-bfd_reconfigure_iface(struct bfd_proto *p UNUSED, struct bfd_iface *ifa, struct bfd_config *nc)
+bfd_reconfigure_iface(struct bfd_proto *p, struct bfd_iface *ifa, struct bfd_config *nc)
{
struct bfd_iface_config *new = bfd_find_iface_config(nc, ifa->iface);
struct bfd_iface_config *old = ifa->cf;
@@ -631,7 +628,9 @@ bfd_reconfigure_iface(struct bfd_proto *p UNUSED, struct bfd_iface *ifa, struct
(new->passive != old->passive);
/* This should be probably changed to not access ifa->cf from the BFD thread */
+ birdloop_enter(p->p.loop);
ifa->cf = new;
+ birdloop_leave(p->p.loop);
}
@@ -640,55 +639,77 @@ bfd_reconfigure_iface(struct bfd_proto *p UNUSED, struct bfd_iface *ifa, struct
*/
static void
-bfd_request_notify(void *data)
+bfd_request_notify(struct bfd_request *req, u8 state, u8 diag)
{
- struct bfd_request *req = data;
- struct bfd_session_state old = req->old_state;
-
- BFD_LOCK; /* Needed to safely access req->session */
- struct bfd_session_state new = atomic_load_explicit(&req->session->loc, memory_order_acquire);
- BFD_UNLOCK;
+ u8 old_state = req->state;
- if (new.state == old.state)
+ if (state == old_state)
return;
- req->state = new.state;
- req->diag = new.diag;
- req->old_state = new;
- req->down = (old.state == BFD_STATE_UP) && (new.state == BFD_STATE_DOWN);
+ req->state = state;
+ req->diag = diag;
+ req->old_state = old_state;
+ req->down = (old_state == BFD_STATE_UP) && (state == BFD_STATE_DOWN);
if (req->hook)
+ {
+ struct birdloop *target = !birdloop_inside(req->target) ? req->target : NULL;
+
+ if (target)
+ birdloop_enter(target);
+
req->hook(req);
+
+ if (target)
+ birdloop_leave(target);
+ }
}
static int
bfd_add_request(struct bfd_proto *p, struct bfd_request *req)
{
- BFD_ASSERT_LOCKED;
- ASSERT_DIE(req->session == &bfd_admin_down);
-
struct bfd_config *cf = (struct bfd_config *) (p->p.cf);
if (p->p.vrf && (p->p.vrf != req->vrf))
+ {
+ TRACE(D_EVENTS, "Not accepting request to %I with different VRF", req->addr);
return 0;
+ }
if (ipa_is_ip4(req->addr) ? !cf->accept_ipv4 : !cf->accept_ipv6)
+ {
+ TRACE(D_EVENTS, "Not accepting request to %I (AF limit)", req->addr);
return 0;
+ }
if (req->iface ? !cf->accept_direct : !cf->accept_multihop)
+ {
+ TRACE(D_EVENTS, "Not accepting %s request to %I", req->iface ? "direct" : "multihop", req->addr);
return 0;
+ }
uint ifindex = req->iface ? req->iface->index : 0;
struct bfd_session *s = bfd_find_session_by_addr(p, req->addr, ifindex);
- if (!s)
+ if (s)
+ TRACE(D_EVENTS, "Session to %I reused", s->addr);
+ else
s = bfd_add_session(p, req->addr, req->local, req->iface, &req->opts);
rem_node(&req->n);
add_tail(&s->request_list, &req->n);
req->session = s;
- ev_send_self(&req->event);
+ bfd_lock_sessions(p);
+
+ int notify = !NODE_VALID(&s->n);
+ if (notify)
+ add_tail(&p->notify_list, &s->n);
+
+ bfd_unlock_sessions(p);
+
+ if (notify)
+ ev_send(&global_event_list, &p->notify_event);
return 1;
}
@@ -696,35 +717,89 @@ bfd_add_request(struct bfd_proto *p, struct bfd_request *req)
static void
bfd_pickup_requests(void *_data UNUSED)
{
- struct bfd_proto *p;
- node *nn;
- WALK_LIST2(p, nn, bfd_global.proto_list, bfd_node)
- {
- birdloop_enter(p->p.loop);
+ /* NOTE TO MY FUTURE SELF
+ *
+ * Functions bfd_take_requests() and bfd_drop_requests() need to have
+ * consistent &bfd_global.wait_list and this is ensured only by having these
+ * functions called from bfd_start() and bfd_shutdown() which are both called
+ * in PROTO_LOCKED_FROM_MAIN context, i.e. always from &main_birdloop.
+ *
+ * This pickup event is also called in &main_birdloop, therefore we can
+ * freely do BFD_LOCK/BFD_UNLOCK while processing all the requests. All BFD
+ * protocols capable of bfd_add_request() are either started before this code
+ * happens or after that.
+ *
+ * If BFD protocols could start in parallel with this routine, they might
+ * miss some of the waiting requests, thus if anybody tries to start
+ * protocols or run this pickup event outside &main_birdloop in future, they
+ * shall ensure that this race condition is mitigated somehow.
+ *
+ * Thank you, my future self, for understanding. Have a nice day!
+ */
+
+ DBG("BFD pickup loop starting");
+
+ BFD_LOCK;
+ do {
+ bfd_global.pickup_reload = 0;
+ BFD_UNLOCK;
+
+ node *n;
+ WALK_LIST(n, bfd_global.proto_list)
+ {
+ struct bfd_proto *p = SKIP_BACK(struct bfd_proto, bfd_node, n);
+ birdloop_enter(p->p.loop);
+ BFD_LOCK;
+
+ TRACE(D_EVENTS, "Picking up new requests (%d available)", list_length(&bfd_global.pickup_list));
+
+ node *rn, *rnxt;
+ WALK_LIST_DELSAFE(rn, rnxt, bfd_global.pickup_list)
+ bfd_add_request(p, SKIP_BACK(struct bfd_request, n, rn));
+
+ BFD_UNLOCK;
+
+ /* Remove sessions with no requests */
+ HASH_WALK_DELSAFE(p->session_hash_id, next_id, s)
+ {
+ if (EMPTY_LIST(s->request_list))
+ bfd_remove_session_locked(p, s);
+ }
+ HASH_WALK_END;
+
+ birdloop_leave(p->p.loop);
+ }
+
BFD_LOCK;
+ } while (bfd_global.pickup_reload);
- struct bfd_request *req;
- node *rn, *rnxt;
- WALK_LIST2_DELSAFE(req, rn, rnxt, bfd_global.wait_list, n)
- bfd_add_request(p, req);
+ list tmp_list;
+ init_list(&tmp_list);
+ add_tail_list(&tmp_list, &bfd_global.pickup_list);
- BFD_UNLOCK;
- birdloop_ping(p->p.loop);
- birdloop_leave(p->p.loop);
- }
+ init_list(&bfd_global.pickup_list);
+ BFD_UNLOCK;
+
+ log(L_TRACE "No protocol for %d BFD requests", list_length(&tmp_list));
+
+ node *n;
+ WALK_LIST(n, tmp_list)
+ bfd_request_notify(SKIP_BACK(struct bfd_request, n, n), BFD_STATE_ADMIN_DOWN, 0);
+
+ BFD_LOCK;
+ add_tail_list(&bfd_global.wait_list, &tmp_list);
+ BFD_UNLOCK;
}
static event bfd_pickup_event = { .hook = bfd_pickup_requests };
-#define bfd_schedule_pickup() ev_send(&global_event_list, &bfd_pickup_event)
static void
bfd_take_requests(struct bfd_proto *p)
{
- struct bfd_request *req;
node *n, *nn;
BFD_LOCK;
- WALK_LIST2_DELSAFE(req, n, nn, bfd_global.wait_list, n)
- bfd_add_request(p, req);
+ WALK_LIST_DELSAFE(n, nn, bfd_global.wait_list)
+ bfd_add_request(p, SKIP_BACK(struct bfd_request, n, n));
BFD_UNLOCK;
}
@@ -739,13 +814,13 @@ bfd_drop_requests(struct bfd_proto *p)
{
struct bfd_request *req = SKIP_BACK(struct bfd_request, n, n);
rem_node(&req->n);
- add_tail(&bfd_global.wait_list, &req->n);
- req->session = &bfd_admin_down;
- ev_send_self(&req->event);
+ add_tail(&bfd_global.pickup_list, &req->n);
+ req->session = NULL;
}
- bfd_schedule_pickup();
- bfd_remove_session(p, s);
+ ev_send(&global_event_list, &bfd_pickup_event);
+
+ bfd_remove_session_locked(p, s);
}
HASH_WALK_END;
BFD_UNLOCK;
@@ -757,7 +832,7 @@ struct bfd_request *
bfd_request_session(pool *p, ip_addr addr, ip_addr local,
struct iface *iface, struct iface *vrf,
void (*hook)(struct bfd_request *), void *data,
- struct event_list *list,
+ struct birdloop *target,
const struct bfd_options *opts)
{
struct bfd_request *req = ralloc(p, &bfd_request_class);
@@ -770,18 +845,18 @@ bfd_request_session(pool *p, ip_addr addr, ip_addr local,
if (opts)
req->opts = *opts;
+ ASSERT_DIE(target || !hook);
req->hook = hook;
req->data = data;
- req->event = (event) {
- .hook = bfd_request_notify,
- .data = req,
- .list = list,
- };
+ req->target = target;
+
+ req->session = NULL;
BFD_LOCK;
- req->session = &bfd_admin_down;
- add_tail(&bfd_global.wait_list, &req->n);
- bfd_schedule_pickup();
+ bfd_global.pickup_reload++;
+ add_tail(&bfd_global.pickup_list, &req->n);
+ ev_send(&global_event_list, &bfd_pickup_event);
+ DBG("New BFD request enlisted.\n");
BFD_UNLOCK;
return req;
@@ -790,17 +865,15 @@ bfd_request_session(pool *p, ip_addr addr, ip_addr local,
void
bfd_update_request(struct bfd_request *req, const struct bfd_options *opts)
{
+ struct bfd_session *s = req->session;
+
if (!memcmp(opts, &req->opts, sizeof(const struct bfd_options)))
return;
- BFD_LOCK;
req->opts = *opts;
- struct bfd_session *s = req->session;
- if (s != &bfd_admin_down)
- ev_send_self(&s->update_event);
-
- BFD_UNLOCK;
+ if (s)
+ bfd_reconfigure_session(s->ifa->bfd, s);
}
static void
@@ -812,11 +885,11 @@ bfd_request_free(resource *r)
rem_node(&req->n);
BFD_UNLOCK;
- ev_postpone(&req->event);
+ ev_send(&global_event_list, &bfd_pickup_event);
}
static void
-bfd_request_dump(resource *r)
+bfd_request_dump(resource *r, unsigned indent UNUSED)
{
struct bfd_request *req = (struct bfd_request *) r;
@@ -849,7 +922,7 @@ bfd_neigh_notify(struct neighbor *nb)
if ((nb->scope > 0) && !n->req)
{
ip_addr local = ipa_nonzero(n->local) ? n->local : nb->ifa->ip;
- n->req = bfd_request_session(p->p.pool, n->addr, local, nb->iface, p->p.vrf, NULL, NULL, birdloop_event_list(p->p.loop), NULL);
+ n->req = bfd_request_session(p->p.pool, n->addr, local, nb->iface, p->p.vrf, NULL, NULL, NULL, NULL);
}
if ((nb->scope <= 0) && n->req)
@@ -866,7 +939,7 @@ bfd_start_neighbor(struct bfd_proto *p, struct bfd_neighbor *n)
if (n->multihop)
{
- n->req = bfd_request_session(p->p.pool, n->addr, n->local, NULL, p->p.vrf, NULL, NULL, birdloop_event_list(p->p.loop), NULL);
+ n->req = bfd_request_session(p->p.pool, n->addr, n->local, NULL, p->p.vrf, NULL, NULL, NULL, NULL);
return;
}
@@ -941,23 +1014,53 @@ bfd_reconfigure_neighbors(struct bfd_proto *p, struct bfd_config *new)
/*
- * BFD protocol glue
+ * BFD notify socket
*/
-void
-bfd_init_all(void)
+/* This core notify code should be replaced after main loop transition to birdloop */
+
+static void
+bfd_notify_hook(void *data)
{
- bfd_global.lock = DOMAIN_NEW(rtable, "BFD Global");
- init_list(&bfd_global.wait_list);
- init_list(&bfd_global.proto_list);
+ struct bfd_proto *p = data;
+ struct bfd_session *s;
+ list tmp_list;
+ u8 state, diag;
+ node *n, *nn;
+
+ bfd_lock_sessions(p);
+ init_list(&tmp_list);
+ add_tail_list(&tmp_list, &p->notify_list);
+ init_list(&p->notify_list);
+ bfd_unlock_sessions(p);
+
+ WALK_LIST_FIRST(s, tmp_list)
+ {
+ bfd_lock_sessions(p);
+ rem_node(&s->n);
+ state = s->loc_state;
+ diag = s->loc_diag;
+ bfd_unlock_sessions(p);
+
+ WALK_LIST_DELSAFE(n, nn, s->request_list)
+ bfd_request_notify(SKIP_BACK(struct bfd_request, n, n), state, diag);
+
+ /* Remove the session if all requests were removed in notify hooks */
+ if (EMPTY_LIST(s->request_list))
+ bfd_remove_session(p, s);
+ }
}
+/*
+ * BFD protocol glue
+ */
+
static struct proto *
bfd_init(struct proto_config *c)
{
struct proto *p = proto_new(c);
- p->neigh_notify = bfd_neigh_notify;
+ p->iface_sub.neigh_notify = bfd_neigh_notify;
return p;
}
@@ -968,25 +1071,38 @@ bfd_start(struct proto *P)
struct bfd_proto *p = (struct bfd_proto *) P;
struct bfd_config *cf = (struct bfd_config *) (P->cf);
+ pthread_spin_init(&p->lock, PTHREAD_PROCESS_PRIVATE);
+
+ p->tpool = rp_new(P->pool, "BFD loop pool");
+
p->session_slab = sl_new(P->pool, sizeof(struct bfd_session));
HASH_INIT(p->session_hash_id, P->pool, 8);
HASH_INIT(p->session_hash_ip, P->pool, 8);
init_list(&p->iface_list);
+ init_list(&p->notify_list);
+ p->notify_event = (event) {
+ .hook = bfd_notify_hook,
+ .data = p,
+ };
+
add_tail(&bfd_global.proto_list, &p->bfd_node);
- if (cf->accept_ipv4 && cf->accept_direct)
- p->rx4_1 = bfd_open_rx_sk(p, 0, SK_IPV4);
+ if (!cf->strict_bind)
+ {
+ if (cf->accept_ipv4 && cf->accept_direct)
+ p->rx4_1 = bfd_open_rx_sk(p, 0, SK_IPV4);
- if (cf->accept_ipv4 && cf->accept_multihop)
- p->rx4_m = bfd_open_rx_sk(p, 1, SK_IPV4);
+ if (cf->accept_ipv4 && cf->accept_multihop)
+ p->rx4_m = bfd_open_rx_sk(p, 1, SK_IPV4);
- if (cf->accept_ipv6 && cf->accept_direct)
- p->rx6_1 = bfd_open_rx_sk(p, 0, SK_IPV6);
+ if (cf->accept_ipv6 && cf->accept_direct)
+ p->rx6_1 = bfd_open_rx_sk(p, 0, SK_IPV6);
- if (cf->accept_ipv6 && cf->accept_multihop)
- p->rx6_m = bfd_open_rx_sk(p, 1, SK_IPV6);
+ if (cf->accept_ipv6 && cf->accept_multihop)
+ p->rx6_m = bfd_open_rx_sk(p, 1, SK_IPV6);
+ }
bfd_take_requests(p);
@@ -1011,11 +1127,6 @@ bfd_shutdown(struct proto *P)
bfd_drop_requests(p);
- if (p->rx4_1) sk_stop(p->rx4_1);
- if (p->rx4_m) sk_stop(p->rx4_m);
- if (p->rx6_1) sk_stop(p->rx6_1);
- if (p->rx6_m) sk_stop(p->rx6_m);
-
return PS_DOWN;
}
@@ -1027,13 +1138,12 @@ bfd_reconfigure(struct proto *P, struct proto_config *c)
struct bfd_config *new = (struct bfd_config *) c;
struct bfd_iface *ifa;
- ASSERT_DIE(birdloop_inside(P->loop));
-
/* TODO: Improve accept reconfiguration */
if ((new->accept_ipv4 != old->accept_ipv4) ||
(new->accept_ipv6 != old->accept_ipv6) ||
(new->accept_direct != old->accept_direct) ||
- (new->accept_multihop != old->accept_multihop))
+ (new->accept_multihop != old->accept_multihop) ||
+ (new->strict_bind != old->strict_bind))
return 0;
birdloop_mask_wakeups(p->p.loop);
@@ -1041,12 +1151,12 @@ bfd_reconfigure(struct proto *P, struct proto_config *c)
WALK_LIST(ifa, p->iface_list)
bfd_reconfigure_iface(p, ifa, new);
- HASH_WALK_DELSAFE(p->session_hash_id, next_id, s)
+ HASH_WALK(p->session_hash_id, next_id, s)
{
if (s->ifa->changed)
bfd_reconfigure_session(p, s);
}
- HASH_WALK_DELSAFE_END;
+ HASH_WALK_END;
bfd_reconfigure_neighbors(p, new);
@@ -1071,14 +1181,13 @@ bfd_show_sessions(struct proto *P)
{
byte tbuf[TM_DATETIME_BUFFER_SIZE];
struct bfd_proto *p = (struct bfd_proto *) P;
+ uint state, diag UNUSED;
btime tx_int, timeout;
const char *ifname;
- birdloop_enter(P->loop);
if (p->p.proto_state != PS_UP)
{
cli_msg(-1020, "%s: is not up", p->p.name);
- birdloop_leave(P->loop);
return;
}
@@ -1086,9 +1195,12 @@ bfd_show_sessions(struct proto *P)
cli_msg(-1020, "%-25s %-10s %-10s %-12s %8s %8s",
"IP address", "Interface", "State", "Since", "Interval", "Timeout");
+
HASH_WALK(p->session_hash_id, next_id, s)
{
- uint state = BFD_LOC_STATE(s).state;
+ /* FIXME: this is thread-unsafe, but perhaps harmless */
+ state = s->loc_state;
+ diag = s->loc_diag;
ifname = (s->ifa && s->ifa->iface) ? s->ifa->iface->name : "---";
tx_int = s->last_tx ? MAX(s->des_min_tx_int, s->rem_min_rx_int) : 0;
timeout = (btime) MAX(s->req_min_rx_int, s->rem_min_tx_int) * s->rem_detect_mult;
@@ -1100,15 +1212,12 @@ bfd_show_sessions(struct proto *P)
s->addr, ifname, bfd_state_names[state], tbuf, tx_int, timeout);
}
HASH_WALK_END;
-
- birdloop_leave(P->loop);
}
struct protocol proto_bfd = {
.name = "BFD",
.template = "bfd%d",
- .class = PROTOCOL_BFD,
.proto_size = sizeof(struct bfd_proto),
.config_size = sizeof(struct bfd_config),
.init = bfd_init,
@@ -1117,3 +1226,14 @@ struct protocol proto_bfd = {
.reconfigure = bfd_reconfigure,
.copy_config = bfd_copy_config,
};
+
+void
+bfd_build(void)
+{
+ proto_build(&proto_bfd);
+
+ bfd_global.lock = DOMAIN_NEW(rtable, "BFD Global");
+ init_list(&bfd_global.wait_list);
+ init_list(&bfd_global.pickup_list);
+ init_list(&bfd_global.proto_list);
+}
diff --git a/proto/bfd/bfd.h b/proto/bfd/bfd.h
index ffb1c43f..a4b7d63c 100644
--- a/proto/bfd/bfd.h
+++ b/proto/bfd/bfd.h
@@ -13,7 +13,7 @@
#include "nest/cli.h"
#include "nest/iface.h"
#include "nest/protocol.h"
-#include "nest/route.h"
+#include "nest/rt.h"
#include "nest/password.h"
#include "conf/conf.h"
#include "lib/hash.h"
@@ -47,6 +47,7 @@ struct bfd_config
u8 accept_ipv6;
u8 accept_direct;
u8 accept_multihop;
+ u8 strict_bind;
};
struct bfd_iface_config
@@ -88,12 +89,19 @@ struct bfd_proto
{
struct proto p;
+ pthread_spinlock_t lock;
+
+ pool *tpool;
+
node bfd_node;
slab *session_slab;
HASH(struct bfd_session) session_hash_id;
HASH(struct bfd_session) session_hash_ip;
+ event notify_event;
+ list notify_list;
+
sock *rx4_1;
sock *rx6_1;
sock *rx4_m;
@@ -110,12 +118,14 @@ struct bfd_iface
struct bfd_proto *bfd;
sock *sk;
+ sock *rx;
u32 uc;
u8 changed;
};
struct bfd_session
{
+ node n;
ip_addr addr; /* Address of session */
struct bfd_iface *ifa; /* Iface associated with session */
struct bfd_session *next_id; /* Next in bfd.session_hash_id */
@@ -126,15 +136,14 @@ struct bfd_session
u8 poll_active;
u8 poll_scheduled;
- _Atomic struct bfd_session_state loc;
- struct bfd_session_state rem;
-#define BFD_LOC_STATE(s) ({ struct bfd_session_state _bss = atomic_load_explicit(&(s)->loc, memory_order_relaxed); _bss; })
-
+ u8 loc_state;
+ u8 rem_state;
+ u8 loc_diag;
+ u8 rem_diag;
u32 loc_id; /* Local session ID (local discriminator) */
u32 rem_id; /* Remote session ID (remote discriminator) */
- struct bfd_session_config cf; /* Static configuration parameers */
- event update_event; /* Reconfiguration requested */
+ struct bfd_session_config cf; /* Static configuration parameters */
u32 des_min_tx_int; /* Desired min rx interval, local option */
u32 des_min_tx_new; /* Used for des_min_tx_int change */
@@ -156,8 +165,6 @@ struct bfd_session
list request_list; /* List of client requests (struct bfd_request) */
btime last_state_change; /* Time of last state change */
- btime last_reqlist_check; /* Time of last check whether the request list is not empty */
- u8 notify_running; /* 1 if notify hooks are running */
u8 rx_csn_known; /* Received crypto sequence number is known */
u32 rx_csn; /* Last received crypto sequence number */
@@ -203,6 +210,10 @@ extern const char *bfd_state_names[];
extern const u8 bfd_auth_type_to_hash_alg[];
+
+static inline void bfd_lock_sessions(struct bfd_proto *p) { pthread_spin_lock(&p->lock); }
+static inline void bfd_unlock_sessions(struct bfd_proto *p) { pthread_spin_unlock(&p->lock); }
+
/* bfd.c */
struct bfd_session * bfd_find_session_by_id(struct bfd_proto *p, u32 id);
struct bfd_session * bfd_find_session_by_addr(struct bfd_proto *p, ip_addr addr, uint ifindex);
@@ -212,6 +223,7 @@ void bfd_show_sessions(struct proto *P);
/* packets.c */
void bfd_send_ctl(struct bfd_proto *p, struct bfd_session *s, int final);
sock * bfd_open_rx_sk(struct bfd_proto *p, int multihop, int inet_version);
+sock * bfd_open_rx_sk_bound(struct bfd_proto *p, ip_addr local, struct iface *ifa);
sock * bfd_open_tx_sk(struct bfd_proto *p, ip_addr local, struct iface *ifa);
diff --git a/proto/bfd/config.Y b/proto/bfd/config.Y
index ed5479fb..8e608bda 100644
--- a/proto/bfd/config.Y
+++ b/proto/bfd/config.Y
@@ -23,7 +23,8 @@ CF_DECLS
CF_KEYWORDS(BFD, MIN, IDLE, RX, TX, INTERVAL, MULTIPLIER, PASSIVE,
INTERFACE, MULTIHOP, NEIGHBOR, DEV, LOCAL, AUTHENTICATION,
- NONE, SIMPLE, METICULOUS, KEYED, MD5, SHA1, IPV4, IPV6, DIRECT)
+ NONE, SIMPLE, METICULOUS, KEYED, MD5, SHA1, IPV4, IPV6, DIRECT,
+ STRICT, BIND)
%type <iface> bfd_neigh_iface
%type <a> bfd_neigh_local
@@ -37,6 +38,7 @@ bfd_proto_start: proto_start BFD
{
this_proto = proto_config_new(&proto_bfd, $1);
this_proto->loop_order = DOMAIN_ORDER(proto);
+ this_proto->loop_max_latency = 10 MS_;
init_list(&BFD_CFG->patt_list);
init_list(&BFD_CFG->neigh_list);
BFD_CFG->accept_ipv4 = BFD_CFG->accept_ipv6 = 1;
@@ -49,6 +51,7 @@ bfd_proto_item:
| INTERFACE bfd_iface
| MULTIHOP bfd_multihop
| NEIGHBOR bfd_neighbor
+ | STRICT BIND bool { BFD_CFG->strict_bind = $3; }
;
bfd_proto_opts:
diff --git a/proto/bfd/packets.c b/proto/bfd/packets.c
index 893d582d..a22f223b 100644
--- a/proto/bfd/packets.c
+++ b/proto/bfd/packets.c
@@ -290,11 +290,9 @@ bfd_send_ctl(struct bfd_proto *p, struct bfd_session *s, int final)
if (!sk)
return;
- struct bfd_session_state loc = BFD_LOC_STATE(s);
-
pkt = (struct bfd_ctl_packet *) sk->tbuf;
- pkt->vdiag = bfd_pack_vdiag(1, loc.diag);
- pkt->flags = bfd_pack_flags(loc.state, 0);
+ pkt->vdiag = bfd_pack_vdiag(1, s->loc_diag);
+ pkt->flags = bfd_pack_flags(s->loc_state, 0);
pkt->detect_mult = s->detect_mult;
pkt->length = BFD_BASE_LEN;
pkt->snd_id = htonl(s->loc_id);
@@ -315,7 +313,7 @@ bfd_send_ctl(struct bfd_proto *p, struct bfd_session *s, int final)
log(L_WARN "%s: Old packet overwritten in TX buffer", p->p.name);
TRACE(D_PACKETS, "Sending CTL to %I [%s%s]", s->addr,
- bfd_state_names[loc.state], bfd_format_flags(pkt->flags, fb));
+ bfd_state_names[s->loc_state], bfd_format_flags(pkt->flags, fb));
sk_send_to(sk, pkt->length, s->addr, sk->dport);
}
@@ -368,12 +366,18 @@ bfd_rx_hook(sock *sk, uint len)
if (ps > BFD_STATE_DOWN)
DROP("invalid init state", ps);
- uint ifindex = (sk->sport == BFD_CONTROL_PORT) ? sk->lifindex : 0;
+ uint ifindex = (sk->sport == BFD_CONTROL_PORT) ?
+ (sk->iface ? sk->iface->index : sk->lifindex) :
+ 0;
s = bfd_find_session_by_addr(p, sk->faddr, ifindex);
/* FIXME: better session matching and message */
if (!s)
return 1;
+
+ /* For active sessions we require matching remote id */
+ if ((s->loc_state == BFD_STATE_UP) && (ntohl(pkt->snd_id) != s->rem_id))
+ DROP("mismatched remote id", ntohl(pkt->snd_id));
}
/* bfd_check_authentication() has its own error logging */
@@ -384,17 +388,16 @@ bfd_rx_hook(sock *sk, uint len)
u32 old_rx_int = s->rem_min_rx_int;
s->rem_id= ntohl(pkt->snd_id);
- s->rem.state = bfd_pkt_get_state(pkt);
- s->rem.diag = bfd_pkt_get_diag(pkt);
+ s->rem_state = bfd_pkt_get_state(pkt);
+ s->rem_diag = bfd_pkt_get_diag(pkt);
s->rem_demand_mode = pkt->flags & BFD_FLAG_DEMAND;
s->rem_min_tx_int = ntohl(pkt->des_min_tx_int);
s->rem_min_rx_int = ntohl(pkt->req_min_rx_int);
s->rem_detect_mult = pkt->detect_mult;
TRACE(D_PACKETS, "CTL received from %I [%s%s]", sk->faddr,
- bfd_state_names[s->rem.state], bfd_format_flags(pkt->flags, fb));
+ bfd_state_names[s->rem_state], bfd_format_flags(pkt->flags, fb));
- /* This call may drop the session, must be called in tail position */
bfd_session_process_ctl(s, pkt->flags, old_tx_int, old_rx_int);
return 1;
@@ -427,13 +430,42 @@ bfd_open_rx_sk(struct bfd_proto *p, int multihop, int af)
/* TODO: configurable ToS and priority */
sk->tos = IP_PREC_INTERNET_CONTROL;
sk->priority = sk_priority_control;
- sk->flags = SKF_THREAD | SKF_LADDR_RX | (!multihop ? SKF_TTL_RX : 0);
- sk->loop = p->p.loop;
+ sk->flags = SKF_LADDR_RX | (!multihop ? SKF_TTL_RX : 0);
+
+ if (sk_open(sk, p->p.loop) < 0)
+ goto err;
+
+ return sk;
+
+ err:
+ sk_log_error(sk, p->p.name);
+ rfree(sk);
+ return NULL;
+}
+
+sock *
+bfd_open_rx_sk_bound(struct bfd_proto *p, ip_addr local, struct iface *ifa)
+{
+ sock *sk = sk_new(p->tpool);
+ sk->type = SK_UDP;
+ sk->saddr = local;
+ sk->sport = ifa ? BFD_CONTROL_PORT : BFD_MULTI_CTL_PORT;
+ sk->iface = ifa;
+ sk->vrf = p->p.vrf;
+ sk->data = p;
+
+ sk->rbsize = BFD_MAX_LEN;
+ sk->rx_hook = bfd_rx_hook;
+ sk->err_hook = bfd_err_hook;
+
+ /* TODO: configurable ToS and priority */
+ sk->tos = IP_PREC_INTERNET_CONTROL;
+ sk->priority = sk_priority_control;
+ sk->flags = SKF_BIND | (ifa ? SKF_TTL_RX : 0);
- if (sk_open(sk) < 0)
+ if (sk_open(sk, p->p.loop) < 0)
goto err;
- sk_start(sk);
return sk;
err:
@@ -460,13 +492,11 @@ bfd_open_tx_sk(struct bfd_proto *p, ip_addr local, struct iface *ifa)
sk->tos = IP_PREC_INTERNET_CONTROL;
sk->priority = sk_priority_control;
sk->ttl = ifa ? 255 : -1;
- sk->flags = SKF_THREAD | SKF_BIND | SKF_HIGH_PORT;
- sk->loop = p->p.loop;
+ sk->flags = SKF_BIND | SKF_HIGH_PORT;
- if (sk_open(sk) < 0)
+ if (sk_open(sk, p->p.loop) < 0)
goto err;
- sk_start(sk);
return sk;
err:
diff --git a/proto/bgp/Makefile b/proto/bgp/Makefile
index 00aaef5e..f6a38678 100644
--- a/proto/bgp/Makefile
+++ b/proto/bgp/Makefile
@@ -3,4 +3,4 @@ obj := $(src-o-files)
$(all-daemon)
$(cf-local)
-tests_objs := $(tests_objs) $(src-o-files) \ No newline at end of file
+tests_objs := $(tests_objs) $(src-o-files)
diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c
index 02b07410..8bff4c78 100644
--- a/proto/bgp/attrs.c
+++ b/proto/bgp/attrs.c
@@ -15,12 +15,13 @@
#include "nest/bird.h"
#include "nest/iface.h"
#include "nest/protocol.h"
-#include "nest/route.h"
-#include "nest/attrs.h"
+#include "nest/rt.h"
+#include "lib/attrs.h"
#include "conf/conf.h"
#include "lib/resource.h"
#include "lib/string.h"
#include "lib/unaligned.h"
+#include "lib/macro.h"
#include "bgp.h"
@@ -45,9 +46,9 @@
*
* export - Hook that validates and normalizes attribute during export phase.
* Receives eattr, may modify it (e.g., sort community lists for canonical
- * representation), UNSET() it (e.g., skip empty lists), or WITHDRAW() it if
- * necessary. May assume that eattr has value valid w.r.t. its type, but may be
- * invalid w.r.t. BGP constraints. Optional.
+ * representation), UNSET() it (e.g., skip empty lists), or REJECT() the route
+ * if necessary. May assume that eattr has value valid w.r.t. its type, but may
+ * be invalid w.r.t. BGP constraints. Optional.
*
* encode - Hook that converts internal representation to external one during
* packet writing. Receives eattr and puts it in the buffer (including attribute
@@ -64,37 +65,72 @@
* format - Optional hook that converts eattr to textual representation.
*/
-
-struct bgp_attr_desc {
- const char *name;
- uint type;
- uint flags;
- void (*export)(struct bgp_export_state *s, eattr *a);
- int (*encode)(struct bgp_write_state *s, eattr *a, byte *buf, uint size);
- void (*decode)(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to);
- void (*format)(const eattr *ea, byte *buf, uint size);
+union bgp_attr_desc {
+ struct ea_class class;
+ struct {
+ EA_CLASS_INSIDE;
+ uint flags;
+ void (*export)(struct bgp_export_state *s, eattr *a);
+ int (*encode)(struct bgp_write_state *s, eattr *a, byte *buf, uint size);
+ void (*decode)(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to);
+ };
};
-static const struct bgp_attr_desc bgp_attr_table[];
+static union bgp_attr_desc bgp_attr_table[];
+static inline const union bgp_attr_desc *bgp_find_attr_desc(eattr *a)
+{
+ const struct ea_class *class = ea_class_find(a->id);
-static inline int bgp_attr_known(uint code);
+ if ((class < &bgp_attr_table[0].class) || (class >= &bgp_attr_table[BGP_ATTR_MAX].class))
+ return NULL;
-eattr *
-bgp_set_attr(ea_list **attrs, struct linpool *pool, uint code, uint flags, uintptr_t val)
+ return (const union bgp_attr_desc *) class;
+}
+
+#define BGP_EA_ID(code) (bgp_attr_table[code].id)
+#define EA_BGP_ID(code) (((union bgp_attr_desc *) ea_class_find(code)) - bgp_attr_table)
+
+void bgp_set_attr_u32(ea_list **to, uint code, uint flags, u32 val)
{
- ASSERT(bgp_attr_known(code));
+ const union bgp_attr_desc *desc = &bgp_attr_table[code];
- return ea_set_attr(
- attrs,
- pool,
- EA_CODE(PROTOCOL_BGP, code),
- flags & ~BAF_EXT_LEN,
- bgp_attr_table[code].type,
- val
- );
+ ea_set_attr(to, EA_LITERAL_EMBEDDED(
+ &desc->class,
+ flags & ~BAF_EXT_LEN,
+ val
+ ));
}
+void bgp_set_attr_ptr(ea_list **to, uint code, uint flags, const struct adata *ad)
+{
+ const union bgp_attr_desc *desc = &bgp_attr_table[code];
+ ea_set_attr(to, EA_LITERAL_DIRECT_ADATA(
+ &desc->class,
+ flags & ~BAF_EXT_LEN,
+ ad
+ ));
+}
+
+void
+bgp_set_attr_data(ea_list **to, uint code, uint flags, void *data, uint len)
+{
+ const union bgp_attr_desc *desc = &bgp_attr_table[code];
+
+ ea_set_attr(to, EA_LITERAL_STORE_ADATA(
+ &desc->class,
+ flags & ~BAF_EXT_LEN,
+ data,
+ len
+ ));
+}
+
+void
+bgp_unset_attr(ea_list **to, uint code)
+{
+ const union bgp_attr_desc *desc = &bgp_attr_table[code];
+ ea_unset_attr(to, 0, &desc->class);
+}
#define REPORT(msg, args...) \
({ log(L_REMOTE "%s: " msg, s->proto->p.name, ## args); })
@@ -106,7 +142,10 @@ bgp_set_attr(ea_list **attrs, struct linpool *pool, uint code, uint flags, uintp
({ REPORT(msg, ## args); s->err_withdraw = 1; return; })
#define UNSET(a) \
- ({ a->type = EAF_TYPE_UNDEF; return; })
+ ({ a->undef = 1; return; })
+
+#define REJECT(msg, args...) \
+ ({ log(L_ERR "%s: " msg, s->proto->p.name, ## args); s->err_reject = 1; return; })
#define NEW_BGP "Discarding %s attribute received from AS4-aware neighbor"
#define BAD_EBGP "Discarding %s attribute received from EBGP neighbor"
@@ -148,7 +187,7 @@ bgp_encode_u8(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
if (size < (3+1))
return -1;
- bgp_put_attr_hdr3(buf, EA_ID(a->id), a->flags, 1);
+ bgp_put_attr_hdr3(buf, EA_BGP_ID(a->id), a->flags, 1);
buf[3] = a->u.data;
return 3+1;
@@ -160,7 +199,7 @@ bgp_encode_u32(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
if (size < (3+4))
return -1;
- bgp_put_attr_hdr3(buf, EA_ID(a->id), a->flags, 4);
+ bgp_put_attr_hdr3(buf, EA_BGP_ID(a->id), a->flags, 4);
put_u32(buf+3, a->u.data);
return 3+4;
@@ -174,7 +213,7 @@ bgp_encode_u32s(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size
if (size < (4+len))
return -1;
- uint hdr = bgp_put_attr_hdr(buf, EA_ID(a->id), a->flags, len);
+ uint hdr = bgp_put_attr_hdr(buf, EA_BGP_ID(a->id), a->flags, len);
put_u32s(buf + hdr, (u32 *) a->u.ptr->data, len / 4);
return hdr + len;
@@ -195,7 +234,7 @@ bgp_put_attr(byte *buf, uint size, uint code, uint flags, const byte *data, uint
static int
bgp_encode_raw(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
{
- return bgp_put_attr(buf, size, EA_ID(a->id), a->flags, a->u.ptr->data, a->u.ptr->length);
+ return bgp_put_attr(buf, size, EA_BGP_ID(a->id), a->flags, a->u.ptr->data, a->u.ptr->length);
}
@@ -333,26 +372,26 @@ bgp_aigp_set_metric(struct linpool *pool, const struct adata *ad, u64 metric)
}
int
-bgp_total_aigp_metric_(struct rta *a, u64 *metric, const struct adata **ad)
+bgp_total_aigp_metric_(const rte *e, u64 *metric, const struct adata **ad)
{
- eattr *ea = ea_find(a->eattrs, EA_CODE(PROTOCOL_BGP, BA_AIGP));
- if (!ea)
+ eattr *a = ea_find(e->attrs, BGP_EA_ID(BA_AIGP));
+ if (!a)
return 0;
- const byte *b = bgp_aigp_get_tlv(ea->u.ptr, BGP_AIGP_METRIC);
+ const byte *b = bgp_aigp_get_tlv(a->u.ptr, BGP_AIGP_METRIC);
if (!b)
return 0;
u64 aigp = get_u64(b + 3);
- u64 step = a->igp_metric;
+ u64 step = rt_get_igp_metric(e);
- if (!rta_resolvable(a) || (step >= IGP_METRIC_UNKNOWN))
+ if (!rte_resolvable(e) || (step >= IGP_METRIC_UNKNOWN))
step = BGP_AIGP_MAX;
if (!step)
step = 1;
- *ad = ea->u.ptr;
+ *ad = a->u.ptr;
*metric = aigp + step;
if (*metric < aigp)
*metric = BGP_AIGP_MAX;
@@ -363,7 +402,7 @@ bgp_total_aigp_metric_(struct rta *a, u64 *metric, const struct adata **ad)
static inline int
bgp_init_aigp_metric(rte *e, u64 *metric, const struct adata **ad)
{
- if (e->attrs->source == RTS_BGP)
+ if (rt_get_source_attr(e) == RTS_BGP)
return 0;
*metric = rt_get_igp_metric(e);
@@ -372,9 +411,9 @@ bgp_init_aigp_metric(rte *e, u64 *metric, const struct adata **ad)
}
u32
-bgp_rte_igp_metric(struct rte *rt)
+bgp_rte_igp_metric(const rte *rt)
{
- u64 metric = bgp_total_aigp_metric(rt->attrs);
+ u64 metric = bgp_total_aigp_metric(rt);
return (u32) MIN(metric, (u64) IGP_METRIC_UNKNOWN);
}
@@ -387,7 +426,7 @@ static void
bgp_export_origin(struct bgp_export_state *s, eattr *a)
{
if (a->u.data > 2)
- WITHDRAW(BAD_VALUE, "ORIGIN", a->u.data);
+ REJECT(BAD_VALUE, "ORIGIN", a->u.data);
}
static void
@@ -399,7 +438,7 @@ bgp_decode_origin(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte
if (data[0] > 2)
WITHDRAW(BAD_VALUE, "ORIGIN", data[0]);
- bgp_set_attr_u32(to, s->pool, BA_ORIGIN, flags, data[0]);
+ bgp_set_attr_u32(to, BA_ORIGIN, flags, data[0]);
}
static void
@@ -467,7 +506,7 @@ bgp_decode_as_path(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte
!bgp_as_path_first_as_equal(data, len, p->remote_as))
WITHDRAW("Malformed AS_PATH attribute - %s", "First AS differs from neigbor AS");
- bgp_set_attr_data(to, s->pool, BA_AS_PATH, flags, data, len);
+ bgp_set_attr_data(to, BA_AS_PATH, flags, data, len);
}
@@ -539,7 +578,7 @@ bgp_decode_med(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *da
WITHDRAW(BAD_LENGTH, "MULTI_EXIT_DISC", len);
u32 val = get_u32(data);
- bgp_set_attr_u32(to, s->pool, BA_MULTI_EXIT_DISC, flags, val);
+ bgp_set_attr_u32(to, BA_MULTI_EXIT_DISC, flags, val);
}
@@ -560,7 +599,7 @@ bgp_decode_local_pref(struct bgp_parse_state *s, uint code UNUSED, uint flags, b
WITHDRAW(BAD_LENGTH, "LOCAL_PREF", len);
u32 val = get_u32(data);
- bgp_set_attr_u32(to, s->pool, BA_LOCAL_PREF, flags, val);
+ bgp_set_attr_u32(to, BA_LOCAL_PREF, flags, val);
}
@@ -570,7 +609,7 @@ bgp_decode_atomic_aggr(struct bgp_parse_state *s, uint code UNUSED, uint flags,
if (len != 0)
DISCARD(BAD_LENGTH, "ATOMIC_AGGR", len);
- bgp_set_attr_data(to, s->pool, BA_ATOMIC_AGGR, flags, NULL, 0);
+ bgp_set_attr_data(to, BA_ATOMIC_AGGR, flags, NULL, 0);
}
static int
@@ -604,7 +643,7 @@ bgp_decode_aggregator(struct bgp_parse_state *s, uint code UNUSED, uint flags, b
len = aggregator_16to32(data, src);
}
- bgp_set_attr_data(to, s->pool, BA_AGGREGATOR, flags, data, len);
+ bgp_set_attr_data(to, BA_AGGREGATOR, flags, data, len);
}
static void
@@ -633,7 +672,7 @@ bgp_decode_community(struct bgp_parse_state *s, uint code UNUSED, uint flags, by
struct adata *ad = lp_alloc_adata(s->pool, len);
get_u32s(data, (u32 *) ad->data, len / 4);
- bgp_set_attr_ptr(to, s->pool, BA_COMMUNITY, flags, ad);
+ bgp_set_attr_ptr(to, BA_COMMUNITY, flags, ad);
}
@@ -654,7 +693,7 @@ bgp_decode_originator_id(struct bgp_parse_state *s, uint code UNUSED, uint flags
WITHDRAW(BAD_LENGTH, "ORIGINATOR_ID", len);
u32 val = get_u32(data);
- bgp_set_attr_u32(to, s->pool, BA_ORIGINATOR_ID, flags, val);
+ bgp_set_attr_u32(to, BA_ORIGINATOR_ID, flags, val);
}
@@ -679,7 +718,7 @@ bgp_decode_cluster_list(struct bgp_parse_state *s, uint code UNUSED, uint flags,
struct adata *ad = lp_alloc_adata(s->pool, len);
get_u32s(data, (u32 *) ad->data, len / 4);
- bgp_set_attr_ptr(to, s->pool, BA_CLUSTER_LIST, flags, ad);
+ bgp_set_attr_ptr(to, BA_CLUSTER_LIST, flags, ad);
}
static void
@@ -798,7 +837,7 @@ bgp_decode_ext_community(struct bgp_parse_state *s, uint code UNUSED, uint flags
struct adata *ad = lp_alloc_adata(s->pool, len);
get_u32s(data, (u32 *) ad->data, len / 4);
- bgp_set_attr_ptr(to, s->pool, BA_EXT_COMMUNITY, flags, ad);
+ bgp_set_attr_ptr(to, BA_EXT_COMMUNITY, flags, ad);
}
@@ -811,7 +850,7 @@ bgp_decode_as4_aggregator(struct bgp_parse_state *s, uint code UNUSED, uint flag
if (len != 8)
DISCARD(BAD_LENGTH, "AS4_AGGREGATOR", len);
- bgp_set_attr_data(to, s->pool, BA_AS4_AGGREGATOR, flags, data, len);
+ bgp_set_attr_data(to, BA_AS4_AGGREGATOR, flags, data, len);
}
static void
@@ -841,7 +880,7 @@ bgp_decode_as4_path(struct bgp_parse_state *s, uint code UNUSED, uint flags, byt
a = as_path_strip_confed(s->pool, a);
}
- bgp_set_attr_ptr(to, s->pool, BA_AS4_PATH, flags, a);
+ bgp_set_attr_ptr(to, BA_AS4_PATH, flags, a);
}
@@ -865,7 +904,7 @@ bgp_decode_aigp(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *d
if (!bgp_aigp_valid(data, len, err, sizeof(err)))
DISCARD("Malformed AIGP attribute - %s", err);
- bgp_set_attr_data(to, s->pool, BA_AIGP, flags, data, len);
+ bgp_set_attr_data(to, BA_AIGP, flags, data, len);
}
static void
@@ -897,9 +936,21 @@ bgp_decode_large_community(struct bgp_parse_state *s, uint code UNUSED, uint fla
struct adata *ad = lp_alloc_adata(s->pool, len);
get_u32s(data, (u32 *) ad->data, len / 4);
- bgp_set_attr_ptr(to, s->pool, BA_LARGE_COMMUNITY, flags, ad);
+ bgp_set_attr_ptr(to, BA_LARGE_COMMUNITY, flags, ad);
+}
+
+
+static void
+bgp_decode_otc(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data UNUSED, uint len, ea_list **to)
+{
+ if (len != 4)
+ WITHDRAW(BAD_LENGTH, "OTC", len);
+
+ u32 val = get_u32(data);
+ bgp_set_attr_u32(to, BA_ONLY_TO_CUSTOMER, flags, val);
}
+
static void
bgp_export_mpls_label_stack(struct bgp_export_state *s, eattr *a)
{
@@ -909,20 +960,20 @@ bgp_export_mpls_label_stack(struct bgp_export_state *s, eattr *a)
/* Perhaps we should just ignore it? */
if (!s->mpls)
- WITHDRAW("Unexpected MPLS stack");
+ REJECT("Unexpected MPLS stack");
/* Empty MPLS stack is not allowed */
if (!lnum)
- WITHDRAW("Malformed MPLS stack - empty");
+ REJECT("Malformed MPLS stack - empty");
/* This is ugly, but we must ensure that labels fit into NLRI field */
if ((24*lnum + (net_is_vpn(n) ? 64 : 0) + net_pxlen(n)) > 255)
- WITHDRAW("Malformed MPLS stack - too many labels (%u)", lnum);
+ REJECT("Malformed MPLS stack - too many labels (%u)", lnum);
for (uint i = 0; i < lnum; i++)
{
if (labels[i] > 0xfffff)
- WITHDRAW("Malformed MPLS stack - invalid label (%u)", labels[i]);
+ REJECT("Malformed MPLS stack - invalid label (%u)", labels[i]);
/* TODO: Check for special-purpose label values? */
}
@@ -970,10 +1021,29 @@ bgp_format_mpls_label_stack(const eattr *a, byte *buf, uint size)
}
static inline void
-bgp_decode_unknown(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to)
+bgp_export_unknown(struct bgp_export_state *s UNUSED, eattr *a)
{
+ if (!(a->flags & BAF_TRANSITIVE))
+ UNSET(a);
+
+ a->flags |= BAF_PARTIAL;
+}
+
+static inline void
+bgp_decode_unknown(struct bgp_parse_state *s UNUSED, uint code, uint flags, byte *data, uint len, ea_list **to)
+{
+ if (!(flags & BAF_OPTIONAL))
+ WITHDRAW("Unknown attribute (code %u) - conflicting flags (%02x)", code, flags);
+
/* Cannot use bgp_set_attr_data() as it works on known attributes only */
- ea_set_attr_data(to, s->pool, EA_CODE(PROTOCOL_BGP, code), flags, EAF_TYPE_OPAQUE, data, len);
+ ea_set_attr_data(to, &bgp_attr_table[code].class, flags, data, len);
+}
+
+static inline void
+bgp_format_unknown(const eattr *a, byte *buf, uint size)
+{
+ if (a->flags & BAF_TRANSITIVE)
+ bsnprintf(buf, size, "(transitive)");
}
@@ -981,10 +1051,10 @@ bgp_decode_unknown(struct bgp_parse_state *s, uint code, uint flags, byte *data,
* Attribute table
*/
-static const struct bgp_attr_desc bgp_attr_table[] = {
+static union bgp_attr_desc bgp_attr_table[BGP_ATTR_MAX] = {
[BA_ORIGIN] = {
- .name = "origin",
- .type = EAF_TYPE_INT,
+ .name = "bgp_origin",
+ .type = T_ENUM_BGP_ORIGIN,
.flags = BAF_TRANSITIVE,
.export = bgp_export_origin,
.encode = bgp_encode_u8,
@@ -992,69 +1062,69 @@ static const struct bgp_attr_desc bgp_attr_table[] = {
.format = bgp_format_origin,
},
[BA_AS_PATH] = {
- .name = "as_path",
- .type = EAF_TYPE_AS_PATH,
+ .name = "bgp_path",
+ .type = T_PATH,
.flags = BAF_TRANSITIVE,
.encode = bgp_encode_as_path,
.decode = bgp_decode_as_path,
},
[BA_NEXT_HOP] = {
- .name = "next_hop",
- .type = EAF_TYPE_IP_ADDRESS,
+ .name = "bgp_next_hop",
+ .type = T_IP,
.flags = BAF_TRANSITIVE,
.encode = bgp_encode_next_hop,
.decode = bgp_decode_next_hop,
.format = bgp_format_next_hop,
},
[BA_MULTI_EXIT_DISC] = {
- .name = "med",
- .type = EAF_TYPE_INT,
+ .name = "bgp_med",
+ .type = T_INT,
.flags = BAF_OPTIONAL,
.encode = bgp_encode_u32,
.decode = bgp_decode_med,
},
[BA_LOCAL_PREF] = {
- .name = "local_pref",
- .type = EAF_TYPE_INT,
+ .name = "bgp_local_pref",
+ .type = T_INT,
.flags = BAF_TRANSITIVE,
.export = bgp_export_local_pref,
.encode = bgp_encode_u32,
.decode = bgp_decode_local_pref,
},
[BA_ATOMIC_AGGR] = {
- .name = "atomic_aggr",
- .type = EAF_TYPE_OPAQUE,
+ .name = "bgp_atomic_aggr",
+ .type = T_OPAQUE,
.flags = BAF_TRANSITIVE,
.encode = bgp_encode_raw,
.decode = bgp_decode_atomic_aggr,
},
[BA_AGGREGATOR] = {
- .name = "aggregator",
- .type = EAF_TYPE_OPAQUE,
+ .name = "bgp_aggregator",
+ .type = T_OPAQUE,
.flags = BAF_OPTIONAL | BAF_TRANSITIVE,
.encode = bgp_encode_aggregator,
.decode = bgp_decode_aggregator,
.format = bgp_format_aggregator,
},
[BA_COMMUNITY] = {
- .name = "community",
- .type = EAF_TYPE_INT_SET,
+ .name = "bgp_community",
+ .type = T_CLIST,
.flags = BAF_OPTIONAL | BAF_TRANSITIVE,
.export = bgp_export_community,
.encode = bgp_encode_u32s,
.decode = bgp_decode_community,
},
[BA_ORIGINATOR_ID] = {
- .name = "originator_id",
- .type = EAF_TYPE_ROUTER_ID,
+ .name = "bgp_originator_id",
+ .type = T_QUAD,
.flags = BAF_OPTIONAL,
.export = bgp_export_originator_id,
.encode = bgp_encode_u32,
.decode = bgp_decode_originator_id,
},
[BA_CLUSTER_LIST] = {
- .name = "cluster_list",
- .type = EAF_TYPE_INT_SET,
+ .name = "bgp_cluster_list",
+ .type = T_CLIST,
.flags = BAF_OPTIONAL,
.export = bgp_export_cluster_list,
.encode = bgp_encode_u32s,
@@ -1062,43 +1132,47 @@ static const struct bgp_attr_desc bgp_attr_table[] = {
.format = bgp_format_cluster_list,
},
[BA_MP_REACH_NLRI] = {
- .name = "mp_reach_nlri",
- .type = EAF_TYPE_OPAQUE,
+ .name = "bgp_mp_reach_nlri",
+ .type = T_OPAQUE,
+ .hidden = 1,
.flags = BAF_OPTIONAL,
.decode = bgp_decode_mp_reach_nlri,
},
[BA_MP_UNREACH_NLRI] = {
- .name = "mp_unreach_nlri",
- .type = EAF_TYPE_OPAQUE,
+ .name = "bgp_mp_unreach_nlri",
+ .type = T_OPAQUE,
+ .hidden = 1,
.flags = BAF_OPTIONAL,
.decode = bgp_decode_mp_unreach_nlri,
},
[BA_EXT_COMMUNITY] = {
- .name = "ext_community",
- .type = EAF_TYPE_EC_SET,
+ .name = "bgp_ext_community",
+ .type = T_ECLIST,
.flags = BAF_OPTIONAL | BAF_TRANSITIVE,
.export = bgp_export_ext_community,
.encode = bgp_encode_u32s,
.decode = bgp_decode_ext_community,
},
[BA_AS4_PATH] = {
- .name = "as4_path",
- .type = EAF_TYPE_AS_PATH,
+ .name = "bgp_as4_path",
+ .type = T_PATH,
+ .hidden = 1,
.flags = BAF_OPTIONAL | BAF_TRANSITIVE,
.encode = bgp_encode_raw,
.decode = bgp_decode_as4_path,
},
[BA_AS4_AGGREGATOR] = {
- .name = "as4_aggregator",
- .type = EAF_TYPE_OPAQUE,
+ .name = "bgp_as4_aggregator",
+ .type = T_OPAQUE,
+ .hidden = 1,
.flags = BAF_OPTIONAL | BAF_TRANSITIVE,
.encode = bgp_encode_raw,
.decode = bgp_decode_as4_aggregator,
.format = bgp_format_aggregator,
},
[BA_AIGP] = {
- .name = "aigp",
- .type = EAF_TYPE_OPAQUE,
+ .name = "bgp_aigp",
+ .type = T_OPAQUE,
.flags = BAF_OPTIONAL | BAF_DECODE_FLAGS,
.export = bgp_export_aigp,
.encode = bgp_encode_raw,
@@ -1106,16 +1180,24 @@ static const struct bgp_attr_desc bgp_attr_table[] = {
.format = bgp_format_aigp,
},
[BA_LARGE_COMMUNITY] = {
- .name = "large_community",
- .type = EAF_TYPE_LC_SET,
+ .name = "bgp_large_community",
+ .type = T_LCLIST,
.flags = BAF_OPTIONAL | BAF_TRANSITIVE,
.export = bgp_export_large_community,
.encode = bgp_encode_u32s,
.decode = bgp_decode_large_community,
},
+ [BA_ONLY_TO_CUSTOMER] = {
+ .name = "otc",
+ .type = T_INT,
+ .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
+ .encode = bgp_encode_u32,
+ .decode = bgp_decode_otc,
+ },
[BA_MPLS_LABEL_STACK] = {
- .name = "mpls_label_stack",
- .type = EAF_TYPE_INT_SET,
+ .name = "bgp_mpls_label_stack",
+ .type = T_CLIST,
+ .readonly = 1,
.export = bgp_export_mpls_label_stack,
.encode = bgp_encode_mpls_label_stack,
.decode = bgp_decode_mpls_label_stack,
@@ -1123,12 +1205,32 @@ static const struct bgp_attr_desc bgp_attr_table[] = {
},
};
-static inline int
-bgp_attr_known(uint code)
+eattr *
+bgp_find_attr(ea_list *attrs, uint code)
{
- return (code < ARRAY_SIZE(bgp_attr_table)) && bgp_attr_table[code].name;
+ return ea_find(attrs, BGP_EA_ID(code));
}
+void
+bgp_register_attrs(void)
+{
+ for (uint i=0; i<ARRAY_SIZE(bgp_attr_table); i++)
+ {
+ if (!bgp_attr_table[i].name)
+ bgp_attr_table[i] = (union bgp_attr_desc) {
+ .name = mb_sprintf(&root_pool, "bgp_unknown_0x%02x", i),
+ .type = T_OPAQUE,
+ .flags = BAF_OPTIONAL,
+ .readonly = 1,
+ .export = bgp_export_unknown,
+ .encode = bgp_encode_raw,
+ .decode = bgp_decode_unknown,
+ .format = bgp_format_unknown,
+ };
+
+ ea_register_init(&bgp_attr_table[i].class);
+ }
+}
/*
* Attribute export
@@ -1137,38 +1239,24 @@ bgp_attr_known(uint code)
static inline void
bgp_export_attr(struct bgp_export_state *s, eattr *a, ea_list *to)
{
- if (EA_PROTO(a->id) != PROTOCOL_BGP)
+ const union bgp_attr_desc *desc = bgp_find_attr_desc(a);
+ if (!desc)
return;
- uint code = EA_ID(a->id);
+ /* The flags might have been zero if the attr was added locally */
+ a->flags = (a->flags & BAF_PARTIAL) | desc->flags;
- if (bgp_attr_known(code))
- {
- const struct bgp_attr_desc *desc = &bgp_attr_table[code];
-
- /* The flags might have been zero if the attr was added by filters */
- a->flags = (a->flags & BAF_PARTIAL) | desc->flags;
-
- /* Set partial bit if new opt-trans attribute is attached to non-local route */
- if ((s->src != NULL) && (a->type & EAF_ORIGINATED) &&
- (a->flags & BAF_OPTIONAL) && (a->flags & BAF_TRANSITIVE))
- a->flags |= BAF_PARTIAL;
-
- /* Call specific hook */
- CALL(desc->export, s, a);
+ /* Set partial bit if new opt-trans attribute is attached to non-local route */
+ if ((s->src != NULL) && (a->originated) &&
+ (a->flags & BAF_OPTIONAL) && (a->flags & BAF_TRANSITIVE))
+ a->flags |= BAF_PARTIAL;
- /* Attribute might become undefined in hook */
- if ((a->type & EAF_TYPE_MASK) == EAF_TYPE_UNDEF)
- return;
- }
- else
- {
- /* Don't re-export unknown non-transitive attributes */
- if (!(a->flags & BAF_TRANSITIVE))
- return;
+ /* Call specific hook */
+ CALL(desc->export, s, a);
- a->flags |= BAF_PARTIAL;
- }
+ /* Attribute might become undefined in hook */
+ if (a->undef)
+ return;
/* Append updated attribute */
to->attrs[to->count++] = *a;
@@ -1188,12 +1276,11 @@ bgp_export_attr(struct bgp_export_state *s, eattr *a, ea_list *to)
* Result: one sorted attribute list segment, or NULL if attributes are unsuitable.
*/
static inline ea_list *
-bgp_export_attrs(struct bgp_export_state *s, ea_list *attrs)
+bgp_export_attrs(struct bgp_export_state *s, ea_list *a)
{
/* Merge the attribute list */
- ea_list *new = lp_alloc(s->pool, ea_scan(attrs));
- ea_merge(attrs, new);
- ea_sort(new);
+ ea_list *new = ea_normalize(a, 0);
+ ASSERT_DIE(new);
uint i, count;
count = new->count;
@@ -1203,7 +1290,7 @@ bgp_export_attrs(struct bgp_export_state *s, ea_list *attrs)
for (i = 0; i < count; i++)
bgp_export_attr(s, &new->attrs[i], new);
- if (s->err_withdraw)
+ if (s->err_reject)
return NULL;
return new;
@@ -1217,14 +1304,9 @@ bgp_export_attrs(struct bgp_export_state *s, ea_list *attrs)
static inline int
bgp_encode_attr(struct bgp_write_state *s, eattr *a, byte *buf, uint size)
{
- ASSERT(EA_PROTO(a->id) == PROTOCOL_BGP);
-
- uint code = EA_ID(a->id);
-
- if (bgp_attr_known(code))
- return bgp_attr_table[code].encode(s, a, buf, size);
- else
- return bgp_encode_raw(s, a, buf, size);
+ const union bgp_attr_desc *desc = bgp_find_attr_desc(a);
+ ASSERT_DIE(desc);
+ return desc->encode(s, a, buf, size);
}
/**
@@ -1289,7 +1371,7 @@ bgp_cluster_list_loopy(struct bgp_proto *p, ea_list *attrs)
}
static inline void
-bgp_decode_attr(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to)
+bgp_decode_attr(struct bgp_parse_state *s, byte code, byte flags, byte *data, uint len, ea_list **to)
{
/* Handle duplicate attributes; RFC 7606 3 (g) */
if (BIT32_TEST(s->attrs_seen, code))
@@ -1301,24 +1383,15 @@ bgp_decode_attr(struct bgp_parse_state *s, uint code, uint flags, byte *data, ui
}
BIT32_SET(s->attrs_seen, code);
- if (bgp_attr_known(code))
- {
- const struct bgp_attr_desc *desc = &bgp_attr_table[code];
+ ASSERT_DIE(bgp_attr_table[code].id);
+ const union bgp_attr_desc *desc = &bgp_attr_table[code];
- /* Handle conflicting flags; RFC 7606 3 (c) */
- if (((flags ^ desc->flags) & (BAF_OPTIONAL | BAF_TRANSITIVE)) &&
- !(desc->flags & BAF_DECODE_FLAGS))
- WITHDRAW("Malformed %s attribute - conflicting flags (%02x)", desc->name, flags);
+ /* Handle conflicting flags; RFC 7606 3 (c) */
+ if (((flags ^ desc->flags) & (BAF_OPTIONAL | BAF_TRANSITIVE)) &&
+ !(desc->flags & BAF_DECODE_FLAGS))
+ WITHDRAW("Malformed %s attribute - conflicting flags (%02x, expected %02x)", desc->name, flags, desc->flags);
- desc->decode(s, code, flags, data, len, to);
- }
- else /* Unknown attribute */
- {
- if (!(flags & BAF_OPTIONAL))
- WITHDRAW("Unknown attribute (code %u) - conflicting flags (%02x)", code, flags);
-
- bgp_decode_unknown(s, code, flags, data, len, to);
- }
+ desc->decode(s, code, flags, data, len, to);
}
/**
@@ -1336,7 +1409,8 @@ bgp_decode_attrs(struct bgp_parse_state *s, byte *data, uint len)
{
struct bgp_proto *p = s->proto;
ea_list *attrs = NULL;
- uint code, flags, alen;
+ uint alen;
+ byte code, flags;
byte *pos = data;
/* Parse the attributes */
@@ -1401,23 +1475,23 @@ bgp_decode_attrs(struct bgp_parse_state *s, byte *data, uint len)
/* Reject routes with our ASN in AS_PATH attribute */
if (bgp_as_path_loopy(p, attrs, p->local_as))
- goto withdraw;
+ goto loop;
/* Reject routes with our Confederation ID in AS_PATH attribute; RFC 5065 4.0 */
if ((p->public_as != p->local_as) && bgp_as_path_loopy(p, attrs, p->public_as))
- goto withdraw;
+ goto loop;
/* Reject routes with our Router ID in ORIGINATOR_ID attribute; RFC 4456 8 */
if (p->is_internal && bgp_originator_id_loopy(p, attrs))
- goto withdraw;
+ goto loop;
/* Reject routes with our Cluster ID in CLUSTER_LIST attribute; RFC 4456 8 */
if (p->rr_client && bgp_cluster_list_loopy(p, attrs))
- goto withdraw;
+ goto loop;
/* If there is no local preference, define one */
if (!BIT32_TEST(s->attrs_seen, BA_LOCAL_PREF))
- bgp_set_attr_u32(&attrs, s->pool, BA_LOCAL_PREF, 0, p->cf->default_local_pref);
+ bgp_set_attr_u32(&attrs, BA_LOCAL_PREF, 0, p->cf->default_local_pref);
return attrs;
@@ -1434,16 +1508,43 @@ withdraw:
s->err_withdraw = 1;
return NULL;
+
+loop:
+ /* Loops are handled as withdraws, but ignored silently. Do not set err_withdraw. */
+ return NULL;
}
void
-bgp_finish_attrs(struct bgp_parse_state *s, rta *a)
+bgp_finish_attrs(struct bgp_parse_state *s, ea_list **to)
{
/* AIGP test here instead of in bgp_decode_aigp() - we need to know channel */
if (BIT32_TEST(s->attrs_seen, BA_AIGP) && !s->channel->cf->aigp)
{
REPORT("Discarding AIGP attribute received on non-AIGP session");
- bgp_unset_attr(&a->eattrs, s->pool, BA_AIGP);
+ bgp_unset_attr(to, BA_AIGP);
+ }
+
+ /* Handle OTC ingress procedure, RFC 9234 */
+ if (bgp_channel_is_role_applicable(s->channel))
+ {
+ struct bgp_proto *p = s->proto;
+ eattr *e = bgp_find_attr(*to, BA_ONLY_TO_CUSTOMER);
+
+ /* Reject routes from downstream if they are leaked */
+ if (e && (p->cf->local_role == BGP_ROLE_PROVIDER ||
+ p->cf->local_role == BGP_ROLE_RS_SERVER))
+ WITHDRAW("Route leak detected - OTC attribute from downstream");
+
+ /* Reject routes from peers if they are leaked */
+ if (e && (p->cf->local_role == BGP_ROLE_PEER) && (e->u.data != p->cf->remote_as))
+ WITHDRAW("Route leak detected - OTC attribute with mismatched ASN (%u)",
+ (uint) e->u.data);
+
+ /* Mark routes from upstream if it did not happened before */
+ if (!e && (p->cf->local_role == BGP_ROLE_CUSTOMER ||
+ p->cf->local_role == BGP_ROLE_PEER ||
+ p->cf->local_role == BGP_ROLE_RS_CLIENT))
+ bgp_set_attr_u32(to, BA_ONLY_TO_CUSTOMER, 0, p->cf->remote_as);
}
}
@@ -1458,13 +1559,13 @@ bgp_finish_attrs(struct bgp_parse_state *s, rta *a)
#define RBH_FN(a,h) h
#define RBH_REHASH bgp_rbh_rehash
-#define RBH_PARAMS /8, *2, 2, 2, 8, 20
+#define RBH_PARAMS /8, *2, 2, 2, 12, 20
HASH_DEFINE_REHASH_FN(RBH, struct bgp_bucket)
-void
-bgp_init_bucket_table(struct bgp_channel *c)
+static void
+bgp_init_bucket_table(struct bgp_pending_tx *c)
{
HASH_INIT(c->bucket_hash, c->pool, 8);
@@ -1472,24 +1573,8 @@ bgp_init_bucket_table(struct bgp_channel *c)
c->withdraw_bucket = NULL;
}
-void
-bgp_free_bucket_table(struct bgp_channel *c)
-{
- HASH_FREE(c->bucket_hash);
-
- struct bgp_bucket *b;
- WALK_LIST_FIRST(b, c->bucket_queue)
- {
- rem_node(&b->send_node);
- mb_free(b);
- }
-
- mb_free(c->withdraw_bucket);
- c->withdraw_bucket = NULL;
-}
-
static struct bgp_bucket *
-bgp_get_bucket(struct bgp_channel *c, ea_list *new)
+bgp_get_bucket(struct bgp_pending_tx *c, ea_list *new)
{
/* Hash and lookup */
u32 hash = ea_hash(new);
@@ -1498,55 +1583,27 @@ bgp_get_bucket(struct bgp_channel *c, ea_list *new)
if (b)
return b;
- uint ea_size = sizeof(ea_list) + new->count * sizeof(eattr);
- uint ea_size_aligned = BIRD_ALIGN(ea_size, CPU_STRUCT_ALIGN);
- uint size = sizeof(struct bgp_bucket) + ea_size_aligned;
- uint i;
- byte *dest;
-
- /* Gather total size of non-inline attributes */
- for (i = 0; i < new->count; i++)
- {
- eattr *a = &new->attrs[i];
-
- if (!(a->type & EAF_EMBEDDED))
- size += BIRD_ALIGN(sizeof(struct adata) + a->u.ptr->length, CPU_STRUCT_ALIGN);
- }
+ /* Scan the list for total size */
+ uint ea_size = BIRD_CPU_ALIGN(ea_list_size(new));
+ uint size = sizeof(struct bgp_bucket) + ea_size;
- /* Create the bucket */
+ /* Allocate the bucket */
b = mb_alloc(c->pool, size);
*b = (struct bgp_bucket) { };
init_list(&b->prefixes);
b->hash = hash;
- /* Copy list of extended attributes */
- memcpy(b->eattrs, new, ea_size);
- dest = ((byte *) b->eattrs) + ea_size_aligned;
-
- /* Copy values of non-inline attributes */
- for (i = 0; i < new->count; i++)
- {
- eattr *a = &b->eattrs->attrs[i];
-
- if (!(a->type & EAF_EMBEDDED))
- {
- const struct adata *oa = a->u.ptr;
- struct adata *na = (struct adata *) dest;
- memcpy(na, oa, sizeof(struct adata) + oa->length);
- a->u.ptr = na;
- dest += BIRD_ALIGN(sizeof(struct adata) + na->length, CPU_STRUCT_ALIGN);
- }
- }
+ /* Copy the ea_list */
+ ea_list_copy(b->eattrs, new, ea_size);
- /* Insert the bucket to send queue and bucket hash */
- add_tail(&c->bucket_queue, &b->send_node);
+ /* Insert the bucket to bucket hash */
HASH_INSERT2(c->bucket_hash, RBH, c->pool, b);
return b;
}
static struct bgp_bucket *
-bgp_get_withdraw_bucket(struct bgp_channel *c)
+bgp_get_withdraw_bucket(struct bgp_pending_tx *c)
{
if (!c->withdraw_bucket)
{
@@ -1557,25 +1614,45 @@ bgp_get_withdraw_bucket(struct bgp_channel *c)
return c->withdraw_bucket;
}
-void
-bgp_free_bucket(struct bgp_channel *c, struct bgp_bucket *b)
+static void
+bgp_free_bucket(struct bgp_pending_tx *c, struct bgp_bucket *b)
{
- rem_node(&b->send_node);
HASH_REMOVE2(c->bucket_hash, RBH, c->pool, b);
mb_free(b);
}
+int
+bgp_done_bucket(struct bgp_channel *bc, struct bgp_bucket *b)
+{
+ struct bgp_pending_tx *c = bc->ptx;
+
+ /* Won't free the withdraw bucket */
+ if (b == c->withdraw_bucket)
+ return 0;
+
+ if (EMPTY_LIST(b->prefixes))
+ rem_node(&b->send_node);
+
+ if (b->px_uc || !EMPTY_LIST(b->prefixes))
+ return 0;
+
+ bgp_free_bucket(c, b);
+ return 1;
+}
+
void
-bgp_defer_bucket(struct bgp_channel *c, struct bgp_bucket *b)
+bgp_defer_bucket(struct bgp_channel *bc, struct bgp_bucket *b)
{
+ struct bgp_pending_tx *c = bc->ptx;
rem_node(&b->send_node);
add_tail(&c->bucket_queue, &b->send_node);
}
void
-bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b)
+bgp_withdraw_bucket(struct bgp_channel *bc, struct bgp_bucket *b)
{
- struct bgp_proto *p = (void *) c->c.proto;
+ struct bgp_proto *p = (void *) bc->c.proto;
+ struct bgp_pending_tx *c = bc->ptx;
struct bgp_bucket *wb = bgp_get_withdraw_bucket(c);
log(L_ERR "%s: Attribute list too long", p->p.name);
@@ -1584,8 +1661,8 @@ bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b)
struct bgp_prefix *px = HEAD(b->prefixes);
log(L_ERR "%s: - withdrawing %N", p->p.name, &px->net);
- rem_node(&px->buck_node);
- add_tail(&wb->prefixes, &px->buck_node);
+ rem_node(&px->buck_node_xx);
+ add_tail(&wb->prefixes, &px->buck_node_xx);
}
}
@@ -1596,42 +1673,43 @@ bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b)
#define PXH_KEY(px) px->net, px->path_id, px->hash
#define PXH_NEXT(px) px->next
-#define PXH_EQ(n1,i1,h1,n2,i2,h2) h1 == h2 && i1 == i2 && net_equal(n1, n2)
+#define PXH_EQ(n1,i1,h1,n2,i2,h2) h1 == h2 && (add_path_tx ? (i1 == i2) : 1) && net_equal(n1, n2)
#define PXH_FN(n,i,h) h
#define PXH_REHASH bgp_pxh_rehash
-#define PXH_PARAMS /8, *2, 2, 2, 8, 24
+#define PXH_PARAMS /8, *2, 2, 2, 12, 24
HASH_DEFINE_REHASH_FN(PXH, struct bgp_prefix)
-void
-bgp_init_prefix_table(struct bgp_channel *c)
+static void
+bgp_init_prefix_table(struct bgp_channel *bc)
{
+ struct bgp_pending_tx *c = bc->ptx;
HASH_INIT(c->prefix_hash, c->pool, 8);
- uint alen = net_addr_length[c->c.net_type];
+ uint alen = net_addr_length[bc->c.net_type];
c->prefix_slab = alen ? sl_new(c->pool, sizeof(struct bgp_prefix) + alen) : NULL;
}
-void
-bgp_free_prefix_table(struct bgp_channel *c)
-{
- HASH_FREE(c->prefix_hash);
-
- rfree(c->prefix_slab);
- c->prefix_slab = NULL;
-}
-
static struct bgp_prefix *
-bgp_get_prefix(struct bgp_channel *c, const net_addr *net, u32 path_id)
+bgp_get_prefix(struct bgp_pending_tx *c, const net_addr *net, struct rte_src *src, int add_path_tx)
{
- u32 hash = net_hash(net) ^ u32_hash(path_id);
- struct bgp_prefix *px = HASH_FIND(c->prefix_hash, PXH, net, path_id, hash);
+ u32 path_id = src->global_id;
+ u32 path_id_hash = add_path_tx ? path_id : 0;
+ /* We must use a different hash function than the rtable */
+ u32 hash = u32_hash(net_hash(net) ^ u32_hash(path_id_hash));
+ struct bgp_prefix *px = HASH_FIND(c->prefix_hash, PXH, net, path_id_hash, hash);
if (px)
{
- rem_node(&px->buck_node);
+ if (!add_path_tx && (path_id != px->path_id))
+ {
+ rt_unlock_source(rt_find_source_global(px->path_id));
+ rt_lock_source(src);
+ px->path_id = path_id;
+ }
+
return px;
}
@@ -1644,34 +1722,321 @@ bgp_get_prefix(struct bgp_channel *c, const net_addr *net, u32 path_id)
px->hash = hash;
px->path_id = path_id;
net_copy(px->net, net);
+ rt_lock_source(src);
HASH_INSERT2(c->prefix_hash, PXH, c->pool, px);
return px;
}
-void
-bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *px)
+static void bgp_free_prefix(struct bgp_pending_tx *c, struct bgp_prefix *px);
+
+static inline int
+bgp_update_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bucket *b)
+{
+#define IS_WITHDRAW_BUCKET(b) ((b) == c->ptx->withdraw_bucket)
+#define BPX_TRACE(what) do { \
+ if (c->c.debug & D_ROUTES) log(L_TRACE "%s.%s < %s %N %uG %s", \
+ c->c.proto->name, c->c.name, what, \
+ px->net, px->path_id, IS_WITHDRAW_BUCKET(b) ? "withdraw" : "update"); } while (0)
+ px->lastmod = current_time();
+
+ /* Already queued for the same bucket */
+ if (px->cur == b)
+ {
+ BPX_TRACE("already queued");
+ return 0;
+ }
+
+ /* Unqueue from the old bucket */
+ if (px->cur)
+ {
+ rem_node(&px->buck_node_xx);
+ bgp_done_bucket(c, px->cur);
+ }
+
+ /* The new bucket is the same as we sent before */
+ if ((px->last == b) || c->c.out_table && !px->last && IS_WITHDRAW_BUCKET(b))
+ {
+ if (px->cur)
+ BPX_TRACE("reverted");
+ else
+ BPX_TRACE("already sent");
+
+ /* Well, we haven't sent anything yet */
+ if (!px->last)
+ bgp_free_prefix(c->ptx, px);
+
+ px->cur = NULL;
+ return 0;
+ }
+
+ /* Enqueue the bucket if it has been empty */
+ if (!IS_WITHDRAW_BUCKET(b) && EMPTY_LIST(b->prefixes))
+ add_tail(&c->ptx->bucket_queue, &b->send_node);
+
+ /* Enqueue to the new bucket and indicate the change */
+ add_tail(&b->prefixes, &px->buck_node_xx);
+ px->cur = b;
+
+ BPX_TRACE("queued");
+ return 1;
+
+#undef BPX_TRACE
+}
+
+static void
+bgp_free_prefix(struct bgp_pending_tx *c, struct bgp_prefix *px)
{
- rem_node(&px->buck_node);
HASH_REMOVE2(c->prefix_hash, PXH, c->pool, px);
+ rt_unlock_source(rt_find_source_global(px->path_id));
+
if (c->prefix_slab)
- sl_free(c->prefix_slab, px);
+ sl_free(px);
else
mb_free(px);
}
+void
+bgp_done_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bucket *buck)
+{
+ /* Cleanup: We're called from bucket senders. */
+ ASSERT_DIE(px->cur == buck);
+ rem_node(&px->buck_node_xx);
+
+ /* We may want to store the updates */
+ if (c->c.out_table)
+ {
+ /* Nothing to be sent right now */
+ px->cur = NULL;
+
+ /* Unref the previous sent version */
+ if (px->last)
+ px->last->px_uc--;
+
+ /* Ref the current sent version */
+ if (!IS_WITHDRAW_BUCKET(buck))
+ {
+ px->last = buck;
+ px->last->px_uc++;
+ return;
+ }
+
+ /* Prefixes belonging to the withdraw bucket are freed always */
+ }
+
+ bgp_free_prefix(c->ptx, px);
+}
+
+static void
+bgp_pending_tx_rfree(resource *r)
+{
+ struct bgp_pending_tx *ptx = SKIP_BACK(struct bgp_pending_tx, r, r);
+
+ HASH_WALK(ptx->prefix_hash, next, n)
+ rt_unlock_source(rt_find_source_global(n->path_id));
+ HASH_WALK_END;
+}
+
+static void bgp_pending_tx_dump(resource *r UNUSED, unsigned indent UNUSED) { debug("\n"); }
+
+static struct resclass bgp_pending_tx_class = {
+ .name = "BGP Pending TX",
+ .size = sizeof(struct bgp_pending_tx),
+ .free = bgp_pending_tx_rfree,
+ .dump = bgp_pending_tx_dump,
+};
+
+void
+bgp_init_pending_tx(struct bgp_channel *c)
+{
+ ASSERT_DIE(!c->ptx);
+
+ pool *p = rp_new(c->pool, "BGP Pending TX");
+ c->ptx = ralloc(p, &bgp_pending_tx_class);
+ c->ptx->pool = p;
+
+ bgp_init_bucket_table(c->ptx);
+ bgp_init_prefix_table(c);
+}
+
+void
+bgp_free_pending_tx(struct bgp_channel *c)
+{
+ ASSERT_DIE(c->ptx);
+ ASSERT_DIE(c->ptx->pool);
+
+ rfree(c->ptx->pool);
+ c->ptx = NULL;
+}
+
+
+/*
+ * Prefix hash table exporter
+ */
+
+struct bgp_out_export_hook {
+ struct rt_export_hook h;
+ u32 hash_iter; /* Iterator over hash */
+};
+
+static void
+bgp_out_table_feed(void *data)
+{
+ struct bgp_out_export_hook *hook = data;
+ struct bgp_channel *bc = SKIP_BACK(struct bgp_channel, prefix_exporter, hook->h.table);
+ struct bgp_pending_tx *c = bc->ptx;
+
+ int max = 512;
+
+ const net_addr *neq = (hook->h.req->addr_mode == TE_ADDR_EQUAL) ? hook->h.req->addr : NULL;
+ const net_addr *cand = NULL;
+
+ do {
+ HASH_WALK_ITER(c->prefix_hash, PXH, n, hook->hash_iter)
+ {
+ switch (hook->h.req->addr_mode)
+ {
+ case TE_ADDR_IN:
+ if (!net_in_netX(n->net, hook->h.req->addr))
+ continue;
+ /* fall through */
+ case TE_ADDR_NONE:
+ /* Splitting only for multi-net exports */
+ if (--max <= 0)
+ HASH_WALK_ITER_PUT;
+ break;
+
+ case TE_ADDR_FOR:
+ if (!neq)
+ {
+ if (net_in_netX(hook->h.req->addr, n->net) && (!cand || (n->net->length > cand->length)))
+ cand = n->net;
+ continue;
+ }
+ /* fall through */
+ case TE_ADDR_EQUAL:
+ if (!net_equal(n->net, neq))
+ continue;
+ break;
+ }
+
+ struct bgp_bucket *buck = n->cur ?: n->last;
+ ea_list *ea = NULL;
+ if (buck == c->withdraw_bucket)
+ ea_set_dest(&ea, 0, RTD_UNREACHABLE);
+ else
+ {
+ ea = buck->eattrs;
+ eattr *eanh = bgp_find_attr(ea, BA_NEXT_HOP);
+ ASSERT_DIE(eanh);
+ const ip_addr *nh = (const void *) eanh->u.ptr->data;
+
+ struct nexthop_adata nhad = {
+ .ad = { .length = sizeof (struct nexthop_adata) - sizeof (struct adata), },
+ .nh = { .gw = nh[0], },
+ };
+
+ ea_set_attr(&ea, EA_LITERAL_DIRECT_ADATA(&ea_gen_nexthop, 0, tmp_copy_adata(&nhad.ad)));
+ }
+
+ struct rte_storage es = {
+ .rte = {
+ .attrs = ea,
+ .net = n->net,
+ .src = rt_find_source_global(n->path_id),
+ .sender = NULL,
+ .lastmod = n->lastmod,
+ .flags = n->cur ? REF_PENDING : 0,
+ },
+ };
+
+ struct rt_pending_export rpe = {
+ .new = &es, .new_best = &es,
+ };
+
+ if (hook->h.req->export_bulk)
+ {
+ const rte *feed = &es.rte;
+ hook->h.req->export_bulk(hook->h.req, n->net, &rpe, &rpe, &feed, 1);
+ }
+ else if (hook->h.req->export_one)
+ hook->h.req->export_one(hook->h.req, n->net, &rpe);
+ else
+ bug("No export method in export request");
+ }
+ HASH_WALK_ITER_END;
+
+ neq = cand;
+ cand = NULL;
+ } while (neq);
+
+ if (hook->hash_iter)
+ ev_schedule_work(&hook->h.event);
+ else
+ rt_set_export_state(&hook->h, TES_READY);
+}
+
+static void
+bgp_out_table_export_start(struct rt_exporter *re, struct rt_export_request *req)
+{
+ req->hook = rt_alloc_export(re, sizeof(struct bgp_out_export_hook));
+ req->hook->req = req;
+
+ struct bgp_out_export_hook *hook = SKIP_BACK(struct bgp_out_export_hook, h, req->hook);
+
+ hook->h.event.hook = bgp_out_table_feed;
+ rt_init_export(re, req->hook);
+}
+
+static void
+bgp_out_table_export_done(void *data)
+{
+ struct bgp_out_export_hook *hook = data;
+ struct rt_export_request *req = hook->h.req;
+ void (*stopped)(struct rt_export_request *) = hook->h.stopped;
+
+ rt_export_stopped(&hook->h);
+ CALL(stopped, req);
+}
+
+static const struct rt_exporter_class bgp_out_table_export_class = {
+ .start = bgp_out_table_export_start,
+ .done = bgp_out_table_export_done,
+};
+
+void
+bgp_setup_out_table(struct bgp_channel *c)
+{
+ ASSERT_DIE(c->c.out_table == NULL);
+
+ c->prefix_exporter = (struct rt_exporter) {
+ .class = &bgp_out_table_export_class,
+ .addr_type = c->c.table->addr_type,
+ .rp = c->c.proto->pool,
+ };
+
+ rt_exporter_init(&c->prefix_exporter);
+
+ c->c.out_table = &c->prefix_exporter;
+}
+
/*
* BGP protocol glue
*/
int
-bgp_preexport(struct channel *c, rte *e)
+bgp_preexport(struct channel *C, rte *e)
{
- struct bgp_proto *p = (struct bgp_proto *) (c->proto);
+ struct bgp_proto *p = (struct bgp_proto *) C->proto;
struct bgp_proto *src = bgp_rte_proto(e);
+ struct bgp_channel *c = (struct bgp_channel *) C;
+
+ /* Ignore non-BGP channels */
+ if (C->channel != &channel_bgp)
+ return -1;
/* Reject our routes */
if (src == p)
@@ -1681,6 +2046,22 @@ bgp_preexport(struct channel *c, rte *e)
if (src == NULL)
return 0;
+ /* Reject flowspec that failed validation */
+ if (net_is_flow(e->net))
+ switch (rt_get_flowspec_valid(e))
+ {
+ case FLOWSPEC_VALID:
+ break;
+ case FLOWSPEC_INVALID:
+ return -1;
+ case FLOWSPEC_UNKNOWN:
+ ASSUME((rt_get_source_attr(e) != RTS_BGP) ||
+ !((struct bgp_channel *) SKIP_BACK(struct channel, in_req, e->sender->req))->base_table);
+ break;
+ case FLOWSPEC__MAX:
+ bug("This never happens.");
+ }
+
/* IBGP route reflection, RFC 4456 */
if (p->is_internal && src->is_internal && (p->local_as == src->local_as))
{
@@ -1690,16 +2071,16 @@ bgp_preexport(struct channel *c, rte *e)
/* Generally, this should be handled when path is received, but we check it
also here as rr_cluster_id may be undefined or different in src. */
- if (p->rr_cluster_id && bgp_cluster_list_loopy(p, e->attrs->eattrs))
+ if (p->rr_cluster_id && bgp_cluster_list_loopy(p, e->attrs))
return -1;
}
/* Handle well-known communities, RFC 1997 */
- struct eattr *com;
+ struct eattr *a;
if (p->cf->interpret_communities &&
- (com = ea_find(e->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY))))
+ (a = bgp_find_attr(e->attrs, BA_COMMUNITY)))
{
- const struct adata *d = com->u.ptr;
+ const struct adata *d = a->u.ptr;
/* Do not export anywhere */
if (int_set_contains(d, BGP_COMM_NO_ADVERTISE))
@@ -1718,6 +2099,16 @@ bgp_preexport(struct channel *c, rte *e)
return -1;
}
+ /* Do not export routes marked with OTC to upstream, RFC 9234 */
+ if (bgp_channel_is_role_applicable(c))
+ {
+ a = bgp_find_attr(e->attrs, BA_ONLY_TO_CUSTOMER);
+ if (a && (p->cf->local_role==BGP_ROLE_CUSTOMER ||
+ p->cf->local_role==BGP_ROLE_PEER ||
+ p->cf->local_role==BGP_ROLE_RS_CLIENT))
+ return -1;
+ }
+
return 0;
}
@@ -1732,7 +2123,7 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at
/* ORIGIN attribute - mandatory, attach if missing */
if (! bgp_find_attr(attrs0, BA_ORIGIN))
- bgp_set_attr_u32(&attrs, pool, BA_ORIGIN, 0, src ? ORIGIN_INCOMPLETE : ORIGIN_IGP);
+ bgp_set_attr_u32(&attrs, BA_ORIGIN, 0, src ? ORIGIN_INCOMPLETE : ORIGIN_IGP);
/* AS_PATH attribute - mandatory */
a = bgp_find_attr(attrs0, BA_AS_PATH);
@@ -1747,24 +2138,24 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at
{
/* IBGP or route server -> just ensure there is one */
if (!a)
- bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, &null_adata);
+ bgp_set_attr_ptr(&attrs, BA_AS_PATH, 0, &null_adata);
}
else if (p->is_interior)
{
/* Confederation -> prepend ASN as AS_CONFED_SEQUENCE */
ad = as_path_prepend2(pool, ad, AS_PATH_CONFED_SEQUENCE, p->public_as);
- bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, ad);
+ bgp_set_attr_ptr(&attrs, BA_AS_PATH, 0, ad);
}
else /* Regular EBGP (no RS, no confederation) */
{
/* Regular EBGP -> prepend ASN as regular sequence */
ad = as_path_prepend2(pool, ad, AS_PATH_SEQUENCE, p->public_as);
- bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, ad);
+ bgp_set_attr_ptr(&attrs, BA_AS_PATH, 0, ad);
/* MULTI_EXIT_DESC attribute - accept only if set in export filter */
a = bgp_find_attr(attrs0, BA_MULTI_EXIT_DISC);
- if (a && !(a->type & EAF_FRESH))
- bgp_unset_attr(&attrs, pool, BA_MULTI_EXIT_DISC);
+ if (a && !a->fresh && !p->cf->allow_med)
+ bgp_unset_attr(&attrs, BA_MULTI_EXIT_DISC);
}
/* NEXT_HOP attribute - delegated to AF-specific hook */
@@ -1773,16 +2164,16 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at
/* LOCAL_PREF attribute - required for IBGP, attach if missing */
if (p->is_interior && ! bgp_find_attr(attrs0, BA_LOCAL_PREF))
- bgp_set_attr_u32(&attrs, pool, BA_LOCAL_PREF, 0, p->cf->default_local_pref);
+ bgp_set_attr_u32(&attrs, BA_LOCAL_PREF, 0, p->cf->default_local_pref);
/* AIGP attribute - accumulate local metric or originate new one */
u64 metric;
if (s.local_next_hop &&
- (bgp_total_aigp_metric_(e->attrs, &metric, &ad) ||
+ (bgp_total_aigp_metric_(e, &metric, &ad) ||
(c->cf->aigp_originate && bgp_init_aigp_metric(e, &metric, &ad))))
{
ad = bgp_aigp_set_metric(pool, ad, metric);
- bgp_set_attr_ptr(&attrs, pool, BA_AIGP, 0, ad);
+ bgp_set_attr_ptr(&attrs, BA_AIGP, 0, ad);
}
/* IBGP route reflection, RFC 4456 */
@@ -1790,7 +2181,7 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at
{
/* ORIGINATOR_ID attribute - attach if not already set */
if (! bgp_find_attr(attrs0, BA_ORIGINATOR_ID))
- bgp_set_attr_u32(&attrs, pool, BA_ORIGINATOR_ID, 0, src->remote_id);
+ bgp_set_attr_u32(&attrs, BA_ORIGINATOR_ID, 0, src->remote_id);
/* CLUSTER_LIST attribute - prepend cluster ID */
a = bgp_find_attr(attrs0, BA_CLUSTER_LIST);
@@ -1805,7 +2196,7 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at
ad = int_set_prepend(pool, ad, p->rr_cluster_id);
/* Should be at least one prepended cluster ID */
- bgp_set_attr_ptr(&attrs, pool, BA_CLUSTER_LIST, 0, ad);
+ bgp_set_attr_ptr(&attrs, BA_CLUSTER_LIST, 0, ad);
}
/* AS4_* transition attributes, RFC 6793 4.2.2 */
@@ -1814,18 +2205,28 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at
a = bgp_find_attr(attrs, BA_AS_PATH);
if (a && as_path_contains_as4(a->u.ptr))
{
- bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, as_path_to_old(pool, a->u.ptr));
- bgp_set_attr_ptr(&attrs, pool, BA_AS4_PATH, 0, as_path_strip_confed(pool, a->u.ptr));
+ bgp_set_attr_ptr(&attrs, BA_AS_PATH, 0, as_path_to_old(pool, a->u.ptr));
+ bgp_set_attr_ptr(&attrs, BA_AS4_PATH, 0, as_path_strip_confed(pool, a->u.ptr));
}
a = bgp_find_attr(attrs, BA_AGGREGATOR);
if (a && aggregator_contains_as4(a->u.ptr))
{
- bgp_set_attr_ptr(&attrs, pool, BA_AGGREGATOR, 0, aggregator_to_old(pool, a->u.ptr));
- bgp_set_attr_ptr(&attrs, pool, BA_AS4_AGGREGATOR, 0, a->u.ptr);
+ bgp_set_attr_ptr(&attrs, BA_AGGREGATOR, 0, aggregator_to_old(pool, a->u.ptr));
+ bgp_set_attr_ptr(&attrs, BA_AS4_AGGREGATOR, 0, a->u.ptr);
}
}
+ /* Mark routes for downstream with OTC, RFC 9234 */
+ if (bgp_channel_is_role_applicable(c))
+ {
+ a = bgp_find_attr(attrs, BA_ONLY_TO_CUSTOMER);
+ if (!a && (p->cf->local_role == BGP_ROLE_PROVIDER ||
+ p->cf->local_role == BGP_ROLE_PEER ||
+ p->cf->local_role == BGP_ROLE_RS_SERVER))
+ bgp_set_attr_u32(&attrs, BA_ONLY_TO_CUSTOMER, 0, p->public_as);
+ }
+
/*
* Presence of mandatory attributes ORIGIN and AS_PATH is ensured by above
* conditions. Presence and validity of quasi-mandatory NEXT_HOP attribute
@@ -1842,34 +2243,39 @@ bgp_rt_notify(struct proto *P, struct channel *C, const net_addr *n, rte *new, c
struct bgp_proto *p = (void *) P;
struct bgp_channel *c = (void *) C;
struct bgp_bucket *buck;
- struct bgp_prefix *px;
- u32 path;
+ struct rte_src *path;
+
+ /* Ignore non-BGP channels */
+ if (C->channel != &channel_bgp)
+ return;
if (new)
{
- struct ea_list *attrs = bgp_update_attrs(p, c, new, new->attrs->eattrs, C->rte_update_pool);
+ struct ea_list *attrs = bgp_update_attrs(p, c, new, new->attrs, tmp_linpool);
+
+ /* Error during attribute processing */
+ if (!attrs)
+ log(L_ERR "%s: Invalid route %N withdrawn", p->p.name, n);
/* If attributes are invalid, we fail back to withdraw */
- buck = attrs ? bgp_get_bucket(c, attrs) : bgp_get_withdraw_bucket(c);
- path = new->src->global_id;
+ buck = attrs ? bgp_get_bucket(c->ptx, attrs) : bgp_get_withdraw_bucket(c->ptx);
+ path = new->src;
}
else
{
- buck = bgp_get_withdraw_bucket(c);
- path = old->src->global_id;
+ buck = bgp_get_withdraw_bucket(c->ptx);
+ path = old->src;
}
- px = bgp_get_prefix(c, n, c->add_path_tx ? path : 0);
- add_tail(&buck->prefixes, &px->buck_node);
-
- bgp_schedule_packet(p->conn, c, PKT_UPDATE);
+ if (bgp_update_prefix(c, bgp_get_prefix(c->ptx, n, path, c->add_path_tx), buck))
+ bgp_schedule_packet(p->conn, c, PKT_UPDATE);
}
static inline u32
-bgp_get_neighbor(rte *r)
+bgp_get_neighbor(const rte *r)
{
- eattr *e = ea_find(r->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
+ eattr *e = ea_find(r->attrs, BGP_EA_ID(BA_AS_PATH));
u32 as;
if (e && as_path_get_first_regular(e->u.ptr, &as))
@@ -1881,30 +2287,14 @@ bgp_get_neighbor(rte *r)
}
static inline int
-rte_stale(rte *r)
+rte_stale(const rte *r)
{
- if (r->pflags & BGP_REF_STALE)
- return 1;
-
- if (r->pflags & BGP_REF_NOT_STALE)
- return 0;
-
- /* If staleness is unknown, compute and cache it */
- eattr *a = ea_find(r->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY));
- if (a && int_set_contains(a->u.ptr, BGP_COMM_LLGR_STALE))
- {
- r->pflags |= BGP_REF_STALE;
- return 1;
- }
- else
- {
- r->pflags |= BGP_REF_NOT_STALE;
- return 0;
- }
+ eattr *a = ea_find(r->attrs, BGP_EA_ID(BA_COMMUNITY));
+ return a && int_set_contains(a->u.ptr, BGP_COMM_LLGR_STALE);
}
int
-bgp_rte_better(rte *new, rte *old)
+bgp_rte_better(const rte *new, const rte *old)
{
struct bgp_proto *new_bgp = bgp_rte_proto(new);
struct bgp_proto *old_bgp = bgp_rte_proto(old);
@@ -1920,8 +2310,8 @@ bgp_rte_better(rte *new, rte *old)
return 1;
/* RFC 4271 9.1.2.1. Route resolvability test */
- n = rta_resolvable(new->attrs);
- o = rta_resolvable(old->attrs);
+ n = rte_resolvable(new);
+ o = rte_resolvable(old);
if (n > o)
return 1;
if (n < o)
@@ -1936,8 +2326,8 @@ bgp_rte_better(rte *new, rte *old)
return 1;
/* Start with local preferences */
- x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF));
- y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF));
+ x = ea_find(new->attrs, BGP_EA_ID(BA_LOCAL_PREF));
+ y = ea_find(old->attrs, BGP_EA_ID(BA_LOCAL_PREF));
n = x ? x->u.data : new_bgp->cf->default_local_pref;
o = y ? y->u.data : old_bgp->cf->default_local_pref;
if (n > o)
@@ -1946,8 +2336,8 @@ bgp_rte_better(rte *new, rte *old)
return 0;
/* RFC 7311 4.1 - Apply AIGP metric */
- u64 n2 = bgp_total_aigp_metric(new->attrs);
- u64 o2 = bgp_total_aigp_metric(old->attrs);
+ u64 n2 = bgp_total_aigp_metric(new);
+ u64 o2 = bgp_total_aigp_metric(old);
if (n2 < o2)
return 1;
if (n2 > o2)
@@ -1956,8 +2346,8 @@ bgp_rte_better(rte *new, rte *old)
/* RFC 4271 9.1.2.2. a) Use AS path lengths */
if (new_bgp->cf->compare_path_lengths || old_bgp->cf->compare_path_lengths)
{
- x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
- y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
+ x = ea_find(new->attrs, BGP_EA_ID(BA_AS_PATH));
+ y = ea_find(old->attrs, BGP_EA_ID(BA_AS_PATH));
n = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN;
o = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN;
if (n < o)
@@ -1967,8 +2357,8 @@ bgp_rte_better(rte *new, rte *old)
}
/* RFC 4271 9.1.2.2. b) Use origins */
- x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN));
- y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN));
+ x = ea_find(new->attrs, BGP_EA_ID(BA_ORIGIN));
+ y = ea_find(old->attrs, BGP_EA_ID(BA_ORIGIN));
n = x ? x->u.data : ORIGIN_INCOMPLETE;
o = y ? y->u.data : ORIGIN_INCOMPLETE;
if (n < o)
@@ -1990,8 +2380,8 @@ bgp_rte_better(rte *new, rte *old)
if (new_bgp->cf->med_metric || old_bgp->cf->med_metric ||
(bgp_get_neighbor(new) == bgp_get_neighbor(old)))
{
- x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC));
- y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC));
+ x = ea_find(new->attrs, BGP_EA_ID(BA_MULTI_EXIT_DISC));
+ y = ea_find(old->attrs, BGP_EA_ID(BA_MULTI_EXIT_DISC));
n = x ? x->u.data : new_bgp->cf->default_med;
o = y ? y->u.data : old_bgp->cf->default_med;
if (n < o)
@@ -2007,8 +2397,8 @@ bgp_rte_better(rte *new, rte *old)
return 1;
/* RFC 4271 9.1.2.2. e) Compare IGP metrics */
- n = new_bgp->cf->igp_metric ? new->attrs->igp_metric : 0;
- o = old_bgp->cf->igp_metric ? old->attrs->igp_metric : 0;
+ n = new_bgp->cf->igp_metric ? rt_get_igp_metric(new) : 0;
+ o = old_bgp->cf->igp_metric ? rt_get_igp_metric(old) : 0;
if (n < o)
return 1;
if (n > o)
@@ -2016,8 +2406,8 @@ bgp_rte_better(rte *new, rte *old)
/* RFC 4271 9.1.2.2. f) Compare BGP identifiers */
/* RFC 4456 9. a) Use ORIGINATOR_ID instead of local neighbor ID */
- x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGINATOR_ID));
- y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGINATOR_ID));
+ x = ea_find(new->attrs, BGP_EA_ID(BA_ORIGINATOR_ID));
+ y = ea_find(old->attrs, BGP_EA_ID(BA_ORIGINATOR_ID));
n = x ? x->u.data : new_bgp->remote_id;
o = y ? y->u.data : old_bgp->remote_id;
@@ -2034,8 +2424,8 @@ bgp_rte_better(rte *new, rte *old)
return 0;
/* RFC 4456 9. b) Compare cluster list lengths */
- x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_CLUSTER_LIST));
- y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_CLUSTER_LIST));
+ x = ea_find(new->attrs, BGP_EA_ID(BA_CLUSTER_LIST));
+ y = ea_find(old->attrs, BGP_EA_ID(BA_CLUSTER_LIST));
n = x ? int_set_get_size(x->u.ptr) : 0;
o = y ? int_set_get_size(y->u.ptr) : 0;
if (n < o)
@@ -2049,7 +2439,7 @@ bgp_rte_better(rte *new, rte *old)
int
-bgp_rte_mergable(rte *pri, rte *sec)
+bgp_rte_mergable(const rte *pri, const rte *sec)
{
struct bgp_proto *pri_bgp = bgp_rte_proto(pri);
struct bgp_proto *sec_bgp = bgp_rte_proto(sec);
@@ -2057,17 +2447,20 @@ bgp_rte_mergable(rte *pri, rte *sec)
u32 p, s;
/* Skip suppressed routes (see bgp_rte_recalculate()) */
- /* LLGR draft - depreference stale routes */
- if (pri->pflags != sec->pflags)
+ if ((pri->pflags ^ sec->pflags) & BGP_REF_SUPPRESSED)
return 0;
/* RFC 4271 9.1.2.1. Route resolvability test */
- if (rta_resolvable(pri->attrs) != rta_resolvable(sec->attrs))
+ if (rte_resolvable(pri) != rte_resolvable(sec))
+ return 0;
+
+ /* LLGR draft - depreference stale routes */
+ if (rte_stale(pri) != rte_stale(sec))
return 0;
/* Start with local preferences */
- x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF));
- y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF));
+ x = ea_find(pri->attrs, BGP_EA_ID(BA_LOCAL_PREF));
+ y = ea_find(sec->attrs, BGP_EA_ID(BA_LOCAL_PREF));
p = x ? x->u.data : pri_bgp->cf->default_local_pref;
s = y ? y->u.data : sec_bgp->cf->default_local_pref;
if (p != s)
@@ -2076,8 +2469,8 @@ bgp_rte_mergable(rte *pri, rte *sec)
/* RFC 4271 9.1.2.2. a) Use AS path lengths */
if (pri_bgp->cf->compare_path_lengths || sec_bgp->cf->compare_path_lengths)
{
- x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
- y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
+ x = ea_find(pri->attrs, BGP_EA_ID(BA_AS_PATH));
+ y = ea_find(sec->attrs, BGP_EA_ID(BA_AS_PATH));
p = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN;
s = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN;
@@ -2089,8 +2482,8 @@ bgp_rte_mergable(rte *pri, rte *sec)
}
/* RFC 4271 9.1.2.2. b) Use origins */
- x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN));
- y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN));
+ x = ea_find(pri->attrs, BGP_EA_ID(BA_ORIGIN));
+ y = ea_find(sec->attrs, BGP_EA_ID(BA_ORIGIN));
p = x ? x->u.data : ORIGIN_INCOMPLETE;
s = y ? y->u.data : ORIGIN_INCOMPLETE;
if (p != s)
@@ -2100,8 +2493,8 @@ bgp_rte_mergable(rte *pri, rte *sec)
if (pri_bgp->cf->med_metric || sec_bgp->cf->med_metric ||
(bgp_get_neighbor(pri) == bgp_get_neighbor(sec)))
{
- x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC));
- y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC));
+ x = ea_find(pri->attrs, BGP_EA_ID(BA_MULTI_EXIT_DISC));
+ y = ea_find(sec->attrs, BGP_EA_ID(BA_MULTI_EXIT_DISC));
p = x ? x->u.data : pri_bgp->cf->default_med;
s = y ? y->u.data : sec_bgp->cf->default_med;
if (p != s)
@@ -2113,8 +2506,8 @@ bgp_rte_mergable(rte *pri, rte *sec)
return 0;
/* RFC 4271 9.1.2.2. e) Compare IGP metrics */
- p = pri_bgp->cf->igp_metric ? pri->attrs->igp_metric : 0;
- s = sec_bgp->cf->igp_metric ? sec->attrs->igp_metric : 0;
+ p = pri_bgp->cf->igp_metric ? rt_get_igp_metric(pri) : 0;
+ s = sec_bgp->cf->igp_metric ? rt_get_igp_metric(sec) : 0;
if (p != s)
return 0;
@@ -2127,7 +2520,7 @@ bgp_rte_mergable(rte *pri, rte *sec)
static inline int
same_group(rte *r, u32 lpref, u32 lasn)
{
- return (r->attrs->pref == lpref) && (bgp_get_neighbor(r) == lasn);
+ return (rt_get_preference(r) == lpref) && (bgp_get_neighbor(r) == lasn);
}
static inline int
@@ -2138,10 +2531,10 @@ use_deterministic_med(struct rte_storage *r)
}
int
-bgp_rte_recalculate(rtable_private *table, net *net, rte *new, rte *old, rte *old_best)
+bgp_rte_recalculate(struct rtable_private *table, net *net, rte *new, rte *old, rte *old_best)
{
rte *key = new ? new : old;
- u32 lpref = key->attrs->pref;
+ u32 lpref = rt_get_preference(key);
u32 lasn = bgp_get_neighbor(key);
int old_suppressed = old ? !!(old->pflags & BGP_REF_SUPPRESSED) : 0;
@@ -2208,7 +2601,7 @@ bgp_rte_recalculate(rtable_private *table, net *net, rte *new, rte *old, rte *ol
/* The default case - find a new best-in-group route */
rte *r = new; /* new may not be in the list */
- for (struct rte_storage *s = net->routes; rte_is_valid(&s->rte); s = s->next)
+ for (struct rte_storage *s = net->routes; rte_is_valid(RTE_OR_NULL(s)); s = s->next)
if (use_deterministic_med(s) && same_group(&s->rte, lpref, lasn))
{
s->rte.pflags |= BGP_REF_SUPPRESSED;
@@ -2225,7 +2618,7 @@ bgp_rte_recalculate(rtable_private *table, net *net, rte *new, rte *old, rte *ol
new->pflags &= ~BGP_REF_SUPPRESSED;
/* Found all existing routes mergable with best-in-group */
- for (struct rte_storage *s = net->routes; rte_is_valid(&s->rte); s = s->next)
+ for (struct rte_storage *s = net->routes; rte_is_valid(RTE_OR_NULL(s)); s = s->next)
if (use_deterministic_med(s) && same_group(&s->rte, lpref, lasn))
if ((&s->rte != r) && bgp_rte_mergable(r, &s->rte))
s->rte.pflags &= ~BGP_REF_SUPPRESSED;
@@ -2264,43 +2657,58 @@ bgp_rte_recalculate(rtable_private *table, net *net, rte *new, rte *old, rte *ol
}
void
-bgp_rte_modify_stale(struct rt_export_request *req, const net_addr *n, struct rt_pending_export *rpe UNUSED, rte **feed, uint count)
+bgp_rte_modify_stale(struct rt_export_request *req, const net_addr *n,
+ struct rt_pending_export *first, struct rt_pending_export *last,
+ const rte **feed, uint count)
{
struct bgp_channel *c = SKIP_BACK(struct bgp_channel, stale_feed, req);
+ struct rt_import_hook *irh = c->c.in_req.hook;
- do {
- rte *r = feed[--count];
- if (r->sender != c->c.in_req.hook)
- continue;
+ /* Find our routes among others */
+ for (uint i=0; i<count; i++)
+ {
+ const rte *r = feed[i];
- /* A new route, do not mark as stale */
- if (r->stale_cycle == c->c.in_req.hook->stale_set)
+ if (
+ !rte_is_valid(r) || /* Not a valid route */
+ (r->sender != irh) || /* Not our route */
+ (r->stale_cycle == irh->stale_set)) /* A new route, do not mark as stale */
continue;
- eattr *ea = ea_find(r->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY));
+ eattr *ea = ea_find(r->attrs, BGP_EA_ID(BA_COMMUNITY));
const struct adata *ad = ea ? ea->u.ptr : NULL;
uint flags = ea ? ea->flags : BAF_PARTIAL;
- rte e0 = *r;
- e0.flags |= REF_USE_STALE;
-
+ /* LLGR not allowed, withdraw the route */
if (ad && int_set_contains(ad, BGP_COMM_NO_LLGR))
+ {
rte_import(&c->c.in_req, n, NULL, r->src);
+ continue;
+ }
- else if (ad && int_set_contains(ad, BGP_COMM_LLGR_STALE))
- rte_import(&c->c.in_req, n, &e0, r->src);
+ /* Route already marked as LLGR, do nothing */
+ if (ad && int_set_contains(ad, BGP_COMM_LLGR_STALE))
+ continue;
- else {
- rta *a = e0.attrs = rta_do_cow(r->attrs, c->c.rte_update_pool);
+ /* Store the tmp_linpool state to aggresively save memory */
+ struct lp_state tmpp;
+ lp_save(tmp_linpool, &tmpp);
- bgp_set_attr_ptr(&(a->eattrs), c->c.rte_update_pool, BA_COMMUNITY, flags,
- int_set_add(c->c.rte_update_pool, ad, BGP_COMM_LLGR_STALE));
- e0.pflags |= BGP_REF_STALE;
+ /* Mark the route as LLGR */
+ rte e0 = *r;
+ bgp_set_attr_ptr(&e0.attrs, BA_COMMUNITY, flags, int_set_add(tmp_linpool, ad, BGP_COMM_LLGR_STALE));
- rte_import(&c->c.in_req, n, &e0, r->src);
- lp_flush(c->c.rte_update_pool);
- }
- } while (count);
+ /* We need to update the route but keep it stale. */
+ ASSERT_DIE(irh->stale_set == irh->stale_valid + 1);
+ irh->stale_set--;
+ rte_import(&c->c.in_req, n, &e0, r->src);
+ irh->stale_set++;
+
+ /* Restore the memory state */
+ lp_restore(tmp_linpool, &tmpp);
+ }
+
+ rpe_mark_seen_all(req->hook, first, last, NULL);
}
@@ -2316,8 +2724,8 @@ bgp_process_as4_attrs(ea_list **attrs, struct linpool *pool)
eattr *a4 = bgp_find_attr(*attrs, BA_AS4_AGGREGATOR);
/* First, unset AS4_* attributes */
- if (p4) bgp_unset_attr(attrs, pool, BA_AS4_PATH);
- if (a4) bgp_unset_attr(attrs, pool, BA_AS4_AGGREGATOR);
+ if (p4) bgp_unset_attr(attrs, BA_AS4_PATH);
+ if (a4) bgp_unset_attr(attrs, BA_AS4_AGGREGATOR);
/* Handle AGGREGATOR attribute */
if (a2 && a4)
@@ -2350,60 +2758,37 @@ bgp_process_as4_attrs(ea_list **attrs, struct linpool *pool)
}
}
-int
-bgp_get_attr(const eattr *a, byte *buf, int buflen)
-{
- uint i = EA_ID(a->id);
- const struct bgp_attr_desc *d;
- int len;
-
- if (bgp_attr_known(i))
- {
- d = &bgp_attr_table[i];
- len = bsprintf(buf, "%s", d->name);
- buf += len;
- if (d->format)
- {
- *buf++ = ':';
- *buf++ = ' ';
- d->format(a, buf, buflen - len - 2);
- return GA_FULL;
- }
- return GA_NAME;
- }
-
- bsprintf(buf, "%02x%s", i, (a->flags & BAF_TRANSITIVE) ? " [t]" : "");
- return GA_NAME;
-}
-
void
-bgp_get_route_info(rte *e, byte *buf)
+bgp_get_route_info(const rte *e, byte *buf)
{
- eattr *p = ea_find(e->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
- eattr *o = ea_find(e->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN));
+ eattr *p = ea_find(e->attrs, BGP_EA_ID(BA_AS_PATH));
+ eattr *o = ea_find(e->attrs, BGP_EA_ID(BA_ORIGIN));
u32 origas;
- buf += bsprintf(buf, " (%d", e->attrs->pref);
+ buf += bsprintf(buf, " (%d", rt_get_preference(e));
- if (e->pflags & BGP_REF_SUPPRESSED)
- buf += bsprintf(buf, "-");
+ if (!net_is_flow(e->net))
+ {
+ if (e->pflags & BGP_REF_SUPPRESSED)
+ buf += bsprintf(buf, "-");
- if (rte_stale(e))
- buf += bsprintf(buf, "s");
+ if (rte_stale(e))
+ buf += bsprintf(buf, "s");
- u64 metric = bgp_total_aigp_metric(e->attrs);
- if (metric < BGP_AIGP_MAX)
- {
- buf += bsprintf(buf, "/%lu", metric);
- }
- else if (e->attrs->igp_metric)
- {
- if (!rta_resolvable(e->attrs))
- buf += bsprintf(buf, "/-");
- else if (e->attrs->igp_metric >= IGP_METRIC_UNKNOWN)
- buf += bsprintf(buf, "/?");
- else
- buf += bsprintf(buf, "/%d", e->attrs->igp_metric);
+ u64 metric = bgp_total_aigp_metric(e);
+ if (metric < BGP_AIGP_MAX)
+ {
+ buf += bsprintf(buf, "/%lu", metric);
+ }
+ else if (metric = rt_get_igp_metric(e))
+ {
+ if (!rte_resolvable(e))
+ buf += bsprintf(buf, "/-");
+ else if (metric >= IGP_METRIC_UNKNOWN)
+ buf += bsprintf(buf, "/?");
+ else
+ buf += bsprintf(buf, "/%d", metric);
+ }
}
buf += bsprintf(buf, ") [");
diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c
index e2754649..cda0eb8d 100644
--- a/proto/bgp/bgp.c
+++ b/proto/bgp/bgp.c
@@ -101,7 +101,9 @@
* RFC 8203 - BGP Administrative Shutdown Communication
* RFC 8212 - Default EBGP Route Propagation Behavior without Policies
* RFC 8654 - Extended Message Support for BGP
- * draft-ietf-idr-ext-opt-param-07
+ * RFC 9072 - Extended Optional Parameters Length for BGP OPEN Message
+ * RFC 9117 - Revised Validation Procedure for BGP Flow Specifications
+ * RFC 9234 - Route Leak Prevention and Detection Using Roles
* draft-uttaro-idr-bgp-persistence-04
* draft-walton-bgp-hostname-capability-02
*/
@@ -113,7 +115,7 @@
#include "nest/bird.h"
#include "nest/iface.h"
#include "nest/protocol.h"
-#include "nest/route.h"
+#include "nest/rt.h"
#include "nest/cli.h"
#include "nest/locks.h"
#include "conf/conf.h"
@@ -124,8 +126,13 @@
#include "bgp.h"
-/* Global list of listening sockets */
-static list STATIC_LIST_INIT(bgp_sockets);
+static void bgp_listen_create(void *);
+
+static list STATIC_LIST_INIT(bgp_sockets); /* Global list of listening sockets */
+static list STATIC_LIST_INIT(bgp_listen_pending); /* Global list of listening socket open requests */
+static event bgp_listen_event = { .hook = bgp_listen_create };
+
+DOMAIN(rtable) bgp_listen_domain;
static void bgp_connect(struct bgp_proto *p);
static void bgp_active(struct bgp_proto *p);
@@ -136,14 +143,69 @@ static void bgp_update_bfd(struct bgp_proto *p, const struct bfd_options *bfd);
static int bgp_incoming_connection(sock *sk, uint dummy UNUSED);
static void bgp_listen_sock_err(sock *sk UNUSED, int err);
+static void bgp_initiate_disable(struct bgp_proto *p, int err_val);
static void bgp_graceful_restart_feed(struct bgp_channel *c);
-static inline void channel_refresh_end_reload(struct channel *c)
+
+
+static inline int
+bgp_setup_auth(struct bgp_proto *p, int enable)
{
- channel_refresh_end(c);
+ /* Beware. This is done from main_birdloop and protocol birdloop is NOT ENTERED.
+ * Anyway, we are only accessing:
+ * - protocol config which can be changed only from main_birdloop (reconfig)
+ * - protocol listen socket which is always driven by main_birdloop
+ * - protocol name which is set on reconfig
+ */
+
+ if (p->cf->password && p->listen.sock)
+ {
+ ip_addr prefix = p->cf->remote_ip;
+ int pxlen = -1;
+
+ if (p->cf->remote_range)
+ {
+ prefix = net_prefix(p->cf->remote_range);
+ pxlen = net_pxlen(p->cf->remote_range);
+ }
+
+ int rv = sk_set_md5_auth(p->listen.sock->sk,
+ p->cf->local_ip, prefix, pxlen, p->cf->iface,
+ enable ? p->cf->password : NULL, p->cf->setkey);
+
+ if (rv < 0)
+ sk_log_error(p->listen.sock->sk, p->p.name);
- if (c->in_table)
- channel_request_reload(c);
+ return rv;
+ }
+ else
+ return 0;
+}
+
+/**
+ * bgp_close - close a BGP instance
+ * @p: BGP instance
+ *
+ * This function frees and deconfigures shared BGP resources.
+ */
+static void
+bgp_close(struct bgp_proto *p)
+{
+ LOCK_DOMAIN(rtable, bgp_listen_domain);
+
+ struct bgp_listen_request *req = &p->listen;
+ struct bgp_socket *bs = req->sock;
+
+ if (bs)
+ {
+ req->sock = NULL;
+ rem_node(&req->n);
+
+ if (bs && EMPTY_LIST(bs->requests))
+ ev_send(&global_event_list, &bgp_listen_event);
+ }
+
+ UNLOCK_DOMAIN(rtable, bgp_listen_domain);
}
/**
@@ -155,121 +217,136 @@ static inline void channel_refresh_end_reload(struct channel *c)
* is acquired and neighbor is ready). When error, caller should change state to
* PS_DOWN and return immediately.
*/
-static int
+static void
bgp_open(struct bgp_proto *p)
{
- ASSERT_DIE(birdloop_inside(&main_birdloop));
-
- struct bgp_socket *bs = NULL;
- struct iface *ifa = p->cf->strict_bind ? p->cf->iface : NULL;
- ip_addr addr = p->cf->strict_bind ? p->cf->local_ip :
- (p->ipv4 ? IPA_NONE4 : IPA_NONE6);
- uint port = p->cf->local_port;
- uint flags = p->cf->free_bind ? SKF_FREEBIND : 0;
- uint flag_mask = SKF_FREEBIND;
+ LOCK_DOMAIN(rtable, bgp_listen_domain);
+ struct bgp_listen_request *req = &p->listen;
/* We assume that cf->iface is defined iff cf->local_ip is link-local */
+ req->iface = p->cf->strict_bind ? p->cf->iface : NULL;
+ req->vrf = p->p.vrf;
+ req->addr = p->cf->strict_bind ? p->cf->local_ip :
+ (p->ipv4 ? IPA_NONE4 : IPA_NONE6);
+ req->port = p->cf->local_port;
+ req->flags = p->cf->free_bind ? SKF_FREEBIND : 0;
- WALK_LIST(bs, bgp_sockets)
- if (ipa_equal(bs->sk->saddr, addr) &&
- (bs->sk->sport == port) &&
- (bs->sk->iface == ifa) &&
- (bs->sk->vrf == p->p.vrf) &&
- ((bs->sk->flags & flag_mask) == flags))
- {
- bs->uc++;
- p->sock = bs;
- return 0;
- }
-
- sock *sk = sk_new(proto_pool);
- sk->type = SK_TCP_PASSIVE;
- sk->ttl = 255;
- sk->saddr = addr;
- sk->sport = port;
- sk->iface = ifa;
- sk->vrf = p->p.vrf;
- sk->flags = flags | SKF_PASSIVE_THREAD;
- sk->tos = IP_PREC_INTERNET_CONTROL;
- sk->rbsize = BGP_RX_BUFFER_SIZE;
- sk->tbsize = BGP_TX_BUFFER_SIZE;
- sk->rx_hook = bgp_incoming_connection;
- sk->err_hook = bgp_listen_sock_err;
-
- if (sk_open(sk) < 0)
- goto err;
-
- bs = mb_allocz(proto_pool, sizeof(struct bgp_socket));
- bs->sk = sk;
- bs->uc = 1;
- p->sock = bs;
- sk->data = bs;
+ BGP_TRACE(D_EVENTS, "Requesting listen socket at %I%J port %u", req->addr, req->iface, req->port);
- add_tail(&bgp_sockets, &bs->n);
+ add_tail(&bgp_listen_pending, &req->n);
+ ev_send(&global_event_list, &bgp_listen_event);
- return 0;
-
-err:
- sk_log_error(sk, p->p.name);
- log(L_ERR "%s: Cannot open listening socket", p->p.name);
- rfree(sk);
- return -1;
+ UNLOCK_DOMAIN(rtable, bgp_listen_domain);
}
-/**
- * bgp_close - close a BGP instance
- * @p: BGP instance
- *
- * This function frees and deconfigures shared BGP resources.
- */
static void
-bgp_close(struct bgp_proto *p)
+bgp_listen_create(void *_ UNUSED)
{
ASSERT_DIE(birdloop_inside(&main_birdloop));
- struct bgp_socket *bs = p->sock;
-
- ASSERT(bs && bs->uc);
+ uint flag_mask = SKF_FREEBIND;
- if (--bs->uc)
- return;
+ while (1) {
+ LOCK_DOMAIN(rtable, bgp_listen_domain);
- rfree(bs->sk);
- rem_node(&bs->n);
- mb_free(bs);
-}
+ if (EMPTY_LIST(bgp_listen_pending))
+ {
+ UNLOCK_DOMAIN(rtable, bgp_listen_domain);
+ break;
+ }
-static inline int
-bgp_setup_auth(struct bgp_proto *p, int enable)
-{
- if (p->cf->password)
- {
- ip_addr prefix = p->cf->remote_ip;
- int pxlen = -1;
+ /* Get the first request to match */
+ struct bgp_listen_request *req = HEAD(bgp_listen_pending);
+ struct bgp_proto *p = SKIP_BACK(struct bgp_proto, listen, req);
+ rem_node(&req->n);
+
+ /* First try to find existing socket */
+ struct bgp_socket *bs;
+ WALK_LIST(bs, bgp_sockets)
+ if (ipa_equal(bs->sk->saddr, req->addr) &&
+ (bs->sk->sport == req->port) &&
+ (bs->sk->iface == req->iface) &&
+ (bs->sk->vrf == req->vrf) &&
+ ((bs->sk->flags & flag_mask) == req->flags))
+ break;
- if (p->cf->remote_range)
+ /* Not found any */
+ if (NODE_VALID(bs))
+ BGP_TRACE(D_EVENTS, "Found a listening socket: %p", bs);
+ else
{
- prefix = net_prefix(p->cf->remote_range);
- pxlen = net_pxlen(p->cf->remote_range);
+ /* Allocating new socket from global protocol pool.
+ * We can do this in main_birdloop. */
+ sock *sk = sk_new(proto_pool);
+ sk->type = SK_TCP_PASSIVE;
+ sk->ttl = 255;
+ sk->saddr = req->addr;
+ sk->sport = req->port;
+ sk->iface = req->iface;
+ sk->vrf = req->vrf;
+ sk->flags = req->flags;
+ sk->tos = IP_PREC_INTERNET_CONTROL;
+ sk->rbsize = BGP_RX_BUFFER_SIZE;
+ sk->tbsize = BGP_TX_BUFFER_SIZE;
+ sk->rx_hook = bgp_incoming_connection;
+ sk->err_hook = bgp_listen_sock_err;
+
+ if (sk_open(sk, &main_birdloop) < 0)
+ {
+ sk_log_error(sk, p->p.name);
+ log(L_ERR "%s: Cannot open listening socket", p->p.name);
+ rfree(sk);
+ UNLOCK_DOMAIN(rtable, bgp_listen_domain);
+
+ bgp_initiate_disable(p, BEM_NO_SOCKET);
+ continue;
+ }
+
+ bs = mb_allocz(proto_pool, sizeof(struct bgp_socket));
+ bs->sk = sk;
+ sk->data = bs;
+
+ init_list(&bs->requests);
+ add_tail(&bgp_sockets, &bs->n);
+
+ BGP_TRACE(D_EVENTS, "Created new listening socket: %p", bs);
}
- int rv = sk_set_md5_auth(p->sock->sk,
- p->cf->local_ip, prefix, pxlen, p->cf->iface,
- enable ? p->cf->password : NULL, p->cf->setkey);
+ req->sock = bs;
+ add_tail(&bs->requests, &req->n);
- if (rv < 0)
- sk_log_error(p->sock->sk, p->p.name);
+ if (bgp_setup_auth(p, 1) < 0)
+ {
+ rem_node(&req->n);
+ req->sock = NULL;
- return rv;
+ UNLOCK_DOMAIN(rtable, bgp_listen_domain);
+
+ bgp_initiate_disable(p, BEM_INVALID_MD5);
+ continue;
+ }
+
+ UNLOCK_DOMAIN(rtable, bgp_listen_domain);
}
- else
- return 0;
+
+ /* Cleanup leftover listening sockets */
+ LOCK_DOMAIN(rtable, bgp_listen_domain);
+ struct bgp_socket *bs;
+ node *nxt;
+ WALK_LIST_DELSAFE(bs, nxt, bgp_sockets)
+ if (EMPTY_LIST(bs->requests))
+ {
+ rfree(bs->sk);
+ rem_node(&bs->n);
+ mb_free(bs);
+ }
+ UNLOCK_DOMAIN(rtable, bgp_listen_domain);
}
static inline struct bgp_channel *
bgp_find_channel(struct bgp_proto *p, u32 afi)
{
struct bgp_channel *c;
- WALK_LIST(c, p->p.channels)
+ BGP_WALK_CHANNELS(p, c)
if (c->afi == afi)
return c;
@@ -288,6 +365,8 @@ bgp_startup(struct bgp_proto *p)
if (p->postponed_sk)
{
/* Apply postponed incoming connection */
+ sk_reloop(p->postponed_sk, p->p.loop);
+
bgp_setup_conn(p, &p->incoming_conn);
bgp_setup_sk(&p->incoming_conn, p->postponed_sk);
bgp_send_open(&p->incoming_conn);
@@ -305,13 +384,7 @@ bgp_startup_timeout(timer *t)
static void
bgp_initiate(struct bgp_proto *p)
{
- int err_val;
-
- if (bgp_open(p) < 0)
- { err_val = BEM_NO_SOCKET; goto err1; }
-
- if (bgp_setup_auth(p, 1) < 0)
- { err_val = BEM_INVALID_MD5; goto err2; }
+ bgp_open(p);
if (p->cf->bfd)
bgp_update_bfd(p, p->cf->bfd);
@@ -324,24 +397,28 @@ bgp_initiate(struct bgp_proto *p)
}
else
bgp_startup(p);
+}
- return;
-
-err2:
- bgp_close(p);
-err1:
- p->p.disabled = 1;
- bgp_store_error(p, NULL, BE_MISC, err_val);
-
- p->neigh = NULL;
- proto_notify_state(&p->p, PS_DOWN);
-
- return;
+static void
+bgp_initiate_disable(struct bgp_proto *p, int err_val)
+{
+ PROTO_LOCKED_FROM_MAIN(&p->p)
+ {
+ /* The protocol may be already down for another reason.
+ * Shutdown the protocol only if it isn't already shutting down. */
+ switch (p->p.proto_state)
+ {
+ case PS_START:
+ case PS_UP:
+ p->p.disabled = 1;
+ bgp_store_error(p, NULL, BE_MISC, err_val);
+ bgp_stop(p, err_val, NULL, 0);
+ }
+ }
}
/**
* bgp_start_timer - start a BGP timer
- * @p: bgp_proto which the timer belongs to
* @t: timer
* @value: time (in seconds) to fire (0 to disable the timer)
*
@@ -352,8 +429,6 @@ err1:
void
bgp_start_timer(struct bgp_proto *p, timer *t, uint value)
{
- BGP_ASSERT_INSIDE(p);
-
if (value)
{
/* The randomization procedure is specified in RFC 4271 section 10 */
@@ -375,7 +450,7 @@ bgp_start_timer(struct bgp_proto *p, timer *t, uint value)
void
bgp_close_conn(struct bgp_conn *conn)
{
- BGP_ASSERT_INSIDE(conn->bgp);
+ // struct bgp_proto *p = conn->bgp;
DBG("BGP: Closing connection\n");
conn->packets_to_send = 0;
@@ -386,8 +461,10 @@ bgp_close_conn(struct bgp_conn *conn)
conn->keepalive_timer = NULL;
rfree(conn->hold_timer);
conn->hold_timer = NULL;
+
rfree(conn->tx_ev);
conn->tx_ev = NULL;
+
rfree(conn->sk);
conn->sk = NULL;
@@ -467,8 +544,6 @@ bgp_graceful_close_conn(struct bgp_conn *conn, int subcode, byte *data, uint len
static void
bgp_down(struct bgp_proto *p)
{
- bgp_start_timer(p, p->startup_timer, 0);
-
if (p->start_state > BSS_PREPARE)
{
bgp_setup_auth(p, 0);
@@ -482,34 +557,21 @@ bgp_down(struct bgp_proto *p)
}
static void
-bgp_active_event(void *vp)
+bgp_decision(void *vp)
{
struct bgp_proto *p = vp;
- BGP_ASSERT_INSIDE(p);
-
- DBG("%s: Decision start\n", p->p.name);
+ DBG("BGP: Decision start\n");
if ((p->p.proto_state == PS_START) &&
(p->outgoing_conn.state == BS_IDLE) &&
(p->incoming_conn.state != BS_OPENCONFIRM) &&
!p->passive)
bgp_active(p);
-}
-
-static void
-bgp_down_event(void *vp)
-{
- struct bgp_proto *p = vp;
-
- BGP_ENTER(p);
- DBG("%s: Down event\n", p->p.name);
if ((p->p.proto_state == PS_STOP) &&
(p->outgoing_conn.state == BS_IDLE) &&
(p->incoming_conn.state == BS_IDLE))
bgp_down(p);
-
- BGP_LEAVE(p);
}
static struct bgp_proto *
@@ -539,9 +601,16 @@ void
bgp_stop(struct bgp_proto *p, int subcode, byte *data, uint len)
{
proto_notify_state(&p->p, PS_STOP);
+ p->uncork_ev->data = NULL;
bgp_graceful_close_conn(&p->outgoing_conn, subcode, data, len);
bgp_graceful_close_conn(&p->incoming_conn, subcode, data, len);
- ev_send_loop(&main_birdloop, p->down_event);
+
+ struct bgp_channel *c;
+ WALK_LIST(c, p->p.channels)
+ if (c->ptx)
+ bgp_free_pending_tx(c);
+
+ proto_send_event(&p->p, p->event);
}
static inline void
@@ -588,7 +657,6 @@ bgp_conn_enter_established_state(struct bgp_conn *conn)
p->link_addr = p->neigh->iface->llv6->ip;
conn->sk->fast_rx = 0;
- conn->sk->cork = &rt_cork;
p->conn = conn;
p->last_error_class = 0;
@@ -614,7 +682,7 @@ bgp_conn_enter_established_state(struct bgp_conn *conn)
/* Summary state of ADD_PATH RX for active channels */
uint summary_add_path_rx = 0;
- WALK_LIST(c, p->p.channels)
+ BGP_WALK_CHANNELS(p, c)
{
const struct bgp_af_caps *loc = bgp_find_af_caps(local, c->afi);
const struct bgp_af_caps *rem = bgp_find_af_caps(peer, c->afi);
@@ -635,7 +703,7 @@ bgp_conn_enter_established_state(struct bgp_conn *conn)
int active = loc->ready && rem->ready;
c->c.disabled = !active;
- c->c.reloadable = p->route_refresh || c->cf->import_table;
+ c->c.reloadable = p->route_refresh || ((c->c.in_keep & RIK_PREFILTER) == RIK_PREFILTER);
c->index = active ? num++ : 0;
@@ -696,7 +764,7 @@ bgp_conn_enter_established_state(struct bgp_conn *conn)
p->channel_count = num;
p->summary_add_path_rx = summary_add_path_rx;
- WALK_LIST(c, p->p.channels)
+ BGP_WALK_CHANNELS(p, c)
{
if (c->c.disabled)
continue;
@@ -747,8 +815,7 @@ bgp_conn_enter_idle_state(struct bgp_conn *conn)
bgp_close_conn(conn);
bgp_conn_set_state(conn, BS_IDLE);
- ev_send_loop(p->p.loop, p->active_event);
- ev_send_loop(&main_birdloop, p->down_event);
+ proto_send_event(&p->p, p->event);
if (os == BS_ESTABLISHED)
bgp_conn_leave_established_state(p);
@@ -776,7 +843,7 @@ bgp_handle_graceful_restart(struct bgp_proto *p)
p->gr_active_num = 0;
struct bgp_channel *c;
- WALK_LIST(c, p->p.channels)
+ BGP_WALK_CHANNELS(p, c)
{
/* FIXME: perhaps check for channel state instead of disabled flag? */
if (c->c.disabled)
@@ -790,16 +857,14 @@ bgp_handle_graceful_restart(struct bgp_proto *p)
{
case BGP_GRS_NONE:
c->gr_active = BGP_GRS_ACTIVE;
- channel_refresh_begin(&c->c);
- break;
+ /* fall through */
case BGP_GRS_ACTIVE:
- channel_refresh_end(&c->c);
- channel_refresh_begin(&c->c);
+ rt_refresh_begin(&c->c.in_req);
break;
case BGP_GRS_LLGR:
- channel_refresh_begin(&c->c);
+ rt_refresh_begin(&c->c.in_req);
bgp_graceful_restart_feed(c);
break;
}
@@ -807,15 +872,13 @@ bgp_handle_graceful_restart(struct bgp_proto *p)
else
{
/* Just flush the routes */
- channel_refresh_begin(&c->c);
- channel_refresh_end(&c->c);
+ rt_refresh_begin(&c->c.in_req);
+ rt_refresh_end(&c->c.in_req);
}
/* Reset bucket and prefix tables */
- bgp_free_bucket_table(c);
- bgp_free_prefix_table(c);
- bgp_init_bucket_table(c);
- bgp_init_prefix_table(c);
+ bgp_free_pending_tx(c);
+ bgp_init_pending_tx(c);
c->packets_to_send = 0;
}
@@ -871,6 +934,8 @@ bgp_graceful_restart_feed(struct bgp_channel *c)
}
+
+
/**
* bgp_graceful_restart_done - finish active BGP graceful restart
* @c: BGP channel
@@ -893,11 +958,8 @@ bgp_graceful_restart_done(struct bgp_channel *c)
if (!p->gr_active_num)
BGP_TRACE(D_EVENTS, "Neighbor graceful restart done");
- if (c->stale_feed.hook)
- rt_stop_export(&c->stale_feed, bgp_graceful_restart_feed_done);
-
tm_stop(c->stale_timer);
- channel_refresh_end_reload(&c->c);
+ rt_refresh_end(&c->c.in_req);
}
/**
@@ -919,7 +981,7 @@ bgp_graceful_restart_timeout(timer *t)
if (p->llgr_ready)
{
struct bgp_channel *c;
- WALK_LIST(c, p->p.channels)
+ BGP_WALK_CHANNELS(p, c)
{
/* Channel is not in GR and is already flushed */
if (!c->gr_active)
@@ -976,11 +1038,8 @@ bgp_refresh_begin(struct bgp_channel *c)
if (c->load_state == BFS_LOADING)
{ log(L_WARN "%s: BEGIN-OF-RR received before END-OF-RIB, ignoring", p->p.name); return; }
- if (c->load_state == BFS_REFRESHING)
- channel_refresh_end(&c->c);
-
c->load_state = BFS_REFRESHING;
- channel_refresh_begin(&c->c);
+ rt_refresh_begin(&c->c.in_req);
}
/**
@@ -1001,7 +1060,7 @@ bgp_refresh_end(struct bgp_channel *c)
{ log(L_WARN "%s: END-OF-RR received without prior BEGIN-OF-RR, ignoring", p->p.name); return; }
c->load_state = BFS_NONE;
- channel_refresh_end_reload(&c->c);
+ rt_refresh_end(&c->c.in_req);
}
@@ -1137,12 +1196,10 @@ bgp_setup_conn(struct bgp_proto *p, struct bgp_conn *conn)
static void
bgp_setup_sk(struct bgp_conn *conn, sock *s)
{
- ASSERT_DIE(s->flags & SKF_THREAD);
s->data = conn;
s->err_hook = bgp_sock_err;
s->fast_rx = 1;
conn->sk = s;
- sk_start(s);
}
static void
@@ -1151,8 +1208,6 @@ bgp_active(struct bgp_proto *p)
int delay = MAX(1, p->cf->connect_delay_time);
struct bgp_conn *conn = &p->outgoing_conn;
- BGP_ASSERT_INSIDE(p);
-
BGP_TRACE(D_EVENTS, "Connect delayed by %d seconds", delay);
bgp_setup_conn(p, conn);
bgp_conn_set_state(conn, BS_ACTIVE);
@@ -1173,12 +1228,9 @@ bgp_connect(struct bgp_proto *p) /* Enter Connect state and start establishing c
struct bgp_conn *conn = &p->outgoing_conn;
int hops = p->cf->multihop ? : 1;
- BGP_ASSERT_INSIDE(p);
-
DBG("BGP: Connecting\n");
sock *s = sk_new(p->p.pool);
s->type = SK_TCP_ACTIVE;
- s->flags |= SKF_THREAD;
s->saddr = p->local_ip;
s->daddr = p->remote_ip;
s->dport = p->cf->remote_port;
@@ -1190,6 +1242,7 @@ bgp_connect(struct bgp_proto *p) /* Enter Connect state and start establishing c
s->tos = IP_PREC_INTERNET_CONTROL;
s->password = p->cf->password;
s->tx_hook = bgp_connected;
+ s->flags = p->cf->free_bind ? SKF_FREEBIND : 0;
BGP_TRACE(D_EVENTS, "Connecting to %I%J from local address %I%J",
s->daddr, ipa_is_link_local(s->daddr) ? p->cf->iface : NULL,
s->saddr, ipa_is_link_local(s->saddr) ? s->iface : NULL);
@@ -1197,7 +1250,7 @@ bgp_connect(struct bgp_proto *p) /* Enter Connect state and start establishing c
bgp_setup_sk(conn, s);
bgp_conn_set_state(conn, BS_CONNECT);
- if (sk_open(s) < 0)
+ if (sk_open(s, p->p.loop) < 0)
goto err;
/* Set minimal receive TTL if needed */
@@ -1227,12 +1280,17 @@ static struct bgp_proto *
bgp_find_proto(sock *sk)
{
struct bgp_proto *best = NULL;
- struct bgp_proto *p;
+ struct bgp_socket *bs = sk->data;
+ struct bgp_listen_request *req;
/* sk->iface is valid only if src or dst address is link-local */
int link = ipa_is_link_local(sk->saddr) || ipa_is_link_local(sk->daddr);
- WALK_LIST(p, proto_list)
+ LOCK_DOMAIN(rtable, bgp_listen_domain);
+
+ WALK_LIST(req, bs->requests)
+ {
+ struct bgp_proto *p = SKIP_BACK(struct bgp_proto, listen, req);
if ((p->p.proto == &proto_bgp) &&
(ipa_equal(p->remote_ip, sk->daddr) || bgp_is_dynamic(p)) &&
(!p->cf->remote_range || ipa_in_netX(sk->daddr, p->cf->remote_range)) &&
@@ -1246,7 +1304,9 @@ bgp_find_proto(sock *sk)
if (!bgp_is_dynamic(p))
break;
}
+ }
+ UNLOCK_DOMAIN(rtable, bgp_listen_domain);
return best;
}
@@ -1265,6 +1325,8 @@ bgp_find_proto(sock *sk)
static int
bgp_incoming_connection(sock *sk, uint dummy UNUSED)
{
+ ASSERT_DIE(birdloop_inside(&main_birdloop));
+
struct bgp_proto *p;
int acc, hops;
@@ -1278,17 +1340,7 @@ bgp_incoming_connection(sock *sk, uint dummy UNUSED)
return 0;
}
- if (p->p.loop == &main_birdloop)
- {
- /* Protocol is down for whatever reason. No need for locking. */
- BGP_TRACE(D_EVENTS, "Incoming connection from %I%J (port %d) rejected (protocol is down)",
- sk->daddr, ipa_is_link_local(sk->daddr) ? sk->iface : NULL,
- sk->dport);
- rfree(sk);
- return 0;
- }
-
- BGP_ENTER(p);
+ birdloop_enter(p->p.loop);
/*
* BIRD should keep multiple incoming connections in OpenSent state (for
@@ -1319,8 +1371,7 @@ bgp_incoming_connection(sock *sk, uint dummy UNUSED)
if (!acc)
{
rfree(sk);
- BGP_LEAVE(p);
- return 0;
+ goto leave;
}
hops = p->cf->multihop ? : 1;
@@ -1345,22 +1396,24 @@ bgp_incoming_connection(sock *sk, uint dummy UNUSED)
p = bgp_spawn(p, sk->daddr);
p->postponed_sk = sk;
rmove(sk, p->p.pool);
- BGP_LEAVE(p);
- return 0;
+ goto leave;
}
rmove(sk, p->p.pool);
+ sk_reloop(sk, p->p.loop);
+
bgp_setup_conn(p, &p->incoming_conn);
bgp_setup_sk(&p->incoming_conn, sk);
bgp_send_open(&p->incoming_conn);
- BGP_LEAVE(p);
- return 0;
+ goto leave;
err:
sk_log_error(sk, p->p.name);
log(L_ERR "%s: Incoming connection aborted", p->p.name);
rfree(sk);
- BGP_LEAVE(p);
+
+leave:
+ birdloop_leave(p->p.loop);
return 0;
}
@@ -1395,9 +1448,10 @@ bgp_neigh_notify(neighbor *n)
struct bgp_proto *p = (struct bgp_proto *) n->proto;
int ps = p->p.proto_state;
- BGP_ASSERT_INSIDE(p);
+ if (n != p->neigh)
+ return;
- if ((n != p->neigh) || (ps == PS_DOWN) || (ps == PS_STOP))
+ if ((ps == PS_DOWN) || (ps == PS_STOP))
return;
int prepare = (ps == PS_START) && (p->start_state == BSS_PREPARE);
@@ -1470,15 +1524,22 @@ static void
bgp_update_bfd(struct bgp_proto *p, const struct bfd_options *bfd)
{
if (bfd && p->bfd_req)
+ {
+ BGP_TRACE(D_EVENTS, "Updating existing BFD request");
bfd_update_request(p->bfd_req, bfd);
+ }
if (bfd && !p->bfd_req && !bgp_is_dynamic(p))
+ {
p->bfd_req = bfd_request_session(p->p.pool, p->remote_ip, p->local_ip,
p->cf->multihop ? NULL : p->neigh->iface,
- p->p.vrf, bgp_bfd_notify, p, birdloop_event_list(p->p.loop), bfd);
+ p->p.vrf, bgp_bfd_notify, p, p->p.loop, bfd);
+ BGP_TRACE(D_EVENTS, "Requesting a new BFD session");
+ }
if (!bfd && p->bfd_req)
{
+ BGP_TRACE(D_EVENTS, "Retracting the BFD request");
rfree(p->bfd_req);
p->bfd_req = NULL;
}
@@ -1490,8 +1551,11 @@ bgp_reload_routes(struct channel *C)
struct bgp_proto *p = (void *) C->proto;
struct bgp_channel *c = (void *) C;
- ASSERT(p->conn && (p->route_refresh));
+ /* Ignore non-BGP channels */
+ if (C->channel != &channel_bgp)
+ return;
+ ASSERT(p->conn && p->route_refresh);
bgp_schedule_packet(p->conn, c, PKT_ROUTE_REFRESH);
}
@@ -1501,6 +1565,10 @@ bgp_feed_begin(struct channel *C, int initial)
struct bgp_proto *p = (void *) C->proto;
struct bgp_channel *c = (void *) C;
+ /* Ignore non-BGP channels */
+ if (C->channel != &channel_bgp)
+ return;
+
/* This should not happen */
if (!p->conn)
return;
@@ -1508,6 +1576,12 @@ bgp_feed_begin(struct channel *C, int initial)
if (initial && p->cf->gr_mode)
c->feed_state = BFS_LOADING;
+ if (!initial && C->out_table)
+ {
+ c->feed_out_table = 1;
+ return;
+ }
+
/* It is refeed and both sides support enhanced route refresh */
if (!initial && p->enhanced_refresh)
{
@@ -1526,6 +1600,16 @@ bgp_feed_end(struct channel *C)
struct bgp_proto *p = (void *) C->proto;
struct bgp_channel *c = (void *) C;
+ /* Ignore non-BGP channels */
+ if (C->channel != &channel_bgp)
+ return;
+
+ if (c->feed_out_table)
+ {
+ c->feed_out_table = 0;
+ return;
+ }
+
/* This should not happen */
if (!p->conn)
return;
@@ -1548,17 +1632,14 @@ bgp_feed_end(struct channel *C)
static void
-bgp_start_locked(struct object_lock *lock)
+bgp_start_locked(void *_p)
{
- struct bgp_proto *p = lock->data;
+ struct bgp_proto *p = _p;
const struct bgp_config *cf = p->cf;
- BGP_ENTER(p);
-
if (p->p.proto_state != PS_START)
{
DBG("BGP: Got lock in different state %d\n", p->p.proto_state);
- BGP_LEAVE(p);
return;
}
@@ -1568,11 +1649,10 @@ bgp_start_locked(struct object_lock *lock)
{
/* Multi-hop sessions do not use neighbor entries */
bgp_initiate(p);
- BGP_LEAVE(p);
return;
}
- neighbor *n = neigh_find(&p->p, p->remote_ip, cf->iface, NEF_STICKY | NEF_NOTIFY_MAIN);
+ neighbor *n = neigh_find(&p->p, p->remote_ip, cf->iface, NEF_STICKY);
if (!n)
{
log(L_ERR "%s: Invalid remote address %I%J", p->p.name, p->remote_ip, cf->iface);
@@ -1580,7 +1660,6 @@ bgp_start_locked(struct object_lock *lock)
p->p.disabled = 1;
bgp_store_error(p, NULL, BE_MISC, BEM_INVALID_NEXT_HOP);
proto_notify_state(&p->p, PS_DOWN);
- BGP_LEAVE(p);
return;
}
@@ -1592,8 +1671,6 @@ bgp_start_locked(struct object_lock *lock)
BGP_TRACE(D_EVENTS, "Waiting for link on %s", n->iface->name);
else
bgp_start_neighbor(p);
-
- BGP_LEAVE(p);
}
static int
@@ -1632,13 +1709,12 @@ bgp_start(struct proto *P)
p->stats.rx_bytes = p->stats.tx_bytes = 0;
p->last_rx_update = 0;
- p->active_event = ev_new_init(p->p.pool, bgp_active_event, p);
- p->down_event = ev_new_init(p->p.pool, bgp_down_event, p);
+ p->event = ev_new_init(p->p.pool, bgp_decision, p);
+ p->uncork_ev = ev_new_init(p->p.pool, bgp_uncork, p);
+
p->startup_timer = tm_new_init(p->p.pool, bgp_startup_timeout, p, 0, 0);
p->gr_timer = tm_new_init(p->p.pool, bgp_graceful_restart_timeout, p, 0, 0);
- p->rx_lp = lp_new_default(p->p.pool);
-
p->local_id = proto_get_router_id(P->cf);
if (p->rr_client)
p->rr_cluster_id = p->cf->rr_cluster_id ? p->cf->rr_cluster_id : p->local_id;
@@ -1650,7 +1726,7 @@ bgp_start(struct proto *P)
if (p->p.gr_recovery && p->cf->gr_mode)
{
struct bgp_channel *c;
- WALK_LIST(c, p->p.channels)
+ BGP_WALK_CHANNELS(p, c)
channel_graceful_restart_lock(&c->c);
}
@@ -1665,8 +1741,11 @@ bgp_start(struct proto *P)
lock->iface = p->cf->iface;
lock->vrf = p->cf->iface ? NULL : p->p.vrf;
lock->type = OBJLOCK_TCP;
- lock->hook = bgp_start_locked;
- lock->data = p;
+ lock->event = (event) {
+ .hook = bgp_start_locked,
+ .data = p,
+ };
+ lock->target = proto_event_list(P);
/* For dynamic BGP, we use inst 1 to avoid collisions with regular BGP */
if (bgp_is_dynamic(p))
@@ -1782,7 +1861,7 @@ bgp_init(struct proto_config *CF)
P->rt_notify = bgp_rt_notify;
P->preexport = bgp_preexport;
- P->neigh_notify = bgp_neigh_notify;
+ P->iface_sub.neigh_notify = bgp_neigh_notify;
P->reload_routes = bgp_reload_routes;
P->feed_begin = bgp_feed_begin;
P->feed_end = bgp_feed_end;
@@ -1809,7 +1888,7 @@ bgp_init(struct proto_config *CF)
/* Add all channels */
struct bgp_channel_config *cc;
- WALK_LIST(cc, CF->channels)
+ BGP_CF_WALK_CHANNELS(cf, cc)
proto_add_channel(P, &cc->c);
return P;
@@ -1830,6 +1909,9 @@ bgp_channel_init(struct channel *C, struct channel_config *CF)
if (cf->igp_table_ip6)
c->igp_table_ip6 = cf->igp_table_ip6->table;
+
+ if (cf->base_table)
+ c->base_table = cf->base_table->table;
}
static int
@@ -1840,22 +1922,23 @@ bgp_channel_start(struct channel *C)
ip_addr src = p->local_ip;
if (c->igp_table_ip4)
- RT_LOCKED(c->igp_table_ip4, t)
- rt_lock_table(t);
+ rt_lock_table(c->igp_table_ip4);
if (c->igp_table_ip6)
- RT_LOCKED(c->igp_table_ip6, t)
- rt_lock_table(t);
+ rt_lock_table(c->igp_table_ip6);
- c->pool = p->p.pool; // XXXX
- bgp_init_bucket_table(c);
- bgp_init_prefix_table(c);
+ if (c->base_table)
+ {
+ rt_lock_table(c->base_table);
+ rt_flowspec_link(c->base_table, c->c.table);
+ }
- if (c->cf->import_table)
- channel_setup_in_table(C, 0);
+ c->pool = p->p.pool; // XXXX
if (c->cf->export_table)
- channel_setup_out_table(C);
+ bgp_setup_out_table(c);
+
+ bgp_init_pending_tx(c);
c->stale_timer = tm_new_init(c->pool, bgp_long_lived_stale_timeout, c, 0, 0);
@@ -1926,12 +2009,16 @@ bgp_channel_cleanup(struct channel *C)
struct bgp_channel *c = (void *) C;
if (c->igp_table_ip4)
- RT_LOCKED(c->igp_table_ip4, t)
- rt_unlock_table(t);
+ rt_unlock_table(c->igp_table_ip4);
if (c->igp_table_ip6)
- RT_LOCKED(c->igp_table_ip6, t)
- rt_unlock_table(t);
+ rt_unlock_table(c->igp_table_ip6);
+
+ if (c->base_table)
+ {
+ rt_flowspec_unlink(c->base_table, c->c.table);
+ rt_unlock_table(c->base_table);
+ }
c->index = 0;
@@ -1944,7 +2031,7 @@ bgp_find_channel_config(struct bgp_config *cf, u32 afi)
{
struct bgp_channel_config *cc;
- WALK_LIST(cc, cf->c.channels)
+ BGP_CF_WALK_CHANNELS(cf, cc)
if (cc->afi == afi)
return cc;
@@ -1974,12 +2061,31 @@ bgp_default_igp_table(struct bgp_config *cf, struct bgp_channel_config *cc, u32
return cc2->c.table;
/* Last, try default table of given type */
- if (tab = cf->c.global->def_tables[type])
+ if (tab = rt_get_default_table(cf->c.global, type))
return tab;
cf_error("Undefined IGP table");
}
+static struct rtable_config *
+bgp_default_base_table(struct bgp_config *cf, struct bgp_channel_config *cc)
+{
+ /* Expected table type */
+ u32 type = (cc->afi == BGP_AF_FLOW4) ? NET_IP4 : NET_IP6;
+
+ /* First, try appropriate IP channel */
+ u32 afi2 = BGP_AF(BGP_AFI(cc->afi), BGP_SAFI_UNICAST);
+ struct bgp_channel_config *cc2 = bgp_find_channel_config(cf, afi2);
+ if (cc2 && (cc2->c.table->addr_type == type))
+ return cc2->c.table;
+
+ /* Last, try default table of given type */
+ struct rtable_config *tab = rt_get_default_table(cf->c.global, type);
+ if (tab)
+ return tab;
+
+ cf_error("Undefined base table");
+}
void
bgp_postconfig(struct proto_config *CF)
@@ -2042,6 +2148,15 @@ bgp_postconfig(struct proto_config *CF)
if (internal && cf->rs_client)
cf_error("Only external neighbor can be RS client");
+ if (internal && (cf->local_role != BGP_ROLE_UNDEFINED))
+ cf_error("Local role cannot be set on IBGP sessions");
+
+ if (interior && (cf->local_role != BGP_ROLE_UNDEFINED))
+ log(L_WARN "BGP roles are not recommended to be used within AS confederations");
+
+ if (cf->require_roles && (cf->local_role == BGP_ROLE_UNDEFINED))
+ cf_error("Local role must be set if roles are required");
+
if (!cf->confederation && cf->confederation_member)
cf_error("Confederation ID must be set for member sessions");
@@ -2064,9 +2179,24 @@ bgp_postconfig(struct proto_config *CF)
if (internal && cf->enforce_first_as)
cf_error("Enforce first AS check is requires EBGP sessions");
+ if (cf->keepalive_time > cf->hold_time)
+ cf_error("Keepalive time must be at most hold time");
+
+ if (cf->keepalive_time > (cf->hold_time / 2))
+ log(L_WARN "Keepalive time should be at most 1/2 of hold time");
+
+ if (cf->min_hold_time > cf->hold_time)
+ cf_error("Min hold time (%u) exceeds hold time (%u)",
+ cf->min_hold_time, cf->hold_time);
+
+ uint keepalive_time = cf->keepalive_time ?: cf->hold_time / 3;
+ if (cf->min_keepalive_time > keepalive_time)
+ cf_error("Min keepalive time (%u) exceeds keepalive time (%u)",
+ cf->min_keepalive_time, keepalive_time);
+
struct bgp_channel_config *cc;
- WALK_LIST(cc, CF->channels)
+ BGP_CF_WALK_CHANNELS(cf, cc)
{
/* Handle undefined import filter */
if (cc->c.in_filter == FILTER_UNDEF)
@@ -2094,6 +2224,10 @@ bgp_postconfig(struct proto_config *CF)
if (!cc->gw_mode)
cc->gw_mode = cf->multihop ? GW_RECURSIVE : GW_DIRECT;
+ /* Different default for next_hop_prefer */
+ if (!cc->next_hop_prefer)
+ cc->next_hop_prefer = (cc->gw_mode == GW_DIRECT) ? NHP_GLOBAL : NHP_LOCAL;
+
/* Defaults based on proto config */
if (cc->gr_able == 0xff)
cc->gr_able = (cf->gr_mode == BGP_GR_ABLE);
@@ -2124,6 +2258,14 @@ bgp_postconfig(struct proto_config *CF)
cf_error("Mismatched IGP table type");
}
+ /* Default value of base table */
+ if ((BGP_SAFI(cc->afi) == BGP_SAFI_FLOW) && cc->validate && !cc->base_table)
+ cc->base_table = bgp_default_base_table(cf, cc);
+
+ if (cc->base_table && !cc->base_table->trie_used)
+ cf_error("Flowspec validation requires base table (%s) with trie",
+ cc->base_table->name);
+
if (cf->multihop && (cc->gw_mode == GW_DIRECT))
cf_error("Multihop BGP cannot use direct gateway mode");
@@ -2165,20 +2307,16 @@ bgp_reconfigure(struct proto *P, struct proto_config *CF)
WALK_LIST(C, p->p.channels)
C->stale = 1;
- WALK_LIST(cc, new->c.channels)
+ BGP_CF_WALK_CHANNELS(new, cc)
{
C = (struct channel *) bgp_find_channel(p, cc->afi);
same = proto_configure_channel(P, &C, &cc->c) && same;
-
- if (C)
- C->stale = 0;
}
WALK_LIST_DELSAFE(C, C2, p->p.channels)
if (C->stale)
same = proto_configure_channel(P, &C, NULL) && same;
-
if (same && (p->start_state > BSS_PREPARE))
bgp_update_bfd(p, new->bfd);
@@ -2192,7 +2330,7 @@ bgp_reconfigure(struct proto *P, struct proto_config *CF)
return same;
}
-#define IGP_TABLE(cf, sym) ((cf)->igp_table_##sym ? (cf)->igp_table_##sym ->table : NULL )
+#define TABLE(cf, NAME) ((cf)->NAME ? (cf)->NAME->table : NULL )
static int
bgp_channel_reconfigure(struct channel *C, struct channel_config *CC, int *import_changed, int *export_changed)
@@ -2203,29 +2341,33 @@ bgp_channel_reconfigure(struct channel *C, struct channel_config *CC, int *impor
struct bgp_channel_config *old = c->cf;
if ((new->secondary != old->secondary) ||
+ (new->validate != old->validate) ||
(new->gr_able != old->gr_able) ||
(new->llgr_able != old->llgr_able) ||
(new->llgr_time != old->llgr_time) ||
(new->ext_next_hop != old->ext_next_hop) ||
(new->add_path != old->add_path) ||
- (new->import_table != old->import_table) ||
(new->export_table != old->export_table) ||
- (IGP_TABLE(new, ip4) != IGP_TABLE(old, ip4)) ||
- (IGP_TABLE(new, ip6) != IGP_TABLE(old, ip6)))
+ (TABLE(new, igp_table_ip4) != TABLE(old, igp_table_ip4)) ||
+ (TABLE(new, igp_table_ip6) != TABLE(old, igp_table_ip6)) ||
+ (TABLE(new, base_table) != TABLE(old, base_table)))
return 0;
if (new->mandatory && !old->mandatory && (C->channel_state != CS_UP))
return 0;
if ((new->gw_mode != old->gw_mode) ||
+ (new->next_hop_prefer != old->next_hop_prefer) ||
(new->aigp != old->aigp) ||
(new->cost != old->cost))
{
- /* import_changed itself does not force ROUTE_REFRESH when import_table is active */
- if (c->c.in_table && (c->c.channel_state == CS_UP))
+ /* If import table is active and route refresh is possible, we just ask for route refresh */
+ if ((c->c.in_keep & RIK_PREFILTER) && (c->c.channel_state == CS_UP) && p->route_refresh)
bgp_schedule_packet(p->conn, c, PKT_ROUTE_REFRESH);
- *import_changed = 1;
+ /* Otherwise we do complete reload */
+ else
+ *import_changed = 1;
}
if (!ipa_equal(new->next_hop_addr, old->next_hop_addr) ||
@@ -2394,6 +2536,15 @@ bgp_show_afis(int code, char *s, u32 *afis, uint count)
cli_msg(code, b.start);
}
+const char *
+bgp_format_role_name(u8 role)
+{
+ static const char *bgp_role_names[] = { "provider", "rs_server", "rs_client", "customer", "peer" };
+ if (role == BGP_ROLE_UNDEFINED) return "undefined";
+ if (role < ARRAY_SIZE(bgp_role_names)) return bgp_role_names[role];
+ return "?";
+}
+
static void
bgp_show_capabilities(struct bgp_proto *p UNUSED, struct bgp_caps *caps)
{
@@ -2522,6 +2673,9 @@ bgp_show_capabilities(struct bgp_proto *p UNUSED, struct bgp_caps *caps)
if (caps->hostname)
cli_msg(-1006, " Hostname: %s", caps->hostname);
+
+ if (caps->role != BGP_ROLE_UNDEFINED)
+ cli_msg(-1006, " Role: %s", bgp_format_role_name(caps->role));
}
static void
@@ -2529,9 +2683,6 @@ bgp_show_proto_info(struct proto *P)
{
struct bgp_proto *p = (struct bgp_proto *) P;
- if (p->p.proto_state != PS_DOWN)
- BGP_ASSERT_INSIDE(p);
-
cli_msg(-1006, " BGP state: %s", bgp_state_dsc(p));
if (bgp_is_dynamic(p) && p->cf->remote_range)
@@ -2539,6 +2690,9 @@ bgp_show_proto_info(struct proto *P)
else
cli_msg(-1006, " Neighbor address: %I%J", p->remote_ip, p->cf->iface);
+ if ((p->conn == &p->outgoing_conn) && (p->cf->remote_port != BGP_PORT))
+ cli_msg(-1006, " Neighbor port: %u", p->cf->remote_port);
+
cli_msg(-1006, " Neighbor AS: %u", p->remote_as);
cli_msg(-1006, " Local AS: %u", p->cf->local_as);
@@ -2581,6 +2735,9 @@ bgp_show_proto_info(struct proto *P)
tm_remains(p->conn->hold_timer), p->conn->hold_time);
cli_msg(-1006, " Keepalive timer: %t/%u",
tm_remains(p->conn->keepalive_timer), p->conn->keepalive_time);
+ cli_msg(-1006, " TX pending: %d bytes%s",
+ p->conn->sk->tpos - p->conn->sk->ttx,
+ ev_active(p->conn->tx_ev) ? " (refill scheduled)" : "");
}
#if 0
@@ -2609,6 +2766,9 @@ bgp_show_proto_info(struct proto *P)
{
channel_show_info(&c->c);
+ if (c->c.channel != &channel_bgp)
+ continue;
+
if (p->gr_active_num)
cli_msg(-1006, " Neighbor GR: %s", bgp_gr_states[c->gr_active]);
@@ -2628,6 +2788,25 @@ bgp_show_proto_info(struct proto *P)
if (c->igp_table_ip6)
cli_msg(-1006, " IGP IPv6 table: %s", c->igp_table_ip6->name);
+
+ if (c->base_table)
+ cli_msg(-1006, " Base table: %s", c->base_table->name);
+
+ uint bucket_cnt = 0;
+ uint prefix_cnt = 0;
+ struct bgp_bucket *buck;
+ struct bgp_prefix *px;
+ if (c->ptx)
+ WALK_LIST(buck, c->ptx->bucket_queue)
+ {
+ bucket_cnt++;
+ WALK_LIST(px, buck->prefixes)
+ if (px->cur)
+ prefix_cnt++;
+ }
+
+ cli_msg(-1006, " Pending %u attribute sets with total %u prefixes to send",
+ bucket_cnt, prefix_cnt);
}
}
}
@@ -2645,7 +2824,6 @@ struct channel_class channel_bgp = {
struct protocol proto_bgp = {
.name = "BGP",
.template = "bgp%d",
- .class = PROTOCOL_BGP,
.preference = DEF_PREF_BGP,
.channel_mask = NB_IP | NB_VPN | NB_FLOW,
.proto_size = sizeof(struct bgp_proto),
@@ -2657,6 +2835,12 @@ struct protocol proto_bgp = {
.reconfigure = bgp_reconfigure,
.copy_config = bgp_copy_config,
.get_status = bgp_get_status,
- .get_attr = bgp_get_attr,
.show_proto_info = bgp_show_proto_info
};
+
+void bgp_build(void)
+{
+ proto_build(&proto_bgp);
+ bgp_register_attrs();
+ bgp_listen_domain = DOMAIN_NEW(rtable, "BGP Listen Sockets");
+}
diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h
index 2a2fe689..6f75874f 100644
--- a/proto/bgp/bgp.h
+++ b/proto/bgp/bgp.h
@@ -14,13 +14,12 @@
#include <stdint.h>
#include <setjmp.h>
#include "nest/bird.h"
-#include "nest/route.h"
+#include "nest/rt.h"
#include "nest/bfd.h"
//#include "lib/lists.h"
#include "lib/hash.h"
#include "lib/socket.h"
-struct linpool;
struct eattr;
@@ -68,10 +67,10 @@ struct bgp_af_desc {
u8 no_igp;
const char *name;
uint (*encode_nlri)(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size);
- void (*decode_nlri)(struct bgp_parse_state *s, byte *pos, uint len, rta *a);
+ void (*decode_nlri)(struct bgp_parse_state *s, byte *pos, uint len, ea_list *a);
void (*update_next_hop)(struct bgp_export_state *s, eattr *nh, ea_list **to);
uint (*encode_next_hop)(struct bgp_write_state *s, eattr *nh, byte *buf, uint size);
- void (*decode_next_hop)(struct bgp_parse_state *s, byte *pos, uint len, rta *a);
+ void (*decode_next_hop)(struct bgp_parse_state *s, byte *pos, uint len, ea_list **to);
};
@@ -98,7 +97,7 @@ struct bgp_config {
int capabilities; /* Enable capability handshake [RFC 5492] */
int enable_refresh; /* Enable local support for route refresh [RFC 2918] */
int enable_as4; /* Enable local support for 4B AS numbers [RFC 6793] */
- int enable_extended_messages; /* Enable local support for extended messages [draft] */
+ int enable_extended_messages; /* Enable local support for extended messages [RFC 8654] */
int enable_hostname; /* Enable local support for hostname [draft] */
u32 rr_cluster_id; /* Route reflector cluster ID, if different from local ID */
int rr_client; /* Whether neighbor is RR client of me */
@@ -109,18 +108,24 @@ struct bgp_config {
int interpret_communities; /* Hardwired handling of well-known communities */
int allow_local_as; /* Allow that number of local ASNs in incoming AS_PATHs */
int allow_local_pref; /* Allow LOCAL_PREF in EBGP sessions */
+ int allow_med; /* Allow BGP_MED in EBGP sessions */
int allow_as_sets; /* Allow AS_SETs in incoming AS_PATHs */
int enforce_first_as; /* Enable check for neighbor AS as first AS in AS_PATH */
int gr_mode; /* Graceful restart mode (BGP_GR_*) */
int llgr_mode; /* Long-lived graceful restart mode (BGP_LLGR_*) */
int setkey; /* Set MD5 password to system SA/SP database */
+ u8 local_role; /* Set peering role with neighbor [RFC 9234] */
+ int require_roles; /* Require configured roles on both sides */
/* Times below are in seconds */
unsigned gr_time; /* Graceful restart timeout */
unsigned llgr_time; /* Long-lived graceful restart stale time */
unsigned connect_delay_time; /* Minimum delay between connect attempts */
unsigned connect_retry_time; /* Timeout for connect attempts */
- unsigned hold_time, initial_hold_time;
+ unsigned hold_time;
+ unsigned min_hold_time; /* Minimum accepted hold time */
+ unsigned initial_hold_time;
unsigned keepalive_time;
+ unsigned min_keepalive_time; /* Minimum accepted keepalive time */
unsigned error_amnesia_time; /* Errors are forgotten after */
unsigned error_delay_time_min; /* Time to wait after an error is detected */
unsigned error_delay_time_max;
@@ -144,9 +149,11 @@ struct bgp_channel_config {
ip_addr next_hop_addr; /* Local address for NEXT_HOP attribute */
u8 next_hop_self; /* Always set next hop to local IP address (NH_*) */
u8 next_hop_keep; /* Do not modify next hop attribute (NH_*) */
+ u8 next_hop_prefer; /* Prefer global or link-local next hop (NHP_*) */
u8 mandatory; /* Channel is mandatory in capability negotiation */
u8 gw_mode; /* How we compute route gateway from next_hop attr, see GW_* */
u8 secondary; /* Accept also non-best routes (i.e. RA_ACCEPTED) */
+ u8 validate; /* Validate Flowspec per RFC 8955 (6) */
u8 gr_able; /* Allow full graceful restart for the channel */
u8 llgr_able; /* Allow full long-lived GR for the channel */
uint llgr_time; /* Long-lived graceful restart stale time */
@@ -156,15 +163,23 @@ struct bgp_channel_config {
u8 aigp_originate; /* AIGP is originated automatically */
u32 cost; /* IGP cost for direct next hops */
u8 import_table; /* Use c.in_table as Adj-RIB-In */
- u8 export_table; /* Use c.out_table as Adj-RIB-Out */
+ u8 export_table; /* Keep Adj-RIB-Out and export it */
struct rtable_config *igp_table_ip4; /* Table for recursive IPv4 next hop lookups */
struct rtable_config *igp_table_ip6; /* Table for recursive IPv6 next hop lookups */
+ struct rtable_config *base_table; /* Base table for Flowspec validation */
};
#define BGP_PT_INTERNAL 1
#define BGP_PT_EXTERNAL 2
+#define BGP_ROLE_UNDEFINED 255
+#define BGP_ROLE_PROVIDER 0
+#define BGP_ROLE_RS_SERVER 1
+#define BGP_ROLE_RS_CLIENT 2
+#define BGP_ROLE_CUSTOMER 3
+#define BGP_ROLE_PEER 4
+
#define NH_NO 0
#define NH_ALL 1
#define NH_IBGP 2
@@ -177,6 +192,9 @@ struct bgp_channel_config {
#define GW_DIRECT 1
#define GW_RECURSIVE 2
+#define NHP_GLOBAL 1
+#define NHP_LOCAL 2
+
#define BGP_ADD_PATH_RX 1
#define BGP_ADD_PATH_TX 2
#define BGP_ADD_PATH_FULL 3
@@ -203,8 +221,6 @@ struct bgp_channel_config {
/* rte->pflags */
#define BGP_REF_SUPPRESSED 0x1 /* Used for deterministic MED comparison */
-#define BGP_REF_STALE 0x2 /* Route is LLGR_STATE */
-#define BGP_REF_NOT_STALE 0x4 /* Route is NOT LLGR_STATE */
struct bgp_af_caps {
u32 afi;
@@ -222,9 +238,10 @@ struct bgp_caps {
u32 as4_number; /* Announced ASN */
u8 as4_support; /* Four-octet AS capability, RFC 6793 */
- u8 ext_messages; /* Extended message length, RFC draft */
+ u8 ext_messages; /* Extended message length, RFC 8654 */
u8 route_refresh; /* Route refresh capability, RFC 2918 */
u8 enhanced_refresh; /* Enhanced route refresh, RFC 7313 */
+ u8 role; /* BGP role capability, RFC 9234 */
u8 gr_aware; /* Graceful restart capability, RFC 4724 */
u8 gr_flags; /* Graceful restart flags */
@@ -248,8 +265,8 @@ struct bgp_caps {
struct bgp_socket {
node n; /* Node in global bgp_sockets */
+ list requests; /* Listen requests */
sock *sk; /* Real listening socket */
- u32 uc; /* Use count */
};
struct bgp_stats {
@@ -284,6 +301,16 @@ struct bgp_conn {
uint hold_time, keepalive_time; /* Times calculated from my and neighbor's requirements */
};
+struct bgp_listen_request {
+ node n; /* Node in bgp_socket / pending list */
+ struct bgp_socket *sock; /* Assigned socket */
+ ip_addr addr;
+ struct iface *iface;
+ struct iface *vrf;
+ uint port;
+ uint flags;
+};
+
struct bgp_proto {
struct proto p;
const struct bgp_config *cf; /* Shortcut to BGP configuration */
@@ -313,18 +340,17 @@ struct bgp_proto {
struct bgp_conn *conn; /* Connection we have established */
struct bgp_conn outgoing_conn; /* Outgoing connection we're working with */
struct bgp_conn incoming_conn; /* Incoming connection we have neither accepted nor rejected yet */
- struct linpool *rx_lp; /* Linpool for parsing received updates */
struct object_lock *lock; /* Lock for neighbor connection */
struct neighbor *neigh; /* Neighbor entry corresponding to remote ip, NULL if multihop */
- struct bgp_socket *sock; /* Shared listening socket */
+ struct bgp_listen_request listen; /* Shared listening socket */
struct bfd_request *bfd_req; /* BFD request, if BFD is used */
struct birdsock *postponed_sk; /* Postponed incoming socket for dynamic BGP */
+ event *uncork_ev; /* Uncork event in case of congestion */
struct bgp_stats stats; /* BGP statistics */
btime last_established; /* Last time of enter/leave of established state */
btime last_rx_update; /* Last time of RX update */
ip_addr link_addr; /* Link-local version of local_ip */
- event *active_event; /* Event for respawning */
- event *down_event; /* Event to shut down */
+ event *event; /* Event for respawning and shutting process */
timer *startup_timer; /* Timer used to delay protocol startup due to previous errors (startup_delay) */
timer *gr_timer; /* Timer waiting for reestablishment after graceful restart */
int dynamic_name_counter; /* Counter for dynamic BGP names */
@@ -347,15 +373,12 @@ struct bgp_channel {
rtable *igp_table_ip4; /* Table for recursive IPv4 next hop lookups */
rtable *igp_table_ip6; /* Table for recursive IPv6 next hop lookups */
+ rtable *base_table; /* Base table for Flowspec validation */
/* Rest are zeroed when down */
pool *pool;
- HASH(struct bgp_bucket) bucket_hash; /* Hash table of route buckets */
- struct bgp_bucket *withdraw_bucket; /* Withdrawn routes */
- list bucket_queue; /* Queue of buckets to send (struct bgp_bucket) */
-
- HASH(struct bgp_prefix) prefix_hash; /* Prefixes to be sent */
- slab *prefix_slab; /* Slab holding prefix nodes */
+ struct bgp_pending_tx *ptx; /* Routes waiting to be sent */
+ struct rt_exporter prefix_exporter; /* Table-like exporter for ptx */
ip_addr next_hop_addr; /* Local address for NEXT_HOP attribute */
ip_addr link_addr; /* Link-local version of next_hop_addr */
@@ -376,11 +399,16 @@ struct bgp_channel {
u8 feed_state; /* Feed state (TX) for EoR, RR packets, see BFS_* */
u8 load_state; /* Load state (RX) for EoR, RR packets, see BFS_* */
+
+ u8 feed_out_table; /* Refeed into out_table */
};
struct bgp_prefix {
- node buck_node; /* Node in per-bucket list */
+ node buck_node_xx; /* Node in per-bucket list */
struct bgp_prefix *next; /* Node in prefix hash table */
+ struct bgp_bucket *last; /* Last bucket sent with this prefix */
+ struct bgp_bucket *cur; /* Current bucket (cur == last) if no update is required */
+ btime lastmod; /* Last modification of this prefix */
u32 hash;
u32 path_id;
net_addr net[0];
@@ -389,11 +417,24 @@ struct bgp_prefix {
struct bgp_bucket {
node send_node; /* Node in send queue */
struct bgp_bucket *next; /* Node in bucket hash table */
- list prefixes; /* Prefixes in this bucket (struct bgp_prefix) */
+ list prefixes; /* Prefixes to send in this bucket (struct bgp_prefix) */
u32 hash; /* Hash over extended attributes */
+ u32 px_uc; /* How many prefixes are linking this bucket */
ea_list eattrs[0]; /* Per-bucket extended attributes */
};
+struct bgp_pending_tx {
+ resource r;
+ pool *pool;
+
+ HASH(struct bgp_bucket) bucket_hash; /* Hash table of route buckets */
+ struct bgp_bucket *withdraw_bucket; /* Withdrawn routes */
+ list bucket_queue; /* Queue of buckets to send (struct bgp_bucket) */
+
+ HASH(struct bgp_prefix) prefix_hash; /* Prefixes to be sent */
+ slab *prefix_slab; /* Slab holding prefix nodes */
+};
+
struct bgp_export_state {
struct bgp_proto *proto;
struct bgp_channel *channel;
@@ -404,7 +445,7 @@ struct bgp_export_state {
int mpls;
u32 attrs_seen[1];
- uint err_withdraw;
+ uint err_reject;
uint local_next_hop;
};
@@ -430,6 +471,7 @@ struct bgp_parse_state {
int as4_session;
int add_path;
int mpls;
+ int reach_nlri_step;
u32 attrs_seen[256/32];
@@ -456,13 +498,12 @@ struct bgp_parse_state {
uint err_subcode;
jmp_buf err_jmpbuf;
- struct hostentry *hostentry;
adata *mpls_labels;
/* Cached state for bgp_rte_update() */
u32 last_id;
struct rte_src *last_src;
- rta *cached_rta;
+ ea_list *cached_ea;
};
#define BGP_PORT 179
@@ -475,6 +516,9 @@ struct bgp_parse_state {
#define BGP_RX_BUFFER_EXT_SIZE 65535
#define BGP_TX_BUFFER_EXT_SIZE 65535
+#define BGP_CF_WALK_CHANNELS(P,C) WALK_LIST(C, P->c.channels) if (C->c.channel == &channel_bgp)
+#define BGP_WALK_CHANNELS(P,C) WALK_LIST(C, P->p.channels) if (C->c.channel == &channel_bgp)
+
static inline int bgp_channel_is_ipv4(struct bgp_channel *c)
{ return BGP_AFI(c->afi) == BGP_AFI_IPV4; }
@@ -487,6 +531,12 @@ static inline int bgp_cc_is_ipv4(struct bgp_channel_config *c)
static inline int bgp_cc_is_ipv6(struct bgp_channel_config *c)
{ return BGP_AFI(c->afi) == BGP_AFI_IPV6; }
+static inline int bgp_channel_is_role_applicable(struct bgp_channel *c)
+{ return (c->afi == BGP_AF_IPV4 || c->afi == BGP_AF_IPV6); }
+
+static inline int bgp_cc_is_role_applicable(struct bgp_channel_config *c)
+{ return (c->afi == BGP_AF_IPV4 || c->afi == BGP_AF_IPV6); }
+
static inline uint bgp_max_packet_length(struct bgp_conn *conn)
{ return conn->ext_messages ? BGP_MAX_EXT_MSG_LENGTH : BGP_MAX_MESSAGE_LENGTH; }
@@ -513,14 +563,17 @@ void bgp_refresh_begin(struct bgp_channel *c);
void bgp_refresh_end(struct bgp_channel *c);
void bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code);
void bgp_stop(struct bgp_proto *p, int subcode, byte *data, uint len);
-
-struct rte_source *bgp_find_source(struct bgp_proto *p, u32 path_id);
-struct rte_source *bgp_get_source(struct bgp_proto *p, u32 path_id);
+const char *bgp_format_role_name(u8 role);
static inline int
-rta_resolvable(rta *a)
+rte_resolvable(const rte *rt)
{
- return a->dest == RTD_UNICAST;
+ eattr *nhea = ea_find(rt->attrs, &ea_gen_nexthop);
+ if (!nhea)
+ return 0;
+
+ struct nexthop_adata *nhad = (void *) nhea->u.ptr;
+ return NEXTHOP_IS_REACHABLE(nhad) || (nhad->dest != RTD_UNREACHABLE);
}
extern struct rte_owner_class bgp_rte_owner_class;
@@ -539,63 +592,41 @@ extern struct rte_owner_class bgp_rte_owner_class;
/* attrs.c */
-static inline eattr *
-bgp_find_attr(ea_list *attrs, uint code)
-{
- return ea_find(attrs, EA_CODE(PROTOCOL_BGP, code));
-}
-
eattr *
-bgp_set_attr(ea_list **attrs, struct linpool *pool, uint code, uint flags, uintptr_t val);
-
-static inline void
-bgp_set_attr_u32(ea_list **to, struct linpool *pool, uint code, uint flags, u32 val)
-{ bgp_set_attr(to, pool, code, flags, (uintptr_t) val); }
-
-static inline void
-bgp_set_attr_ptr(ea_list **to, struct linpool *pool, uint code, uint flags, const struct adata *val)
-{ bgp_set_attr(to, pool, code, flags, (uintptr_t) val); }
+bgp_find_attr(ea_list *attrs, uint code);
-static inline void
-bgp_set_attr_data(ea_list **to, struct linpool *pool, uint code, uint flags, void *data, uint len)
-{
- struct adata *a = lp_alloc_adata(pool, len);
- bmemcpy(a->data, data, len);
- bgp_set_attr(to, pool, code, flags, (uintptr_t) a);
-}
-
-static inline void
-bgp_unset_attr(ea_list **to, struct linpool *pool, uint code)
-{ eattr *e = bgp_set_attr(to, pool, code, 0, 0); e->type = EAF_TYPE_UNDEF; }
+void bgp_set_attr_u32(ea_list **to, uint code, uint flags, u32 val);
+void bgp_set_attr_ptr(ea_list **to, uint code, uint flags, const struct adata *ad);
+void bgp_set_attr_data(ea_list **to, uint code, uint flags, void *data, uint len);
+void bgp_unset_attr(ea_list **to, uint code);
int bgp_encode_mp_reach_mrt(struct bgp_write_state *s, eattr *a, byte *buf, uint size);
int bgp_encode_attrs(struct bgp_write_state *s, ea_list *attrs, byte *buf, byte *end);
ea_list * bgp_decode_attrs(struct bgp_parse_state *s, byte *data, uint len);
-void bgp_finish_attrs(struct bgp_parse_state *s, rta *a);
+void bgp_finish_attrs(struct bgp_parse_state *s, ea_list **to);
+
+void bgp_setup_out_table(struct bgp_channel *c);
+
+void bgp_init_pending_tx(struct bgp_channel *c);
+void bgp_free_pending_tx(struct bgp_channel *c);
-void bgp_init_bucket_table(struct bgp_channel *c);
-void bgp_free_bucket_table(struct bgp_channel *c);
-void bgp_free_bucket(struct bgp_channel *c, struct bgp_bucket *b);
-void bgp_defer_bucket(struct bgp_channel *c, struct bgp_bucket *b);
void bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b);
+int bgp_done_bucket(struct bgp_channel *c, struct bgp_bucket *b);
-void bgp_init_prefix_table(struct bgp_channel *c);
-void bgp_free_prefix_table(struct bgp_channel *c);
-void bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *bp);
+void bgp_done_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bucket *buck);
-int bgp_rte_better(struct rte *, struct rte *);
-int bgp_rte_mergable(rte *pri, rte *sec);
-int bgp_rte_recalculate(rtable_private *table, net *net, rte *new, rte *old, rte *old_best);
-void bgp_rte_modify_stale(struct rt_export_request *, const net_addr *, struct rt_pending_export *, rte **, uint);
-u32 bgp_rte_igp_metric(struct rte *);
+int bgp_rte_better(const rte *, const rte *);
+int bgp_rte_mergable(const rte *pri, const rte *sec);
+int bgp_rte_recalculate(struct rtable_private *table, net *net, rte *new, rte *old, rte *old_best);
+void bgp_rte_modify_stale(struct rt_export_request *req, const net_addr *n, struct rt_pending_export *first, struct rt_pending_export *last, const rte **feed, uint count);
+u32 bgp_rte_igp_metric(const rte *);
void bgp_rt_notify(struct proto *P, struct channel *C, const net_addr *n, rte *new, const rte *old);
int bgp_preexport(struct channel *, struct rte *);
-int bgp_get_attr(const struct eattr *e, byte *buf, int buflen);
-void bgp_get_route_info(struct rte *, byte *);
-int bgp_total_aigp_metric_(rta *a, u64 *metric, const struct adata **ad);
+void bgp_get_route_info(const rte *, byte *);
+int bgp_total_aigp_metric_(const rte *e, u64 *metric, const struct adata **ad);
-static inline struct bgp_proto *bgp_rte_proto(struct rte *rte)
+static inline struct bgp_proto *bgp_rte_proto(const rte *rte)
{
return (rte->src->owner->class == &bgp_rte_owner_class) ?
SKIP_BACK(struct bgp_proto, p.sources, rte->src->owner) : NULL;
@@ -605,15 +636,17 @@ static inline struct bgp_proto *bgp_rte_proto(struct rte *rte)
#define BGP_AIGP_MAX U64(0xffffffffffffffff)
static inline u64
-bgp_total_aigp_metric(rta *a)
+bgp_total_aigp_metric(const rte *e)
{
u64 metric = BGP_AIGP_MAX;
const struct adata *ad;
- bgp_total_aigp_metric_(a, &metric, &ad);
+ bgp_total_aigp_metric_(e, &metric, &ad);
return metric;
}
+void bgp_register_attrs(void);
+
/* packets.c */
@@ -625,6 +658,7 @@ void bgp_schedule_packet(struct bgp_conn *conn, struct bgp_channel *c, int type)
void bgp_kick_tx(void *vconn);
void bgp_tx(struct birdsock *sk);
int bgp_rx(struct birdsock *sk, uint size);
+void bgp_uncork(void *vp);
const char * bgp_error_dsc(unsigned code, unsigned subcode);
void bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsigned subcode, byte *data, unsigned len);
@@ -650,26 +684,32 @@ void bgp_update_next_hop(struct bgp_export_state *s, eattr *a, ea_list **to);
#define BAF_DECODE_FLAGS 0x0100 /* Private flag - attribute flags are handled by the decode hook */
-#define BA_ORIGIN 0x01 /* RFC 4271 */ /* WM */
-#define BA_AS_PATH 0x02 /* WM */
-#define BA_NEXT_HOP 0x03 /* WM */
-#define BA_MULTI_EXIT_DISC 0x04 /* ON */
-#define BA_LOCAL_PREF 0x05 /* WD */
-#define BA_ATOMIC_AGGR 0x06 /* WD */
-#define BA_AGGREGATOR 0x07 /* OT */
-#define BA_COMMUNITY 0x08 /* RFC 1997 */ /* OT */
-#define BA_ORIGINATOR_ID 0x09 /* RFC 4456 */ /* ON */
-#define BA_CLUSTER_LIST 0x0a /* RFC 4456 */ /* ON */
-#define BA_MP_REACH_NLRI 0x0e /* RFC 4760 */
-#define BA_MP_UNREACH_NLRI 0x0f /* RFC 4760 */
-#define BA_EXT_COMMUNITY 0x10 /* RFC 4360 */
-#define BA_AS4_PATH 0x11 /* RFC 6793 */
-#define BA_AS4_AGGREGATOR 0x12 /* RFC 6793 */
-#define BA_AIGP 0x1a /* RFC 7311 */
-#define BA_LARGE_COMMUNITY 0x20 /* RFC 8092 */
+enum bgp_attr_id {
+ BA_ORIGIN = 0x01, /* RFC 4271 */ /* WM */
+ BA_AS_PATH = 0x02, /* WM */
+ BA_NEXT_HOP = 0x03, /* WM */
+ BA_MULTI_EXIT_DISC = 0x04, /* ON */
+ BA_LOCAL_PREF = 0x05, /* WD */
+ BA_ATOMIC_AGGR = 0x06, /* WD */
+ BA_AGGREGATOR = 0x07, /* OT */
+ BA_COMMUNITY = 0x08, /* RFC 1997 */ /* OT */
+ BA_ORIGINATOR_ID = 0x09, /* RFC 4456 */ /* ON */
+ BA_CLUSTER_LIST = 0x0a, /* RFC 4456 */ /* ON */
+ BA_MP_REACH_NLRI = 0x0e, /* RFC 4760 */
+ BA_MP_UNREACH_NLRI = 0x0f, /* RFC 4760 */
+ BA_EXT_COMMUNITY = 0x10, /* RFC 4360 */
+ BA_AS4_PATH = 0x11, /* RFC 6793 */
+ BA_AS4_AGGREGATOR = 0x12, /* RFC 6793 */
+ BA_AIGP = 0x1a, /* RFC 7311 */
+ BA_LARGE_COMMUNITY = 0x20, /* RFC 8092 */
+#define BA_ONLY_TO_CUSTOMER 0x23 /* RFC 9234 */
/* Bird's private internal BGP attributes */
-#define BA_MPLS_LABEL_STACK 0xfe /* MPLS label stack transfer attribute */
+ BA_MPLS_LABEL_STACK = 0x100, /* MPLS label stack transfer attribute */
+
+/* Maximum */
+ BGP_ATTR_MAX,
+};
/* BGP connection states */
@@ -762,10 +802,5 @@ void bgp_update_next_hop(struct bgp_export_state *s, eattr *a, ea_list **to);
#define ORIGIN_EGP 1
#define ORIGIN_INCOMPLETE 2
-/* Loop */
-
-#define BGP_ENTER(bgp) birdloop_enter(bgp->p.loop)
-#define BGP_LEAVE(bgp) birdloop_leave(bgp->p.loop)
-#define BGP_ASSERT_INSIDE(bgp) ASSERT_DIE((bgp->p.loop != &main_birdloop) && birdloop_inside(bgp->p.loop))
#endif
diff --git a/proto/bgp/config.Y b/proto/bgp/config.Y
index 7c0d8d65..a37bb27a 100644
--- a/proto/bgp/config.Y
+++ b/proto/bgp/config.Y
@@ -19,19 +19,19 @@ CF_DECLS
CF_KEYWORDS(BGP, LOCAL, NEIGHBOR, AS, HOLD, TIME, CONNECT, RETRY, KEEPALIVE,
MULTIHOP, STARTUP, VIA, NEXT, HOP, SELF, DEFAULT, PATH, METRIC, ERROR,
- START, DELAY, FORGET, WAIT, ENABLE, DISABLE, AFTER, BGP_PATH,
- BGP_LOCAL_PREF, BGP_MED, BGP_ORIGIN, BGP_NEXT_HOP, BGP_ATOMIC_AGGR,
- BGP_AGGREGATOR, BGP_COMMUNITY, BGP_EXT_COMMUNITY, BGP_LARGE_COMMUNITY,
+ START, DELAY, FORGET, WAIT, ENABLE, DISABLE, AFTER,
+ BGP_LOCAL_PREF, BGP_MED,
SOURCE, ADDRESS, PASSWORD, RR, RS, CLIENT, CLUSTER, ID, AS4, ADVERTISE,
IPV4, CAPABILITIES, LIMIT, PASSIVE, PREFER, OLDER, MISSING, LLADDR,
- DROP, IGNORE, ROUTE, REFRESH, INTERPRET, COMMUNITIES, BGP_ORIGINATOR_ID,
- BGP_CLUSTER_LIST, IGP, TABLE, GATEWAY, DIRECT, RECURSIVE, MED, TTL,
+ DROP, IGNORE, ROUTE, REFRESH, INTERPRET, COMMUNITIES,
+ IGP, TABLE, GATEWAY, DIRECT, RECURSIVE, MED, TTL,
SECURITY, DETERMINISTIC, SECONDARY, ALLOW, BFD, ADD, PATHS, RX, TX,
GRACEFUL, RESTART, AWARE, CHECK, LINK, PORT, EXTENDED, MESSAGES, SETKEY,
STRICT, BIND, CONFEDERATION, MEMBER, MULTICAST, FLOW4, FLOW6, LONG,
LIVED, STALE, IMPORT, IBGP, EBGP, MANDATORY, INTERNAL, EXTERNAL, SETS,
- DYNAMIC, RANGE, NAME, DIGITS, BGP_AIGP, AIGP, ORIGINATE, COST, ENFORCE,
- FIRST, FREE)
+ DYNAMIC, RANGE, NAME, DIGITS, AIGP, ORIGINATE, COST, ENFORCE,
+ FIRST, FREE, VALIDATE, BASE, ROLE, ROLES, PEER, PROVIDER, CUSTOMER,
+ RS_SERVER, RS_CLIENT, REQUIRE, BGP_OTC, GLOBAL)
%type <i> bgp_nh
%type <i32> bgp_afi
@@ -40,10 +40,14 @@ CF_KEYWORDS(CEASE, PREFIX, LIMIT, HIT, ADMINISTRATIVE, SHUTDOWN, RESET, PEER,
CONFIGURATION, CHANGE, DECONFIGURED, CONNECTION, REJECTED, COLLISION,
OUT, OF, RESOURCES)
-%type<i> bgp_cease_mask bgp_cease_list bgp_cease_flag
+%type<i> bgp_cease_mask bgp_cease_list bgp_cease_flag bgp_role_name
CF_GRAMMAR
+/* Workaround for collisions between keywords and symbols */
+toksym: ROLE | PEER | PROVIDER | CUSTOMER | RS_SERVER | RS_CLIENT ;
+toksym: BGP_MED | BGP_LOCAL_PREF | SOURCE ;
+
proto: bgp_proto '}' ;
bgp_proto_start: proto_start BGP {
@@ -73,6 +77,7 @@ bgp_proto_start: proto_start BGP {
BGP_CFG->llgr_mode = -1;
BGP_CFG->llgr_time = 3600;
BGP_CFG->setkey = 1;
+ BGP_CFG->local_role = BGP_ROLE_UNDEFINED;
BGP_CFG->dynamic_name = "dynbgp";
BGP_CFG->check_link = -1;
}
@@ -115,6 +120,14 @@ bgp_cease_flag:
| OUT OF RESOURCES { $$ = 1 << 8; }
;
+bgp_role_name:
+ PEER { $$ = BGP_ROLE_PEER; }
+ | PROVIDER { $$ = BGP_ROLE_PROVIDER; }
+ | CUSTOMER { $$ = BGP_ROLE_CUSTOMER; }
+ | RS_SERVER { $$ = BGP_ROLE_RS_SERVER; }
+ | RS_CLIENT { $$ = BGP_ROLE_RS_CLIENT; }
+ ;
+
bgp_proto:
bgp_proto_start proto_name '{'
| bgp_proto proto_item ';'
@@ -144,7 +157,8 @@ bgp_proto:
| bgp_proto RS CLIENT bool ';' { BGP_CFG->rs_client = $4; }
| bgp_proto CONFEDERATION expr ';' { BGP_CFG->confederation = $3; }
| bgp_proto CONFEDERATION MEMBER bool ';' { BGP_CFG->confederation_member = $4; }
- | bgp_proto HOLD TIME expr ';' { BGP_CFG->hold_time = $4; }
+ | bgp_proto HOLD TIME expr ';' { BGP_CFG->hold_time = $4; if (($4 && $4<3) || ($4>65535)) cf_error("Hold time must be in range 3-65535 or zero"); }
+ | bgp_proto MIN HOLD TIME expr ';' { BGP_CFG->min_hold_time = $5; }
| bgp_proto STARTUP HOLD TIME expr ';' { BGP_CFG->initial_hold_time = $5; }
| bgp_proto DIRECT ';' { BGP_CFG->multihop = 0; }
| bgp_proto MULTIHOP ';' { BGP_CFG->multihop = 64; }
@@ -168,7 +182,8 @@ bgp_proto:
| bgp_proto START DELAY TIME expr ';' { BGP_CFG->connect_delay_time = $5; log(L_WARN "%s: Start delay time option is deprecated, use connect delay time", this_proto->name); }
| bgp_proto CONNECT DELAY TIME expr ';' { BGP_CFG->connect_delay_time = $5; }
| bgp_proto CONNECT RETRY TIME expr ';' { BGP_CFG->connect_retry_time = $5; }
- | bgp_proto KEEPALIVE TIME expr ';' { BGP_CFG->keepalive_time = $4; }
+ | bgp_proto KEEPALIVE TIME expr ';' { BGP_CFG->keepalive_time = $4; if (($4<1) || ($4>65535)) cf_error("Keepalive time must be in range 1-65535"); }
+ | bgp_proto MIN KEEPALIVE TIME expr ';' { BGP_CFG->min_keepalive_time = $5; }
| bgp_proto ERROR FORGET TIME expr ';' { BGP_CFG->error_amnesia_time = $5; }
| bgp_proto ERROR WAIT TIME expr ',' expr ';' { BGP_CFG->error_delay_time_min = $5; BGP_CFG->error_delay_time_max = $7; }
| bgp_proto DISABLE AFTER ERROR bool ';' { BGP_CFG->disable_after_error = $5; }
@@ -185,6 +200,7 @@ bgp_proto:
| bgp_proto ALLOW LOCAL AS ';' { BGP_CFG->allow_local_as = -1; }
| bgp_proto ALLOW LOCAL AS expr ';' { BGP_CFG->allow_local_as = $5; }
| bgp_proto ALLOW BGP_LOCAL_PREF bool ';' { BGP_CFG->allow_local_pref = $4; }
+ | bgp_proto ALLOW BGP_MED bool ';' { BGP_CFG->allow_med = $4; }
| bgp_proto ALLOW AS SETS bool ';' { BGP_CFG->allow_as_sets = $5; }
| bgp_proto GRACEFUL RESTART bool ';' { BGP_CFG->gr_mode = $4; }
| bgp_proto GRACEFUL RESTART AWARE ';' { BGP_CFG->gr_mode = BGP_GR_AWARE; }
@@ -198,6 +214,8 @@ bgp_proto:
| bgp_proto BFD GRACEFUL ';' { init_bfd_opts(&BGP_CFG->bfd); BGP_CFG->bfd->mode = BGP_BFD_GRACEFUL; }
| bgp_proto BFD { open_bfd_opts(&BGP_CFG->bfd); } bfd_opts { close_bfd_opts(); } ';'
| bgp_proto ENFORCE FIRST AS bool ';' { BGP_CFG->enforce_first_as = $5; }
+ | bgp_proto LOCAL ROLE bgp_role_name ';' { BGP_CFG->local_role = $4; }
+ | bgp_proto REQUIRE ROLES bool ';' { BGP_CFG->require_roles = $4; }
;
bgp_afi:
@@ -252,11 +270,17 @@ bgp_channel_item:
| NEXT HOP ADDRESS ipa { BGP_CC->next_hop_addr = $4; }
| NEXT HOP SELF bgp_nh { BGP_CC->next_hop_self = $4; }
| NEXT HOP KEEP bgp_nh { BGP_CC->next_hop_keep = $4; }
+ | NEXT HOP PREFER GLOBAL { BGP_CC->next_hop_prefer = NHP_GLOBAL; }
| MANDATORY bool { BGP_CC->mandatory = $2; }
| MISSING LLADDR bgp_lladdr { log(L_WARN "%s.%s: Missing lladdr option is deprecated and ignored, remove it", this_proto->name, this_channel->name); }
| GATEWAY DIRECT { BGP_CC->gw_mode = GW_DIRECT; }
| GATEWAY RECURSIVE { BGP_CC->gw_mode = GW_RECURSIVE; }
| SECONDARY bool { BGP_CC->secondary = $2; }
+ | VALIDATE bool {
+ BGP_CC->validate = $2;
+ if (BGP_SAFI(BGP_CC->afi) != BGP_SAFI_FLOW)
+ cf_error("Validate option limited to flowspec channels");
+ }
| GRACEFUL RESTART bool { BGP_CC->gr_able = $3; }
| LONG LIVED GRACEFUL RESTART bool { BGP_CC->llgr_able = $5; }
| LONG LIVED STALE TIME expr { BGP_CC->llgr_time = $5; }
@@ -280,6 +304,16 @@ bgp_channel_item:
else
cf_error("Mismatched IGP table type");
}
+ | BASE TABLE rtable {
+ if (BGP_SAFI(BGP_CC->afi) != BGP_SAFI_FLOW)
+ cf_error("Base table option limited to flowspec channels");
+
+ if (((BGP_CC->afi == BGP_AF_FLOW4) && ($3->addr_type == NET_IP4)) ||
+ ((BGP_CC->afi == BGP_AF_FLOW6) && ($3->addr_type == NET_IP6)))
+ BGP_CC->base_table = $3;
+ else
+ cf_error("Mismatched base table type");
+ }
;
bgp_channel_opts:
@@ -297,41 +331,14 @@ bgp_channel_end:
if (!this_channel->table)
cf_error("Routing table not specified");
+ if (BGP_CC->import_table)
+ this_channel->in_keep |= RIK_PREFILTER;
+
this_channel = NULL;
};
bgp_proto_channel: bgp_channel_start bgp_channel_opt_list bgp_channel_end;
-
-dynamic_attr: BGP_ORIGIN
- { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_ENUM_BGP_ORIGIN, EA_CODE(PROTOCOL_BGP, BA_ORIGIN)); } ;
-dynamic_attr: BGP_PATH
- { $$ = f_new_dynamic_attr(EAF_TYPE_AS_PATH, T_PATH, EA_CODE(PROTOCOL_BGP, BA_AS_PATH)); } ;
-dynamic_attr: BGP_NEXT_HOP
- { $$ = f_new_dynamic_attr(EAF_TYPE_IP_ADDRESS, T_IP, EA_CODE(PROTOCOL_BGP, BA_NEXT_HOP)); } ;
-dynamic_attr: BGP_MED
- { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC)); } ;
-dynamic_attr: BGP_LOCAL_PREF
- { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF)); } ;
-dynamic_attr: BGP_ATOMIC_AGGR
- { $$ = f_new_dynamic_attr(EAF_TYPE_OPAQUE, T_ENUM_EMPTY, EA_CODE(PROTOCOL_BGP, BA_ATOMIC_AGGR)); } ;
-dynamic_attr: BGP_AGGREGATOR
- { $$ = f_new_dynamic_attr(EAF_TYPE_OPAQUE, T_ENUM_EMPTY, EA_CODE(PROTOCOL_BGP, BA_AGGREGATOR)); } ;
-dynamic_attr: BGP_COMMUNITY
- { $$ = f_new_dynamic_attr(EAF_TYPE_INT_SET, T_CLIST, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY)); } ;
-dynamic_attr: BGP_ORIGINATOR_ID
- { $$ = f_new_dynamic_attr(EAF_TYPE_ROUTER_ID, T_QUAD, EA_CODE(PROTOCOL_BGP, BA_ORIGINATOR_ID)); } ;
-dynamic_attr: BGP_CLUSTER_LIST
- { $$ = f_new_dynamic_attr(EAF_TYPE_INT_SET, T_CLIST, EA_CODE(PROTOCOL_BGP, BA_CLUSTER_LIST)); } ;
-dynamic_attr: BGP_EXT_COMMUNITY
- { $$ = f_new_dynamic_attr(EAF_TYPE_EC_SET, T_ECLIST, EA_CODE(PROTOCOL_BGP, BA_EXT_COMMUNITY)); } ;
-dynamic_attr: BGP_AIGP
- { $$ = f_new_dynamic_attr(EAF_TYPE_OPAQUE, T_ENUM_EMPTY, EA_CODE(PROTOCOL_BGP, BA_AIGP)); } ;
-dynamic_attr: BGP_LARGE_COMMUNITY
- { $$ = f_new_dynamic_attr(EAF_TYPE_LC_SET, T_LCLIST, EA_CODE(PROTOCOL_BGP, BA_LARGE_COMMUNITY)); } ;
-
-
-
CF_ENUM(T_ENUM_BGP_ORIGIN, ORIGIN_, IGP, EGP, INCOMPLETE)
CF_CODE
diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c
index 88b66040..978a1891 100644
--- a/proto/bgp/packets.c
+++ b/proto/bgp/packets.c
@@ -15,8 +15,8 @@
#include "nest/bird.h"
#include "nest/iface.h"
#include "nest/protocol.h"
-#include "nest/route.h"
-#include "nest/attrs.h"
+#include "nest/rt.h"
+#include "lib/attrs.h"
#include "proto/mrt/mrt.h"
#include "conf/conf.h"
#include "lib/unaligned.h"
@@ -238,6 +238,7 @@ bgp_prepare_capabilities(struct bgp_conn *conn)
caps->ext_messages = p->cf->enable_extended_messages;
caps->route_refresh = p->cf->enable_refresh;
caps->enhanced_refresh = p->cf->enable_refresh;
+ caps->role = p->cf->local_role;
if (caps->as4_support)
caps->as4_number = p->public_as;
@@ -261,7 +262,7 @@ bgp_prepare_capabilities(struct bgp_conn *conn)
}
/* Allocate and fill per-AF fields */
- WALK_LIST(c, p->p.channels)
+ BGP_WALK_CHANNELS(p, c)
{
ac = &caps->af_data[caps->af_count++];
ac->afi = c->afi;
@@ -350,6 +351,13 @@ bgp_write_capabilities(struct bgp_conn *conn, byte *buf)
*buf++ = 0; /* Capability data length */
}
+ if (caps->role != BGP_ROLE_UNDEFINED)
+ {
+ *buf++ = 9; /* Capability 9: Announce chosen BGP role */
+ *buf++ = 1; /* Capability data length */
+ *buf++ = caps->role;
+ }
+
if (caps->gr_aware)
{
*buf++ = 64; /* Capability 64: Support for graceful restart */
@@ -449,11 +457,15 @@ bgp_read_capabilities(struct bgp_conn *conn, byte *pos, int len)
struct bgp_proto *p = conn->bgp;
struct bgp_caps *caps;
struct bgp_af_caps *ac;
+ uint err_subcode = 0;
int i, cl;
u32 af;
if (!conn->remote_caps)
+ {
caps = mb_allocz(p->p.pool, sizeof(struct bgp_caps) + sizeof(struct bgp_af_caps));
+ caps->role = BGP_ROLE_UNDEFINED;
+ }
else
{
caps = conn->remote_caps;
@@ -506,13 +518,28 @@ bgp_read_capabilities(struct bgp_conn *conn, byte *pos, int len)
}
break;
- case 6: /* Extended message length capability, RFC draft */
+ case 6: /* Extended message length capability, RFC 8654 */
if (cl != 0)
goto err;
caps->ext_messages = 1;
break;
+ case 9: /* BGP role capability, RFC 9234 */
+ if (cl != 1)
+ goto err;
+
+ /* Reserved value */
+ if (pos[2] == BGP_ROLE_UNDEFINED)
+ { err_subcode = 11; goto err; }
+
+ /* Multiple inconsistent values */
+ if ((caps->role != BGP_ROLE_UNDEFINED) && (caps->role != pos[2]))
+ { err_subcode = 11; goto err; }
+
+ caps->role = pos[2];
+ break;
+
case 64: /* Graceful restart capability, RFC 4724 */
if (cl % 4 != 2)
goto err;
@@ -638,7 +665,7 @@ bgp_read_capabilities(struct bgp_conn *conn, byte *pos, int len)
err:
mb_free(caps);
- bgp_error(conn, 2, 0, NULL, 0);
+ bgp_error(conn, 2, err_subcode, NULL, 0);
return -1;
}
@@ -654,7 +681,7 @@ bgp_check_capabilities(struct bgp_conn *conn)
/* This is partially overlapping with bgp_conn_enter_established_state(),
but we need to run this just after we receive OPEN message */
- WALK_LIST(c, p->p.channels)
+ BGP_WALK_CHANNELS(p, c)
{
const struct bgp_af_caps *loc = bgp_find_af_caps(local, c->afi);
const struct bgp_af_caps *rem = bgp_find_af_caps(remote, c->afi);
@@ -684,7 +711,7 @@ bgp_read_options(struct bgp_conn *conn, byte *pos, uint len, uint rest)
struct bgp_proto *p = conn->bgp;
int ext = 0;
- /* Handle extended length (draft-ietf-idr-ext-opt-param-07) */
+ /* Handle extended length, RFC 9072 */
if ((len > 0) && (rest > 0) && (pos[0] == 255))
{
if (rest < 3)
@@ -769,7 +796,7 @@ bgp_create_open(struct bgp_conn *conn, byte *buf)
buf[10] = 2; /* Option 2: Capability list */
buf[11] = len; /* Option data length */
}
- else /* draft-ietf-idr-ext-opt-param-07 */
+ else /* Extended length, RFC 9072 */
{
/* Move capabilities 4 B forward */
memmove(buf + 16, pos, len);
@@ -820,9 +847,25 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, uint len)
if (bgp_read_options(conn, pkt+29, pkt[28], len-29) < 0)
return;
+ /* RFC 4271 4.2 - hold time must be either 0 or at least 3 */
if (hold > 0 && hold < 3)
{ bgp_error(conn, 2, 6, pkt+22, 2); return; }
+ /* Compute effective hold and keepalive times */
+ uint hold_time = MIN(hold, p->cf->hold_time);
+ uint keepalive_time = p->cf->keepalive_time ?
+ (p->cf->keepalive_time * hold_time / p->cf->hold_time) :
+ hold_time / 3;
+
+ /* Keepalive time might be rounded down to zero */
+ if (hold_time && !keepalive_time)
+ keepalive_time = 1;
+
+ /* Check effective values against configured minimums */
+ if ((hold_time < p->cf->min_hold_time) ||
+ (keepalive_time < p->cf->min_keepalive_time))
+ { bgp_error(conn, 2, 6, pkt+22, 2); return; }
+
/* RFC 6286 2.2 - router ID is nonzero and AS-wide unique */
if (!id || (p->is_internal && id == p->local_id))
{ bgp_error(conn, 2, 3, pkt+24, -4); return; }
@@ -854,6 +897,22 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, uint len)
conn->received_as = asn;
}
+ /* RFC 9234 4.2 - check role agreement */
+ u8 local_role = p->cf->local_role;
+ u8 neigh_role = caps->role;
+
+ if ((local_role != BGP_ROLE_UNDEFINED) &&
+ (neigh_role != BGP_ROLE_UNDEFINED) &&
+ !((local_role == BGP_ROLE_PEER && neigh_role == BGP_ROLE_PEER) ||
+ (local_role == BGP_ROLE_CUSTOMER && neigh_role == BGP_ROLE_PROVIDER) ||
+ (local_role == BGP_ROLE_PROVIDER && neigh_role == BGP_ROLE_CUSTOMER) ||
+ (local_role == BGP_ROLE_RS_CLIENT && neigh_role == BGP_ROLE_RS_SERVER) ||
+ (local_role == BGP_ROLE_RS_SERVER && neigh_role == BGP_ROLE_RS_CLIENT)))
+ { bgp_error(conn, 2, 11, &neigh_role, -1); return; }
+
+ if ((p->cf->require_roles) && (neigh_role == BGP_ROLE_UNDEFINED))
+ { bgp_error(conn, 2, 11, &neigh_role, -1); return; }
+
/* Check the other connection */
other = (conn == &p->outgoing_conn) ? &p->incoming_conn : &p->outgoing_conn;
switch (other->state)
@@ -904,8 +963,8 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, uint len)
}
/* Update our local variables */
- conn->hold_time = MIN(hold, p->cf->hold_time);
- conn->keepalive_time = p->cf->keepalive_time ? : conn->hold_time / 3;
+ conn->hold_time = hold_time;
+ conn->keepalive_time = keepalive_time;
conn->as4_session = conn->local_caps->as4_support && caps->as4_support;
conn->ext_messages = conn->local_caps->ext_messages && caps->ext_messages;
p->remote_id = id;
@@ -932,14 +991,18 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, uint len)
#define WITHDRAW(msg, args...) \
({ REPORT(msg, ## args); s->err_withdraw = 1; return; })
+#define REJECT(msg, args...) \
+ ({ log(L_ERR "%s: " msg, s->proto->p.name, ## args); s->err_reject = 1; return; })
+
#define BAD_AFI "Unexpected AF <%u/%u> in UPDATE"
#define BAD_NEXT_HOP "Invalid NEXT_HOP attribute"
#define NO_NEXT_HOP "Missing NEXT_HOP attribute"
#define NO_LABEL_STACK "Missing MPLS stack"
+#define MISMATCHED_AF " - mismatched address family (%I for %s)"
static void
-bgp_apply_next_hop(struct bgp_parse_state *s, rta *a, ip_addr gw, ip_addr ll)
+bgp_apply_next_hop(struct bgp_parse_state *s, ea_list **to, ip_addr gw, ip_addr ll)
{
struct bgp_proto *p = s->proto;
struct bgp_channel *c = s->channel;
@@ -949,67 +1012,88 @@ bgp_apply_next_hop(struct bgp_parse_state *s, rta *a, ip_addr gw, ip_addr ll)
neighbor *nbr = NULL;
/* GW_DIRECT -> single_hop -> p->neigh != NULL */
- if (ipa_nonzero(gw))
+ if (ipa_nonzero2(gw))
nbr = neigh_find(&p->p, gw, NULL, 0);
else if (ipa_nonzero(ll))
nbr = neigh_find(&p->p, ll, p->neigh->iface, 0);
+ else
+ WITHDRAW(BAD_NEXT_HOP " - zero address");
- if (!nbr || (nbr->scope == SCOPE_HOST))
- WITHDRAW(BAD_NEXT_HOP);
+ if (!nbr)
+ WITHDRAW(BAD_NEXT_HOP " - address %I not directly reachable", ipa_nonzero(gw) ? gw : ll);
- a->dest = RTD_UNICAST;
- a->nh.gw = nbr->addr;
- a->nh.iface = nbr->iface;
- a->igp_metric = c->cf->cost;
+ if (nbr->scope == SCOPE_HOST)
+ WITHDRAW(BAD_NEXT_HOP " - address %I is local", nbr->addr);
+
+ ea_set_attr_u32(to, &ea_gen_igp_metric, 0, c->cf->cost);
+
+ struct nexthop_adata nhad = {
+ .nh = {
+ .gw = nbr->addr,
+ .iface = nbr->iface,
+ },
+ .ad = {
+ .length = sizeof nhad - sizeof nhad.ad,
+ },
+ };
+ ea_set_attr_data(to, &ea_gen_nexthop, 0, nhad.ad.data, nhad.ad.length);
}
else /* GW_RECURSIVE */
{
- if (ipa_zero(gw))
- WITHDRAW(BAD_NEXT_HOP);
+ if (ipa_zero2(gw))
+ WITHDRAW(BAD_NEXT_HOP " - zero address");
rtable *tab = ipa_is_ip4(gw) ? c->igp_table_ip4 : c->igp_table_ip6;
- s->hostentry = rt_get_hostentry(tab, gw, ll, c->c.table);
-
- if (!s->mpls)
- rta_apply_hostentry(a, s->hostentry, NULL, s->pool);
+ ip_addr lla = (c->cf->next_hop_prefer == NHP_LOCAL) ? ll : IPA_NONE;
- /* With MPLS, hostentry is applied later in bgp_apply_mpls_labels() */
+ if (s->mpls)
+ {
+ u32 labels[BGP_MPLS_MAX];
+ ea_set_hostentry(to, c->c.table, tab, gw, lla, BGP_MPLS_MAX, labels);
+ }
+ else
+ ea_set_hostentry(to, c->c.table, tab, gw, lla, 0, NULL);
}
}
static void
-bgp_apply_mpls_labels(struct bgp_parse_state *s, rta *a, u32 *labels, uint lnum)
+bgp_apply_mpls_labels(struct bgp_parse_state *s, ea_list **to, u32 lnum, u32 labels[lnum])
{
if (lnum > MPLS_MAX_LABEL_STACK)
{
REPORT("Too many MPLS labels ($u)", lnum);
- a->dest = RTD_UNREACHABLE;
- a->hostentry = NULL;
- a->nh = (struct nexthop) { };
+ ea_set_dest(to, 0, RTD_UNREACHABLE);
return;
}
/* Handle implicit NULL as empty MPLS stack */
if ((lnum == 1) && (labels[0] == BGP_MPLS_NULL))
- lnum = 0;
+ lnum = s->mpls_labels->length = 0;
if (s->channel->cf->gw_mode == GW_DIRECT)
{
- a->nh.labels = lnum;
- memcpy(a->nh.label, labels, 4*lnum);
+ eattr *e = ea_find(*to, &ea_gen_nexthop);
+ struct {
+ struct nexthop_adata nhad;
+ u32 labels[MPLS_MAX_LABEL_STACK];
+ } nh;
+
+ memcpy(&nh.nhad, e->u.ptr, sizeof(struct adata) + e->u.ptr->length);
+ nh.nhad.nh.labels = lnum;
+ memcpy(nh.labels, labels, lnum * sizeof(u32));
+ nh.nhad.ad.length = sizeof nh.nhad + lnum * sizeof(u32);
}
else /* GW_RECURSIVE */
{
- mpls_label_stack ms;
-
- ms.len = lnum;
- memcpy(ms.stack, labels, 4*lnum);
- rta_apply_hostentry(a, s->hostentry, &ms, s->pool);
+ eattr *e = ea_find(*to, &ea_gen_hostentry);
+ ASSERT_DIE(e);
+ struct hostentry_adata *head = (void *) e->u.ptr;
+ memcpy(&head->labels, labels, lnum * sizeof(u32));
+ head->ad.length = (void *)(&head->labels[lnum]) - (void *) head->ad.data;
}
}
-
static int
bgp_match_src(struct bgp_export_state *s, int mode)
{
@@ -1039,13 +1123,17 @@ bgp_use_next_hop(struct bgp_export_state *s, eattr *a)
return 1;
/* Keep it when explicitly set in export filter */
- if (a->type & EAF_FRESH)
+ if (a->fresh)
return 1;
/* Check for non-matching AF */
if ((ipa_is_ip4(*nh) != bgp_channel_is_ipv4(c)) && !c->ext_next_hop)
return 0;
+ /* Do not pass NEXT_HOP between different VRFs */
+ if (p->p.vrf && s->src && s->src->p.vrf && (p->p.vrf != s->src->p.vrf))
+ return 0;
+
/* Keep it when exported to internal peers */
if (p->is_interior && ipa_nonzero(*nh))
return 1;
@@ -1056,31 +1144,45 @@ bgp_use_next_hop(struct bgp_export_state *s, eattr *a)
return p->neigh && (p->neigh->iface == ifa);
}
-static inline int
+static inline struct nexthop *
bgp_use_gateway(struct bgp_export_state *s)
{
struct bgp_proto *p = s->proto;
struct bgp_channel *c = s->channel;
- rta *ra = s->route->attrs;
+ ea_list *ra = s->route->attrs;
/* Handle next hop self option - also applies to gateway */
if (c->cf->next_hop_self && bgp_match_src(s, c->cf->next_hop_self))
- return 0;
+ return NULL;
+
+ eattr *nhea = ea_find(ra, &ea_gen_nexthop);
+ if (!nhea)
+ return NULL;
/* We need one valid global gateway */
- if ((ra->dest != RTD_UNICAST) || ra->nh.next || ipa_zero(ra->nh.gw) || ipa_is_link_local(ra->nh.gw))
- return 0;
+ struct nexthop_adata *nhad = (struct nexthop_adata *) nhea->u.ptr;
+ if (!NEXTHOP_IS_REACHABLE(nhad) ||
+ !NEXTHOP_ONE(nhad) || ipa_zero(nhad->nh.gw) ||
+ ipa_is_link_local(nhad->nh.gw))
+ return NULL;
/* Check for non-matching AF */
- if ((ipa_is_ip4(ra->nh.gw) != bgp_channel_is_ipv4(c)) && !c->ext_next_hop)
+ if ((ipa_is_ip4(nhad->nh.gw) != bgp_channel_is_ipv4(c)) && !c->ext_next_hop)
+ return NULL;
+
+ /* Do not use gateway from different VRF */
+ if (p->p.vrf && nhad->nh.iface && (p->p.vrf != nhad->nh.iface->master))
return 0;
/* Use it when exported to internal peers */
if (p->is_interior)
- return 1;
+ return &nhad->nh;
/* Use it when forwarded to single-hop BGP peer on on the same iface */
- return p->neigh && (p->neigh->iface == ra->nh.iface);
+ if (p->neigh && (p->neigh->iface == nhad->nh.iface))
+ return &nhad->nh;
+
+ return NULL;
}
static void
@@ -1088,60 +1190,64 @@ bgp_update_next_hop_ip(struct bgp_export_state *s, eattr *a, ea_list **to)
{
if (!a || !bgp_use_next_hop(s, a))
{
- if (bgp_use_gateway(s))
+ struct nexthop *nhloc;
+ if (nhloc = bgp_use_gateway(s))
{
- rta *ra = s->route->attrs;
- ip_addr nh[1] = { ra->nh.gw };
- bgp_set_attr_data(to, s->pool, BA_NEXT_HOP, 0, nh, 16);
+ ip_addr nh[1] = { nhloc->gw };
+ bgp_set_attr_data(to, BA_NEXT_HOP, 0, nh, 16);
if (s->mpls)
{
u32 implicit_null = BGP_MPLS_NULL;
- u32 *labels = ra->nh.labels ? ra->nh.label : &implicit_null;
- uint lnum = ra->nh.labels ? ra->nh.labels : 1;
- bgp_set_attr_data(to, s->pool, BA_MPLS_LABEL_STACK, 0, labels, lnum * 4);
+ u32 *labels = nhloc->labels ? nhloc->label : &implicit_null;
+ uint lnum = nhloc->labels ? nhloc->labels : 1;
+ bgp_set_attr_data(to, BA_MPLS_LABEL_STACK, 0, labels, lnum * 4);
}
+ else
+ bgp_unset_attr(to, BA_MPLS_LABEL_STACK);
}
else
{
ip_addr nh[2] = { s->channel->next_hop_addr, s->channel->link_addr };
- bgp_set_attr_data(to, s->pool, BA_NEXT_HOP, 0, nh, ipa_nonzero(nh[1]) ? 32 : 16);
+ bgp_set_attr_data(to, BA_NEXT_HOP, 0, nh, ipa_nonzero(nh[1]) ? 32 : 16);
s->local_next_hop = 1;
/* TODO: Use local MPLS assigned label */
if (s->mpls)
{
u32 implicit_null = BGP_MPLS_NULL;
- bgp_set_attr_data(to, s->pool, BA_MPLS_LABEL_STACK, 0, &implicit_null, 4);
+ bgp_set_attr_data(to, BA_MPLS_LABEL_STACK, 0, &implicit_null, 4);
}
+ else
+ bgp_unset_attr(to, BA_MPLS_LABEL_STACK);
}
}
/* Check if next hop is valid */
a = bgp_find_attr(*to, BA_NEXT_HOP);
if (!a)
- WITHDRAW(NO_NEXT_HOP);
+ REJECT(NO_NEXT_HOP);
ip_addr *nh = (void *) a->u.ptr->data;
ip_addr peer = s->proto->remote_ip;
uint len = a->u.ptr->length;
/* Forbid zero next hop */
- if (ipa_zero(nh[0]) && ((len != 32) || ipa_zero(nh[1])))
- WITHDRAW(BAD_NEXT_HOP);
+ if (ipa_zero2(nh[0]) && ((len != 32) || ipa_zero(nh[1])))
+ REJECT(BAD_NEXT_HOP " - zero address");
/* Forbid next hop equal to neighbor IP */
if (ipa_equal(peer, nh[0]) || ((len == 32) && ipa_equal(peer, nh[1])))
- WITHDRAW(BAD_NEXT_HOP);
+ REJECT(BAD_NEXT_HOP " - neighbor address %I", peer);
/* Forbid next hop with non-matching AF */
if ((ipa_is_ip4(nh[0]) != bgp_channel_is_ipv4(s->channel)) &&
!s->channel->ext_next_hop)
- WITHDRAW(BAD_NEXT_HOP);
+ REJECT(BAD_NEXT_HOP MISMATCHED_AF, nh[0], s->channel->desc->name);
/* Just check if MPLS stack */
if (s->mpls && !bgp_find_attr(*to, BA_MPLS_LABEL_STACK))
- WITHDRAW(NO_LABEL_STACK);
+ REJECT(NO_LABEL_STACK);
}
static uint
@@ -1175,7 +1281,7 @@ bgp_encode_next_hop_ip(struct bgp_write_state *s, eattr *a, byte *buf, uint size
}
static void
-bgp_decode_next_hop_ip(struct bgp_parse_state *s, byte *data, uint len, rta *a)
+bgp_decode_next_hop_ip(struct bgp_parse_state *s, byte *data, uint len, ea_list **to)
{
struct bgp_channel *c = s->channel;
struct adata *ad = lp_alloc_adata(s->pool, 32);
@@ -1212,12 +1318,12 @@ bgp_decode_next_hop_ip(struct bgp_parse_state *s, byte *data, uint len, rta *a)
ad->length = 16;
if ((bgp_channel_is_ipv4(c) != ipa_is_ip4(nh[0])) && !c->ext_next_hop)
- WITHDRAW(BAD_NEXT_HOP);
+ WITHDRAW(BAD_NEXT_HOP MISMATCHED_AF, nh[0], c->desc->name);
// XXXX validate next hop
- bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_NEXT_HOP, 0, ad);
- bgp_apply_next_hop(s, a, nh[0], nh[1]);
+ bgp_set_attr_ptr(to, BA_NEXT_HOP, 0, ad);
+ bgp_apply_next_hop(s, to, nh[0], nh[1]);
}
static uint
@@ -1255,7 +1361,7 @@ bgp_encode_next_hop_vpn(struct bgp_write_state *s, eattr *a, byte *buf, uint siz
}
static void
-bgp_decode_next_hop_vpn(struct bgp_parse_state *s, byte *data, uint len, rta *a)
+bgp_decode_next_hop_vpn(struct bgp_parse_state *s, byte *data, uint len, ea_list **to)
{
struct bgp_channel *c = s->channel;
struct adata *ad = lp_alloc_adata(s->pool, 32);
@@ -1293,12 +1399,12 @@ bgp_decode_next_hop_vpn(struct bgp_parse_state *s, byte *data, uint len, rta *a)
bgp_parse_error(s, 9);
if ((bgp_channel_is_ipv4(c) != ipa_is_ip4(nh[0])) && !c->ext_next_hop)
- WITHDRAW(BAD_NEXT_HOP);
+ WITHDRAW(BAD_NEXT_HOP MISMATCHED_AF, nh[0], c->desc->name);
// XXXX validate next hop
- bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_NEXT_HOP, 0, ad);
- bgp_apply_next_hop(s, a, nh[0], nh[1]);
+ bgp_set_attr_ptr(to, BA_NEXT_HOP, 0, ad);
+ bgp_apply_next_hop(s, to, nh[0], nh[1]);
}
@@ -1310,7 +1416,7 @@ bgp_encode_next_hop_none(struct bgp_write_state *s UNUSED, eattr *a UNUSED, byte
}
static void
-bgp_decode_next_hop_none(struct bgp_parse_state *s UNUSED, byte *data UNUSED, uint len UNUSED, rta *a UNUSED)
+bgp_decode_next_hop_none(struct bgp_parse_state *s UNUSED, byte *data UNUSED, uint len UNUSED, ea_list **to UNUSED)
{
/*
* Although we expect no next hop and RFC 7606 7.11 states that attribute
@@ -1322,11 +1428,11 @@ bgp_decode_next_hop_none(struct bgp_parse_state *s UNUSED, byte *data UNUSED, ui
}
static void
-bgp_update_next_hop_none(struct bgp_export_state *s, eattr *a, ea_list **to)
+bgp_update_next_hop_none(struct bgp_export_state *s UNUSED, eattr *a, ea_list **to)
{
/* NEXT_HOP shall not pass */
if (a)
- bgp_unset_attr(to, s->pool, BA_NEXT_HOP);
+ bgp_unset_attr(to, BA_NEXT_HOP);
}
@@ -1335,7 +1441,7 @@ bgp_update_next_hop_none(struct bgp_export_state *s, eattr *a, ea_list **to)
*/
static void
-bgp_rte_update(struct bgp_parse_state *s, net_addr *n, u32 path_id, rta *a0)
+bgp_rte_update(struct bgp_parse_state *s, const net_addr *n, u32 path_id, ea_list *a0)
{
if (path_id != s->last_id)
{
@@ -1344,28 +1450,27 @@ bgp_rte_update(struct bgp_parse_state *s, net_addr *n, u32 path_id, rta *a0)
s->last_src = rt_get_source(&s->proto->p, path_id);
s->last_id = path_id;
- rta_free(s->cached_rta);
- s->cached_rta = NULL;
+ ea_free(s->cached_ea);
+ s->cached_ea = NULL;
}
if (!a0)
{
+ /* Route update was changed to withdraw */
+ if (s->err_withdraw && s->reach_nlri_step)
+ REPORT("Invalid route %N withdrawn", n);
+
/* Route withdraw */
rte_update(&s->channel->c, n, NULL, s->last_src);
return;
}
/* Prepare cached route attributes */
- if (s->cached_rta == NULL)
- {
- /* Workaround for rta_lookup() breaking eattrs */
- ea_list *ea = a0->eattrs;
- s->cached_rta = rta_lookup(a0);
- a0->eattrs = ea;
- }
+ if (s->cached_ea == NULL)
+ s->cached_ea = ea_lookup(a0, 0);
rte e0 = {
- .attrs = s->cached_rta,
+ .attrs = s->cached_ea,
.src = s->last_src,
};
@@ -1392,9 +1497,10 @@ bgp_encode_mpls_labels(struct bgp_write_state *s UNUSED, const adata *mpls, byte
}
static void
-bgp_decode_mpls_labels(struct bgp_parse_state *s, byte **pos, uint *len, uint *pxlen, rta *a)
+bgp_decode_mpls_labels(struct bgp_parse_state *s, byte **pos, uint *len, uint *pxlen, ea_list **to)
{
- u32 labels[BGP_MPLS_MAX], label;
+ u32 labels[BGP_MPLS_MAX];
+ u32 label;
uint lnum = 0;
do {
@@ -1408,31 +1514,20 @@ bgp_decode_mpls_labels(struct bgp_parse_state *s, byte **pos, uint *len, uint *p
/* RFC 8277 2.4 - withdraw does not have variable-size MPLS stack but
fixed-size 24-bit Compatibility field, which MUST be ignored */
- if (!a && !s->err_withdraw)
+ if (!s->reach_nlri_step)
return;
}
while (!(label & BGP_MPLS_BOS));
- if (!a)
+ if (!*to)
return;
- /* Attach MPLS attribute unless we already have one */
- if (!s->mpls_labels)
- {
- s->mpls_labels = lp_alloc_adata(s->pool, 4*BGP_MPLS_MAX);
- bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_MPLS_LABEL_STACK, 0, s->mpls_labels);
- }
-
- /* Overwrite data in the attribute */
- s->mpls_labels->length = 4*lnum;
- memcpy(s->mpls_labels->data, labels, 4*lnum);
-
/* Update next hop entry in rta */
- bgp_apply_mpls_labels(s, a, labels, lnum);
+ bgp_apply_mpls_labels(s, to, lnum, labels);
/* Attributes were changed, invalidate cached entry */
- rta_free(s->cached_rta);
- s->cached_rta = NULL;
+ rta_free(s->cached_ea);
+ s->cached_ea = NULL;
return;
}
@@ -1468,14 +1563,14 @@ bgp_encode_nlri_ip4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *bu
memcpy(pos, &a, b);
ADVANCE(pos, size, b);
- bgp_free_prefix(s->channel, px);
+ bgp_done_prefix(s->channel, px, buck);
}
return pos - buf;
}
static void
-bgp_decode_nlri_ip4(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
+bgp_decode_nlri_ip4(struct bgp_parse_state *s, byte *pos, uint len, ea_list *a)
{
while (len)
{
@@ -1501,7 +1596,7 @@ bgp_decode_nlri_ip4(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
/* Decode MPLS labels */
if (s->mpls)
- bgp_decode_mpls_labels(s, &pos, &len, &l, a);
+ bgp_decode_mpls_labels(s, &pos, &len, &l, &a);
if (l > IP4_MAX_PREFIX_LENGTH)
bgp_parse_error(s, 10);
@@ -1553,14 +1648,14 @@ bgp_encode_nlri_ip6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *bu
memcpy(pos, &a, b);
ADVANCE(pos, size, b);
- bgp_free_prefix(s->channel, px);
+ bgp_done_prefix(s->channel, px, buck);
}
return pos - buf;
}
static void
-bgp_decode_nlri_ip6(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
+bgp_decode_nlri_ip6(struct bgp_parse_state *s, byte *pos, uint len, ea_list *a)
{
while (len)
{
@@ -1586,7 +1681,7 @@ bgp_decode_nlri_ip6(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
/* Decode MPLS labels */
if (s->mpls)
- bgp_decode_mpls_labels(s, &pos, &len, &l, a);
+ bgp_decode_mpls_labels(s, &pos, &len, &l, &a);
if (l > IP6_MAX_PREFIX_LENGTH)
bgp_parse_error(s, 10);
@@ -1641,14 +1736,14 @@ bgp_encode_nlri_vpn4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *b
memcpy(pos, &a, b);
ADVANCE(pos, size, b);
- bgp_free_prefix(s->channel, px);
+ bgp_done_prefix(s->channel, px, buck);
}
return pos - buf;
}
static void
-bgp_decode_nlri_vpn4(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
+bgp_decode_nlri_vpn4(struct bgp_parse_state *s, byte *pos, uint len, ea_list *a)
{
while (len)
{
@@ -1674,7 +1769,7 @@ bgp_decode_nlri_vpn4(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
/* Decode MPLS labels */
if (s->mpls)
- bgp_decode_mpls_labels(s, &pos, &len, &l, a);
+ bgp_decode_mpls_labels(s, &pos, &len, &l, &a);
/* Decode route distinguisher */
if (l < 64)
@@ -1738,14 +1833,14 @@ bgp_encode_nlri_vpn6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *b
memcpy(pos, &a, b);
ADVANCE(pos, size, b);
- bgp_free_prefix(s->channel, px);
+ bgp_done_prefix(s->channel, px, buck);
}
return pos - buf;
}
static void
-bgp_decode_nlri_vpn6(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
+bgp_decode_nlri_vpn6(struct bgp_parse_state *s, byte *pos, uint len, ea_list *a)
{
while (len)
{
@@ -1771,7 +1866,7 @@ bgp_decode_nlri_vpn6(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
/* Decode MPLS labels */
if (s->mpls)
- bgp_decode_mpls_labels(s, &pos, &len, &l, a);
+ bgp_decode_mpls_labels(s, &pos, &len, &l, &a);
/* Decode route distinguisher */
if (l < 64)
@@ -1825,14 +1920,14 @@ bgp_encode_nlri_flow4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *
memcpy(pos, net->data, flen);
ADVANCE(pos, size, flen);
- bgp_free_prefix(s->channel, px);
+ bgp_done_prefix(s->channel, px, buck);
}
return pos - buf;
}
static void
-bgp_decode_nlri_flow4(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
+bgp_decode_nlri_flow4(struct bgp_parse_state *s, byte *pos, uint len, ea_list *a)
{
while (len)
{
@@ -1913,14 +2008,14 @@ bgp_encode_nlri_flow6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *
memcpy(pos, net->data, flen);
ADVANCE(pos, size, flen);
- bgp_free_prefix(s->channel, px);
+ bgp_done_prefix(s->channel, px, buck);
}
return pos - buf;
}
static void
-bgp_decode_nlri_flow6(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
+bgp_decode_nlri_flow6(struct bgp_parse_state *s, byte *pos, uint len, ea_list *a)
{
while (len)
{
@@ -2147,6 +2242,8 @@ bgp_create_ip_reach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *bu
* var IPv4 Network Layer Reachability Information
*/
+ ASSERT_DIE(s->channel->ptx->withdraw_bucket != buck);
+
int lr, la;
la = bgp_encode_attrs(s, buck->eattrs, buf+4, buf + MAX_ATTRS_LENGTH);
@@ -2168,6 +2265,8 @@ bgp_create_ip_reach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *bu
static byte *
bgp_create_mp_reach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end)
{
+ ASSERT_DIE(s->channel->ptx->withdraw_bucket != buck);
+
/*
* 2 B IPv4 Withdrawn Routes Length (zero)
* --- IPv4 Withdrawn Routes NLRI (unused)
@@ -2291,11 +2390,14 @@ bgp_create_update(struct bgp_channel *c, byte *buf)
again: ;
+ struct lp_state tmpp;
+ lp_save(tmp_linpool, &tmpp);
+
/* Initialize write state */
struct bgp_write_state s = {
.proto = p,
.channel = c,
- .pool = c->c.rte_update_pool,
+ .pool = tmp_linpool,
.mp_reach = (c->afi != BGP_AF_IPV4) || c->ext_next_hop,
.as4_session = p->as4_session,
.add_path = c->add_path_tx,
@@ -2303,7 +2405,7 @@ again: ;
};
/* Try unreachable bucket */
- if ((buck = c->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
+ if ((buck = c->ptx->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
{
res = (c->afi == BGP_AF_IPV4) && !c->ext_next_hop ?
bgp_create_ip_unreach(&s, buck, buf, end):
@@ -2313,14 +2415,14 @@ again: ;
}
/* Try reachable buckets */
- if (!EMPTY_LIST(c->bucket_queue))
+ if (!EMPTY_LIST(c->ptx->bucket_queue))
{
- buck = HEAD(c->bucket_queue);
+ buck = HEAD(c->ptx->bucket_queue);
/* Cleanup empty buckets */
- if (EMPTY_LIST(buck->prefixes))
+ if (bgp_done_bucket(c, buck))
{
- bgp_free_bucket(c, buck);
+ lp_restore(tmp_linpool, &tmpp);
goto again;
}
@@ -2328,13 +2430,13 @@ again: ;
bgp_create_ip_reach(&s, buck, buf, end):
bgp_create_mp_reach(&s, buck, buf, end);
- if (EMPTY_LIST(buck->prefixes))
- bgp_free_bucket(c, buck);
- else
- bgp_defer_bucket(c, buck);
+ bgp_done_bucket(c, buck);
if (!res)
+ {
+ lp_restore(tmp_linpool, &tmpp);
goto again;
+ }
goto done;
}
@@ -2345,7 +2447,7 @@ again: ;
done:
BGP_TRACE_RL(&rl_snd_update, D_PACKETS, "Sending UPDATE");
p->stats.tx_updates++;
- lp_flush(s.pool);
+ lp_restore(tmp_linpool, &tmpp);
return res;
}
@@ -2412,7 +2514,6 @@ static inline void
bgp_decode_nlri(struct bgp_parse_state *s, u32 afi, byte *nlri, uint len, ea_list *ea, byte *nh, uint nh_len)
{
struct bgp_channel *c = bgp_get_channel(s->proto, afi);
- rta *a = NULL;
if (!c)
DISCARD(BAD_AFI, BGP_AFI(afi), BGP_SAFI(afi));
@@ -2434,26 +2535,22 @@ bgp_decode_nlri(struct bgp_parse_state *s, u32 afi, byte *nlri, uint len, ea_lis
if (ea)
{
- a = allocz(RTA_MAX_SIZE);
-
- a->source = RTS_BGP;
- a->scope = SCOPE_UNIVERSE;
- a->from = s->proto->remote_ip;
- a->eattrs = ea;
- a->pref = c->c.preference;
+ ea_set_attr_data(&ea, &ea_gen_from, 0, &s->proto->remote_ip, sizeof(ip_addr));
+ ea_set_attr_u32(&ea, &ea_gen_preference, 0, c->c.preference);
+ ea_set_attr_u32(&ea, &ea_gen_source, 0, RTS_BGP);
- c->desc->decode_next_hop(s, nh, nh_len, a);
- bgp_finish_attrs(s, a);
+ c->desc->decode_next_hop(s, nh, nh_len, &ea);
+ bgp_finish_attrs(s, &ea);
/* Handle withdraw during next hop decoding */
if (s->err_withdraw)
- a = NULL;
+ ea = NULL;
}
- c->desc->decode_nlri(s, nlri, len, a);
+ c->desc->decode_nlri(s, nlri, len, ea);
- rta_free(s->cached_rta);
- s->cached_rta = NULL;
+ rta_free(s->cached_ea);
+ s->cached_ea = NULL;
rt_unlock_source(s->last_src);
}
@@ -2477,10 +2574,13 @@ bgp_rx_update(struct bgp_conn *conn, byte *pkt, uint len)
bgp_start_timer(p, conn->hold_timer, conn->hold_time);
+ struct lp_state tmpp;
+ lp_save(tmp_linpool, &tmpp);
+
/* Initialize parse state */
struct bgp_parse_state s = {
.proto = p,
- .pool = p->rx_lp,
+ .pool = tmp_linpool,
.as4_session = p->as4_session,
};
@@ -2546,6 +2646,8 @@ bgp_rx_update(struct bgp_conn *conn, byte *pkt, uint len)
if (s.mp_unreach_len)
bgp_decode_nlri(&s, s.mp_unreach_af, s.mp_unreach_nlri, s.mp_unreach_len, NULL, NULL, 0);
+ s.reach_nlri_step = 1;
+
if (s.ip_reach_len)
bgp_decode_nlri(&s, BGP_AF_IPV4, s.ip_reach_nlri, s.ip_reach_len,
ea, s.ip_next_hop_data, s.ip_next_hop_len);
@@ -2555,8 +2657,8 @@ bgp_rx_update(struct bgp_conn *conn, byte *pkt, uint len)
ea, s.mp_next_hop_data, s.mp_next_hop_len);
done:
- rta_free(s.cached_rta);
- lp_flush(s.pool);
+ rta_free(s.cached_ea);
+ lp_restore(tmp_linpool, &tmpp);
return;
}
@@ -2879,7 +2981,11 @@ bgp_schedule_packet(struct bgp_conn *conn, struct bgp_channel *c, int type)
{
ASSERT(conn->sk);
- DBG("BGP: Scheduling packet type %d\n", type);
+ struct bgp_proto *p = conn->bgp;
+ if (c)
+ BGP_TRACE(D_PACKETS, "Scheduling packet type %d for channel %s", type, c->c.name);
+ else
+ BGP_TRACE(D_PACKETS, "Scheduling packet type %d", type);
if (c)
{
@@ -2896,7 +3002,7 @@ bgp_schedule_packet(struct bgp_conn *conn, struct bgp_channel *c, int type)
conn->packets_to_send |= 1 << type;
if ((conn->sk->tpos == conn->sk->tbuf) && !ev_active(conn->tx_ev))
- ev_send_loop(conn->bgp->p.loop, conn->tx_ev);
+ proto_send_event(&p->p, conn->tx_ev);
}
void
bgp_kick_tx(void *vconn)
@@ -2909,7 +3015,7 @@ bgp_kick_tx(void *vconn)
;
if (!max && !ev_active(conn->tx_ev))
- ev_send_loop(conn->bgp->p.loop, conn->tx_ev);
+ proto_send_event(&conn->bgp->p, conn->tx_ev);
}
void
@@ -2917,13 +3023,14 @@ bgp_tx(sock *sk)
{
struct bgp_conn *conn = sk->data;
+ ASSERT_DIE(birdloop_inside(conn->bgp->p.loop));
DBG("BGP: TX hook\n");
uint max = 1024;
while (--max && (bgp_fire_tx(conn) > 0))
;
if (!max && !ev_active(conn->tx_ev))
- ev_send_loop(conn->bgp->p.loop, conn->tx_ev);
+ proto_send_event(&conn->bgp->p, conn->tx_ev);
}
@@ -2944,6 +3051,7 @@ static struct {
{ 2, 6, "Unacceptable hold time" },
{ 2, 7, "Required capability missing" }, /* [RFC5492] */
{ 2, 8, "No supported AFI/SAFI" }, /* This error msg is nonstandard */
+ { 2,11, "Role mismatch" }, /* From Open Policy, RFC 9234 */
{ 3, 0, "Invalid UPDATE message" },
{ 3, 1, "Malformed attribute list" },
{ 3, 2, "Unrecognized well-known attribute" },
@@ -3038,13 +3146,19 @@ bgp_log_error(struct bgp_proto *p, u8 class, char *msg, uint code, uint subcode,
if (len)
{
- /* Bad peer AS - we would like to print the AS */
- if ((code == 2) && (subcode == 2) && ((len == 2) || (len == 4)))
+ /* Bad peer AS / unacceptable hold time - print the value as decimal number */
+ if ((code == 2) && ((subcode == 2) || (subcode == 6)) && ((len == 2) || (len == 4)))
{
t += bsprintf(t, ": %u", (len == 2) ? get_u16(data) : get_u32(data));
goto done;
}
+ if ((code == 2) && (subcode == 11) && (len == 1))
+ {
+ t += bsprintf(t, " (%s)", bgp_format_role_name(get_u8(data)));
+ goto done;
+ }
+
/* RFC 8203 - shutdown communication */
if (((code == 6) && ((subcode == 2) || (subcode == 4))))
if (bgp_handle_message(p, data, len, &t))
@@ -3147,6 +3261,30 @@ bgp_rx_packet(struct bgp_conn *conn, byte *pkt, uint len)
}
}
+void
+bgp_uncork(void *vp)
+{
+ /* The uncork event is run from &main_birdloop and there is no useful way how
+ * to assign the target loop to it, thus we have to lock it ourselves. */
+
+ struct bgp_proto *p = vp;
+ if (!p)
+ return;
+
+ birdloop_enter(p->p.loop);
+
+ if (p && p->conn && (p->conn->state == BS_ESTABLISHED) && !p->conn->sk->rx_hook)
+ {
+ struct birdsock *sk = p->conn->sk;
+ ASSERT_DIE(sk->rpos > sk->rbuf);
+ sk_resume_rx(p->p.loop, sk, bgp_rx);
+ bgp_rx(sk, sk->rpos - sk->rbuf);
+ BGP_TRACE(D_PACKETS, "Uncorked");
+ }
+
+ birdloop_leave(p->p.loop);
+}
+
/**
* bgp_rx - handle received data
* @sk: socket
@@ -3161,6 +3299,7 @@ int
bgp_rx(sock *sk, uint size)
{
struct bgp_conn *conn = sk->data;
+ struct bgp_proto *p = conn->bgp;
byte *pkt_start = sk->rbuf;
byte *end = pkt_start + size;
uint i, len;
@@ -3170,6 +3309,12 @@ bgp_rx(sock *sk, uint size)
{
if ((conn->state == BS_CLOSE) || (conn->sk != sk))
return 0;
+ if ((conn->state == BS_ESTABLISHED) && rt_cork_check(conn->bgp->uncork_ev))
+ {
+ sk_pause_rx(p->p.loop, sk);
+ BGP_TRACE(D_PACKETS, "Corked");
+ return 0;
+ }
for(i=0; i<16; i++)
if (pkt_start[i] != 0xff)
{
diff --git a/proto/mrt/Makefile b/proto/mrt/Makefile
index 925fb102..8cd44ac1 100644
--- a/proto/mrt/Makefile
+++ b/proto/mrt/Makefile
@@ -3,4 +3,4 @@ obj := $(src-o-files)
$(all-daemon)
$(cf-local)
-tests_objs := $(tests_objs) $(src-o-files) \ No newline at end of file
+tests_objs := $(tests_objs) $(src-o-files)
diff --git a/proto/mrt/mrt.c b/proto/mrt/mrt.c
index e12f7743..82fd426a 100644
--- a/proto/mrt/mrt.c
+++ b/proto/mrt/mrt.c
@@ -113,13 +113,13 @@ mrt_buffer_flush(buffer *b)
}
#define MRT_DEFINE_TYPE(S, T) \
- UNUSED static inline void mrt_put_##S##_(buffer *b, T x) \
+ static inline void UNUSED mrt_put_##S##_(buffer *b, T x) \
{ \
put_##S(b->pos, x); \
b->pos += sizeof(T); \
} \
\
- UNUSED static inline void mrt_put_##S(buffer *b, T x) \
+ static inline void UNUSED mrt_put_##S(buffer *b, T x) \
{ \
mrt_buffer_need(b, sizeof(T)); \
put_##S(b->pos, x); \
@@ -243,21 +243,15 @@ mrt_next_table(struct mrt_table_dump_state *s)
rtable *tab = mrt_next_table_(s->table, s->table_ptr, s->table_expr);
if (s->table)
- {
- RT_LOCK(s->table);
- rt_unlock_table(RT_PRIV(s->table));
- RT_UNLOCK(s->table);
- }
+ RT_LOCKED(s->table, tab)
+ rt_unlock_table(tab);
s->table = tab;
s->ipv4 = tab ? (tab->addr_type == NET_IP4) : 0;
if (s->table)
- {
- RT_LOCK(s->table);
- rt_lock_table(RT_PRIV(s->table));
- RT_UNLOCK(s->table);
- }
+ RT_LOCKED(s->table, tab)
+ rt_lock_table(tab);
return s->table;
}
@@ -431,7 +425,7 @@ mrt_rib_table_header(struct mrt_table_dump_state *s, net_addr *n)
static void
mrt_rib_table_entry_bgp_attrs(struct mrt_table_dump_state *s, rte *r)
{
- struct ea_list *eattrs = r->attrs->eattrs;
+ struct ea_list *eattrs = r->attrs;
buffer *b = &s->buf;
if (!eattrs)
@@ -439,7 +433,7 @@ mrt_rib_table_entry_bgp_attrs(struct mrt_table_dump_state *s, rte *r)
/* Attribute list must be normalized for bgp_encode_attrs() */
if (!rta_is_cached(r->attrs))
- ea_normalize(eattrs);
+ eattrs = ea_normalize(eattrs, 0);
mrt_buffer_need(b, MRT_ATTR_BUFFER_SIZE);
byte *pos = b->pos;
@@ -533,7 +527,7 @@ mrt_rib_table_dump(struct mrt_table_dump_state *s, net *n, int add_path)
}
rte e = rt->rte;
- if (f_run(s->filter, &e, s->linpool, 0) <= F_ACCEPT)
+ if (f_run(s->filter, &e, 0) <= F_ACCEPT)
mrt_rib_table_entry(s, &e);
lp_flush(s->linpool);
@@ -561,13 +555,12 @@ mrt_rib_table_dump(struct mrt_table_dump_state *s, net *n, int add_path)
static struct mrt_table_dump_state *
mrt_table_dump_init(pool *pp)
{
- pool *pool = rp_new(pp, &main_birdloop, "MRT Table Dump");
+ pool *pool = rp_new(pp, "MRT Table Dump");
struct mrt_table_dump_state *s = mb_allocz(pool, sizeof(struct mrt_table_dump_state));
s->pool = pool;
- s->parent = pp;
- s->linpool = lp_new(pool, 4080);
- s->peer_lp = lp_new(pool, 4080);
+ s->linpool = lp_new(pool);
+ s->peer_lp = lp_new(pool);
mrt_buffer_init(&s->buf, pool, 2 * MRT_ATTR_BUFFER_SIZE);
/* We lock the current config as we may reference it indirectly by filter */
@@ -583,26 +576,21 @@ static void
mrt_table_dump_free(struct mrt_table_dump_state *s)
{
if (s->table)
- {
- RT_LOCK(s->table);
-
- if (s->table_open)
- FIB_ITERATE_UNLINK(&s->fit, &RT_PRIV(s->table)->fib);
+ RT_LOCKED(s->table, tab)
+ {
+ if (s->table_open)
+ FIB_ITERATE_UNLINK(&s->fit, &tab->fib);
- rt_unlock_table(RT_PRIV(s->table));
- RT_UNLOCK(s->table);
- }
+ rt_unlock_table(tab);
+ }
if (s->table_ptr)
- {
- RT_LOCK(s->table_ptr);
- rt_unlock_table(RT_PRIV(s->table_ptr));
- RT_UNLOCK(s->table_ptr);
- }
+ RT_LOCKED(s->table_ptr, tab)
+ rt_unlock_table(tab);
config_del_obstacle(s->config);
- rp_free(s->pool, s->parent);
+ rfree(s->pool);
}
@@ -614,14 +602,8 @@ mrt_table_dump_step(struct mrt_table_dump_state *s)
s->max = 2048;
s->bws = &bws;
- rtable_private *tab;
-
if (s->table_open)
- {
- RT_LOCK(s->table);
- tab = RT_PRIV(s->table);
goto step;
- }
while (mrt_next_table(s))
{
@@ -630,8 +612,9 @@ mrt_table_dump_step(struct mrt_table_dump_state *s)
mrt_peer_table_dump(s);
- RT_LOCK(s->table);
- tab = RT_PRIV(s->table);
+ RT_LOCKED(s->table, tab)
+ {
+
FIB_ITERATE_INIT(&s->fit, &tab->fib);
s->table_open = 1;
@@ -641,8 +624,7 @@ mrt_table_dump_step(struct mrt_table_dump_state *s)
if (s->max < 0)
{
FIB_ITERATE_PUT(&s->fit);
- RT_UNLOCK(s->table);
- return 0;
+ RT_RETURN(tab, 0);
}
/* With Always ADD_PATH option, we jump directly to second phase */
@@ -657,11 +639,12 @@ mrt_table_dump_step(struct mrt_table_dump_state *s)
FIB_ITERATE_END;
s->table_open = 0;
+ }
+
mrt_close_file(s);
mrt_peer_table_flush(s);
}
- RT_UNLOCK(s->table);
return 1;
}
@@ -689,11 +672,8 @@ mrt_timer(timer *t)
s->always_add_path = cf->always_add_path;
if (s->table_ptr)
- {
- RT_LOCK(s->table_ptr);
- rt_lock_table(RT_PRIV(s->table_ptr));
- RT_UNLOCK(s->table_ptr);
- }
+ RT_LOCKED(s->table_ptr, tab)
+ rt_lock_table(tab);
p->table_dump = s;
ev_schedule(p->event);
@@ -735,14 +715,17 @@ mrt_dump_cont(struct cli *c)
cli_printf(c, 0, "");
mrt_table_dump_free(c->rover);
- c->cont = c->cleanup = c->rover = NULL;
+ c->cont = NULL;
+ c->cleanup = NULL;
+ c->rover = NULL;
}
-static void
+static int
mrt_dump_cleanup(struct cli *c)
{
mrt_table_dump_free(c->rover);
c->rover = NULL;
+ return 0;
}
void
@@ -766,11 +749,8 @@ mrt_dump_cmd(struct mrt_dump_data *d)
s->filename = d->filename;
if (s->table_ptr)
- {
- RT_LOCK(s->table_ptr);
- rt_lock_table(RT_PRIV(s->table_ptr));
- RT_UNLOCK(s->table_ptr);
- }
+ RT_LOCKED(s->table_ptr, tab)
+ rt_lock_table(tab);
this_cli->cont = mrt_dump_cont;
this_cli->cleanup = mrt_dump_cleanup;
@@ -940,7 +920,6 @@ mrt_copy_config(struct proto_config *dest UNUSED, struct proto_config *src UNUSE
struct protocol proto_mrt = {
.name = "MRT",
.template = "mrt%d",
- .class = PROTOCOL_MRT,
.proto_size = sizeof(struct mrt_proto),
.config_size = sizeof(struct mrt_config),
.init = mrt_init,
@@ -949,3 +928,9 @@ struct protocol proto_mrt = {
.reconfigure = mrt_reconfigure,
.copy_config = mrt_copy_config,
};
+
+void
+mrt_build(void)
+{
+ proto_build(&proto_mrt);
+}
diff --git a/proto/mrt/mrt.h b/proto/mrt/mrt.h
index 2e616f6f..f535a391 100644
--- a/proto/mrt/mrt.h
+++ b/proto/mrt/mrt.h
@@ -13,7 +13,7 @@
#include "nest/bird.h"
#include "nest/protocol.h"
#include "lib/lists.h"
-#include "nest/route.h"
+#include "nest/rt.h"
#include "lib/event.h"
#include "lib/hash.h"
@@ -67,7 +67,6 @@ struct mrt_table_dump_state {
/* Allocated by mrt_table_dump_init() */
pool *pool; /* Pool for table dump */
- pool *parent; /* Parent pool for cleanup */
linpool *linpool; /* Temporary linear pool */
linpool *peer_lp; /* Linear pool for peer entries in peer_hash */
buffer buf; /* Buffer for MRT messages */
diff --git a/proto/ospf/Makefile b/proto/ospf/Makefile
index 39e74f71..015f394a 100644
--- a/proto/ospf/Makefile
+++ b/proto/ospf/Makefile
@@ -3,4 +3,4 @@ obj := $(src-o-files)
$(all-daemon)
$(cf-local)
-tests_objs := $(tests_objs) $(src-o-files) \ No newline at end of file
+tests_objs := $(tests_objs) $(src-o-files)
diff --git a/proto/ospf/config.Y b/proto/ospf/config.Y
index 4b7d5a36..bc3df8db 100644
--- a/proto/ospf/config.Y
+++ b/proto/ospf/config.Y
@@ -190,7 +190,7 @@ ospf_check_auth(void)
CF_DECLS
-CF_KEYWORDS(OSPF, V2, V3, OSPF_METRIC1, OSPF_METRIC2, OSPF_TAG, OSPF_ROUTER_ID)
+CF_KEYWORDS(OSPF, V2, V3)
CF_KEYWORDS(AREA, NEIGHBORS, RFC1583COMPAT, STUB, TICK, COST, COST2, RETRANSMIT)
CF_KEYWORDS(HELLO, TRANSMIT, PRIORITY, DEAD, TYPE, BROADCAST, BCAST, DEFAULT)
CF_KEYWORDS(NONBROADCAST, NBMA, POINTOPOINT, PTP, POINTOMULTIPOINT, PTMP)
@@ -505,11 +505,6 @@ ospf_iface:
ospf_iface_start ospf_iface_patt_list ospf_iface_opt_list { ospf_iface_finish(); }
;
-dynamic_attr: OSPF_METRIC1 { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_OSPF_METRIC1); } ;
-dynamic_attr: OSPF_METRIC2 { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_OSPF_METRIC2); } ;
-dynamic_attr: OSPF_TAG { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_OSPF_TAG); } ;
-dynamic_attr: OSPF_ROUTER_ID { $$ = f_new_dynamic_attr(EAF_TYPE_ROUTER_ID, T_QUAD, EA_OSPF_ROUTER_ID); } ;
-
CF_CLI_HELP(SHOW OSPF, ..., [[Show information about OSPF protocol]]);
CF_CLI(SHOW OSPF, optproto, [<name>], [[Show information about OSPF protocol]])
{ PROTO_WALK_CMD($3, &proto_ospf, p) ospf_sh(p); };
diff --git a/proto/ospf/iface.c b/proto/ospf/iface.c
index 049030ac..0aa7fa00 100644
--- a/proto/ospf/iface.c
+++ b/proto/ospf/iface.c
@@ -136,7 +136,7 @@ ospf_sk_open(struct ospf_iface *ifa)
sk->flags = SKF_LADDR_RX | (ifa->check_ttl ? SKF_TTL_RX : 0);
sk->ttl = ifa->cf->ttl_security ? 255 : 1;
- if (sk_open(sk) < 0)
+ if (sk_open(sk, p->p.loop) < 0)
goto err;
/* 12 is an offset of the checksum in an OSPFv3 packet */
@@ -220,7 +220,7 @@ ospf_open_vlink_sk(struct ospf_proto *p)
sk->data = (void *) p;
sk->flags = 0;
- if (sk_open(sk) < 0)
+ if (sk_open(sk, p->p.loop) < 0)
goto err;
/* 12 is an offset of the checksum in an OSPFv3 packet */
@@ -311,7 +311,7 @@ ospf_iface_remove(struct ospf_iface *ifa)
ospf_iface_sm(ifa, ISM_DOWN);
rem_node(NODE ifa);
- rp_free(ifa->pool, p->p.pool);
+ rfree(ifa->pool);
}
void
@@ -484,9 +484,9 @@ ospf_iface_find(struct ospf_proto *p, struct iface *what)
}
static void
-ospf_iface_add(struct object_lock *lock)
+ospf_iface_add(void *_ifa)
{
- struct ospf_iface *ifa = lock->data;
+ struct ospf_iface *ifa = _ifa;
struct ospf_proto *p = ifa->oa->po;
/* Open socket if interface is not stub */
@@ -567,7 +567,7 @@ ospf_iface_new(struct ospf_area *oa, struct ifa *addr, struct ospf_iface_patt *i
OSPF_TRACE(D_EVENTS, "Adding interface %s (%N) to area %R",
iface->name, &addr->prefix, oa->areaid);
- pool = rp_new(p->p.pool, p->p.loop, "OSPF Interface");
+ pool = rp_new(p->p.pool, "OSPF Interface");
ifa = mb_allocz(pool, sizeof(struct ospf_iface));
ifa->iface = iface;
ifa->addr = addr;
@@ -668,8 +668,11 @@ ospf_iface_new(struct ospf_area *oa, struct ifa *addr, struct ospf_iface_patt *i
lock->port = OSPF_PROTO;
lock->inst = ifa->instance_id;
lock->iface = iface;
- lock->data = ifa;
- lock->hook = ospf_iface_add;
+ lock->event = (event) {
+ .hook = ospf_iface_add,
+ .data = ifa,
+ };
+ lock->target = &global_event_list;
olock_acquire(lock);
}
@@ -687,7 +690,7 @@ ospf_iface_new_vlink(struct ospf_proto *p, struct ospf_iface_patt *ip)
/* Vlink ifname is stored just after the ospf_iface structure */
- pool = rp_new(p->p.pool, p->p.loop, "OSPF Vlink");
+ pool = rp_new(p->p.pool, "OSPF Vlink");
ifa = mb_allocz(pool, sizeof(struct ospf_iface) + 16);
ifa->oa = p->backbone;
ifa->cf = ip;
@@ -1222,12 +1225,13 @@ ospf_ifa_notify3(struct proto *P, uint flags, struct ifa *a)
static void
ospf_reconfigure_ifaces2(struct ospf_proto *p)
{
- struct iface *iface;
struct ifa *a;
- IFACE_LEGACY_ACCESS;
- WALK_LIST(iface, global_iface_list)
+ IFACE_WALK(iface)
{
+ if (p->p.vrf && p->p.vrf != iface->master)
+ continue;
+
if (! (iface->flags & IF_UP))
continue;
@@ -1269,12 +1273,13 @@ ospf_reconfigure_ifaces2(struct ospf_proto *p)
static void
ospf_reconfigure_ifaces3(struct ospf_proto *p)
{
- struct iface *iface;
struct ifa *a;
- IFACE_LEGACY_ACCESS;
- WALK_LIST(iface, global_iface_list)
+ IFACE_WALK(iface)
{
+ if (p->p.vrf && p->p.vrf != iface->master)
+ continue;
+
if (! (iface->flags & IF_UP))
continue;
diff --git a/proto/ospf/neighbor.c b/proto/ospf/neighbor.c
index 4ae0d3fa..b0fdc42f 100644
--- a/proto/ospf/neighbor.c
+++ b/proto/ospf/neighbor.c
@@ -80,7 +80,7 @@ struct ospf_neighbor *
ospf_neighbor_new(struct ospf_iface *ifa)
{
struct ospf_proto *p = ifa->oa->po;
- struct pool *pool = rp_new(p->p.pool, p->p.loop, "OSPF Neighbor");
+ struct pool *pool = rp_new(p->p.pool, "OSPF Neighbor");
struct ospf_neighbor *n = mb_allocz(pool, sizeof(struct ospf_neighbor));
n->pool = pool;
@@ -120,7 +120,7 @@ ospf_neigh_down(struct ospf_neighbor *n)
s_get(&(n->dbsi));
release_lsrtl(p, n);
rem_node(NODE n);
- rp_free(n->pool, p->p.pool);
+ rfree(n->pool);
OSPF_TRACE(D_EVENTS, "Neighbor %R on %s removed", rid, ifa->ifname);
}
@@ -777,7 +777,7 @@ ospf_neigh_update_bfd(struct ospf_neighbor *n, int use_bfd)
if (use_bfd && !n->bfd_req)
n->bfd_req = bfd_request_session(n->pool, n->ip, n->ifa->addr->ip,
n->ifa->iface, p->p.vrf,
- ospf_neigh_bfd_hook, n, birdloop_event_list(p->p.loop), NULL);
+ ospf_neigh_bfd_hook, n, p->p.loop, NULL);
if (!use_bfd && n->bfd_req)
{
diff --git a/proto/ospf/ospf.c b/proto/ospf/ospf.c
index 16774df6..896bf5a3 100644
--- a/proto/ospf/ospf.c
+++ b/proto/ospf/ospf.c
@@ -106,11 +106,12 @@
#include <stdlib.h>
#include "ospf.h"
+#include "lib/macro.h"
static int ospf_preexport(struct channel *C, rte *new);
static void ospf_reload_routes(struct channel *C);
-static int ospf_rte_better(struct rte *new, struct rte *old);
-static u32 ospf_rte_igp_metric(struct rte *rt);
+static int ospf_rte_better(const rte *new, const rte *old);
+static u32 ospf_rte_igp_metric(const rte *rt);
static void ospf_disp(timer *timer);
@@ -299,7 +300,7 @@ ospf_start(struct proto *P)
p->lsab_size = 256;
p->lsab_used = 0;
p->lsab = mb_alloc(P->pool, p->lsab_size);
- p->nhpool = lp_new(P->pool, 12*sizeof(struct nexthop));
+ p->nhpool = lp_new(P->pool);
init_list(&(p->iface_list));
init_list(&(p->area_list));
fib_init(&p->rtf, P->pool, ospf_get_af(p), sizeof(ort), OFFSETOF(ort, fn), 0, NULL);
@@ -370,8 +371,8 @@ ospf_init(struct proto_config *CF)
P->main_channel = proto_add_channel(P, proto_cf_main_channel(CF));
P->rt_notify = ospf_rt_notify;
- P->if_notify = ospf_if_notify;
- P->ifa_notify = cf->ospf2 ? ospf_ifa_notify2 : ospf_ifa_notify3;
+ P->iface_sub.if_notify = ospf_if_notify;
+ P->iface_sub.ifa_notify = cf->ospf2 ? ospf_ifa_notify2 : ospf_ifa_notify3;
P->preexport = ospf_preexport;
P->reload_routes = ospf_reload_routes;
P->feed_begin = ospf_feed_begin;
@@ -384,25 +385,28 @@ ospf_init(struct proto_config *CF)
/* If new is better return 1 */
static int
-ospf_rte_better(struct rte *new, struct rte *old)
+ospf_rte_better(const rte *new, const rte *old)
{
- u32 new_metric1 = ea_get_int(new->attrs->eattrs, EA_OSPF_METRIC1, LSINFINITY);
+ u32 new_metric1 = ea_get_int(new->attrs, &ea_ospf_metric1, LSINFINITY);
if (new_metric1 == LSINFINITY)
return 0;
- if(new->attrs->source < old->attrs->source) return 1;
- if(new->attrs->source > old->attrs->source) return 0;
+ u32 ns = rt_get_source_attr(new);
+ u32 os = rt_get_source_attr(old);
- if(new->attrs->source == RTS_OSPF_EXT2)
+ if (ns < os) return 1;
+ if (ns > os) return 0;
+
+ if (ns == RTS_OSPF_EXT2)
{
- u32 old_metric2 = ea_get_int(old->attrs->eattrs, EA_OSPF_METRIC2, LSINFINITY);
- u32 new_metric2 = ea_get_int(new->attrs->eattrs, EA_OSPF_METRIC2, LSINFINITY);
- if(new_metric2 < old_metric2) return 1;
- if(new_metric2 > old_metric2) return 0;
+ u32 old_metric2 = ea_get_int(old->attrs, &ea_ospf_metric2, LSINFINITY);
+ u32 new_metric2 = ea_get_int(new->attrs, &ea_ospf_metric2, LSINFINITY);
+ if (new_metric2 < old_metric2) return 1;
+ if (new_metric2 > old_metric2) return 0;
}
- u32 old_metric1 = ea_get_int(old->attrs->eattrs, EA_OSPF_METRIC1, LSINFINITY);
+ u32 old_metric1 = ea_get_int(old->attrs, &ea_ospf_metric1, LSINFINITY);
if (new_metric1 < old_metric1)
return 1;
@@ -410,12 +414,12 @@ ospf_rte_better(struct rte *new, struct rte *old)
}
static u32
-ospf_rte_igp_metric(struct rte *rt)
+ospf_rte_igp_metric(const rte *rt)
{
- if (rt->attrs->source == RTS_OSPF_EXT2)
+ if (rt_get_source_attr(rt) == RTS_OSPF_EXT2)
return IGP_METRIC_UNKNOWN;
- return ea_get_int(rt->attrs->eattrs, EA_OSPF_METRIC1, LSINFINITY);
+ return ea_get_int(rt->attrs, &ea_ospf_metric1, LSINFINITY);
}
void
@@ -482,13 +486,13 @@ ospf_disp(timer * timer)
* import to the filters.
*/
static int
-ospf_preexport(struct channel *c, rte *e)
+ospf_preexport(struct channel *C, rte *e)
{
- struct ospf_proto *p = (struct ospf_proto *) c->proto;
+ struct ospf_proto *p = (struct ospf_proto *) C->proto;
struct ospf_area *oa = ospf_main_area(p);
/* Reject our own routes */
- if (e->sender == c->in_req.hook)
+ if (e->sender == C->in_req.hook)
return -1;
/* Do not export routes to stub areas */
@@ -531,7 +535,7 @@ ospf_shutdown(struct proto *P)
/* Cleanup locked rta entries */
FIB_WALK(&p->rtf, ort, nf)
{
- rta_free(nf->old_rta);
+ ea_free(nf->old_ea);
}
FIB_WALK_END;
@@ -566,11 +570,12 @@ ospf_get_status(struct proto *P, byte * buf)
}
static void
-ospf_get_route_info(rte * rte, byte * buf)
+ospf_get_route_info(const rte * rte, byte * buf)
{
char *type = "<bug>";
- switch (rte->attrs->source)
+ uint source = rt_get_source_attr(rte);
+ switch (source)
{
case RTS_OSPF:
type = "I";
@@ -587,42 +592,26 @@ ospf_get_route_info(rte * rte, byte * buf)
}
buf += bsprintf(buf, " %s", type);
- buf += bsprintf(buf, " (%d/%d", rte->attrs->pref, ea_get_int(rte->attrs->eattrs, EA_OSPF_METRIC1, LSINFINITY));
- if (rte->attrs->source == RTS_OSPF_EXT2)
- buf += bsprintf(buf, "/%d", ea_get_int(rte->attrs->eattrs, EA_OSPF_METRIC2, LSINFINITY));
+ buf += bsprintf(buf, " (%d/%d", rt_get_preference(rte), ea_get_int(rte->attrs, &ea_ospf_metric1, LSINFINITY));
+ if (source == RTS_OSPF_EXT2)
+ buf += bsprintf(buf, "/%d", ea_get_int(rte->attrs, &ea_ospf_metric2, LSINFINITY));
buf += bsprintf(buf, ")");
- if (rte->attrs->source == RTS_OSPF_EXT1 || rte->attrs->source == RTS_OSPF_EXT2)
+ if (source == RTS_OSPF_EXT1 || source == RTS_OSPF_EXT2)
{
- eattr *ea = ea_find(rte->attrs->eattrs, EA_OSPF_TAG);
+ eattr *ea = ea_find(rte->attrs, &ea_ospf_tag);
if (ea && (ea->u.data > 0))
buf += bsprintf(buf, " [%x]", ea->u.data);
}
- eattr *ea = ea_find(rte->attrs->eattrs, EA_OSPF_ROUTER_ID);
+ eattr *ea = ea_find(rte->attrs, &ea_ospf_router_id);
if (ea)
buf += bsprintf(buf, " [%R]", ea->u.data);
}
-static int
-ospf_get_attr(const eattr * a, byte * buf, int buflen UNUSED)
+static void
+ospf_tag_format(const eattr * a, byte * buf, uint buflen)
{
- switch (a->id)
- {
- case EA_OSPF_METRIC1:
- bsprintf(buf, "metric1");
- return GA_NAME;
- case EA_OSPF_METRIC2:
- bsprintf(buf, "metric2");
- return GA_NAME;
- case EA_OSPF_TAG:
- bsprintf(buf, "tag: 0x%08x", a->u.data);
- return GA_FULL;
- case EA_OSPF_ROUTER_ID:
- bsprintf(buf, "router_id");
- return GA_NAME;
- default:
- return GA_UNKNOWN;
- }
+ bsnprintf(buf, buflen, "0x%08x", a->u.data);
}
static void
@@ -1526,7 +1515,6 @@ struct rte_owner_class ospf_rte_owner_class = {
struct protocol proto_ospf = {
.name = "OSPF",
.template = "ospf%d",
- .class = PROTOCOL_OSPF,
.preference = DEF_PREF_OSPF,
.channel_mask = NB_IP,
.proto_size = sizeof(struct ospf_proto),
@@ -1537,5 +1525,38 @@ struct protocol proto_ospf = {
.shutdown = ospf_shutdown,
.reconfigure = ospf_reconfigure,
.get_status = ospf_get_status,
- .get_attr = ospf_get_attr,
};
+
+struct ea_class ea_ospf_metric1 = {
+ .name = "ospf_metric1",
+ .type = T_INT,
+};
+
+struct ea_class ea_ospf_metric2 = {
+ .name = "ospf_metric2",
+ .type = T_INT,
+};
+
+struct ea_class ea_ospf_tag = {
+ .name = "ospf_tag",
+ .type = T_INT,
+ .format = ospf_tag_format,
+};
+
+struct ea_class ea_ospf_router_id = {
+ .name = "ospf_router_id",
+ .type = T_QUAD,
+};
+
+void
+ospf_build(void)
+{
+ proto_build(&proto_ospf);
+
+ EA_REGISTER_ALL(
+ &ea_ospf_metric1,
+ &ea_ospf_metric2,
+ &ea_ospf_tag,
+ &ea_ospf_router_id
+ );
+}
diff --git a/proto/ospf/ospf.h b/proto/ospf/ospf.h
index a5f83e79..3477ba5a 100644
--- a/proto/ospf/ospf.h
+++ b/proto/ospf/ospf.h
@@ -22,7 +22,7 @@
#include "lib/resource.h"
#include "nest/protocol.h"
#include "nest/iface.h"
-#include "nest/route.h"
+#include "nest/rt.h"
#include "nest/cli.h"
#include "nest/locks.h"
#include "nest/bfd.h"
@@ -939,12 +939,7 @@ struct lsadb_show_data {
u32 router; /* Advertising router, 0 -> all */
};
-
-#define EA_OSPF_METRIC1 EA_CODE(PROTOCOL_OSPF, 0)
-#define EA_OSPF_METRIC2 EA_CODE(PROTOCOL_OSPF, 1)
-#define EA_OSPF_TAG EA_CODE(PROTOCOL_OSPF, 2)
-#define EA_OSPF_ROUTER_ID EA_CODE(PROTOCOL_OSPF, 3)
-
+extern struct ea_class ea_ospf_metric1, ea_ospf_metric2, ea_ospf_tag, ea_ospf_router_id;
/*
* For regular networks, neighbor address must match network prefix.
diff --git a/proto/ospf/rt.c b/proto/ospf/rt.c
index 3e208023..69c2907d 100644
--- a/proto/ospf/rt.c
+++ b/proto/ospf/rt.c
@@ -28,24 +28,30 @@ nh_is_vlink(struct nexthop *nhs)
static inline int
unresolved_vlink(ort *ort)
{
- return ort->n.nhs && nh_is_vlink(ort->n.nhs);
+ return ort->n.nhs && nh_is_vlink(&ort->n.nhs->nh);
}
-static inline struct nexthop *
+static inline struct nexthop_adata *
new_nexthop(struct ospf_proto *p, ip_addr gw, struct iface *iface, byte weight)
{
- struct nexthop *nh = lp_allocz(p->nhpool, sizeof(struct nexthop));
- nh->gw = gw;
- nh->iface = iface;
- nh->weight = weight;
- return nh;
+ struct nexthop_adata *nhad = lp_alloc(p->nhpool, sizeof(struct nexthop_adata));
+ *nhad = (struct nexthop_adata) {
+ .ad = { .length = sizeof *nhad - sizeof nhad->ad, },
+ .nh = {
+ .gw = gw,
+ .iface = iface,
+ .weight = weight,
+ },
+ };
+
+ return nhad;
}
/* Returns true if there are device nexthops in n */
static inline int
-has_device_nexthops(const struct nexthop *n)
+has_device_nexthops(struct nexthop_adata *nhad)
{
- for (; n; n = n->next)
+ NEXTHOP_WALK(n, nhad)
if (ipa_zero(n->gw))
return 1;
@@ -53,38 +59,22 @@ has_device_nexthops(const struct nexthop *n)
}
/* Replace device nexthops with nexthops to gw */
-static struct nexthop *
-fix_device_nexthops(struct ospf_proto *p, const struct nexthop *n, ip_addr gw)
+static struct nexthop_adata *
+fix_device_nexthops(struct ospf_proto *p, struct nexthop_adata *old, ip_addr gw)
{
- struct nexthop *root1 = NULL;
- struct nexthop *root2 = NULL;
- struct nexthop **nn1 = &root1;
- struct nexthop **nn2 = &root2;
-
if (!p->ecmp)
- return new_nexthop(p, gw, n->iface, n->weight);
-
- /* This is a bit tricky. We cannot just copy the list and update n->gw,
- because the list should stay sorted, so we create two lists, one with new
- gateways and one with old ones, and then merge them. */
-
- for (; n; n = n->next)
{
- struct nexthop *nn = new_nexthop(p, ipa_zero(n->gw) ? gw : n->gw, n->iface, n->weight);
+ struct nexthop_adata *new = (struct nexthop_adata *) lp_store_adata(p->nhpool, old->ad.data, old->ad.length);
+ new->nh.gw = gw;
+ return new;
+ }
+ struct nexthop_adata *tmp = (struct nexthop_adata *) tmp_copy_adata(&old->ad);
+ NEXTHOP_WALK(n, tmp)
if (ipa_zero(n->gw))
- {
- *nn1 = nn;
- nn1 = &(nn->next);
- }
- else
- {
- *nn2 = nn;
- nn2 = &(nn->next);
- }
- }
+ n->gw = gw;
- return nexthop_merge(root1, root2, 1, 1, p->ecmp, p->nhpool);
+ return nexthop_sort(tmp, p->nhpool);
}
@@ -169,9 +159,9 @@ orta_compare(const struct ospf_proto *p, const orta *new, const orta *old)
return -1;
if (!new->nhs)
return 1;
- if (nh_is_vlink(new->nhs))
+ if (nh_is_vlink(&new->nhs->nh))
return -1;
- if (nh_is_vlink(old->nhs))
+ if (nh_is_vlink(&old->nhs->nh))
return 1;
@@ -279,11 +269,7 @@ ort_merge(struct ospf_proto *p, ort *o, const orta *new)
orta *old = &o->n;
if (old->nhs != new->nhs)
- {
- old->nhs = nexthop_merge(old->nhs, new->nhs, old->nhs_reuse, new->nhs_reuse,
- p->ecmp, p->nhpool);
- old->nhs_reuse = 1;
- }
+ old->nhs = nexthop_merge(old->nhs, new->nhs, p->ecmp, p->nhpool);
if (old->rid < new->rid)
old->rid = new->rid;
@@ -295,11 +281,7 @@ ort_merge_ext(struct ospf_proto *p, ort *o, const orta *new)
orta *old = &o->n;
if (old->nhs != new->nhs)
- {
- old->nhs = nexthop_merge(old->nhs, new->nhs, old->nhs_reuse, new->nhs_reuse,
- p->ecmp, p->nhpool);
- old->nhs_reuse = 1;
- }
+ old->nhs = nexthop_merge(old->nhs, new->nhs, p->ecmp, p->nhpool);
if (old->tag != new->tag)
old->tag = 0;
@@ -1165,7 +1147,7 @@ ospf_check_vlinks(struct ospf_proto *p)
if (tmp && (tmp->color == INSPF) && ipa_nonzero(tmp->lb) && tmp->nhs)
{
- struct ospf_iface *nhi = ospf_iface_find(p, tmp->nhs->iface);
+ struct ospf_iface *nhi = ospf_iface_find(p, tmp->nhs->nh.iface);
if ((ifa->state != OSPF_IS_PTP)
|| (ifa->vifa != nhi)
@@ -1579,10 +1561,7 @@ ospf_ext_spf(struct ospf_proto *p)
/* Replace device nexthops with nexthops to forwarding address from LSA */
if (has_device_nexthops(nfa.nhs))
- {
nfa.nhs = fix_device_nexthops(p, nfa.nhs, rt.fwaddr);
- nfa.nhs_reuse = 1;
- }
}
if (rt.ebit)
@@ -1726,10 +1705,10 @@ ospf_rt_spf(struct ospf_proto *p)
static inline int
-inherit_nexthops(struct nexthop *pn)
+inherit_nexthops(struct nexthop_adata *pn)
{
/* Proper nexthops (with defined GW) or dummy vlink nexthops (without iface) */
- return pn && (ipa_nonzero(pn->gw) || !pn->iface);
+ return pn && (ipa_nonzero(pn->nh.gw) || !pn->nh.iface);
}
static inline ip_addr
@@ -1744,12 +1723,12 @@ link_lsa_lladdr(struct ospf_proto *p, struct top_hash_entry *en)
return ospf_is_ip4(p) ? ipa_from_ip4(ospf3_6to4(ll)) : ipa_from_ip6(ll);
}
-static struct nexthop *
+static struct nexthop_adata *
calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en,
struct top_hash_entry *par, int pos, uint data, uint lif, uint nif)
{
struct ospf_proto *p = oa->po;
- struct nexthop *pn = par->nhs;
+ struct nexthop_adata *pn = par->nhs;
struct top_hash_entry *link = NULL;
struct ospf_iface *ifa = NULL;
ip_addr nh = IPA_NONE;
@@ -1827,10 +1806,10 @@ calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en,
return NULL;
}
- struct nexthop *nhs = new_nexthop(p, nh, ifa->iface, ifa->ecmp_weight);
+ struct nexthop_adata *nhs = new_nexthop(p, nh, ifa->iface, ifa->ecmp_weight);
if (ifa->addr->flags & IA_HOST)
- nhs->flags = RNF_ONLINK;
+ nhs->nh.flags = RNF_ONLINK;
return nhs;
}
@@ -1851,7 +1830,7 @@ calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en,
if (ipa_zero(en->lb))
goto bad;
- return new_nexthop(p, en->lb, pn->iface, pn->weight);
+ return new_nexthop(p, en->lb, pn->nh.iface, pn->nh.weight);
}
else /* OSPFv3 */
{
@@ -1859,7 +1838,7 @@ calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en,
* Next-hop is taken from lladdr field of Link-LSA, en->lb_id
* is computed in link_back().
*/
- link = ospf_hash_find(p->gr, pn->iface->index, en->lb_id, rid, LSA_T_LINK);
+ link = ospf_hash_find(p->gr, pn->nh.iface->index, en->lb_id, rid, LSA_T_LINK);
if (!link)
return NULL;
@@ -1867,7 +1846,7 @@ calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en,
if (ipa_zero(nh))
return NULL;
- return new_nexthop(p, nh, pn->iface, pn->weight);
+ return new_nexthop(p, nh, pn->nh.iface, pn->nh.weight);
}
}
@@ -1914,7 +1893,7 @@ add_cand(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry
if (!link_back(oa, en, par, lif, nif))
return;
- struct nexthop *nhs = calc_next_hop(oa, en, par, pos, data, lif, nif);
+ struct nexthop_adata *nhs = calc_next_hop(oa, en, par, pos, data, lif, nif);
if (!nhs)
{
log(L_WARN "%s: Cannot find next hop for LSA (Type: %04x, Id: %R, Rt: %R)",
@@ -1923,7 +1902,7 @@ add_cand(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry
}
/* If en->dist > 0, we know that en->color == CANDIDATE and en->nhs is defined. */
- if ((dist == en->dist) && !nh_is_vlink(en->nhs))
+ if ((dist == en->dist) && !nh_is_vlink(&en->nhs->nh))
{
/*
* For multipath, we should merge nexthops. We merge regular nexthops only.
@@ -1947,13 +1926,11 @@ add_cand(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry
*/
/* Keep old ones */
- if (!p->ecmp || nh_is_vlink(nhs) || (nhs == en->nhs))
+ if (!p->ecmp || nh_is_vlink(&nhs->nh) || (nhs == en->nhs))
return;
/* Merge old and new */
- int new_reuse = (par->nhs != nhs);
- en->nhs = nexthop_merge(en->nhs, nhs, en->nhs_reuse, new_reuse, p->ecmp, p->nhpool);
- en->nhs_reuse = 1;
+ en->nhs = nexthop_merge(en->nhs, nhs, p->ecmp, p->nhpool);
return;
}
@@ -1967,7 +1944,6 @@ add_cand(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry
en->nhs = nhs;
en->dist = dist;
en->color = CANDIDATE;
- en->nhs_reuse = (par->nhs != nhs);
prev = NULL;
@@ -2001,14 +1977,34 @@ add_cand(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry
}
static inline int
-ort_changed(ort *nf, rta *nr)
+ort_changed(ort *nf, ea_list *nr)
{
- rta *or = nf->old_rta;
- return !or ||
+ ea_list *or = nf->old_ea;
+
+ if (!or ||
(nf->n.metric1 != nf->old_metric1) || (nf->n.metric2 != nf->old_metric2) ||
- (nf->n.tag != nf->old_tag) || (nf->n.rid != nf->old_rid) ||
- (nr->source != or->source) || (nr->dest != or->dest) ||
- !nexthop_same(&(nr->nh), &(or->nh));
+ (nf->n.tag != nf->old_tag) || (nf->n.rid != nf->old_rid))
+ return 1;
+
+ eattr *nhea_n = ea_find(nr, &ea_gen_nexthop);
+ eattr *nhea_o = ea_find(or, &ea_gen_nexthop);
+ if (!nhea_n != !nhea_o)
+ return 1;
+
+ if (nhea_n && nhea_o)
+ {
+ struct nexthop_adata *nhad_n = (struct nexthop_adata *) nhea_n->u.ptr;
+ struct nexthop_adata *nhad_o = (struct nexthop_adata *) nhea_o->u.ptr;
+
+ if (!nexthop_same(nhad_n, nhad_o))
+ return 1;
+ }
+
+ if ( ea_get_int(nr, &ea_gen_source, 0)
+ != ea_get_int(or, &ea_gen_source, 0))
+ return 1;
+
+ return 0;
}
static void
@@ -2030,10 +2026,9 @@ again1:
FIB_ITERATE_START(fib, &fit, ort, nf)
{
/* Sanity check of next-hop addresses, failure should not happen */
- if (nf->n.type)
+ if (nf->n.type && nf->n.nhs)
{
- struct nexthop *nh;
- for (nh = nf->n.nhs; nh; nh = nh->next)
+ NEXTHOP_WALK(nh, nf->n.nhs)
if (ipa_nonzero(nh->gw))
{
neighbor *nbr = neigh_find(&p->p, nh->gw, nh->iface,
@@ -2052,69 +2047,67 @@ again1:
if (nf->n.type) /* Add the route */
{
- rta a0 = {
- .source = nf->n.type,
- .scope = SCOPE_UNIVERSE,
- .dest = RTD_UNICAST,
- .nh = *(nf->n.nhs),
- .pref = p->p.main_channel->preference,
- };
-
- if (reload || ort_changed(nf, &a0))
- {
- a0.eattrs = alloca(sizeof(ea_list) + 4 * sizeof(eattr));
- memset(a0.eattrs, 0, sizeof(ea_list));
+ struct {
+ ea_list l;
+ eattr a[7];
+ } eattrs;
+ eattrs.l = (ea_list) {};
+
+ eattrs.a[eattrs.l.count++] =
+ EA_LITERAL_EMBEDDED(&ea_gen_preference, 0, p->p.main_channel->preference);
+
+ eattrs.a[eattrs.l.count++] =
+ EA_LITERAL_EMBEDDED(&ea_gen_source, 0, nf->n.type);
+
+ eattrs.a[eattrs.l.count++] =
+ EA_LITERAL_DIRECT_ADATA(&ea_gen_nexthop, 0, &nf->n.nhs->ad);
+
+ if (reload || ort_changed(nf, &eattrs.l))
+ {
nf->old_metric1 = nf->n.metric1;
nf->old_metric2 = nf->n.metric2;
nf->old_tag = nf->n.tag;
nf->old_rid = nf->n.rid;
- a0.eattrs->attrs[a0.eattrs->count++] = (eattr) {
- .id = EA_OSPF_METRIC1,
- .type = EAF_TYPE_INT,
- .u.data = nf->n.metric1,
- };
+ eattrs.a[eattrs.l.count++] =
+ EA_LITERAL_EMBEDDED(&ea_ospf_metric1, 0, nf->n.metric1);
if (nf->n.type == RTS_OSPF_EXT2)
- a0.eattrs->attrs[a0.eattrs->count++] = (eattr) {
- .id = EA_OSPF_METRIC2,
- .type = EAF_TYPE_INT,
- .u.data = nf->n.metric2,
- };
+ eattrs.a[eattrs.l.count++] =
+ EA_LITERAL_EMBEDDED(&ea_ospf_metric2, 0, nf->n.metric2);
if ((nf->n.type == RTS_OSPF_EXT1) || (nf->n.type == RTS_OSPF_EXT2))
- a0.eattrs->attrs[a0.eattrs->count++] = (eattr) {
- .id = EA_OSPF_TAG,
- .type = EAF_TYPE_INT,
- .u.data = nf->n.tag,
- };
-
- a0.eattrs->attrs[a0.eattrs->count++] = (eattr) {
- .id = EA_OSPF_ROUTER_ID,
- .type = EAF_TYPE_ROUTER_ID,
- .u.data = nf->n.rid,
- };
+ eattrs.a[eattrs.l.count++] =
+ EA_LITERAL_EMBEDDED(&ea_ospf_tag, 0, nf->n.tag);
+
+ eattrs.a[eattrs.l.count++] =
+ EA_LITERAL_EMBEDDED(&ea_ospf_router_id, 0, nf->n.rid);
- rta_free(nf->old_rta);
- nf->old_rta = rta_lookup(&a0);
+ ASSERT_DIE(ARRAY_SIZE(eattrs.a) >= eattrs.l.count);
+
+ ea_list *eal = ea_lookup(&eattrs.l, 0);
+ ea_free(nf->old_ea);
+ nf->old_ea = eal;
rte e0 = {
- .attrs = nf->old_rta,
+ .attrs = eal,
.src = p->p.main_source,
};
+ /*
DBG("Mod rte type %d - %N via %I on iface %s, met %d\n",
a0.source, nf->fn.addr, a0.gw, a0.iface ? a0.iface->name : "(none)", nf->n.metric1);
+ */
rte_update(p->p.main_channel, nf->fn.addr, &e0, p->p.main_source);
}
}
- else if (nf->old_rta)
+ else if (nf->old_ea)
{
/* Remove the route */
- rta_free(nf->old_rta);
- nf->old_rta = NULL;
+ rta_free(nf->old_ea);
+ nf->old_ea = NULL;
rte_update(p->p.main_channel, nf->fn.addr, NULL, p->p.main_source);
}
diff --git a/proto/ospf/rt.h b/proto/ospf/rt.h
index 094e125b..88eefef9 100644
--- a/proto/ospf/rt.h
+++ b/proto/ospf/rt.h
@@ -18,8 +18,6 @@
typedef struct orta
{
u8 type; /* RTS_OSPF_* */
- u8 nhs_reuse; /* Whether nhs nodes can be reused during merging.
- See a note in rt.c:add_cand() */
u32 options;
/*
* For ORT_ROUTER routes, options field are router-LSA style
@@ -53,7 +51,7 @@ typedef struct orta
struct ospf_area *oa;
struct ospf_area *voa; /* Used when route is replaced in ospf_rt_sum_tr(),
NULL otherwise */
- struct nexthop *nhs; /* Next hops computed during SPF */
+ struct nexthop_adata *nhs; /* Next hops computed during SPF */
struct top_hash_entry *en; /* LSA responsible for this orta */
}
orta;
@@ -80,7 +78,7 @@ typedef struct ort
*/
orta n;
u32 old_metric1, old_metric2, old_tag, old_rid;
- rta *old_rta;
+ ea_list *old_ea;
u32 lsa_id;
u8 external_rte;
u8 area_net;
diff --git a/proto/ospf/topology.c b/proto/ospf/topology.c
index bb88d20a..85bce03d 100644
--- a/proto/ospf/topology.c
+++ b/proto/ospf/topology.c
@@ -1337,9 +1337,9 @@ ospf_rt_notify(struct proto *P, struct channel *ch UNUSED, const net_addr *n, rt
ASSERT(p->asbr);
/* Get route attributes */
- rta *a = new->attrs;
- eattr *m1a = ea_find(a->eattrs, EA_OSPF_METRIC1);
- eattr *m2a = ea_find(a->eattrs, EA_OSPF_METRIC2);
+ ea_list *a = new->attrs;
+ eattr *m1a = ea_find(a, &ea_ospf_metric1);
+ eattr *m2a = ea_find(a, &ea_ospf_metric2);
uint m1 = m1a ? m1a->u.data : 0;
uint m2 = m2a ? m2a->u.data : 10000;
@@ -1363,11 +1363,14 @@ ospf_rt_notify(struct proto *P, struct channel *ch UNUSED, const net_addr *n, rt
uint ebit = m2a || !m1a;
uint metric = ebit ? m2 : m1;
- uint tag = ea_get_int(a->eattrs, EA_OSPF_TAG, 0);
+ uint tag = ea_get_int(a, &ea_ospf_tag, 0);
ip_addr fwd = IPA_NONE;
- if ((a->dest == RTD_UNICAST) && use_gw_for_fwaddr(p, a->nh.gw, a->nh.iface))
- fwd = a->nh.gw;
+ eattr *nhea = ea_find(a, &ea_gen_nexthop);
+ struct nexthop_adata *nhad = (struct nexthop_adata *) nhea->u.ptr;
+ if (NEXTHOP_IS_REACHABLE(nhad))
+ if (use_gw_for_fwaddr(p, nhad->nh.gw, nhad->nh.iface))
+ fwd = nhad->nh.gw;
/* NSSA-LSA with P-bit set must have non-zero forwarding address */
if (oa && ipa_zero(fwd))
@@ -2135,7 +2138,7 @@ ospf_hash_delete(struct top_graph *f, struct top_hash_entry *e)
if (*ee == e)
{
*ee = e->next;
- sl_free(f->hash_slab, e);
+ sl_free(e);
if (f->hash_entries-- < f->hash_entries_min)
ospf_top_rehash(f, -HASH_LO_STEP);
return;
diff --git a/proto/ospf/topology.h b/proto/ospf/topology.h
index c36d0b50..3c92b431 100644
--- a/proto/ospf/topology.h
+++ b/proto/ospf/topology.h
@@ -28,7 +28,7 @@ struct top_hash_entry
u16 next_lsa_opts; /* For postponed LSA origination */
btime inst_time; /* Time of installation into DB */
struct ort *nf; /* Reference fibnode for sum and ext LSAs, NULL for otherwise */
- struct nexthop *nhs; /* Computed nexthops - valid only in ospf_rt_spf() */
+ struct nexthop_adata *nhs; /* Computed nexthops - valid only in ospf_rt_spf() */
ip_addr lb; /* In OSPFv2, link back address. In OSPFv3, any global address in the area useful for vlinks */
u32 lb_id; /* Interface ID of link back iface (for bcast or NBMA networks) */
u32 dist; /* Distance from the root */
@@ -39,8 +39,6 @@ struct top_hash_entry
#define CANDIDATE 1
#define INSPF 2
u8 mode; /* LSA generated during RT calculation (LSA_RTCALC or LSA_STALE)*/
- u8 nhs_reuse; /* Whether nhs nodes can be reused during merging.
- See a note in rt.c:add_cand() */
};
diff --git a/proto/perf/perf.c b/proto/perf/perf.c
index aa688d88..dc5bbf2f 100644
--- a/proto/perf/perf.c
+++ b/proto/perf/perf.c
@@ -18,7 +18,7 @@
#include "nest/bird.h"
#include "nest/iface.h"
#include "nest/protocol.h"
-#include "nest/route.h"
+#include "nest/rt.h"
#include "nest/cli.h"
#include "conf/conf.h"
#include "filter/filter.h"
@@ -85,7 +85,7 @@ random_net_ip4(void)
}
struct perf_random_routes {
- struct rta *a;
+ ea_list *a;
net_addr net;
};
@@ -142,17 +142,21 @@ perf_loop(void *data)
*((net_addr_ip4 *) &(p->data[i].net)) = random_net_ip4();
if (!p->attrs_per_rte || !(i % p->attrs_per_rte)) {
- struct rta a0 = {
- .source = RTS_PERF,
- .scope = SCOPE_UNIVERSE,
- .dest = RTD_UNICAST,
- .pref = p->p.main_channel->preference,
+ ea_list *ea = NULL;
+
+ ea_set_attr_u32(&ea, &ea_gen_preference, 0, p->p.main_channel->preference);
+ ea_set_attr_u32(&ea, &ea_gen_source, 0, RTS_PERF);
+
+ struct nexthop_adata nhad = {
.nh.iface = p->ifa->iface,
.nh.gw = gw,
.nh.weight = 1,
};
- p->data[i].a = rta_lookup(&a0);
+ ea_set_attr_data(&ea, &ea_gen_nexthop, 0,
+ &nhad.ad.data, sizeof nhad - sizeof nhad.ad);
+
+ p->data[i].a = rta_lookup(ea, 0);
}
else
p->data[i].a = rta_clone(p->data[i-1].a);
@@ -198,9 +202,9 @@ perf_loop(void *data)
p->exp++;
}
- RT_LOCK(P->main_channel->table);
- rt_schedule_prune(RT_PRIV(P->main_channel->table));
- RT_UNLOCK(P->main_channel->table);
+ RT_LOCKED(P->main_channel->table, tab)
+ rt_schedule_prune(tab);
+
ev_schedule(p->loop);
}
@@ -268,7 +272,7 @@ perf_init(struct proto_config *CF)
switch (p->mode) {
case PERF_MODE_IMPORT:
- P->ifa_notify = perf_ifa_notify;
+ P->iface_sub.ifa_notify = perf_ifa_notify;
break;
case PERF_MODE_EXPORT:
P->rt_notify = perf_rt_notify;
@@ -307,7 +311,6 @@ perf_copy_config(struct proto_config *dest UNUSED, struct proto_config *src UNUS
struct protocol proto_perf = {
.name = "Perf",
.template = "perf%d",
- .class = PROTOCOL_PERF,
.channel_mask = NB_IP,
.proto_size = sizeof(struct perf_proto),
.config_size = sizeof(struct perf_config),
@@ -316,3 +319,9 @@ struct protocol proto_perf = {
.reconfigure = perf_reconfigure,
.copy_config = perf_copy_config,
};
+
+void
+perf_build(void)
+{
+ proto_build(&proto_perf);
+}
diff --git a/proto/pipe/Makefile b/proto/pipe/Makefile
index 5093da98..0d68db4c 100644
--- a/proto/pipe/Makefile
+++ b/proto/pipe/Makefile
@@ -3,4 +3,4 @@ obj := $(src-o-files)
$(all-daemon)
$(cf-local)
-tests_objs := $(tests_objs) $(src-o-files) \ No newline at end of file
+tests_objs := $(tests_objs) $(src-o-files)
diff --git a/proto/pipe/config.Y b/proto/pipe/config.Y
index fc08445f..444de127 100644
--- a/proto/pipe/config.Y
+++ b/proto/pipe/config.Y
@@ -42,6 +42,12 @@ pipe_proto:
pipe_proto_start '{'
| pipe_proto proto_item ';'
| pipe_proto channel_item_ ';'
+ | pipe_proto IMPORT IN net_any imexport ';' {
+ if (this_channel->net_type && ($4->type != this_channel->net_type))
+ cf_error("Incompatible export prefilter type");
+ PIPE_CFG->in_subprefix = $4;
+ this_channel->in_filter = $5;
+ }
| pipe_proto PEER TABLE rtable ';' { PIPE_CFG->peer = $4; }
| pipe_proto MAX GENERATION expr ';' {
if (($4 < 1) || ($4 > 254)) cf_error("Max generation must be in range 1..254, got %u", $4);
diff --git a/proto/pipe/pipe.c b/proto/pipe/pipe.c
index 270f7b92..b2083010 100644
--- a/proto/pipe/pipe.c
+++ b/proto/pipe/pipe.c
@@ -35,7 +35,7 @@
#include "nest/bird.h"
#include "nest/iface.h"
#include "nest/protocol.h"
-#include "nest/route.h"
+#include "nest/rt.h"
#include "nest/cli.h"
#include "conf/conf.h"
#include "filter/filter.h"
@@ -43,32 +43,29 @@
#include "pipe.h"
-#ifdef CONFIG_BGP
-#include "proto/bgp/bgp.h"
-#endif
-
static void
pipe_rt_notify(struct proto *P, struct channel *src_ch, const net_addr *n, rte *new, const rte *old)
{
struct pipe_proto *p = (void *) P;
struct channel *dst = (src_ch == p->pri) ? p->sec : p->pri;
+ uint *flags = (src_ch == p->pri) ? &p->sec_flags : &p->pri_flags;
if (!new && !old)
return;
+ /* Start the route refresh if requested to */
+ if (*flags & PIPE_FL_RR_BEGIN_PENDING)
+ {
+ *flags &= ~PIPE_FL_RR_BEGIN_PENDING;
+ rt_refresh_begin(&dst->in_req);
+ }
+
if (new)
{
- rta *a = alloca(rta_size(new->attrs));
- memcpy(a, new->attrs, rta_size(new->attrs));
+ rte e0 = rte_init_from(new);
- a->cached = 0;
- a->hostentry = NULL;
-
- rte e0 = {
- .attrs = a,
- .src = new->src,
- .generation = new->generation + 1,
- };
+ e0.generation = new->generation + 1;
+ ea_unset_attr(&e0.attrs, 0, &ea_gen_hostentry);
rte_update(dst, n, &e0, new->src);
}
@@ -77,12 +74,12 @@ pipe_rt_notify(struct proto *P, struct channel *src_ch, const net_addr *n, rte *
}
static int
-pipe_preexport(struct channel *c, rte *e)
+pipe_preexport(struct channel *C, rte *e)
{
- struct pipe_proto *p = (void *) c->proto;
+ struct pipe_proto *p = (void *) C->proto;
/* Avoid direct loopbacks */
- if (e->sender == c->in_req.hook)
+ if (e->sender == C->in_req.hook)
return -1;
/* Indirection check */
@@ -90,8 +87,8 @@ pipe_preexport(struct channel *c, rte *e)
if (e->generation >= max_generation)
{
log_rl(&p->rl_gen, L_ERR "Route overpiped (%u hops of %u configured in %s) in table %s: %N %s/%u:%u",
- e->generation, max_generation, c->proto->name,
- c->table->name, e->net, e->src->owner->name, e->src->private_id, e->src->global_id);
+ e->generation, max_generation, C->proto->name,
+ C->table->name, e->net, e->src->owner->name, e->src->private_id, e->src->global_id);
return -1;
}
@@ -109,12 +106,12 @@ pipe_reload_routes(struct channel *C)
}
static void
-pipe_feed_begin(struct channel *C, int refeeding UNUSED)
+pipe_feed_begin(struct channel *C, int initial UNUSED)
{
struct pipe_proto *p = (void *) C->proto;
- struct channel *dst = (C == p->pri) ? p->sec : p->pri;
+ uint *flags = (C == p->pri) ? &p->sec_flags : &p->pri_flags;
- channel_refresh_begin(dst);
+ *flags |= PIPE_FL_RR_BEGIN_PENDING;
}
static void
@@ -122,8 +119,17 @@ pipe_feed_end(struct channel *C)
{
struct pipe_proto *p = (void *) C->proto;
struct channel *dst = (C == p->pri) ? p->sec : p->pri;
+ uint *flags = (C == p->pri) ? &p->sec_flags : &p->pri_flags;
+
+ /* If not even started, start the RR now */
+ if (*flags & PIPE_FL_RR_BEGIN_PENDING)
+ {
+ *flags &= ~PIPE_FL_RR_BEGIN_PENDING;
+ rt_refresh_begin(&dst->in_req);
+ }
- channel_refresh_end(dst);
+ /* Finish RR always */
+ rt_refresh_end(&dst->in_req);
}
static void
@@ -144,10 +150,16 @@ pipe_postconfig(struct proto_config *CF)
if (cc->table->addr_type != cf->peer->addr_type)
cf_error("Primary table and peer table must have the same type");
+ if (cc->out_subprefix && (cc->table->addr_type != cc->out_subprefix->type))
+ cf_error("Export subprefix must match table type");
+
+ if (cf->in_subprefix && (cc->table->addr_type != cf->in_subprefix->type))
+ cf_error("Import subprefix must match table type");
+
if (cc->rx_limit.action)
cf_error("Pipe protocol does not support receive limits");
- if (cc->in_keep_filtered)
+ if (cc->in_keep)
cf_error("Pipe protocol prohibits keeping filtered routes");
cc->debug = cf->c.debug;
@@ -163,6 +175,7 @@ pipe_configure_channels(struct pipe_proto *p, struct pipe_config *cf)
.channel = cc->channel,
.table = cc->table,
.out_filter = cc->out_filter,
+ .out_subprefix = cc->out_subprefix,
.in_limit = cc->in_limit,
.ra_mode = RA_ANY,
.debug = cc->debug,
@@ -174,6 +187,7 @@ pipe_configure_channels(struct pipe_proto *p, struct pipe_config *cf)
.channel = cc->channel,
.table = cf->peer,
.out_filter = cc->in_filter,
+ .out_subprefix = cf->in_subprefix,
.in_limit = cc->out_limit,
.ra_mode = RA_ANY,
.debug = cc->debug,
@@ -318,7 +332,6 @@ pipe_update_debug(struct proto *P)
struct protocol proto_pipe = {
.name = "Pipe",
.template = "pipe%d",
- .class = PROTOCOL_PIPE,
.proto_size = sizeof(struct pipe_proto),
.config_size = sizeof(struct pipe_config),
.postconfig = pipe_postconfig,
@@ -328,3 +341,9 @@ struct protocol proto_pipe = {
.get_status = pipe_get_status,
.show_proto_info = pipe_show_proto_info
};
+
+void
+pipe_build(void)
+{
+ proto_build(&proto_pipe);
+}
diff --git a/proto/pipe/pipe.h b/proto/pipe/pipe.h
index 60c857eb..501b8565 100644
--- a/proto/pipe/pipe.h
+++ b/proto/pipe/pipe.h
@@ -12,6 +12,7 @@
struct pipe_config {
struct proto_config c;
struct rtable_config *peer; /* Table we're connected to */
+ const net_addr *in_subprefix;
u8 max_generation;
};
@@ -19,7 +20,11 @@ struct pipe_proto {
struct proto p;
struct channel *pri;
struct channel *sec;
+ uint pri_flags;
+ uint sec_flags;
struct tbf rl_gen;
};
+#define PIPE_FL_RR_BEGIN_PENDING 1 /* Route refresh should start with the first route notified */
+
#endif
diff --git a/proto/radv/Makefile b/proto/radv/Makefile
index 05317eff..5c56fbf3 100644
--- a/proto/radv/Makefile
+++ b/proto/radv/Makefile
@@ -3,4 +3,4 @@ obj := $(src-o-files)
$(all-daemon)
$(cf-local)
-tests_objs := $(tests_objs) $(src-o-files) \ No newline at end of file
+tests_objs := $(tests_objs) $(src-o-files)
diff --git a/proto/radv/config.Y b/proto/radv/config.Y
index 8d4a3ab9..fb68d2e5 100644
--- a/proto/radv/config.Y
+++ b/proto/radv/config.Y
@@ -33,7 +33,7 @@ CF_KEYWORDS(RADV, PREFIX, INTERFACE, MIN, MAX, RA, DELAY, INTERVAL, SOLICITED,
RETRANS, TIMER, CURRENT, HOP, LIMIT, DEFAULT, VALID, PREFERRED, MULT,
LIFETIME, SKIP, ONLINK, AUTONOMOUS, RDNSS, DNSSL, NS, DOMAIN, LOCAL,
TRIGGER, SENSITIVE, PREFERENCE, LOW, MEDIUM, HIGH, PROPAGATE, ROUTE,
- ROUTES, RA_PREFERENCE, RA_LIFETIME)
+ ROUTES)
CF_ENUM(T_ENUM_RA_PREFERENCE, RA_PREF_, LOW, MEDIUM, HIGH)
@@ -336,9 +336,6 @@ radv_sensitive:
| SENSITIVE bool { $$ = $2; }
;
-dynamic_attr: RA_PREFERENCE { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_ENUM_RA_PREFERENCE, EA_RA_PREFERENCE); } ;
-dynamic_attr: RA_LIFETIME { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_RA_LIFETIME); } ;
-
CF_CODE
CF_END
diff --git a/proto/radv/packets.c b/proto/radv/packets.c
index 5cd8b2de..c6b565d2 100644
--- a/proto/radv/packets.c
+++ b/proto/radv/packets.c
@@ -493,7 +493,7 @@ radv_sk_open(struct radv_iface *ifa)
sk->data = ifa;
sk->flags = SKF_LADDR_RX;
- if (sk_open(sk) < 0)
+ if (sk_open(sk, ifa->ra->p.loop) < 0)
goto err;
/* We want listen just to ICMPv6 messages of type RS and RA */
diff --git a/proto/radv/radv.c b/proto/radv/radv.c
index 15673555..434155dc 100644
--- a/proto/radv/radv.c
+++ b/proto/radv/radv.c
@@ -10,6 +10,7 @@
#include <stdlib.h>
#include "radv.h"
+#include "lib/macro.h"
/**
* DOC: Router Advertisements
@@ -42,6 +43,8 @@
* RFC 6106 - DNS extensions (RDDNS, DNSSL)
*/
+static struct ea_class ea_radv_preference, ea_radv_lifetime;
+
static void radv_prune_prefixes(struct radv_iface *ifa);
static void radv_prune_routes(struct radv_proto *p);
@@ -263,9 +266,9 @@ radv_iface_find(struct radv_proto *p, struct iface *what)
}
static void
-radv_iface_add(struct object_lock *lock)
+radv_iface_add(void *_ifa)
{
- struct radv_iface *ifa = lock->data;
+ struct radv_iface *ifa = _ifa;
struct radv_proto *p = ifa->ra;
if (! radv_sk_open(ifa))
@@ -284,7 +287,7 @@ radv_iface_new(struct radv_proto *p, struct iface *iface, struct radv_iface_conf
RADV_TRACE(D_EVENTS, "Adding interface %s", iface->name);
- pool *pool = rp_new(p->p.pool, p->p.loop, iface->name);
+ pool *pool = rp_new(p->p.pool, iface->name);
ifa = mb_allocz(pool, sizeof(struct radv_iface));
ifa->pool = pool;
ifa->ra = p;
@@ -302,8 +305,11 @@ radv_iface_new(struct radv_proto *p, struct iface *iface, struct radv_iface_conf
lock->type = OBJLOCK_IP;
lock->port = ICMPV6_PROTO;
lock->iface = iface;
- lock->data = ifa;
- lock->hook = radv_iface_add;
+ lock->event = (event) {
+ .hook = radv_iface_add,
+ .data = ifa,
+ };
+ lock->target = &global_event_list;
ifa->lock = lock;
olock_acquire(lock);
@@ -317,7 +323,7 @@ radv_iface_remove(struct radv_iface *ifa)
rem_node(NODE ifa);
- rp_free(ifa->pool, p->p.pool);
+ rfree(ifa->pool);
}
static void
@@ -391,10 +397,10 @@ radv_net_match_trigger(struct radv_config *cf, const net_addr *n)
}
int
-radv_preexport(struct channel *c, rte *new)
+radv_preexport(struct channel *C, rte *new)
{
// struct radv_proto *p = (struct radv_proto *) P;
- struct radv_config *cf = (struct radv_config *) (c->proto->cf);
+ struct radv_config *cf = (struct radv_config *) (C->proto->cf);
if (radv_net_match_trigger(cf, new->net))
return RIC_PROCESS;
@@ -444,11 +450,11 @@ radv_rt_notify(struct proto *P, struct channel *ch UNUSED, const net_addr *n, rt
{
/* Update */
- ea = ea_find(new->attrs->eattrs, EA_RA_PREFERENCE);
+ ea = ea_find(new->attrs, &ea_radv_preference);
uint preference = ea ? ea->u.data : RA_PREF_MEDIUM;
uint preference_set = !!ea;
- ea = ea_find(new->attrs->eattrs, EA_RA_LIFETIME);
+ ea = ea_find(new->attrs, &ea_radv_lifetime);
uint lifetime = ea ? ea->u.data : 0;
uint lifetime_set = !!ea;
@@ -555,10 +561,7 @@ radv_check_active(struct radv_proto *p)
return 1;
struct channel *c = p->p.main_channel;
- RT_LOCK(c->table);
- int active = rt_examine(RT_PRIV(c->table), &cf->trigger, c, c->out_filter);
- RT_UNLOCK(c->table);
- return active;
+ return rt_examine(c->table, &cf->trigger, c, c->out_filter);
}
static void
@@ -580,8 +583,8 @@ radv_init(struct proto_config *CF)
P->preexport = radv_preexport;
P->rt_notify = radv_rt_notify;
- P->if_notify = radv_if_notify;
- P->ifa_notify = radv_ifa_notify;
+ P->iface_sub.if_notify = radv_if_notify;
+ P->iface_sub.ifa_notify = radv_ifa_notify;
return P;
}
@@ -663,10 +666,11 @@ radv_reconfigure(struct proto *P, struct proto_config *CF)
if (!old->propagate_routes && new->propagate_routes)
channel_request_feeding(p->p.main_channel);
- IFACE_LEGACY_ACCESS;
- struct iface *iface;
- WALK_LIST(iface, global_iface_list)
+ IFACE_WALK(iface)
{
+ if (p->p.vrf && p->p.vrf != iface->master)
+ continue;
+
if (!(iface->flags & IF_UP))
continue;
@@ -742,27 +746,26 @@ radv_pref_str(u32 pref)
}
}
-/* The buffer has some minimal size */
-static int
-radv_get_attr(const eattr *a, byte *buf, int buflen UNUSED)
+static void
+radv_preference_format(const eattr *a, byte *buf, uint buflen)
{
- switch (a->id)
- {
- case EA_RA_PREFERENCE:
- bsprintf(buf, "preference: %s", radv_pref_str(a->u.data));
- return GA_FULL;
- case EA_RA_LIFETIME:
- bsprintf(buf, "lifetime");
- return GA_NAME;
- default:
- return GA_UNKNOWN;
- }
+ bsnprintf(buf, buflen, "%s", radv_pref_str(a->u.data));
}
+static struct ea_class ea_radv_preference = {
+ .name = "radv_preference",
+ .type = T_ENUM_RA_PREFERENCE,
+ .format = radv_preference_format,
+};
+
+static struct ea_class ea_radv_lifetime = {
+ .name = "radv_lifetime",
+ .type = T_INT,
+};
+
struct protocol proto_radv = {
.name = "RAdv",
.template = "radv%d",
- .class = PROTOCOL_RADV,
.channel_mask = NB_IP6,
.proto_size = sizeof(struct radv_proto),
.config_size = sizeof(struct radv_config),
@@ -773,5 +776,15 @@ struct protocol proto_radv = {
.reconfigure = radv_reconfigure,
.copy_config = radv_copy_config,
.get_status = radv_get_status,
- .get_attr = radv_get_attr
};
+
+void
+radv_build(void)
+{
+ proto_build(&proto_radv);
+
+ EA_REGISTER_ALL(
+ &ea_radv_preference,
+ &ea_radv_lifetime
+ );
+}
diff --git a/proto/radv/radv.h b/proto/radv/radv.h
index 14d40f8a..c9219bda 100644
--- a/proto/radv/radv.h
+++ b/proto/radv/radv.h
@@ -19,7 +19,7 @@
#include "lib/resource.h"
#include "nest/protocol.h"
#include "nest/iface.h"
-#include "nest/route.h"
+#include "nest/rt.h"
#include "nest/cli.h"
#include "nest/locks.h"
#include "conf/conf.h"
@@ -195,10 +195,6 @@ struct radv_iface
#define RA_PREF_HIGH 0x08
#define RA_PREF_MASK 0x18
-/* Attributes */
-#define EA_RA_PREFERENCE EA_CODE(PROTOCOL_RADV, 0)
-#define EA_RA_LIFETIME EA_CODE(PROTOCOL_RADV, 1)
-
#ifdef LOCAL_DEBUG
#define RADV_FORCE_DEBUG 1
#else
diff --git a/proto/rip/Makefile b/proto/rip/Makefile
index 7feabcd8..f4a6fa72 100644
--- a/proto/rip/Makefile
+++ b/proto/rip/Makefile
@@ -3,4 +3,4 @@ obj := $(src-o-files)
$(all-daemon)
$(cf-local)
-tests_objs := $(tests_objs) $(src-o-files) \ No newline at end of file
+tests_objs := $(tests_objs) $(src-o-files)
diff --git a/proto/rip/config.Y b/proto/rip/config.Y
index 28ee9609..3c0973b1 100644
--- a/proto/rip/config.Y
+++ b/proto/rip/config.Y
@@ -37,7 +37,7 @@ CF_KEYWORDS(RIP, NG, ECMP, LIMIT, WEIGHT, INFINITY, METRIC, UPDATE, TIMEOUT,
PASSIVE, VERSION, SPLIT, HORIZON, POISON, REVERSE, CHECK, ZERO,
TIME, BFD, AUTHENTICATION, NONE, PLAINTEXT, CRYPTOGRAPHIC, MD5,
TTL, SECURITY, RX, TX, BUFFER, LENGTH, PRIORITY, ONLY, LINK,
- DEMAND, CIRCUIT, RIP_METRIC, RIP_TAG)
+ DEMAND, CIRCUIT)
%type <i> rip_variant rip_auth
@@ -190,9 +190,6 @@ rip_iface:
rip_iface_start iface_patt_list_nopx rip_iface_opt_list rip_iface_finish;
-dynamic_attr: RIP_METRIC { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_RIP_METRIC); } ;
-dynamic_attr: RIP_TAG { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_RIP_TAG); } ;
-
CF_CLI_HELP(SHOW RIP, ..., [[Show information about RIP protocol]]);
CF_CLI(SHOW RIP INTERFACES, optproto opttext, [<name>] [\"<interface>\"], [[Show information about RIP interfaces]])
diff --git a/proto/rip/packets.c b/proto/rip/packets.c
index 9c3bd7a3..fecdf896 100644
--- a/proto/rip/packets.c
+++ b/proto/rip/packets.c
@@ -1012,7 +1012,7 @@ rip_open_socket(struct rip_iface *ifa)
/* sk->rbsize and sk->tbsize are handled in rip_iface_update_buffers() */
- if (sk_open(sk) < 0)
+ if (sk_open(sk, p->p.loop) < 0)
goto err;
if (ifa->cf->mode == RIP_IM_MULTICAST)
diff --git a/proto/rip/rip.c b/proto/rip/rip.c
index 0f8b10ad..d15177da 100644
--- a/proto/rip/rip.c
+++ b/proto/rip/rip.c
@@ -78,6 +78,7 @@
#include <stdlib.h>
#include "rip.h"
+#include "lib/macro.h"
static inline void rip_lock_neighbor(struct rip_neighbor *n);
@@ -88,6 +89,7 @@ static inline void rip_iface_kick_timer(struct rip_iface *ifa);
static void rip_iface_timer(timer *timer);
static void rip_trigger_update(struct rip_proto *p);
+static struct ea_class ea_rip_metric, ea_rip_tag, ea_rip_from;
/*
* RIP routes
@@ -108,14 +110,14 @@ rip_add_rte(struct rip_proto *p, struct rip_rte **rp, struct rip_rte *src)
}
static inline void
-rip_remove_rte(struct rip_proto *p, struct rip_rte **rp)
+rip_remove_rte(struct rip_proto *p UNUSED, struct rip_rte **rp)
{
struct rip_rte *rt = *rp;
rip_unlock_neighbor(rt->from);
*rp = rt->next;
- sl_free(p->rte_slab, rt);
+ sl_free(rt);
}
static inline int rip_same_rte(struct rip_rte *a, struct rip_rte *b)
@@ -124,6 +126,11 @@ static inline int rip_same_rte(struct rip_rte *a, struct rip_rte *b)
static inline int rip_valid_rte(struct rip_rte *rt)
{ return rt->from->ifa != NULL; }
+struct rip_iface_adata {
+ struct adata ad;
+ struct iface *iface;
+};
+
/**
* rip_announce_rte - announce route from RIP routing table to the core
* @p: RIP instance
@@ -144,71 +151,87 @@ rip_announce_rte(struct rip_proto *p, struct rip_entry *en)
if (rt)
{
/* Update */
- rta a0 = {
- .pref = p->p.main_channel->preference,
- .source = RTS_RIP,
- .scope = SCOPE_UNIVERSE,
- .dest = RTD_UNICAST,
+ struct {
+ ea_list l;
+ eattr a[3];
+ } ea_block = {
+ .l.count = ARRAY_SIZE(ea_block.a),
+ .a = {
+ EA_LITERAL_EMBEDDED(&ea_gen_preference, 0, p->p.main_channel->preference),
+ EA_LITERAL_EMBEDDED(&ea_gen_source, 0, RTS_RIP),
+ EA_LITERAL_EMBEDDED(&ea_rip_metric, 0, rt->metric),
+ },
};
- u8 rt_metric = rt->metric;
+ ea_list *ea = &ea_block.l;
+
u16 rt_tag = rt->tag;
+ struct iface *rt_from = NULL;
if (p->ecmp)
{
/* ECMP route */
- struct nexthop *nhs = NULL;
int num = 0;
for (rt = en->routes; rt && (num < p->ecmp); rt = rt->next)
+ if (rip_valid_rte(rt))
+ num++;
+
+ struct nexthop_adata *nhad = (struct nexthop_adata *) tmp_alloc_adata((num+1) * sizeof(struct nexthop));
+ struct nexthop *nh = &nhad->nh;
+
+ for (rt = en->routes; rt && (num < p->ecmp); rt = rt->next)
{
if (!rip_valid_rte(rt))
- continue;
+ continue;
- struct nexthop *nh = allocz(sizeof(struct nexthop));
+ *nh = (struct nexthop) {
+ .gw = rt->next_hop,
+ .iface = rt->from->ifa->iface,
+ .weight = rt->from->ifa->cf->ecmp_weight,
+ };
- nh->gw = rt->next_hop;
- nh->iface = rt->from->ifa->iface;
- nh->weight = rt->from->ifa->cf->ecmp_weight;
+ if (!rt_from)
+ rt_from = rt->from->ifa->iface;
- nexthop_insert(&nhs, nh);
- num++;
+ nh = NEXTHOP_NEXT(nh);
if (rt->tag != rt_tag)
rt_tag = 0;
}
- a0.nh = *nhs;
+ nhad->ad.length = ((void *) nh - (void *) nhad->ad.data);
+
+ ea_set_attr(&ea,
+ EA_LITERAL_DIRECT_ADATA(&ea_gen_nexthop, 0,
+ &(nexthop_sort(nhad, tmp_linpool)->ad)));
}
else
{
/* Unipath route */
- a0.from = rt->from->nbr->addr;
- a0.nh.gw = rt->next_hop;
- a0.nh.iface = rt->from->ifa->iface;
+ rt_from = rt->from->ifa->iface;
+
+ struct nexthop_adata nhad = {
+ .nh.gw = rt->next_hop,
+ .nh.iface = rt->from->ifa->iface,
+ };
+
+ ea_set_attr_data(&ea, &ea_gen_nexthop, 0,
+ &nhad.ad.data, sizeof nhad - sizeof nhad.ad);
+ ea_set_attr_data(&ea, &ea_gen_from, 0, &rt->from->nbr->addr, sizeof(ip_addr));
}
- a0.eattrs = alloca(sizeof(ea_list) + 3*sizeof(eattr));
- memset(a0.eattrs, 0, sizeof(ea_list)); /* Zero-ing only the ea_list header */
- a0.eattrs->count = 3;
- a0.eattrs->attrs[0] = (eattr) {
- .id = EA_RIP_METRIC,
- .type = EAF_TYPE_INT,
- .u.data = rt_metric,
- };
- a0.eattrs->attrs[1] = (eattr) {
- .id = EA_RIP_TAG,
- .type = EAF_TYPE_INT,
- .u.data = rt_tag,
- };
- a0.eattrs->attrs[2] = (eattr) {
- .id = EA_RIP_FROM,
- .type = EAF_TYPE_PTR,
- .u.data = (uintptr_t) a0.nh.iface,
+ ea_set_attr_u32(&ea, &ea_rip_tag, 0, rt_tag);
+
+ struct rip_iface_adata riad = {
+ .ad = { .length = sizeof(struct rip_iface_adata) - sizeof(struct adata) },
+ .iface = rt_from,
};
+ ea_set_attr(&ea,
+ EA_LITERAL_DIRECT_ADATA(&ea_rip_from, 0, &riad.ad));
rte e0 = {
- .attrs = &a0,
+ .attrs = ea,
.src = p->p.main_source,
};
@@ -320,9 +343,10 @@ rip_rt_notify(struct proto *P, struct channel *ch UNUSED, const net_addr *net, s
if (new)
{
/* Update */
- u32 rt_tag = ea_get_int(new->attrs->eattrs, EA_RIP_TAG, 0);
- u32 rt_metric = ea_get_int(new->attrs->eattrs, EA_RIP_METRIC, 1);
- struct iface *rt_from = (struct iface *) ea_get_int(new->attrs->eattrs, EA_RIP_FROM, 0);
+ u32 rt_tag = ea_get_int(new->attrs, &ea_rip_tag, 0);
+ u32 rt_metric = ea_get_int(new->attrs, &ea_rip_metric, 1);
+ const eattr *rie = ea_find(new->attrs, &ea_rip_from);
+ struct iface *rt_from = rie ? ((struct rip_iface_adata *) rie->u.ptr)->iface : NULL;
if (rt_metric > p->infinity)
{
@@ -354,8 +378,14 @@ rip_rt_notify(struct proto *P, struct channel *ch UNUSED, const net_addr *net, s
en->metric = rt_metric;
en->tag = rt_tag;
en->from = (new->src->owner == &P->sources) ? rt_from : NULL;
- en->iface = new->attrs->nh.iface;
- en->next_hop = new->attrs->nh.gw;
+
+ eattr *nhea = ea_find(new->attrs, &ea_gen_nexthop);
+ if (nhea)
+ {
+ struct nexthop_adata *nhad = (struct nexthop_adata *) nhea->u.ptr;
+ en->iface = nhad->nh.iface;
+ en->next_hop = nhad->nh.gw;
+ }
}
else
{
@@ -513,7 +543,7 @@ rip_update_bfd(struct rip_proto *p, struct rip_neighbor *n)
ip_addr saddr = rip_is_v2(p) ? n->ifa->sk->saddr : n->nbr->ifa->ip;
n->bfd_req = bfd_request_session(p->p.pool, n->nbr->addr, saddr,
n->nbr->iface, p->p.vrf,
- rip_bfd_notify, n, birdloop_event_list(p->p.loop), NULL);
+ rip_bfd_notify, n, p->p.loop, NULL);
}
if (!use_bfd && n->bfd_req)
@@ -637,9 +667,9 @@ rip_iface_update_bfd(struct rip_iface *ifa)
static void
-rip_iface_locked(struct object_lock *lock)
+rip_iface_locked(void *_ifa)
{
- struct rip_iface *ifa = lock->data;
+ struct rip_iface *ifa = _ifa;
struct rip_proto *p = ifa->rip;
if (!rip_open_socket(ifa))
@@ -701,8 +731,11 @@ rip_add_iface(struct rip_proto *p, struct iface *iface, struct rip_iface_config
lock->type = OBJLOCK_UDP;
lock->port = ic->port;
lock->iface = iface;
- lock->data = ifa;
- lock->hook = rip_iface_locked;
+ lock->event = (event) {
+ .hook = rip_iface_locked,
+ .data = ifa,
+ };
+ lock->target = &global_event_list;
ifa->lock = lock;
olock_acquire(lock);
@@ -774,11 +807,11 @@ rip_reconfigure_iface(struct rip_proto *p, struct rip_iface *ifa, struct rip_ifa
static void
rip_reconfigure_ifaces(struct rip_proto *p, struct rip_config *cf)
{
- struct iface *iface;
-
- IFACE_LEGACY_ACCESS;
- WALK_LIST(iface, global_iface_list)
+ IFACE_WALK(iface)
{
+ if (p->p.vrf && p->p.vrf != iface->master)
+ continue;
+
if (!(iface->flags & IF_UP))
continue;
@@ -1086,28 +1119,22 @@ rip_reload_routes(struct channel *C)
static struct rte_owner_class rip_rte_owner_class;
static inline struct rip_proto *
-rip_rte_proto(struct rte *rte)
+rip_rte_proto(const rte *rte)
{
return (rte->src->owner->class == &rip_rte_owner_class) ?
SKIP_BACK(struct rip_proto, p.sources, rte->src->owner) : NULL;
}
-static int
-rip_rte_better(struct rte *new, struct rte *old)
+static u32
+rip_rte_igp_metric(const rte *rt)
{
- ASSERT_DIE(new->src == old->src);
- struct rip_proto *p = rip_rte_proto(new);
-
- u32 new_metric = ea_get_int(new->attrs->eattrs, EA_RIP_METRIC, p->infinity);
- u32 old_metric = ea_get_int(old->attrs->eattrs, EA_RIP_METRIC, p->infinity);
-
- return new_metric < old_metric;
+ return ea_get_int(rt->attrs, &ea_rip_metric, IGP_METRIC_UNKNOWN);
}
-static u32
-rip_rte_igp_metric(struct rte *rt)
+static int
+rip_rte_better(const rte *new, const rte *old)
{
- return ea_get_int(rt->attrs->eattrs, EA_RIP_METRIC, IGP_METRIC_UNKNOWN);
+ return rip_rte_igp_metric(new) < rip_rte_igp_metric(old);
}
static void
@@ -1127,9 +1154,9 @@ rip_init(struct proto_config *CF)
P->main_channel = proto_add_channel(P, proto_cf_main_channel(CF));
- P->if_notify = rip_if_notify;
+ P->iface_sub.if_notify = rip_if_notify;
P->rt_notify = rip_rt_notify;
- P->neigh_notify = rip_neigh_notify;
+ P->iface_sub.neigh_notify = rip_neigh_notify;
P->reload_routes = rip_reload_routes;
P->sources.class = &rip_rte_owner_class;
@@ -1204,35 +1231,41 @@ rip_reconfigure(struct proto *P, struct proto_config *CF)
}
static void
-rip_get_route_info(rte *rte, byte *buf)
+rip_get_route_info(const rte *rte, byte *buf)
{
struct rip_proto *p = rip_rte_proto(rte);
- u32 rt_metric = ea_get_int(rte->attrs->eattrs, EA_RIP_METRIC, p->infinity);
- u32 rt_tag = ea_get_int(rte->attrs->eattrs, EA_RIP_TAG, 0);
+ u32 rt_metric = ea_get_int(rte->attrs, &ea_rip_metric, p->infinity);
+ u32 rt_tag = ea_get_int(rte->attrs, &ea_rip_tag, 0);
- buf += bsprintf(buf, " (%d/%d)", rte->attrs->pref, rt_metric);
+ buf += bsprintf(buf, " (%d/%d)", rt_get_preference(rte), rt_metric);
if (rt_tag)
bsprintf(buf, " [%04x]", rt_tag);
}
-static int
-rip_get_attr(const eattr *a, byte *buf, int buflen UNUSED)
+static void
+rip_tag_format(const eattr *a, byte *buf, uint buflen)
{
- switch (a->id)
- {
- case EA_RIP_METRIC:
- bsprintf(buf, "metric: %d", a->u.data);
- return GA_FULL;
+ bsnprintf(buf, buflen, "%04x", a->u.data);
+}
+
+static struct ea_class ea_rip_metric = {
+ .name = "rip_metric",
+ .type = T_INT,
+};
- case EA_RIP_TAG:
- bsprintf(buf, "tag: %04x", a->u.data);
- return GA_FULL;
+static struct ea_class ea_rip_tag = {
+ .name = "rip_tag",
+ .type = T_INT,
+ .format = rip_tag_format,
+};
- default:
- return GA_UNKNOWN;
- }
-}
+static struct ea_class ea_rip_from = {
+ .name = "rip_from",
+ .type = T_IFACE,
+ .readonly = 1,
+ .hidden = 1,
+};
void
rip_show_interfaces(struct proto *P, const char *iff)
@@ -1342,7 +1375,6 @@ static struct rte_owner_class rip_rte_owner_class = {
struct protocol proto_rip = {
.name = "RIP",
.template = "rip%d",
- .class = PROTOCOL_RIP,
.preference = DEF_PREF_RIP,
.channel_mask = NB_IP,
.proto_size = sizeof(struct rip_proto),
@@ -1353,5 +1385,16 @@ struct protocol proto_rip = {
.start = rip_start,
.shutdown = rip_shutdown,
.reconfigure = rip_reconfigure,
- .get_attr = rip_get_attr
};
+
+void
+rip_build(void)
+{
+ proto_build(&proto_rip);
+
+ EA_REGISTER_ALL(
+ &ea_rip_metric,
+ &ea_rip_tag,
+ &ea_rip_from
+ );
+}
diff --git a/proto/rip/rip.h b/proto/rip/rip.h
index f8713c4a..a01f8d3b 100644
--- a/proto/rip/rip.h
+++ b/proto/rip/rip.h
@@ -16,7 +16,7 @@
#include "nest/cli.h"
#include "nest/iface.h"
#include "nest/protocol.h"
-#include "nest/route.h"
+#include "nest/rt.h"
#include "nest/password.h"
#include "nest/locks.h"
#include "nest/bfd.h"
@@ -195,10 +195,6 @@ struct rip_rte
#define RIP_ENTRY_VALID 1 /* Valid outgoing route */
#define RIP_ENTRY_STALE 2 /* Stale outgoing route, waiting for GC */
-#define EA_RIP_METRIC EA_CODE(PROTOCOL_RIP, 0)
-#define EA_RIP_TAG EA_CODE(PROTOCOL_RIP, 1)
-#define EA_RIP_FROM EA_CODE(PROTOCOL_RIP, 2)
-
static inline int rip_is_v2(struct rip_proto *p)
{ return p->rip2; }
diff --git a/proto/rpki/Makefile b/proto/rpki/Makefile
index eb09b7df..0f60b2a0 100644
--- a/proto/rpki/Makefile
+++ b/proto/rpki/Makefile
@@ -3,4 +3,4 @@ obj := $(src-o-files)
$(all-daemon)
$(cf-local)
-tests_objs := $(tests_objs) $(src-o-files) \ No newline at end of file
+tests_objs := $(tests_objs) $(src-o-files)
diff --git a/proto/rpki/rpki.c b/proto/rpki/rpki.c
index afba2216..e5638aff 100644
--- a/proto/rpki/rpki.c
+++ b/proto/rpki/rpki.c
@@ -121,14 +121,11 @@ rpki_table_add_roa(struct rpki_cache *cache, struct channel *channel, const net_
{
struct rpki_proto *p = cache->p;
- rta a0 = {
- .pref = channel->preference,
- .source = RTS_RPKI,
- .scope = SCOPE_UNIVERSE,
- .dest = RTD_NONE,
- };
+ ea_list *ea = NULL;
+ ea_set_attr_u32(&ea, &ea_gen_preference, 0, channel->preference);
+ ea_set_attr_u32(&ea, &ea_gen_source, 0, RTS_RPKI);
- rte e0 = { .attrs = &a0, .src = p->p.main_source, };
+ rte e0 = { .attrs = ea, .src = p->p.main_source, };
rte_update(channel, &pfxr->n, &e0, p->p.main_source);
}
@@ -302,12 +299,13 @@ rpki_cache_change_state(struct rpki_cache *cache, const enum rpki_cache_state ne
case RPKI_CS_NO_INCR_UPDATE_AVAIL:
/* Server was unable to answer the last Serial Query and sent Cache Reset. */
- rpki_cache_change_state(cache, RPKI_CS_RESET);
- break;
-
case RPKI_CS_ERROR_NO_DATA_AVAIL:
/* No validation records are available on the cache server. */
- rpki_cache_change_state(cache, RPKI_CS_RESET);
+
+ if (old_state == RPKI_CS_ESTABLISHED)
+ rpki_cache_change_state(cache, RPKI_CS_RESET);
+ else
+ rpki_schedule_next_retry(cache);
break;
case RPKI_CS_ERROR_FATAL:
@@ -491,6 +489,11 @@ rpki_retry_hook(timer *tm)
}
break;
+ case RPKI_CS_NO_INCR_UPDATE_AVAIL:
+ case RPKI_CS_ERROR_NO_DATA_AVAIL:
+ rpki_cache_change_state(cache, RPKI_CS_RESET);
+ break;
+
default:
rpki_cache_change_state(cache, RPKI_CS_CONNECTING);
break;
@@ -596,7 +599,7 @@ rpki_check_expire_interval(uint seconds)
static struct rpki_cache *
rpki_init_cache(struct rpki_proto *p, struct rpki_config *cf)
{
- pool *pool = rp_new(p->p.pool, p->p.loop, cf->hostname);
+ pool *pool = rp_new(p->p.pool, cf->hostname);
struct rpki_cache *cache = mb_allocz(pool, sizeof(struct rpki_cache));
@@ -870,16 +873,27 @@ rpki_show_proto_info(struct proto *P)
if (cache)
{
const char *transport_name = "---";
+ uint default_port = 0;
switch (cf->tr_config.type)
{
#if HAVE_LIBSSH
- case RPKI_TR_SSH: transport_name = "SSHv2"; break;
+ case RPKI_TR_SSH:
+ transport_name = "SSHv2";
+ default_port = RPKI_SSH_PORT;
+ break;
#endif
- case RPKI_TR_TCP: transport_name = "Unprotected over TCP"; break;
+ case RPKI_TR_TCP:
+ transport_name = "Unprotected over TCP";
+ default_port = RPKI_TCP_PORT;
+ break;
};
cli_msg(-1006, " Cache server: %s", cf->hostname);
+
+ if (cf->port != default_port)
+ cli_msg(-1006, " Cache port: %u", cf->port);
+
cli_msg(-1006, " Status: %s", rpki_cache_state_to_str(cache->state));
cli_msg(-1006, " Transport: %s", transport_name);
cli_msg(-1006, " Protocol version: %u", cache->version);
@@ -977,7 +991,6 @@ rpki_copy_config(struct proto_config *dest UNUSED, struct proto_config *src UNUS
struct protocol proto_rpki = {
.name = "RPKI",
.template = "rpki%d",
- .class = PROTOCOL_RPKI,
.preference = DEF_PREF_RPKI,
.proto_size = sizeof(struct rpki_proto),
.config_size = sizeof(struct rpki_config),
@@ -991,3 +1004,9 @@ struct protocol proto_rpki = {
.reconfigure = rpki_reconfigure,
.get_status = rpki_get_status,
};
+
+void
+rpki_build(void)
+{
+ proto_build(&proto_rpki);
+}
diff --git a/proto/rpki/rpki.h b/proto/rpki/rpki.h
index a70a2027..20253844 100644
--- a/proto/rpki/rpki.h
+++ b/proto/rpki/rpki.h
@@ -13,7 +13,7 @@
#define _BIRD_RPKI_H_
#include "nest/bird.h"
-#include "nest/route.h"
+#include "nest/rt.h"
#include "nest/protocol.h"
#include "lib/socket.h"
#include "lib/ip.h"
diff --git a/proto/rpki/ssh_transport.c b/proto/rpki/ssh_transport.c
index 223afa80..425ad460 100644
--- a/proto/rpki/ssh_transport.c
+++ b/proto/rpki/ssh_transport.c
@@ -35,11 +35,9 @@ rpki_tr_ssh_open(struct rpki_tr_sock *tr)
sk->ssh->subsystem = "rpki-rtr";
sk->ssh->state = SK_SSH_CONNECT;
- if (sk_open(sk) != 0)
+ if (sk_open(sk, cache->p->p.loop) != 0)
return RPKI_TR_ERROR;
- sk_start(sk);
-
return RPKI_TR_SUCCESS;
}
diff --git a/proto/rpki/tcp_transport.c b/proto/rpki/tcp_transport.c
index 4e850c44..ebb8030f 100644
--- a/proto/rpki/tcp_transport.c
+++ b/proto/rpki/tcp_transport.c
@@ -28,11 +28,9 @@ rpki_tr_tcp_open(struct rpki_tr_sock *tr)
sk->type = SK_TCP_ACTIVE;
- if (sk_open(sk) != 0)
+ if (sk_open(sk, tr->cache->p->p.loop) != 0)
return RPKI_TR_ERROR;
- sk_start(sk);
-
return RPKI_TR_SUCCESS;
}
diff --git a/proto/rpki/transport.c b/proto/rpki/transport.c
index 26609764..81bd6dd8 100644
--- a/proto/rpki/transport.c
+++ b/proto/rpki/transport.c
@@ -85,8 +85,7 @@ rpki_tr_open(struct rpki_tr_sock *tr)
sk->rbsize = RPKI_RX_BUFFER_SIZE;
sk->tbsize = RPKI_TX_BUFFER_SIZE;
sk->tos = IP_PREC_INTERNET_CONTROL;
- sk->flags |= SKF_THREAD;
- sk->loop = cache->p->p.loop;
+ sk->vrf = cache->p->p.vrf;
if (ipa_zero(sk->daddr) && sk->host)
{
@@ -121,7 +120,6 @@ rpki_tr_close(struct rpki_tr_sock *tr)
if (tr->sk)
{
- sk_stop(tr->sk);
rfree(tr->sk);
tr->sk = NULL;
}
diff --git a/proto/static/Makefile b/proto/static/Makefile
index e38f9b74..de6e819b 100644
--- a/proto/static/Makefile
+++ b/proto/static/Makefile
@@ -3,4 +3,4 @@ obj := $(src-o-files)
$(all-daemon)
$(cf-local)
-tests_objs := $(tests_objs) $(src-o-files) \ No newline at end of file
+tests_objs := $(tests_objs) $(src-o-files)
diff --git a/proto/static/config.Y b/proto/static/config.Y
index 41e10dbf..9d26ee82 100644
--- a/proto/static/config.Y
+++ b/proto/static/config.Y
@@ -40,7 +40,7 @@ static_route_finish(void)
if (net_type_match(this_srt->net, NB_DEST) == !this_srt->dest)
cf_error("Unexpected or missing nexthop/type");
- this_srt->cmds = f_linearize(this_srt_cmds);
+ this_srt->cmds = f_linearize(this_srt_cmds, 0);
}
CF_DECLS
diff --git a/proto/static/static.c b/proto/static/static.c
index d89ca8b0..6bae827b 100644
--- a/proto/static/static.c
+++ b/proto/static/static.c
@@ -38,7 +38,7 @@
#include "nest/bird.h"
#include "nest/iface.h"
#include "nest/protocol.h"
-#include "nest/route.h"
+#include "nest/rt.h"
#include "nest/cli.h"
#include "conf/conf.h"
#include "filter/filter.h"
@@ -47,8 +47,6 @@
#include "static.h"
-static linpool *static_lp;
-
static inline struct rte_src * static_get_source(struct static_proto *p, uint i)
{ return i ? rt_get_source(&p->p, i) : p->p.main_source; }
@@ -59,68 +57,77 @@ static void
static_announce_rte(struct static_proto *p, struct static_route *r)
{
struct rte_src *src;
- rta *a = allocz(RTA_MAX_SIZE);
- a->source = RTS_STATIC;
- a->scope = SCOPE_UNIVERSE;
- a->dest = r->dest;
- a->pref = p->p.main_channel->preference;
+ ea_list *ea = NULL;
+ ea_set_attr_u32(&ea, &ea_gen_preference, 0, p->p.main_channel->preference);
+ ea_set_attr_u32(&ea, &ea_gen_source, 0, RTS_STATIC);
if (r->dest == RTD_UNICAST)
{
- struct static_route *r2;
- struct nexthop *nhs = NULL;
+ uint sz = 0;
+ for (struct static_route *r2 = r; r2; r2 = r2->mp_next)
+ if (r2->active)
+ sz += NEXTHOP_SIZE_CNT(r2->mls ? r2->mls->length / sizeof(u32) : 0);
- for (r2 = r; r2; r2 = r2->mp_next)
+ if (!sz)
+ goto withdraw;
+
+ struct nexthop_adata *nhad = allocz(sz + sizeof *nhad);
+ struct nexthop *nh = &nhad->nh;
+
+ for (struct static_route *r2 = r; r2; r2 = r2->mp_next)
{
if (!r2->active)
continue;
- struct nexthop *nh = allocz(NEXTHOP_MAX_SIZE);
- nh->gw = r2->via;
- nh->iface = r2->neigh->iface;
- nh->flags = r2->onlink ? RNF_ONLINK : 0;
- nh->weight = r2->weight;
+ *nh = (struct nexthop) {
+ .gw = r2->via,
+ .iface = r2->neigh->iface,
+ .flags = r2->onlink ? RNF_ONLINK : 0,
+ .weight = r2->weight,
+ };
+
if (r2->mls)
{
- nh->labels = r2->mls->len;
- memcpy(nh->label, r2->mls->stack, r2->mls->len * sizeof(u32));
+ nh->labels = r2->mls->length / sizeof(u32);
+ memcpy(nh->label, r2->mls->data, r2->mls->length);
}
- nexthop_insert(&nhs, nh);
+ nh = NEXTHOP_NEXT(nh);
}
- if (!nhs)
- goto withdraw;
-
- nexthop_link(a, nhs);
+ ea_set_attr_data(&ea, &ea_gen_nexthop, 0,
+ nhad->ad.data, (void *) nh - (void *) nhad->ad.data);
}
- if (r->dest == RTDX_RECURSIVE)
+ else if (r->dest == RTDX_RECURSIVE)
{
rtable *tab = ipa_is_ip4(r->via) ? p->igp_table_ip4 : p->igp_table_ip6;
- rta_set_recursive_next_hop(p->p.main_channel->table, a, tab, r->via, IPA_NONE, r->mls, static_lp);
+ u32 *labels = r->mls ? (void *) r->mls->data : NULL;
+ u32 lnum = r->mls ? r->mls->length / sizeof(u32) : 0;
+
+ ea_set_hostentry(&ea, p->p.main_channel->table, tab,
+ r->via, IPA_NONE, lnum, labels);
}
+ else if (r->dest)
+ ea_set_dest(&ea, 0, r->dest);
+
/* Already announced */
if (r->state == SRS_CLEAN)
return;
/* We skip rta_lookup() here */
src = static_get_source(p, r->index);
- rte e0 = { .attrs = a, .src = src, .net = r->net, }, *e = &e0;
+ rte e0 = { .attrs = ea, .src = src, .net = r->net, }, *e = &e0;
/* Evaluate the filter */
if (r->cmds)
- f_eval_rte(r->cmds, e, static_lp);
+ f_eval_rte(r->cmds, e);
rte_update(p->p.main_channel, r->net, e, src);
static_free_source(src, r->index);
r->state = SRS_CLEAN;
-
- if (r->cmds)
- lp_flush(static_lp);
-
return;
withdraw:
@@ -206,7 +213,7 @@ static_update_bfd(struct static_proto *p, struct static_route *r)
// ip_addr local = ipa_nonzero(r->local) ? r->local : nb->ifa->ip;
r->bfd_req = bfd_request_session(p->p.pool, r->via, nb->ifa->ip,
nb->iface, p->p.vrf,
- static_bfd_notify, r, birdloop_event_list(p->p.loop), NULL);
+ static_bfd_notify, r, p->p.loop, NULL);
}
if (!bfd_up && r->bfd_req)
@@ -322,31 +329,17 @@ static_same_dest(struct static_route *x, struct static_route *y)
(x->weight != y->weight) ||
(x->use_bfd != y->use_bfd) ||
(!x->mls != !y->mls) ||
- ((x->mls) && (y->mls) && (x->mls->len != y->mls->len)))
+ ((x->mls) && (y->mls) && adata_same(x->mls, y->mls)))
return 0;
-
- if (!x->mls)
- continue;
-
- for (uint i = 0; i < x->mls->len; i++)
- if (x->mls->stack[i] != y->mls->stack[i])
- return 0;
}
return !x && !y;
case RTDX_RECURSIVE:
if (!ipa_equal(x->via, y->via) ||
(!x->mls != !y->mls) ||
- ((x->mls) && (y->mls) && (x->mls->len != y->mls->len)))
+ ((x->mls) && (y->mls) && adata_same(x->mls, y->mls)))
return 0;
- if (!x->mls)
- return 1;
-
- for (uint i = 0; i < x->mls->len; i++)
- if (x->mls->stack[i] != y->mls->stack[i])
- return 0;
-
return 1;
default:
@@ -413,18 +406,18 @@ static_reload_routes(struct channel *C)
}
static int
-static_rte_better(rte *new, rte *old)
+static_rte_better(const rte *new, const rte *old)
{
- u32 n = ea_get_int(new->attrs->eattrs, EA_GEN_IGP_METRIC, IGP_METRIC_UNKNOWN);
- u32 o = ea_get_int(old->attrs->eattrs, EA_GEN_IGP_METRIC, IGP_METRIC_UNKNOWN);
+ u32 n = ea_get_int(new->attrs, &ea_gen_igp_metric, IGP_METRIC_UNKNOWN);
+ u32 o = ea_get_int(old->attrs, &ea_gen_igp_metric, IGP_METRIC_UNKNOWN);
return n < o;
}
static int
-static_rte_mergable(rte *pri, rte *sec)
+static_rte_mergable(const rte *pri, const rte *sec)
{
- u32 a = ea_get_int(pri->attrs->eattrs, EA_GEN_IGP_METRIC, IGP_METRIC_UNKNOWN);
- u32 b = ea_get_int(sec->attrs->eattrs, EA_GEN_IGP_METRIC, IGP_METRIC_UNKNOWN);
+ u32 a = ea_get_int(pri->attrs, &ea_gen_igp_metric, IGP_METRIC_UNKNOWN);
+ u32 b = ea_get_int(sec->attrs, &ea_gen_igp_metric, IGP_METRIC_UNKNOWN);
return a == b;
}
@@ -443,11 +436,11 @@ static_postconfig(struct proto_config *CF)
if (!cf->igp_table_ip4)
cf->igp_table_ip4 = (cc->table->addr_type == NET_IP4) ?
- cc->table : cf->c.global->def_tables[NET_IP4];
+ cc->table : rt_get_default_table(cf->c.global, NET_IP4);
if (!cf->igp_table_ip6)
cf->igp_table_ip6 = (cc->table->addr_type == NET_IP6) ?
- cc->table : cf->c.global->def_tables[NET_IP6];
+ cc->table : rt_get_default_table(cf->c.global, NET_IP6);
WALK_LIST(r, cf->routes)
if (r->net && (r->net->type != CF->net_type))
@@ -467,7 +460,7 @@ static_init(struct proto_config *CF)
P->main_channel = proto_add_channel(P, proto_cf_main_channel(CF));
- P->neigh_notify = static_neigh_notify;
+ P->iface_sub.neigh_notify = static_neigh_notify;
P->reload_routes = static_reload_routes;
P->sources.class = &static_rte_owner_class;
@@ -487,16 +480,11 @@ static_start(struct proto *P)
struct static_config *cf = (void *) P->cf;
struct static_route *r;
- if (!static_lp)
- static_lp = lp_new(&root_pool, LP_GOOD_SIZE(1024));
-
if (p->igp_table_ip4)
- RT_LOCKED(p->igp_table_ip4, t)
- rt_lock_table(t);
+ rt_lock_table(p->igp_table_ip4);
if (p->igp_table_ip6)
- RT_LOCKED(p->igp_table_ip6, t)
- rt_lock_table(t);
+ rt_lock_table(p->igp_table_ip6);
p->event = ev_new_init(p->p.pool, static_announce_marked, p);
@@ -506,7 +494,12 @@ static_start(struct proto *P)
proto_notify_state(P, PS_UP);
WALK_LIST(r, cf->routes)
+ {
+ struct lp_state lps;
+ lp_save(tmp_linpool, &lps);
static_add_rte(p, r);
+ lp_restore(tmp_linpool, &lps);
+ }
return PS_UP;
}
@@ -523,12 +516,10 @@ static_shutdown(struct proto *P)
static_reset_rte(p, r);
if (p->igp_table_ip4)
- RT_LOCKED(p->igp_table_ip4, t)
- rt_unlock_table(t);
+ rt_unlock_table(p->igp_table_ip4);
if (p->igp_table_ip6)
- RT_LOCKED(p->igp_table_ip6, t)
- rt_unlock_table(t);
+ rt_unlock_table(p->igp_table_ip6);
return PS_DOWN;
}
@@ -718,13 +709,14 @@ static_copy_config(struct proto_config *dest, struct proto_config *src)
}
static void
-static_get_route_info(rte *rte, byte *buf)
+static_get_route_info(const rte *rte, byte *buf)
{
- eattr *a = ea_find(rte->attrs->eattrs, EA_GEN_IGP_METRIC);
- if (a)
- buf += bsprintf(buf, " (%d/%u)", rte->attrs->pref, a->u.data);
+ eattr *a = ea_find(rte->attrs, &ea_gen_igp_metric);
+ u32 pref = rt_get_preference(rte);
+ if (a && (a->u.data < IGP_METRIC_UNKNOWN))
+ buf += bsprintf(buf, " (%d/%u)", pref, a->u.data);
else
- buf += bsprintf(buf, " (%d)", rte->attrs->pref);
+ buf += bsprintf(buf, " (%d)", pref);
}
static void
@@ -783,7 +775,6 @@ static struct rte_owner_class static_rte_owner_class = {
struct protocol proto_static = {
.name = "Static",
.template = "static%d",
- .class = PROTOCOL_STATIC,
.preference = DEF_PREF_STATIC,
.channel_mask = NB_ANY,
.proto_size = sizeof(struct static_proto),
@@ -796,3 +787,9 @@ struct protocol proto_static = {
.reconfigure = static_reconfigure,
.copy_config = static_copy_config,
};
+
+void
+static_build(void)
+{
+ proto_build(&proto_static);
+}
diff --git a/proto/static/static.h b/proto/static/static.h
index fc91f71c..ea7ca33b 100644
--- a/proto/static/static.h
+++ b/proto/static/static.h
@@ -9,7 +9,7 @@
#ifndef _BIRD_STATIC_H_
#define _BIRD_STATIC_H_
-#include "nest/route.h"
+#include "nest/rt.h"
#include "nest/bfd.h"
#include "lib/buffer.h"
@@ -49,7 +49,7 @@ struct static_route {
byte weight; /* Multipath next hop weight */
byte use_bfd; /* Configured to use BFD */
struct bfd_request *bfd_req; /* BFD request, if BFD is used */
- mpls_label_stack *mls; /* MPLS label stack; may be NULL */
+ struct adata *mls; /* MPLS label stack; may be NULL */
};
/*