summaryrefslogtreecommitdiff
path: root/proto
diff options
context:
space:
mode:
Diffstat (limited to 'proto')
-rw-r--r--proto/babel/babel.c218
-rw-r--r--proto/babel/babel.h6
-rw-r--r--proto/babel/config.Y4
-rw-r--r--proto/babel/packets.c2
-rw-r--r--proto/bfd/Makefile2
-rw-r--r--proto/bfd/bfd.c347
-rw-r--r--proto/bfd/bfd.h14
-rw-r--r--proto/bfd/config.Y1
-rw-r--r--proto/bfd/io.c537
-rw-r--r--proto/bfd/io.h34
-rw-r--r--proto/bfd/packets.c19
-rw-r--r--proto/bgp/attrs.c1121
-rw-r--r--proto/bgp/bgp.c541
-rw-r--r--proto/bgp/bgp.h187
-rw-r--r--proto/bgp/config.Y50
-rw-r--r--proto/bgp/packets.c340
-rw-r--r--proto/mrt/mrt.c72
-rw-r--r--proto/mrt/mrt.h8
-rw-r--r--proto/ospf/config.Y7
-rw-r--r--proto/ospf/iface.c25
-rw-r--r--proto/ospf/neighbor.c2
-rw-r--r--proto/ospf/ospf.c124
-rw-r--r--proto/ospf/ospf.h11
-rw-r--r--proto/ospf/rt.c233
-rw-r--r--proto/ospf/rt.h6
-rw-r--r--proto/ospf/topology.c27
-rw-r--r--proto/ospf/topology.h6
-rw-r--r--proto/perf/perf.c39
-rw-r--r--proto/pipe/config.Y14
-rw-r--r--proto/pipe/pipe.c145
-rw-r--r--proto/pipe/pipe.h7
-rw-r--r--proto/radv/config.Y5
-rw-r--r--proto/radv/packets.c2
-rw-r--r--proto/radv/radv.c74
-rw-r--r--proto/radv/radv.h6
-rw-r--r--proto/rip/config.Y5
-rw-r--r--proto/rip/packets.c2
-rw-r--r--proto/rip/rip.c244
-rw-r--r--proto/rip/rip.h6
-rw-r--r--proto/rpki/config.Y1
-rw-r--r--proto/rpki/packets.c46
-rw-r--r--proto/rpki/rpki.c70
-rw-r--r--proto/rpki/rpki.h4
-rw-r--r--proto/rpki/ssh_transport.c2
-rw-r--r--proto/rpki/tcp_transport.c2
-rw-r--r--proto/static/static.c179
-rw-r--r--proto/static/static.h4
47 files changed, 2476 insertions, 2325 deletions
diff --git a/proto/babel/babel.c b/proto/babel/babel.c
index 9f33dd34..d398da8e 100644
--- a/proto/babel/babel.c
+++ b/proto/babel/babel.c
@@ -42,6 +42,7 @@
#include <stdlib.h>
#include "babel.h"
+#include "lib/macro.h"
#define LOG_PKT_AUTH(msg, args...) \
log_rl(&p->log_pkt_tbf, L_AUTH "%s: " msg, p->p.name, args)
@@ -67,6 +68,8 @@ static void babel_update_cost(struct babel_neighbor *n);
static inline void babel_kick_timer(struct babel_proto *p);
static inline void babel_iface_kick_timer(struct babel_iface *ifa);
+static struct ea_class ea_babel_metric, ea_babel_router_id, ea_babel_seqno;
+
/*
* Functions to maintain data structures
*/
@@ -666,37 +669,14 @@ babel_announce_rte(struct babel_proto *p, struct babel_entry *e)
if (r)
{
- rta a0 = {
- .source = RTS_BABEL,
- .scope = SCOPE_UNIVERSE,
- .dest = RTD_UNICAST,
- .pref = c->preference,
- .from = r->neigh->addr,
- .nh.gw = r->next_hop,
- .nh.iface = r->neigh->ifa->iface,
- .eattrs = alloca(sizeof(ea_list) + 3*sizeof(eattr)),
- };
-
- *a0.eattrs = (ea_list) { .count = 3 };
- a0.eattrs->attrs[0] = (eattr) {
- .id = EA_BABEL_METRIC,
- .type = EAF_TYPE_INT,
- .u.data = r->metric,
- };
-
- struct adata *ad = alloca(sizeof(struct adata) + sizeof(u64));
- ad->length = sizeof(u64);
- memcpy(ad->data, &(r->router_id), sizeof(u64));
- a0.eattrs->attrs[1] = (eattr) {
- .id = EA_BABEL_ROUTER_ID,
- .type = EAF_TYPE_OPAQUE,
- .u.ptr = ad,
- };
-
- a0.eattrs->attrs[2] = (eattr) {
- .id = EA_BABEL_SEQNO,
- .type = EAF_TYPE_INT,
- .u.data = r->seqno,
+ struct nexthop_adata nhad = {
+ .nh = {
+ .gw = r->next_hop,
+ .iface = r->neigh->ifa->iface,
+ },
+ .ad = {
+ .length = sizeof nhad - sizeof nhad.ad,
+ },
};
/*
@@ -705,35 +685,54 @@ babel_announce_rte(struct babel_proto *p, struct babel_entry *e)
* have routing work.
*/
if (!neigh_find(&p->p, r->next_hop, r->neigh->ifa->iface, 0))
- a0.nh.flags = RNF_ONLINK;
+ nhad.nh.flags = RNF_ONLINK;
+
+ struct {
+ ea_list l;
+ eattr a[7];
+ } eattrs = {
+ .l.count = ARRAY_SIZE(eattrs.a),
+ .a = {
+ EA_LITERAL_EMBEDDED(&ea_gen_preference, 0, c->preference),
+ EA_LITERAL_STORE_ADATA(&ea_gen_from, 0, &r->neigh->addr, sizeof(r->neigh->addr)),
+ EA_LITERAL_EMBEDDED(&ea_gen_source, 0, RTS_BABEL),
+ EA_LITERAL_STORE_ADATA(&ea_gen_nexthop, 0, nhad.ad.data, nhad.ad.length),
+ EA_LITERAL_EMBEDDED(&ea_babel_metric, 0, r->metric),
+ EA_LITERAL_STORE_ADATA(&ea_babel_router_id, 0, &r->router_id, sizeof(r->router_id)),
+ EA_LITERAL_EMBEDDED(&ea_babel_seqno, 0, r->seqno),
+ }
+ };
- rta *a = rta_lookup(&a0);
- rte *rte = rte_get_temp(a, p->p.main_source);
+ rte e0 = {
+ .attrs = &eattrs.l,
+ .src = p->p.main_source,
+ };
e->unreachable = 0;
- rte_update2(c, e->n.addr, rte, p->p.main_source);
+ rte_update(c, e->n.addr, &e0, p->p.main_source);
}
else if (e->valid && (e->router_id != p->router_id))
{
/* Unreachable */
- rta a0 = {
- .source = RTS_BABEL,
- .scope = SCOPE_UNIVERSE,
- .dest = RTD_UNREACHABLE,
- .pref = 1,
- };
+ ea_list *ea = NULL;
+
+ ea_set_attr_u32(&ea, &ea_gen_preference, 0, 1);
+ ea_set_attr_u32(&ea, &ea_gen_source, 0, RTS_BABEL);
+ ea_set_dest(&ea, 0, RTD_UNREACHABLE);
- rta *a = rta_lookup(&a0);
- rte *rte = rte_get_temp(a, p->p.main_source);
+ rte e0 = {
+ .attrs = ea,
+ .src = p->p.main_source,
+ };
e->unreachable = 1;
- rte_update2(c, e->n.addr, rte, p->p.main_source);
+ rte_update(c, e->n.addr, &e0, p->p.main_source);
}
else
{
/* Retraction */
e->unreachable = 0;
- rte_update2(c, e->n.addr, NULL, p->p.main_source);
+ rte_update(c, e->n.addr, NULL, p->p.main_source);
}
}
@@ -743,7 +742,7 @@ babel_announce_retraction(struct babel_proto *p, struct babel_entry *e)
{
struct channel *c = (e->n.addr->type == NET_IP4) ? p->ip4_channel : p->ip6_channel;
e->unreachable = 0;
- rte_update2(c, e->n.addr, NULL, p->p.main_source);
+ rte_update(c, e->n.addr, NULL, p->p.main_source);
}
@@ -1805,9 +1804,9 @@ babel_find_iface(struct babel_proto *p, struct iface *what)
}
static void
-babel_iface_locked(struct object_lock *lock)
+babel_iface_locked(void *_ifa)
{
- struct babel_iface *ifa = lock->data;
+ struct babel_iface *ifa = _ifa;
struct babel_proto *p = ifa->proto;
if (!babel_open_socket(ifa))
@@ -1862,8 +1861,11 @@ babel_add_iface(struct babel_proto *p, struct iface *new, struct babel_iface_con
lock->addr = IP6_BABEL_ROUTERS;
lock->port = ifa->cf->port;
lock->iface = ifa->iface;
- lock->hook = babel_iface_locked;
- lock->data = ifa;
+ lock->event = (event) {
+ .hook = babel_iface_locked,
+ .data = ifa,
+ };
+ lock->target = &global_event_list;
olock_acquire(lock);
}
@@ -1985,11 +1987,9 @@ babel_reconfigure_iface(struct babel_proto *p, struct babel_iface *ifa, struct b
static void
babel_reconfigure_ifaces(struct babel_proto *p, struct babel_config *cf)
{
- struct iface *iface;
-
- WALK_LIST(iface, iface_list)
+ IFACE_WALK(iface)
{
- if (p->p.vrf_set && p->p.vrf != iface->master)
+ if (p->p.vrf && p->p.vrf != iface->master)
continue;
if (!(iface->flags & IF_UP))
@@ -2111,41 +2111,45 @@ babel_dump(struct proto *P)
}
static void
-babel_get_route_info(rte *rte, byte *buf)
+babel_get_route_info(const rte *rte, byte *buf)
{
u64 rid = 0;
- eattr *e = ea_find(rte->attrs->eattrs, EA_BABEL_ROUTER_ID);
+ eattr *e = ea_find(rte->attrs, &ea_babel_router_id);
if (e)
memcpy(&rid, e->u.ptr->data, sizeof(u64));
- buf += bsprintf(buf, " (%d/%d) [%lR]", rte->attrs->pref,
- ea_get_int(rte->attrs->eattrs, EA_BABEL_METRIC, BABEL_INFINITY), rid);
+ buf += bsprintf(buf, " (%d/%d) [%lR]",
+ rt_get_preference(rte),
+ ea_get_int(rte->attrs, &ea_babel_metric, BABEL_INFINITY), rid);
}
-static int
-babel_get_attr(const eattr *a, byte *buf, int buflen UNUSED)
+static void
+babel_router_id_format(const eattr *a, byte *buf, uint len)
{
- switch (a->id)
- {
- case EA_BABEL_METRIC:
- bsprintf(buf, "metric: %d", a->u.data);
- return GA_FULL;
+ u64 rid = 0;
+ memcpy(&rid, a->u.ptr->data, sizeof(u64));
+ bsnprintf(buf, len, "%lR", rid);
+}
- case EA_BABEL_ROUTER_ID:
- {
- u64 rid = 0;
- memcpy(&rid, a->u.ptr->data, sizeof(u64));
- bsprintf(buf, "router_id: %lR", rid);
- return GA_FULL;
- }
+static struct ea_class ea_babel_metric = {
+ .name = "babel_metric",
+ .type = T_INT,
+};
- case EA_BABEL_SEQNO:
- return GA_HIDDEN;
+static struct ea_class ea_babel_router_id = {
+ .name = "babel_router_id",
+ .type = T_OPAQUE,
+ .readonly = 1,
+ .format = babel_router_id_format,
+};
+
+static struct ea_class ea_babel_seqno = {
+ .name = "babel_seqno",
+ .type = T_INT,
+ .readonly = 1,
+ .hidden = 1,
+};
- default:
- return GA_UNKNOWN;
- }
-}
void
babel_show_interfaces(struct proto *P, const char *iff)
@@ -2345,9 +2349,13 @@ babel_kick_timer(struct babel_proto *p)
static int
babel_preexport(struct channel *C, struct rte *new)
{
- struct rta *a = new->attrs;
+ if (new->src->owner != &C->proto->sources)
+ return 0;
+
/* Reject our own unreachable routes */
- if ((a->dest == RTD_UNREACHABLE) && (new->src->proto == C->proto))
+ eattr *ea = ea_find(new->attrs, &ea_gen_nexthop);
+ struct nexthop_adata *nhad = (void *) ea->u.ptr;
+ if (!NEXTHOP_IS_REACHABLE(nhad))
return -1;
return 0;
@@ -2358,8 +2366,8 @@ babel_preexport(struct channel *C, struct rte *new)
* so store it into our data structures.
*/
static void
-babel_rt_notify(struct proto *P, struct channel *c UNUSED, struct network *net,
- struct rte *new, struct rte *old UNUSED)
+babel_rt_notify(struct proto *P, struct channel *c UNUSED, const net_addr *net,
+ struct rte *new, const struct rte *old UNUSED)
{
struct babel_proto *p = (void *) P;
struct babel_entry *e;
@@ -2368,13 +2376,13 @@ babel_rt_notify(struct proto *P, struct channel *c UNUSED, struct network *net,
{
/* Update */
uint rt_seqno;
- uint rt_metric = ea_get_int(new->attrs->eattrs, EA_BABEL_METRIC, 0);
+ uint rt_metric = ea_get_int(new->attrs, &ea_babel_metric, 0);
u64 rt_router_id = 0;
- if (new->src->proto == P)
+ if (new->src->owner == &P->sources)
{
- rt_seqno = ea_find(new->attrs->eattrs, EA_BABEL_SEQNO)->u.data;
- eattr *e = ea_find(new->attrs->eattrs, EA_BABEL_ROUTER_ID);
+ rt_seqno = ea_get_int(new->attrs, &ea_babel_seqno, 0);
+ eattr *e = ea_find(new->attrs, &ea_babel_router_id);
if (e)
memcpy(&rt_router_id, e->u.ptr->data, sizeof(u64));
}
@@ -2387,11 +2395,11 @@ babel_rt_notify(struct proto *P, struct channel *c UNUSED, struct network *net,
if (rt_metric > BABEL_INFINITY)
{
log(L_WARN "%s: Invalid babel_metric value %u for route %N",
- p->p.name, rt_metric, net->n.addr);
+ p->p.name, rt_metric, net);
rt_metric = BABEL_INFINITY;
}
- e = babel_get_entry(p, net->n.addr);
+ e = babel_get_entry(p, net);
/* Activate triggered updates */
if ((e->valid != BABEL_ENTRY_VALID) ||
@@ -2409,7 +2417,7 @@ babel_rt_notify(struct proto *P, struct channel *c UNUSED, struct network *net,
else
{
/* Withdraw */
- e = babel_find_entry(p, net->n.addr);
+ e = babel_find_entry(p, net);
if (!e || e->valid != BABEL_ENTRY_VALID)
return;
@@ -2423,18 +2431,18 @@ babel_rt_notify(struct proto *P, struct channel *c UNUSED, struct network *net,
}
static int
-babel_rte_better(struct rte *new, struct rte *old)
+babel_rte_better(const rte *new, const rte *old)
{
- uint new_metric = ea_get_int(new->attrs->eattrs, EA_BABEL_METRIC, BABEL_INFINITY);
- uint old_metric = ea_get_int(old->attrs->eattrs, EA_BABEL_METRIC, BABEL_INFINITY);
+ uint new_metric = ea_get_int(new->attrs, &ea_babel_metric, BABEL_INFINITY);
+ uint old_metric = ea_get_int(old->attrs, &ea_babel_metric, BABEL_INFINITY);
return new_metric < old_metric;
}
static u32
-babel_rte_igp_metric(struct rte *rt)
+babel_rte_igp_metric(const rte *rt)
{
- return ea_get_int(rt->attrs->eattrs, EA_BABEL_METRIC, BABEL_INFINITY);
+ return ea_get_int(rt->attrs, &ea_babel_metric, BABEL_INFINITY);
}
@@ -2455,6 +2463,12 @@ babel_postconfig(struct proto_config *CF)
cf->ip6_channel = ip6 ?: ip6_sadr;
}
+static struct rte_owner_class babel_rte_owner_class = {
+ .get_route_info = babel_get_route_info,
+ .rte_better = babel_rte_better,
+ .rte_igp_metric = babel_rte_igp_metric,
+};
+
static struct proto *
babel_init(struct proto_config *CF)
{
@@ -2465,11 +2479,11 @@ babel_init(struct proto_config *CF)
proto_configure_channel(P, &p->ip4_channel, cf->ip4_channel);
proto_configure_channel(P, &p->ip6_channel, cf->ip6_channel);
- P->if_notify = babel_if_notify;
+ P->iface_sub.if_notify = babel_if_notify;
P->rt_notify = babel_rt_notify;
P->preexport = babel_preexport;
- P->rte_better = babel_rte_better;
- P->rte_igp_metric = babel_rte_igp_metric;
+
+ P->sources.class = &babel_rte_owner_class;
return P;
}
@@ -2567,11 +2581,9 @@ babel_reconfigure(struct proto *P, struct proto_config *CF)
return 1;
}
-
struct protocol proto_babel = {
.name = "Babel",
.template = "babel%d",
- .class = PROTOCOL_BABEL,
.preference = DEF_PREF_BABEL,
.channel_mask = NB_IP | NB_IP6_SADR,
.proto_size = sizeof(struct babel_proto),
@@ -2582,12 +2594,16 @@ struct protocol proto_babel = {
.start = babel_start,
.shutdown = babel_shutdown,
.reconfigure = babel_reconfigure,
- .get_route_info = babel_get_route_info,
- .get_attr = babel_get_attr
};
void
babel_build(void)
{
proto_build(&proto_babel);
+
+ EA_REGISTER_ALL(
+ &ea_babel_metric,
+ &ea_babel_router_id,
+ &ea_babel_seqno
+ );
}
diff --git a/proto/babel/babel.h b/proto/babel/babel.h
index dcd303e1..562abac2 100644
--- a/proto/babel/babel.h
+++ b/proto/babel/babel.h
@@ -16,7 +16,7 @@
#include "nest/bird.h"
#include "nest/cli.h"
#include "nest/iface.h"
-#include "nest/route.h"
+#include "nest/rt.h"
#include "nest/protocol.h"
#include "nest/locks.h"
#include "nest/password.h"
@@ -26,10 +26,6 @@
#include "lib/string.h"
#include "lib/timer.h"
-#define EA_BABEL_METRIC EA_CODE(PROTOCOL_BABEL, 0)
-#define EA_BABEL_ROUTER_ID EA_CODE(PROTOCOL_BABEL, 1)
-#define EA_BABEL_SEQNO EA_CODE(PROTOCOL_BABEL, 2)
-
#define BABEL_MAGIC 42
#define BABEL_VERSION 2
#define BABEL_PORT 6696
diff --git a/proto/babel/config.Y b/proto/babel/config.Y
index 1b4dc6f5..6d1ad7d0 100644
--- a/proto/babel/config.Y
+++ b/proto/babel/config.Y
@@ -24,7 +24,7 @@ CF_DECLS
CF_KEYWORDS(BABEL, INTERFACE, METRIC, RXCOST, HELLO, UPDATE, INTERVAL, PORT,
TYPE, WIRED, WIRELESS, RX, TX, BUFFER, PRIORITY, LENGTH, CHECK, LINK,
- NEXT, HOP, IPV4, IPV6, BABEL_METRIC, SHOW, INTERFACES, NEIGHBORS,
+ NEXT, HOP, IPV4, IPV6, SHOW, INTERFACES, NEIGHBORS,
ENTRIES, RANDOMIZE, ROUTER, ID, AUTHENTICATION, NONE, MAC, PERMISSIVE,
EXTENDED)
@@ -166,8 +166,6 @@ babel_iface_opt_list:
babel_iface:
babel_iface_start iface_patt_list_nopx babel_iface_opt_list babel_iface_finish;
-dynamic_attr: BABEL_METRIC { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_BABEL_METRIC); } ;
-
CF_CLI_HELP(SHOW BABEL, ..., [[Show information about Babel protocol]]);
CF_CLI(SHOW BABEL INTERFACES, optproto opttext, [<name>] [\"<interface>\"], [[Show information about Babel interfaces]])
diff --git a/proto/babel/packets.c b/proto/babel/packets.c
index 61c94cc5..d26ee5c6 100644
--- a/proto/babel/packets.c
+++ b/proto/babel/packets.c
@@ -1666,7 +1666,7 @@ babel_open_socket(struct babel_iface *ifa)
sk->ttl = 1;
sk->flags = SKF_LADDR_RX;
- if (sk_open(sk) < 0)
+ if (sk_open(sk, p->p.loop) < 0)
goto err;
if (sk_setup_multicast(sk) < 0)
diff --git a/proto/bfd/Makefile b/proto/bfd/Makefile
index d9aecfa9..267dff98 100644
--- a/proto/bfd/Makefile
+++ b/proto/bfd/Makefile
@@ -1,4 +1,4 @@
-src := bfd.c io.c packets.c
+src := bfd.c packets.c
obj := $(src-o-files)
$(all-daemon)
$(cf-local)
diff --git a/proto/bfd/bfd.c b/proto/bfd/bfd.c
index 873e2ed5..c88c1cb2 100644
--- a/proto/bfd/bfd.c
+++ b/proto/bfd/bfd.c
@@ -82,7 +82,7 @@
* BFD thread to the main thread. This is done in an asynchronous way, sesions
* with pending notifications are linked (in the BFD thread) to @notify_list in
* &bfd_proto, and then bfd_notify_hook() in the main thread is activated using
- * bfd_notify_kick() and a pipe. The hook then processes scheduled sessions and
+ * a standard event sending code. The hook then processes scheduled sessions and
* calls hooks from associated BFD requests. This @notify_list (and state fields
* in structure &bfd_session) is protected by a spinlock in &bfd_proto and
* functions bfd_lock_sessions() / bfd_unlock_sessions().
@@ -113,15 +113,22 @@
#define HASH_IP_EQ(a1,n1,a2,n2) ipa_equal(a1, a2) && n1 == n2
#define HASH_IP_FN(a,n) ipa_hash(a) ^ u32_hash(n)
-static list STATIC_LIST_INIT(bfd_proto_list);
-static list STATIC_LIST_INIT(bfd_wait_list);
+#define BFD_LOCK LOCK_DOMAIN(rtable, bfd_global.lock)
+#define BFD_UNLOCK UNLOCK_DOMAIN(rtable, bfd_global.lock)
+
+static struct {
+ DOMAIN(rtable) lock;
+ list wait_list;
+ list pickup_list;
+ list proto_list;
+ uint pickup_reload;
+} bfd_global;
const char *bfd_state_names[] = { "AdminDown", "Down", "Init", "Up" };
static void bfd_session_set_min_tx(struct bfd_session *s, u32 val);
static struct bfd_iface *bfd_get_iface(struct bfd_proto *p, ip_addr local, struct iface *iface);
static void bfd_free_iface(struct bfd_iface *ifa);
-static inline void bfd_notify_kick(struct bfd_proto *p);
/*
@@ -170,7 +177,7 @@ bfd_session_update_state(struct bfd_session *s, uint state, uint diag)
bfd_session_set_min_tx(s, s->cf.idle_tx_int);
if (notify)
- bfd_notify_kick(p);
+ ev_send(&global_event_list, &p->notify_event);
}
static void
@@ -188,7 +195,7 @@ bfd_session_update_tx_interval(struct bfd_session *s)
return;
/* Set timer relative to last tx_timer event */
- tm_set(s->tx_timer, s->last_tx + tx_int_l);
+ tm_set_in(s->tx_timer, s->last_tx + tx_int_l, s->ifa->bfd->p.loop);
}
static void
@@ -202,7 +209,7 @@ bfd_session_update_detection_time(struct bfd_session *s, int kick)
if (!s->last_rx)
return;
- tm_set(s->hold_timer, s->last_rx + timeout);
+ tm_set_in(s->hold_timer, s->last_rx + timeout, s->ifa->bfd->p.loop);
}
static void
@@ -226,7 +233,7 @@ bfd_session_control_tx_timer(struct bfd_session *s, int reset)
if (reset || !tm_active(s->tx_timer))
{
s->last_tx = 0;
- tm_start(s->tx_timer, 0);
+ tm_start_in(s->tx_timer, 0, s->ifa->bfd->p.loop);
}
return;
@@ -419,7 +426,7 @@ bfd_get_free_id(struct bfd_proto *p)
static struct bfd_session *
bfd_add_session(struct bfd_proto *p, ip_addr addr, ip_addr local, struct iface *iface, struct bfd_options *opts)
{
- birdloop_enter(p->loop);
+ ASSERT_DIE(birdloop_inside(p->p.loop));
struct bfd_iface *ifa = bfd_get_iface(p, local, iface);
@@ -454,8 +461,6 @@ bfd_add_session(struct bfd_proto *p, ip_addr addr, ip_addr local, struct iface *
TRACE(D_EVENTS, "Session to %I added", s->addr);
- birdloop_leave(p->loop);
-
return s;
}
@@ -463,38 +468,34 @@ bfd_add_session(struct bfd_proto *p, ip_addr addr, ip_addr local, struct iface *
static void
bfd_open_session(struct bfd_proto *p, struct bfd_session *s, ip_addr local, struct iface *ifa)
{
- birdloop_enter(p->loop);
+ birdloop_enter(p->p.loop);
s->opened = 1;
bfd_session_control_tx_timer(s);
- birdloop_leave(p->loop);
+ birdloop_leave(p->p.loop);
}
static void
bfd_close_session(struct bfd_proto *p, struct bfd_session *s)
{
- birdloop_enter(p->loop);
+ birdloop_enter(p->p.loop);
s->opened = 0;
bfd_session_update_state(s, BFD_STATE_DOWN, BFD_DIAG_PATH_DOWN);
bfd_session_control_tx_timer(s);
- birdloop_leave(p->loop);
+ birdloop_leave(p->p.loop);
}
*/
static void
-bfd_remove_session(struct bfd_proto *p, struct bfd_session *s)
+bfd_remove_session_locked(struct bfd_proto *p, struct bfd_session *s)
{
- ip_addr ip = s->addr;
-
/* Caller should ensure that request list is empty */
- birdloop_enter(p->loop);
-
/* Remove session from notify list if scheduled for notification */
/* No need for bfd_lock_sessions(), we are already protected by birdloop_enter() */
if (NODE_VALID(&s->n))
@@ -508,11 +509,17 @@ bfd_remove_session(struct bfd_proto *p, struct bfd_session *s)
HASH_REMOVE(p->session_hash_id, HASH_ID, s);
HASH_REMOVE(p->session_hash_ip, HASH_IP, s);
- sl_free(s);
+ TRACE(D_EVENTS, "Session to %I removed", s->addr);
- TRACE(D_EVENTS, "Session to %I removed", ip);
+ sl_free(s);
+}
- birdloop_leave(p->loop);
+static void
+bfd_remove_session(struct bfd_proto *p, struct bfd_session *s)
+{
+ birdloop_enter(p->p.loop);
+ bfd_remove_session_locked(p, s);
+ birdloop_leave(p->p.loop);
}
static void
@@ -521,7 +528,7 @@ bfd_reconfigure_session(struct bfd_proto *p, struct bfd_session *s)
if (EMPTY_LIST(s->request_list))
return;
- birdloop_enter(p->loop);
+ birdloop_enter(p->p.loop);
struct bfd_request *req = SKIP_BACK(struct bfd_request, n, HEAD(s->request_list));
s->cf = bfd_merge_options(s->ifa->cf, &req->opts);
@@ -534,7 +541,7 @@ bfd_reconfigure_session(struct bfd_proto *p, struct bfd_session *s)
bfd_session_control_tx_timer(s, 0);
- birdloop_leave(p->loop);
+ birdloop_leave(p->p.loop);
TRACE(D_EVENTS, "Session to %I reconfigured", s->addr);
}
@@ -597,16 +604,10 @@ bfd_free_iface(struct bfd_iface *ifa)
return;
if (ifa->sk)
- {
- sk_stop(ifa->sk);
rfree(ifa->sk);
- }
if (ifa->rx)
- {
- sk_stop(ifa->rx);
rfree(ifa->rx);
- }
rem_node(&ifa->n);
mb_free(ifa);
@@ -627,9 +628,9 @@ bfd_reconfigure_iface(struct bfd_proto *p, struct bfd_iface *ifa, struct bfd_con
(new->passive != old->passive);
/* This should be probably changed to not access ifa->cf from the BFD thread */
- birdloop_enter(p->loop);
+ birdloop_enter(p->p.loop);
ifa->cf = new;
- birdloop_leave(p->loop);
+ birdloop_leave(p->p.loop);
}
@@ -651,7 +652,17 @@ bfd_request_notify(struct bfd_request *req, u8 state, u8 diag)
req->down = (old_state == BFD_STATE_UP) && (state == BFD_STATE_DOWN);
if (req->hook)
+ {
+ struct birdloop *target = !birdloop_inside(req->target) ? req->target : NULL;
+
+ if (target)
+ birdloop_enter(target);
+
req->hook(req);
+
+ if (target)
+ birdloop_leave(target);
+ }
}
static int
@@ -659,20 +670,30 @@ bfd_add_request(struct bfd_proto *p, struct bfd_request *req)
{
struct bfd_config *cf = (struct bfd_config *) (p->p.cf);
- if (p->p.vrf_set && (p->p.vrf != req->vrf))
+ if (p->p.vrf && (p->p.vrf != req->vrf))
+ {
+ TRACE(D_EVENTS, "Not accepting request to %I with different VRF", req->addr);
return 0;
+ }
if (ipa_is_ip4(req->addr) ? !cf->accept_ipv4 : !cf->accept_ipv6)
+ {
+ TRACE(D_EVENTS, "Not accepting request to %I (AF limit)", req->addr);
return 0;
+ }
if (req->iface ? !cf->accept_direct : !cf->accept_multihop)
+ {
+ TRACE(D_EVENTS, "Not accepting %s request to %I", req->iface ? "direct" : "multihop", req->addr);
return 0;
+ }
uint ifindex = req->iface ? req->iface->index : 0;
struct bfd_session *s = bfd_find_session_by_addr(p, req->addr, ifindex);
- u8 state, diag;
- if (!s)
+ if (s)
+ TRACE(D_EVENTS, "Session to %I reused", s->addr);
+ else
s = bfd_add_session(p, req->addr, req->local, req->iface, &req->opts);
rem_node(&req->n);
@@ -680,51 +701,129 @@ bfd_add_request(struct bfd_proto *p, struct bfd_request *req)
req->session = s;
bfd_lock_sessions(p);
- state = s->loc_state;
- diag = s->loc_diag;
+
+ int notify = !NODE_VALID(&s->n);
+ if (notify)
+ add_tail(&p->notify_list, &s->n);
+
bfd_unlock_sessions(p);
- bfd_request_notify(req, state, diag);
+ if (notify)
+ ev_send(&global_event_list, &p->notify_event);
return 1;
}
static void
-bfd_submit_request(struct bfd_request *req)
+bfd_pickup_requests(void *_data UNUSED)
{
- node *n;
+ /* NOTE TO MY FUTURE SELF
+ *
+ * Functions bfd_take_requests() and bfd_drop_requests() need to have
+ * consistent &bfd_global.wait_list and this is ensured only by having these
+ * functions called from bfd_start() and bfd_shutdown() which are both called
+ * in PROTO_LOCKED_FROM_MAIN context, i.e. always from &main_birdloop.
+ *
+ * This pickup event is also called in &main_birdloop, therefore we can
+ * freely do BFD_LOCK/BFD_UNLOCK while processing all the requests. All BFD
+ * protocols capable of bfd_add_request() are either started before this code
+ * happens or after that.
+ *
+ * If BFD protocols could start in parallel with this routine, they might
+ * miss some of the waiting requests, thus if anybody tries to start
+ * protocols or run this pickup event outside &main_birdloop in future, they
+ * shall ensure that this race condition is mitigated somehow.
+ *
+ * Thank you, my future self, for understanding. Have a nice day!
+ */
+
+ DBG("BFD pickup loop starting");
+
+ BFD_LOCK;
+ do {
+ bfd_global.pickup_reload = 0;
+ BFD_UNLOCK;
+
+ node *n;
+ WALK_LIST(n, bfd_global.proto_list)
+ {
+ struct bfd_proto *p = SKIP_BACK(struct bfd_proto, bfd_node, n);
+ birdloop_enter(p->p.loop);
+ BFD_LOCK;
+
+ TRACE(D_EVENTS, "Picking up new requests (%d available)", list_length(&bfd_global.pickup_list));
+
+ node *rn, *rnxt;
+ WALK_LIST_DELSAFE(rn, rnxt, bfd_global.pickup_list)
+ bfd_add_request(p, SKIP_BACK(struct bfd_request, n, rn));
+
+ BFD_UNLOCK;
+
+ /* Remove sessions with no requests */
+ HASH_WALK_DELSAFE(p->session_hash_id, next_id, s)
+ {
+ if (EMPTY_LIST(s->request_list))
+ bfd_remove_session_locked(p, s);
+ }
+ HASH_WALK_END;
- WALK_LIST(n, bfd_proto_list)
- if (bfd_add_request(SKIP_BACK(struct bfd_proto, bfd_node, n), req))
- return;
+ birdloop_leave(p->p.loop);
+ }
- rem_node(&req->n);
- add_tail(&bfd_wait_list, &req->n);
- req->session = NULL;
- bfd_request_notify(req, BFD_STATE_ADMIN_DOWN, 0);
+ BFD_LOCK;
+ } while (bfd_global.pickup_reload);
+
+ list tmp_list;
+ init_list(&tmp_list);
+ add_tail_list(&tmp_list, &bfd_global.pickup_list);
+
+ init_list(&bfd_global.pickup_list);
+ BFD_UNLOCK;
+
+ log(L_TRACE "No protocol for %d BFD requests", list_length(&tmp_list));
+
+ node *n;
+ WALK_LIST(n, tmp_list)
+ bfd_request_notify(SKIP_BACK(struct bfd_request, n, n), BFD_STATE_ADMIN_DOWN, 0);
+
+ BFD_LOCK;
+ add_tail_list(&bfd_global.wait_list, &tmp_list);
+ BFD_UNLOCK;
}
+static event bfd_pickup_event = { .hook = bfd_pickup_requests };
+
static void
bfd_take_requests(struct bfd_proto *p)
{
node *n, *nn;
-
- WALK_LIST_DELSAFE(n, nn, bfd_wait_list)
+ BFD_LOCK;
+ WALK_LIST_DELSAFE(n, nn, bfd_global.wait_list)
bfd_add_request(p, SKIP_BACK(struct bfd_request, n, n));
+ BFD_UNLOCK;
}
static void
bfd_drop_requests(struct bfd_proto *p)
{
node *n;
-
- HASH_WALK(p->session_hash_id, next_id, s)
+ BFD_LOCK;
+ HASH_WALK_DELSAFE(p->session_hash_id, next_id, s)
{
- /* We assume that p is not in bfd_proto_list */
WALK_LIST_FIRST(n, s->request_list)
- bfd_submit_request(SKIP_BACK(struct bfd_request, n, n));
+ {
+ struct bfd_request *req = SKIP_BACK(struct bfd_request, n, n);
+ rem_node(&req->n);
+ add_tail(&bfd_global.pickup_list, &req->n);
+ req->session = NULL;
+ }
+
+ ev_send(&global_event_list, &bfd_pickup_event);
+
+ bfd_remove_session_locked(p, s);
}
HASH_WALK_END;
+ BFD_UNLOCK;
}
static struct resclass bfd_request_class;
@@ -733,13 +832,11 @@ struct bfd_request *
bfd_request_session(pool *p, ip_addr addr, ip_addr local,
struct iface *iface, struct iface *vrf,
void (*hook)(struct bfd_request *), void *data,
+ struct birdloop *target,
const struct bfd_options *opts)
{
struct bfd_request *req = ralloc(p, &bfd_request_class);
- /* Hack: self-link req->n, we will call rem_node() on it */
- req->n.prev = req->n.next = &req->n;
-
req->addr = addr;
req->local = local;
req->iface = iface;
@@ -748,10 +845,19 @@ bfd_request_session(pool *p, ip_addr addr, ip_addr local,
if (opts)
req->opts = *opts;
- bfd_submit_request(req);
-
+ ASSERT_DIE(target || !hook);
req->hook = hook;
req->data = data;
+ req->target = target;
+
+ req->session = NULL;
+
+ BFD_LOCK;
+ bfd_global.pickup_reload++;
+ add_tail(&bfd_global.pickup_list, &req->n);
+ ev_send(&global_event_list, &bfd_pickup_event);
+ DBG("New BFD request enlisted.\n");
+ BFD_UNLOCK;
return req;
}
@@ -774,19 +880,16 @@ static void
bfd_request_free(resource *r)
{
struct bfd_request *req = (struct bfd_request *) r;
- struct bfd_session *s = req->session;
+ BFD_LOCK;
rem_node(&req->n);
+ BFD_UNLOCK;
- /* Remove the session if there is no request for it. Skip that if
- inside notify hooks, will be handled by bfd_notify_hook() itself */
-
- if (s && EMPTY_LIST(s->request_list) && !s->notify_running)
- bfd_remove_session(s->ifa->bfd, s);
+ ev_send(&global_event_list, &bfd_pickup_event);
}
static void
-bfd_request_dump(resource *r)
+bfd_request_dump(resource *r, unsigned indent UNUSED)
{
struct bfd_request *req = (struct bfd_request *) r;
@@ -819,7 +922,7 @@ bfd_neigh_notify(struct neighbor *nb)
if ((nb->scope > 0) && !n->req)
{
ip_addr local = ipa_nonzero(n->local) ? n->local : nb->ifa->ip;
- n->req = bfd_request_session(p->p.pool, n->addr, local, nb->iface, p->p.vrf, NULL, NULL, NULL);
+ n->req = bfd_request_session(p->p.pool, n->addr, local, nb->iface, p->p.vrf, NULL, NULL, NULL, NULL);
}
if ((nb->scope <= 0) && n->req)
@@ -836,7 +939,7 @@ bfd_start_neighbor(struct bfd_proto *p, struct bfd_neighbor *n)
if (n->multihop)
{
- n->req = bfd_request_session(p->p.pool, n->addr, n->local, NULL, p->p.vrf, NULL, NULL, NULL);
+ n->req = bfd_request_session(p->p.pool, n->addr, n->local, NULL, p->p.vrf, NULL, NULL, NULL, NULL);
return;
}
@@ -916,21 +1019,15 @@ bfd_reconfigure_neighbors(struct bfd_proto *p, struct bfd_config *new)
/* This core notify code should be replaced after main loop transition to birdloop */
-int pipe(int pipefd[2]);
-void pipe_drain(int fd);
-void pipe_kick(int fd);
-
-static int
-bfd_notify_hook(sock *sk, uint len UNUSED)
+static void
+bfd_notify_hook(void *data)
{
- struct bfd_proto *p = sk->data;
+ struct bfd_proto *p = data;
struct bfd_session *s;
list tmp_list;
u8 state, diag;
node *n, *nn;
- pipe_drain(sk->fd);
-
bfd_lock_sessions(p);
init_list(&tmp_list);
add_tail_list(&tmp_list, &p->notify_list);
@@ -945,64 +1042,15 @@ bfd_notify_hook(sock *sk, uint len UNUSED)
diag = s->loc_diag;
bfd_unlock_sessions(p);
- s->notify_running = 1;
WALK_LIST_DELSAFE(n, nn, s->request_list)
bfd_request_notify(SKIP_BACK(struct bfd_request, n, n), state, diag);
- s->notify_running = 0;
/* Remove the session if all requests were removed in notify hooks */
if (EMPTY_LIST(s->request_list))
bfd_remove_session(p, s);
}
-
- return 0;
}
-static inline void
-bfd_notify_kick(struct bfd_proto *p)
-{
- pipe_kick(p->notify_ws->fd);
-}
-
-static void
-bfd_noterr_hook(sock *sk, int err)
-{
- struct bfd_proto *p = sk->data;
- log(L_ERR "%s: Notify socket error: %m", p->p.name, err);
-}
-
-static void
-bfd_notify_init(struct bfd_proto *p)
-{
- int pfds[2];
- sock *sk;
-
- int rv = pipe(pfds);
- if (rv < 0)
- die("pipe: %m");
-
- sk = sk_new(p->p.pool);
- sk->type = SK_MAGIC;
- sk->rx_hook = bfd_notify_hook;
- sk->err_hook = bfd_noterr_hook;
- sk->fd = pfds[0];
- sk->data = p;
- if (sk_open(sk) < 0)
- die("bfd: sk_open failed");
- p->notify_rs = sk;
-
- /* The write sock is not added to any event loop */
- sk = sk_new(p->p.pool);
- sk->type = SK_MAGIC;
- sk->fd = pfds[1];
- sk->data = p;
- sk->flags = SKF_THREAD;
- if (sk_open(sk) < 0)
- die("bfd: sk_open failed");
- p->notify_ws = sk;
-}
-
-
/*
* BFD protocol glue
*/
@@ -1012,7 +1060,7 @@ bfd_init(struct proto_config *c)
{
struct proto *p = proto_new(c);
- p->neigh_notify = bfd_neigh_notify;
+ p->iface_sub.neigh_notify = bfd_neigh_notify;
return p;
}
@@ -1023,10 +1071,10 @@ bfd_start(struct proto *P)
struct bfd_proto *p = (struct bfd_proto *) P;
struct bfd_config *cf = (struct bfd_config *) (P->cf);
- p->loop = birdloop_new();
- p->tpool = rp_new(NULL, "BFD thread root");
pthread_spin_init(&p->lock, PTHREAD_PROCESS_PRIVATE);
+ p->tpool = rp_new(P->pool, "BFD loop pool");
+
p->session_slab = sl_new(P->pool, sizeof(struct bfd_session));
HASH_INIT(p->session_hash_id, P->pool, 8);
HASH_INIT(p->session_hash_ip, P->pool, 8);
@@ -1034,11 +1082,12 @@ bfd_start(struct proto *P)
init_list(&p->iface_list);
init_list(&p->notify_list);
- bfd_notify_init(p);
-
- add_tail(&bfd_proto_list, &p->bfd_node);
+ p->notify_event = (event) {
+ .hook = bfd_notify_hook,
+ .data = p,
+ };
- birdloop_enter(p->loop);
+ add_tail(&bfd_global.proto_list, &p->bfd_node);
if (!cf->strict_bind)
{
@@ -1055,43 +1104,29 @@ bfd_start(struct proto *P)
p->rx6_m = bfd_open_rx_sk(p, 1, SK_IPV6);
}
- birdloop_leave(p->loop);
-
bfd_take_requests(p);
struct bfd_neighbor *n;
WALK_LIST(n, cf->neigh_list)
bfd_start_neighbor(p, n);
- birdloop_start(p->loop);
-
return PS_UP;
}
-
static int
bfd_shutdown(struct proto *P)
{
struct bfd_proto *p = (struct bfd_proto *) P;
- struct bfd_config *cf = (struct bfd_config *) (P->cf);
+ struct bfd_config *cf = (struct bfd_config *) (p->p.cf);
rem_node(&p->bfd_node);
- birdloop_stop(p->loop);
-
- struct bfd_neighbor *n;
- WALK_LIST(n, cf->neigh_list)
- bfd_stop_neighbor(p, n);
+ struct bfd_neighbor *bn;
+ WALK_LIST(bn, cf->neigh_list)
+ bfd_stop_neighbor(p, bn);
bfd_drop_requests(p);
- /* FIXME: This is hack */
- birdloop_enter(p->loop);
- rfree(p->tpool);
- birdloop_leave(p->loop);
-
- birdloop_free(p->loop);
-
return PS_DOWN;
}
@@ -1111,7 +1146,7 @@ bfd_reconfigure(struct proto *P, struct proto_config *c)
(new->strict_bind != old->strict_bind))
return 0;
- birdloop_mask_wakeups(p->loop);
+ birdloop_mask_wakeups(p->p.loop);
WALK_LIST(ifa, p->iface_list)
bfd_reconfigure_iface(p, ifa, new);
@@ -1125,7 +1160,7 @@ bfd_reconfigure(struct proto *P, struct proto_config *c)
bfd_reconfigure_neighbors(p, new);
- birdloop_unmask_wakeups(p->loop);
+ birdloop_unmask_wakeups(p->p.loop);
return 1;
}
@@ -1183,7 +1218,6 @@ bfd_show_sessions(struct proto *P)
struct protocol proto_bfd = {
.name = "BFD",
.template = "bfd%d",
- .class = PROTOCOL_BFD,
.proto_size = sizeof(struct bfd_proto),
.config_size = sizeof(struct bfd_config),
.init = bfd_init,
@@ -1197,4 +1231,9 @@ void
bfd_build(void)
{
proto_build(&proto_bfd);
+
+ bfd_global.lock = DOMAIN_NEW(rtable, "BFD Global");
+ init_list(&bfd_global.wait_list);
+ init_list(&bfd_global.pickup_list);
+ init_list(&bfd_global.proto_list);
}
diff --git a/proto/bfd/bfd.h b/proto/bfd/bfd.h
index 7caf9f66..a4b7d63c 100644
--- a/proto/bfd/bfd.h
+++ b/proto/bfd/bfd.h
@@ -13,16 +13,16 @@
#include "nest/cli.h"
#include "nest/iface.h"
#include "nest/protocol.h"
-#include "nest/route.h"
+#include "nest/rt.h"
#include "nest/password.h"
#include "conf/conf.h"
#include "lib/hash.h"
+#include "lib/io-loop.h"
#include "lib/resource.h"
#include "lib/socket.h"
#include "lib/string.h"
#include "nest/bfd.h"
-#include "io.h"
#define BFD_CONTROL_PORT 3784
@@ -88,17 +88,18 @@ struct bfd_neighbor
struct bfd_proto
{
struct proto p;
- struct birdloop *loop;
- pool *tpool;
+
pthread_spinlock_t lock;
+
+ pool *tpool;
+
node bfd_node;
slab *session_slab;
HASH(struct bfd_session) session_hash_id;
HASH(struct bfd_session) session_hash_ip;
- sock *notify_rs;
- sock *notify_ws;
+ event notify_event;
list notify_list;
sock *rx4_1;
@@ -164,7 +165,6 @@ struct bfd_session
list request_list; /* List of client requests (struct bfd_request) */
btime last_state_change; /* Time of last state change */
- u8 notify_running; /* 1 if notify hooks are running */
u8 rx_csn_known; /* Received crypto sequence number is known */
u32 rx_csn; /* Last received crypto sequence number */
diff --git a/proto/bfd/config.Y b/proto/bfd/config.Y
index 70461872..0d6e33fa 100644
--- a/proto/bfd/config.Y
+++ b/proto/bfd/config.Y
@@ -37,6 +37,7 @@ proto: bfd_proto ;
bfd_proto_start: proto_start BFD
{
this_proto = proto_config_new(&proto_bfd, $1);
+ this_proto->loop_order = DOMAIN_ORDER(proto);
init_list(&BFD_CFG->patt_list);
init_list(&BFD_CFG->neigh_list);
BFD_CFG->accept_ipv4 = BFD_CFG->accept_ipv6 = 1;
diff --git a/proto/bfd/io.c b/proto/bfd/io.c
deleted file mode 100644
index e696cc89..00000000
--- a/proto/bfd/io.c
+++ /dev/null
@@ -1,537 +0,0 @@
-/*
- * BIRD -- I/O and event loop
- *
- * Can be freely distributed and used under the terms of the GNU GPL.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <errno.h>
-#include <fcntl.h>
-#include <poll.h>
-#include <pthread.h>
-#include <time.h>
-#include <sys/time.h>
-
-#include "nest/bird.h"
-#include "proto/bfd/io.h"
-
-#include "lib/buffer.h"
-#include "lib/lists.h"
-#include "lib/resource.h"
-#include "lib/event.h"
-#include "lib/timer.h"
-#include "lib/socket.h"
-
-
-struct birdloop
-{
- pool *pool;
- pthread_t thread;
- pthread_mutex_t mutex;
-
- u8 stop_called;
- u8 poll_active;
- u8 wakeup_masked;
- int wakeup_fds[2];
-
- struct timeloop time;
- list event_list;
- list sock_list;
- uint sock_num;
-
- BUFFER(sock *) poll_sk;
- BUFFER(struct pollfd) poll_fd;
- u8 poll_changed;
- u8 close_scheduled;
-};
-
-
-/*
- * Current thread context
- */
-
-static pthread_key_t current_loop_key;
-extern pthread_key_t current_time_key;
-
-static inline struct birdloop *
-birdloop_current(void)
-{
- return pthread_getspecific(current_loop_key);
-}
-
-static inline void
-birdloop_set_current(struct birdloop *loop)
-{
- pthread_setspecific(current_loop_key, loop);
- pthread_setspecific(current_time_key, loop ? &loop->time : &main_timeloop);
-}
-
-static inline void
-birdloop_init_current(void)
-{
- pthread_key_create(&current_loop_key, NULL);
-}
-
-
-/*
- * Wakeup code for birdloop
- */
-
-static void
-pipe_new(int *pfds)
-{
- int rv = pipe(pfds);
- if (rv < 0)
- die("pipe: %m");
-
- if (fcntl(pfds[0], F_SETFL, O_NONBLOCK) < 0)
- die("fcntl(O_NONBLOCK): %m");
-
- if (fcntl(pfds[1], F_SETFL, O_NONBLOCK) < 0)
- die("fcntl(O_NONBLOCK): %m");
-}
-
-void
-pipe_drain(int fd)
-{
- char buf[64];
- int rv;
-
- try:
- rv = read(fd, buf, 64);
- if (rv < 0)
- {
- if (errno == EINTR)
- goto try;
- if (errno == EAGAIN)
- return;
- die("wakeup read: %m");
- }
- if (rv == 64)
- goto try;
-}
-
-void
-pipe_kick(int fd)
-{
- u64 v = 1;
- int rv;
-
- try:
- rv = write(fd, &v, sizeof(u64));
- if (rv < 0)
- {
- if (errno == EINTR)
- goto try;
- if (errno == EAGAIN)
- return;
- die("wakeup write: %m");
- }
-}
-
-static inline void
-wakeup_init(struct birdloop *loop)
-{
- pipe_new(loop->wakeup_fds);
-}
-
-static inline void
-wakeup_drain(struct birdloop *loop)
-{
- pipe_drain(loop->wakeup_fds[0]);
-}
-
-static inline void
-wakeup_do_kick(struct birdloop *loop)
-{
- pipe_kick(loop->wakeup_fds[1]);
-}
-
-static inline void
-wakeup_kick(struct birdloop *loop)
-{
- if (!loop->wakeup_masked)
- wakeup_do_kick(loop);
- else
- loop->wakeup_masked = 2;
-}
-
-/* For notifications from outside */
-void
-wakeup_kick_current(void)
-{
- struct birdloop *loop = birdloop_current();
-
- if (loop && loop->poll_active)
- wakeup_kick(loop);
-}
-
-
-/*
- * Events
- */
-
-static inline uint
-events_waiting(struct birdloop *loop)
-{
- return !EMPTY_LIST(loop->event_list);
-}
-
-static inline void
-events_init(struct birdloop *loop)
-{
- init_list(&loop->event_list);
-}
-
-static void
-events_fire(struct birdloop *loop)
-{
- times_update(&loop->time);
- ev_run_list(&loop->event_list);
-}
-
-void
-ev2_schedule(event *e)
-{
- struct birdloop *loop = birdloop_current();
-
- if (loop->poll_active && EMPTY_LIST(loop->event_list))
- wakeup_kick(loop);
-
- if (e->n.next)
- rem_node(&e->n);
-
- add_tail(&loop->event_list, &e->n);
-}
-
-
-/*
- * Sockets
- */
-
-static void
-sockets_init(struct birdloop *loop)
-{
- init_list(&loop->sock_list);
- loop->sock_num = 0;
-
- BUFFER_INIT(loop->poll_sk, loop->pool, 4);
- BUFFER_INIT(loop->poll_fd, loop->pool, 4);
- loop->poll_changed = 1; /* add wakeup fd */
-}
-
-static void
-sockets_add(struct birdloop *loop, sock *s)
-{
- add_tail(&loop->sock_list, &s->n);
- loop->sock_num++;
-
- s->index = -1;
- loop->poll_changed = 1;
-
- if (loop->poll_active)
- wakeup_kick(loop);
-}
-
-void
-sk_start(sock *s)
-{
- struct birdloop *loop = birdloop_current();
-
- sockets_add(loop, s);
-}
-
-static void
-sockets_remove(struct birdloop *loop, sock *s)
-{
- rem_node(&s->n);
- loop->sock_num--;
-
- if (s->index >= 0)
- loop->poll_sk.data[s->index] = NULL;
-
- s->index = -1;
- loop->poll_changed = 1;
-
- /* Wakeup moved to sk_stop() */
-}
-
-void
-sk_stop(sock *s)
-{
- struct birdloop *loop = birdloop_current();
-
- sockets_remove(loop, s);
-
- if (loop->poll_active)
- {
- loop->close_scheduled = 1;
- wakeup_kick(loop);
- }
- else
- close(s->fd);
-
- s->fd = -1;
-}
-
-static inline uint sk_want_events(sock *s)
-{ return (s->rx_hook ? POLLIN : 0) | ((s->ttx != s->tpos) ? POLLOUT : 0); }
-
-/*
-FIXME: this should be called from sock code
-
-static void
-sockets_update(struct birdloop *loop, sock *s)
-{
- if (s->index >= 0)
- loop->poll_fd.data[s->index].events = sk_want_events(s);
-}
-*/
-
-static void
-sockets_prepare(struct birdloop *loop)
-{
- BUFFER_SET(loop->poll_sk, loop->sock_num + 1);
- BUFFER_SET(loop->poll_fd, loop->sock_num + 1);
-
- struct pollfd *pfd = loop->poll_fd.data;
- sock **psk = loop->poll_sk.data;
- uint i = 0;
- node *n;
-
- WALK_LIST(n, loop->sock_list)
- {
- sock *s = SKIP_BACK(sock, n, n);
-
- ASSERT(i < loop->sock_num);
-
- s->index = i;
- *psk = s;
- pfd->fd = s->fd;
- pfd->events = sk_want_events(s);
- pfd->revents = 0;
-
- pfd++;
- psk++;
- i++;
- }
-
- ASSERT(i == loop->sock_num);
-
- /* Add internal wakeup fd */
- *psk = NULL;
- pfd->fd = loop->wakeup_fds[0];
- pfd->events = POLLIN;
- pfd->revents = 0;
-
- loop->poll_changed = 0;
-}
-
-static void
-sockets_close_fds(struct birdloop *loop)
-{
- struct pollfd *pfd = loop->poll_fd.data;
- sock **psk = loop->poll_sk.data;
- int poll_num = loop->poll_fd.used - 1;
-
- int i;
- for (i = 0; i < poll_num; i++)
- if (psk[i] == NULL)
- close(pfd[i].fd);
-
- loop->close_scheduled = 0;
-}
-
-int sk_read(sock *s, int revents);
-int sk_write(sock *s);
-
-static void
-sockets_fire(struct birdloop *loop)
-{
- struct pollfd *pfd = loop->poll_fd.data;
- sock **psk = loop->poll_sk.data;
- int poll_num = loop->poll_fd.used - 1;
-
- times_update(&loop->time);
-
- /* Last fd is internal wakeup fd */
- if (pfd[poll_num].revents & POLLIN)
- wakeup_drain(loop);
-
- int i;
- for (i = 0; i < poll_num; pfd++, psk++, i++)
- {
- int e = 1;
-
- if (! pfd->revents)
- continue;
-
- if (pfd->revents & POLLNVAL)
- die("poll: invalid fd %d", pfd->fd);
-
- if (pfd->revents & POLLIN)
- while (e && *psk && (*psk)->rx_hook)
- e = sk_read(*psk, 0);
-
- e = 1;
- if (pfd->revents & POLLOUT)
- while (e && *psk)
- e = sk_write(*psk);
- }
-}
-
-
-/*
- * Birdloop
- */
-
-static void * birdloop_main(void *arg);
-
-struct birdloop *
-birdloop_new(void)
-{
- /* FIXME: this init should be elsewhere and thread-safe */
- static int init = 0;
- if (!init)
- { birdloop_init_current(); init = 1; }
-
- pool *p = rp_new(NULL, "Birdloop root");
- struct birdloop *loop = mb_allocz(p, sizeof(struct birdloop));
- loop->pool = p;
- pthread_mutex_init(&loop->mutex, NULL);
-
- wakeup_init(loop);
-
- events_init(loop);
- timers_init(&loop->time, p);
- sockets_init(loop);
-
- return loop;
-}
-
-void
-birdloop_start(struct birdloop *loop)
-{
- int rv = pthread_create(&loop->thread, NULL, birdloop_main, loop);
- if (rv)
- die("pthread_create(): %M", rv);
-}
-
-void
-birdloop_stop(struct birdloop *loop)
-{
- pthread_mutex_lock(&loop->mutex);
- loop->stop_called = 1;
- wakeup_do_kick(loop);
- pthread_mutex_unlock(&loop->mutex);
-
- int rv = pthread_join(loop->thread, NULL);
- if (rv)
- die("pthread_join(): %M", rv);
-}
-
-void
-birdloop_free(struct birdloop *loop)
-{
- rfree(loop->pool);
-}
-
-
-void
-birdloop_enter(struct birdloop *loop)
-{
- /* TODO: these functions could save and restore old context */
- pthread_mutex_lock(&loop->mutex);
- birdloop_set_current(loop);
-}
-
-void
-birdloop_leave(struct birdloop *loop)
-{
- /* TODO: these functions could save and restore old context */
- birdloop_set_current(NULL);
- pthread_mutex_unlock(&loop->mutex);
-}
-
-void
-birdloop_mask_wakeups(struct birdloop *loop)
-{
- pthread_mutex_lock(&loop->mutex);
- loop->wakeup_masked = 1;
- pthread_mutex_unlock(&loop->mutex);
-}
-
-void
-birdloop_unmask_wakeups(struct birdloop *loop)
-{
- pthread_mutex_lock(&loop->mutex);
- if (loop->wakeup_masked == 2)
- wakeup_do_kick(loop);
- loop->wakeup_masked = 0;
- pthread_mutex_unlock(&loop->mutex);
-}
-
-static void *
-birdloop_main(void *arg)
-{
- struct birdloop *loop = arg;
- timer *t;
- int rv, timeout;
-
- birdloop_set_current(loop);
-
- tmp_init(loop->pool);
-
- pthread_mutex_lock(&loop->mutex);
- while (1)
- {
- events_fire(loop);
- timers_fire(&loop->time);
-
- times_update(&loop->time);
- if (events_waiting(loop))
- timeout = 0;
- else if (t = timers_first(&loop->time))
- timeout = (tm_remains(t) TO_MS) + 1;
- else
- timeout = -1;
-
- if (loop->poll_changed)
- sockets_prepare(loop);
-
- loop->poll_active = 1;
- pthread_mutex_unlock(&loop->mutex);
-
- try:
- rv = poll(loop->poll_fd.data, loop->poll_fd.used, timeout);
- if (rv < 0)
- {
- if (errno == EINTR || errno == EAGAIN)
- goto try;
- die("poll: %m");
- }
-
- pthread_mutex_lock(&loop->mutex);
- loop->poll_active = 0;
-
- if (loop->close_scheduled)
- sockets_close_fds(loop);
-
- if (loop->stop_called)
- break;
-
- if (rv)
- sockets_fire(loop);
-
- timers_fire(&loop->time);
- }
-
- loop->stop_called = 0;
- pthread_mutex_unlock(&loop->mutex);
-
- return NULL;
-}
-
-
diff --git a/proto/bfd/io.h b/proto/bfd/io.h
deleted file mode 100644
index ec706e9a..00000000
--- a/proto/bfd/io.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * BIRD -- I/O and event loop
- *
- * Can be freely distributed and used under the terms of the GNU GPL.
- */
-
-#ifndef _BIRD_BFD_IO_H_
-#define _BIRD_BFD_IO_H_
-
-#include "nest/bird.h"
-#include "lib/lists.h"
-#include "lib/resource.h"
-#include "lib/event.h"
-#include "lib/timer.h"
-#include "lib/socket.h"
-
-
-void ev2_schedule(event *e);
-
-void sk_start(sock *s);
-void sk_stop(sock *s);
-
-struct birdloop *birdloop_new(void);
-void birdloop_start(struct birdloop *loop);
-void birdloop_stop(struct birdloop *loop);
-void birdloop_free(struct birdloop *loop);
-
-void birdloop_enter(struct birdloop *loop);
-void birdloop_leave(struct birdloop *loop);
-void birdloop_mask_wakeups(struct birdloop *loop);
-void birdloop_unmask_wakeups(struct birdloop *loop);
-
-
-#endif /* _BIRD_BFD_IO_H_ */
diff --git a/proto/bfd/packets.c b/proto/bfd/packets.c
index cb5f0d89..a22f223b 100644
--- a/proto/bfd/packets.c
+++ b/proto/bfd/packets.c
@@ -416,7 +416,7 @@ bfd_err_hook(sock *sk, int err)
sock *
bfd_open_rx_sk(struct bfd_proto *p, int multihop, int af)
{
- sock *sk = sk_new(p->tpool);
+ sock *sk = sk_new(p->p.pool);
sk->type = SK_UDP;
sk->subtype = af;
sk->sport = !multihop ? BFD_CONTROL_PORT : BFD_MULTI_CTL_PORT;
@@ -430,12 +430,11 @@ bfd_open_rx_sk(struct bfd_proto *p, int multihop, int af)
/* TODO: configurable ToS and priority */
sk->tos = IP_PREC_INTERNET_CONTROL;
sk->priority = sk_priority_control;
- sk->flags = SKF_THREAD | SKF_LADDR_RX | (!multihop ? SKF_TTL_RX : 0);
+ sk->flags = SKF_LADDR_RX | (!multihop ? SKF_TTL_RX : 0);
- if (sk_open(sk) < 0)
+ if (sk_open(sk, p->p.loop) < 0)
goto err;
- sk_start(sk);
return sk;
err:
@@ -462,12 +461,11 @@ bfd_open_rx_sk_bound(struct bfd_proto *p, ip_addr local, struct iface *ifa)
/* TODO: configurable ToS and priority */
sk->tos = IP_PREC_INTERNET_CONTROL;
sk->priority = sk_priority_control;
- sk->flags = SKF_THREAD | SKF_BIND | (ifa ? SKF_TTL_RX : 0);
+ sk->flags = SKF_BIND | (ifa ? SKF_TTL_RX : 0);
- if (sk_open(sk) < 0)
+ if (sk_open(sk, p->p.loop) < 0)
goto err;
- sk_start(sk);
return sk;
err:
@@ -479,7 +477,7 @@ bfd_open_rx_sk_bound(struct bfd_proto *p, ip_addr local, struct iface *ifa)
sock *
bfd_open_tx_sk(struct bfd_proto *p, ip_addr local, struct iface *ifa)
{
- sock *sk = sk_new(p->tpool);
+ sock *sk = sk_new(p->p.pool);
sk->type = SK_UDP;
sk->saddr = local;
sk->dport = ifa ? BFD_CONTROL_PORT : BFD_MULTI_CTL_PORT;
@@ -494,12 +492,11 @@ bfd_open_tx_sk(struct bfd_proto *p, ip_addr local, struct iface *ifa)
sk->tos = IP_PREC_INTERNET_CONTROL;
sk->priority = sk_priority_control;
sk->ttl = ifa ? 255 : -1;
- sk->flags = SKF_THREAD | SKF_BIND | SKF_HIGH_PORT;
+ sk->flags = SKF_BIND | SKF_HIGH_PORT;
- if (sk_open(sk) < 0)
+ if (sk_open(sk, p->p.loop) < 0)
goto err;
- sk_start(sk);
return sk;
err:
diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c
index 1e234b16..039fdff5 100644
--- a/proto/bgp/attrs.c
+++ b/proto/bgp/attrs.c
@@ -15,12 +15,13 @@
#include "nest/bird.h"
#include "nest/iface.h"
#include "nest/protocol.h"
-#include "nest/route.h"
-#include "nest/attrs.h"
+#include "nest/rt.h"
+#include "lib/attrs.h"
#include "conf/conf.h"
#include "lib/resource.h"
#include "lib/string.h"
#include "lib/unaligned.h"
+#include "lib/macro.h"
#include "bgp.h"
@@ -64,37 +65,72 @@
* format - Optional hook that converts eattr to textual representation.
*/
-
-struct bgp_attr_desc {
- const char *name;
- uint type;
- uint flags;
- void (*export)(struct bgp_export_state *s, eattr *a);
- int (*encode)(struct bgp_write_state *s, eattr *a, byte *buf, uint size);
- void (*decode)(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to);
- void (*format)(const eattr *ea, byte *buf, uint size);
+union bgp_attr_desc {
+ struct ea_class class;
+ struct {
+ EA_CLASS_INSIDE;
+ uint flags;
+ void (*export)(struct bgp_export_state *s, eattr *a);
+ int (*encode)(struct bgp_write_state *s, eattr *a, byte *buf, uint size);
+ void (*decode)(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to);
+ };
};
-static const struct bgp_attr_desc bgp_attr_table[];
+static union bgp_attr_desc bgp_attr_table[];
+static inline const union bgp_attr_desc *bgp_find_attr_desc(eattr *a)
+{
+ const struct ea_class *class = ea_class_find(a->id);
-static inline int bgp_attr_known(uint code);
+ if ((class < &bgp_attr_table[0].class) || (class >= &bgp_attr_table[BGP_ATTR_MAX].class))
+ return NULL;
-eattr *
-bgp_set_attr(ea_list **attrs, struct linpool *pool, uint code, uint flags, uintptr_t val)
+ return (const union bgp_attr_desc *) class;
+}
+
+#define BGP_EA_ID(code) (bgp_attr_table[code].id)
+#define EA_BGP_ID(code) (((union bgp_attr_desc *) ea_class_find(code)) - bgp_attr_table)
+
+void bgp_set_attr_u32(ea_list **to, uint code, uint flags, u32 val)
+{
+ const union bgp_attr_desc *desc = &bgp_attr_table[code];
+
+ ea_set_attr(to, EA_LITERAL_EMBEDDED(
+ &desc->class,
+ flags & ~BAF_EXT_LEN,
+ val
+ ));
+}
+
+void bgp_set_attr_ptr(ea_list **to, uint code, uint flags, const struct adata *ad)
{
- ASSERT(bgp_attr_known(code));
+ const union bgp_attr_desc *desc = &bgp_attr_table[code];
- return ea_set_attr(
- attrs,
- pool,
- EA_CODE(PROTOCOL_BGP, code),
- flags & ~BAF_EXT_LEN,
- bgp_attr_table[code].type,
- val
- );
+ ea_set_attr(to, EA_LITERAL_DIRECT_ADATA(
+ &desc->class,
+ flags & ~BAF_EXT_LEN,
+ ad
+ ));
}
+void
+bgp_set_attr_data(ea_list **to, uint code, uint flags, void *data, uint len)
+{
+ const union bgp_attr_desc *desc = &bgp_attr_table[code];
+
+ ea_set_attr(to, EA_LITERAL_STORE_ADATA(
+ &desc->class,
+ flags & ~BAF_EXT_LEN,
+ data,
+ len
+ ));
+}
+void
+bgp_unset_attr(ea_list **to, uint code)
+{
+ const union bgp_attr_desc *desc = &bgp_attr_table[code];
+ ea_unset_attr(to, 0, &desc->class);
+}
#define REPORT(msg, args...) \
({ log(L_REMOTE "%s: " msg, s->proto->p.name, ## args); })
@@ -151,7 +187,7 @@ bgp_encode_u8(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
if (size < (3+1))
return -1;
- bgp_put_attr_hdr3(buf, EA_ID(a->id), a->flags, 1);
+ bgp_put_attr_hdr3(buf, EA_BGP_ID(a->id), a->flags, 1);
buf[3] = a->u.data;
return 3+1;
@@ -163,7 +199,7 @@ bgp_encode_u32(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
if (size < (3+4))
return -1;
- bgp_put_attr_hdr3(buf, EA_ID(a->id), a->flags, 4);
+ bgp_put_attr_hdr3(buf, EA_BGP_ID(a->id), a->flags, 4);
put_u32(buf+3, a->u.data);
return 3+4;
@@ -177,7 +213,7 @@ bgp_encode_u32s(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size
if (size < (4+len))
return -1;
- uint hdr = bgp_put_attr_hdr(buf, EA_ID(a->id), a->flags, len);
+ uint hdr = bgp_put_attr_hdr(buf, EA_BGP_ID(a->id), a->flags, len);
put_u32s(buf + hdr, (u32 *) a->u.ptr->data, len / 4);
return hdr + len;
@@ -198,7 +234,7 @@ bgp_put_attr(byte *buf, uint size, uint code, uint flags, const byte *data, uint
static int
bgp_encode_raw(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
{
- return bgp_put_attr(buf, size, EA_ID(a->id), a->flags, a->u.ptr->data, a->u.ptr->length);
+ return bgp_put_attr(buf, size, EA_BGP_ID(a->id), a->flags, a->u.ptr->data, a->u.ptr->length);
}
@@ -336,9 +372,9 @@ bgp_aigp_set_metric(struct linpool *pool, const struct adata *ad, u64 metric)
}
int
-bgp_total_aigp_metric_(rte *e, u64 *metric, const struct adata **ad)
+bgp_total_aigp_metric_(const rte *e, u64 *metric, const struct adata **ad)
{
- eattr *a = ea_find(e->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AIGP));
+ eattr *a = ea_find(e->attrs, BGP_EA_ID(BA_AIGP));
if (!a)
return 0;
@@ -347,7 +383,7 @@ bgp_total_aigp_metric_(rte *e, u64 *metric, const struct adata **ad)
return 0;
u64 aigp = get_u64(b + 3);
- u64 step = e->attrs->igp_metric;
+ u64 step = rt_get_igp_metric(e);
if (!rte_resolvable(e) || (step >= IGP_METRIC_UNKNOWN))
step = BGP_AIGP_MAX;
@@ -366,7 +402,7 @@ bgp_total_aigp_metric_(rte *e, u64 *metric, const struct adata **ad)
static inline int
bgp_init_aigp_metric(rte *e, u64 *metric, const struct adata **ad)
{
- if (e->attrs->source == RTS_BGP)
+ if (rt_get_source_attr(e) == RTS_BGP)
return 0;
*metric = rt_get_igp_metric(e);
@@ -375,7 +411,7 @@ bgp_init_aigp_metric(rte *e, u64 *metric, const struct adata **ad)
}
u32
-bgp_rte_igp_metric(struct rte *rt)
+bgp_rte_igp_metric(const rte *rt)
{
u64 metric = bgp_total_aigp_metric(rt);
return (u32) MIN(metric, (u64) IGP_METRIC_UNKNOWN);
@@ -402,7 +438,7 @@ bgp_decode_origin(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte
if (data[0] > 2)
WITHDRAW(BAD_VALUE, "ORIGIN", data[0]);
- bgp_set_attr_u32(to, s->pool, BA_ORIGIN, flags, data[0]);
+ bgp_set_attr_u32(to, BA_ORIGIN, flags, data[0]);
}
static void
@@ -470,7 +506,7 @@ bgp_decode_as_path(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte
!bgp_as_path_first_as_equal(data, len, p->remote_as))
WITHDRAW("Malformed AS_PATH attribute - %s", "First AS differs from neigbor AS");
- bgp_set_attr_data(to, s->pool, BA_AS_PATH, flags, data, len);
+ bgp_set_attr_data(to, BA_AS_PATH, flags, data, len);
}
@@ -542,7 +578,7 @@ bgp_decode_med(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *da
WITHDRAW(BAD_LENGTH, "MULTI_EXIT_DISC", len);
u32 val = get_u32(data);
- bgp_set_attr_u32(to, s->pool, BA_MULTI_EXIT_DISC, flags, val);
+ bgp_set_attr_u32(to, BA_MULTI_EXIT_DISC, flags, val);
}
@@ -563,7 +599,7 @@ bgp_decode_local_pref(struct bgp_parse_state *s, uint code UNUSED, uint flags, b
WITHDRAW(BAD_LENGTH, "LOCAL_PREF", len);
u32 val = get_u32(data);
- bgp_set_attr_u32(to, s->pool, BA_LOCAL_PREF, flags, val);
+ bgp_set_attr_u32(to, BA_LOCAL_PREF, flags, val);
}
@@ -573,7 +609,7 @@ bgp_decode_atomic_aggr(struct bgp_parse_state *s, uint code UNUSED, uint flags,
if (len != 0)
DISCARD(BAD_LENGTH, "ATOMIC_AGGR", len);
- bgp_set_attr_data(to, s->pool, BA_ATOMIC_AGGR, flags, NULL, 0);
+ bgp_set_attr_data(to, BA_ATOMIC_AGGR, flags, NULL, 0);
}
static int
@@ -607,7 +643,7 @@ bgp_decode_aggregator(struct bgp_parse_state *s, uint code UNUSED, uint flags, b
len = aggregator_16to32(data, src);
}
- bgp_set_attr_data(to, s->pool, BA_AGGREGATOR, flags, data, len);
+ bgp_set_attr_data(to, BA_AGGREGATOR, flags, data, len);
}
static void
@@ -636,7 +672,7 @@ bgp_decode_community(struct bgp_parse_state *s, uint code UNUSED, uint flags, by
struct adata *ad = lp_alloc_adata(s->pool, len);
get_u32s(data, (u32 *) ad->data, len / 4);
- bgp_set_attr_ptr(to, s->pool, BA_COMMUNITY, flags, ad);
+ bgp_set_attr_ptr(to, BA_COMMUNITY, flags, ad);
}
@@ -657,7 +693,7 @@ bgp_decode_originator_id(struct bgp_parse_state *s, uint code UNUSED, uint flags
WITHDRAW(BAD_LENGTH, "ORIGINATOR_ID", len);
u32 val = get_u32(data);
- bgp_set_attr_u32(to, s->pool, BA_ORIGINATOR_ID, flags, val);
+ bgp_set_attr_u32(to, BA_ORIGINATOR_ID, flags, val);
}
@@ -682,7 +718,7 @@ bgp_decode_cluster_list(struct bgp_parse_state *s, uint code UNUSED, uint flags,
struct adata *ad = lp_alloc_adata(s->pool, len);
get_u32s(data, (u32 *) ad->data, len / 4);
- bgp_set_attr_ptr(to, s->pool, BA_CLUSTER_LIST, flags, ad);
+ bgp_set_attr_ptr(to, BA_CLUSTER_LIST, flags, ad);
}
static void
@@ -801,7 +837,7 @@ bgp_decode_ext_community(struct bgp_parse_state *s, uint code UNUSED, uint flags
struct adata *ad = lp_alloc_adata(s->pool, len);
get_u32s(data, (u32 *) ad->data, len / 4);
- bgp_set_attr_ptr(to, s->pool, BA_EXT_COMMUNITY, flags, ad);
+ bgp_set_attr_ptr(to, BA_EXT_COMMUNITY, flags, ad);
}
@@ -814,7 +850,7 @@ bgp_decode_as4_aggregator(struct bgp_parse_state *s, uint code UNUSED, uint flag
if (len != 8)
DISCARD(BAD_LENGTH, "AS4_AGGREGATOR", len);
- bgp_set_attr_data(to, s->pool, BA_AS4_AGGREGATOR, flags, data, len);
+ bgp_set_attr_data(to, BA_AS4_AGGREGATOR, flags, data, len);
}
static void
@@ -844,7 +880,7 @@ bgp_decode_as4_path(struct bgp_parse_state *s, uint code UNUSED, uint flags, byt
a = as_path_strip_confed(s->pool, a);
}
- bgp_set_attr_ptr(to, s->pool, BA_AS4_PATH, flags, a);
+ bgp_set_attr_ptr(to, BA_AS4_PATH, flags, a);
}
@@ -868,7 +904,7 @@ bgp_decode_aigp(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *d
if (!bgp_aigp_valid(data, len, err, sizeof(err)))
DISCARD("Malformed AIGP attribute - %s", err);
- bgp_set_attr_data(to, s->pool, BA_AIGP, flags, data, len);
+ bgp_set_attr_data(to, BA_AIGP, flags, data, len);
}
static void
@@ -900,7 +936,7 @@ bgp_decode_large_community(struct bgp_parse_state *s, uint code UNUSED, uint fla
struct adata *ad = lp_alloc_adata(s->pool, len);
get_u32s(data, (u32 *) ad->data, len / 4);
- bgp_set_attr_ptr(to, s->pool, BA_LARGE_COMMUNITY, flags, ad);
+ bgp_set_attr_ptr(to, BA_LARGE_COMMUNITY, flags, ad);
}
@@ -911,14 +947,14 @@ bgp_decode_otc(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *da
WITHDRAW(BAD_LENGTH, "OTC", len);
u32 val = get_u32(data);
- bgp_set_attr_u32(to, s->pool, BA_ONLY_TO_CUSTOMER, flags, val);
+ bgp_set_attr_u32(to, BA_ONLY_TO_CUSTOMER, flags, val);
}
static void
bgp_export_mpls_label_stack(struct bgp_export_state *s, eattr *a)
{
- net_addr *n = s->route->net->n.addr;
+ const net_addr *n = s->route->net;
u32 *labels = (u32 *) a->u.ptr->data;
uint lnum = a->u.ptr->length / 4;
@@ -985,10 +1021,29 @@ bgp_format_mpls_label_stack(const eattr *a, byte *buf, uint size)
}
static inline void
-bgp_decode_unknown(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to)
+bgp_export_unknown(struct bgp_export_state *s UNUSED, eattr *a)
+{
+ if (!(a->flags & BAF_TRANSITIVE))
+ UNSET(a);
+
+ a->flags |= BAF_PARTIAL;
+}
+
+static inline void
+bgp_decode_unknown(struct bgp_parse_state *s UNUSED, uint code, uint flags, byte *data, uint len, ea_list **to)
{
+ if (!(flags & BAF_OPTIONAL))
+ WITHDRAW("Unknown attribute (code %u) - conflicting flags (%02x)", code, flags);
+
/* Cannot use bgp_set_attr_data() as it works on known attributes only */
- ea_set_attr_data(to, s->pool, EA_CODE(PROTOCOL_BGP, code), flags, EAF_TYPE_OPAQUE, data, len);
+ ea_set_attr_data(to, &bgp_attr_table[code].class, flags, data, len);
+}
+
+static inline void
+bgp_format_unknown(const eattr *a, byte *buf, uint size)
+{
+ if (a->flags & BAF_TRANSITIVE)
+ bsnprintf(buf, size, "(transitive)");
}
@@ -996,10 +1051,10 @@ bgp_decode_unknown(struct bgp_parse_state *s, uint code, uint flags, byte *data,
* Attribute table
*/
-static const struct bgp_attr_desc bgp_attr_table[] = {
+static union bgp_attr_desc bgp_attr_table[BGP_ATTR_MAX] = {
[BA_ORIGIN] = {
- .name = "origin",
- .type = EAF_TYPE_INT,
+ .name = "bgp_origin",
+ .type = T_ENUM_BGP_ORIGIN,
.flags = BAF_TRANSITIVE,
.export = bgp_export_origin,
.encode = bgp_encode_u8,
@@ -1007,69 +1062,69 @@ static const struct bgp_attr_desc bgp_attr_table[] = {
.format = bgp_format_origin,
},
[BA_AS_PATH] = {
- .name = "as_path",
- .type = EAF_TYPE_AS_PATH,
+ .name = "bgp_path",
+ .type = T_PATH,
.flags = BAF_TRANSITIVE,
.encode = bgp_encode_as_path,
.decode = bgp_decode_as_path,
},
[BA_NEXT_HOP] = {
- .name = "next_hop",
- .type = EAF_TYPE_IP_ADDRESS,
+ .name = "bgp_next_hop",
+ .type = T_IP,
.flags = BAF_TRANSITIVE,
.encode = bgp_encode_next_hop,
.decode = bgp_decode_next_hop,
.format = bgp_format_next_hop,
},
[BA_MULTI_EXIT_DISC] = {
- .name = "med",
- .type = EAF_TYPE_INT,
+ .name = "bgp_med",
+ .type = T_INT,
.flags = BAF_OPTIONAL,
.encode = bgp_encode_u32,
.decode = bgp_decode_med,
},
[BA_LOCAL_PREF] = {
- .name = "local_pref",
- .type = EAF_TYPE_INT,
+ .name = "bgp_local_pref",
+ .type = T_INT,
.flags = BAF_TRANSITIVE,
.export = bgp_export_local_pref,
.encode = bgp_encode_u32,
.decode = bgp_decode_local_pref,
},
[BA_ATOMIC_AGGR] = {
- .name = "atomic_aggr",
- .type = EAF_TYPE_OPAQUE,
+ .name = "bgp_atomic_aggr",
+ .type = T_OPAQUE,
.flags = BAF_TRANSITIVE,
.encode = bgp_encode_raw,
.decode = bgp_decode_atomic_aggr,
},
[BA_AGGREGATOR] = {
- .name = "aggregator",
- .type = EAF_TYPE_OPAQUE,
+ .name = "bgp_aggregator",
+ .type = T_OPAQUE,
.flags = BAF_OPTIONAL | BAF_TRANSITIVE,
.encode = bgp_encode_aggregator,
.decode = bgp_decode_aggregator,
.format = bgp_format_aggregator,
},
[BA_COMMUNITY] = {
- .name = "community",
- .type = EAF_TYPE_INT_SET,
+ .name = "bgp_community",
+ .type = T_CLIST,
.flags = BAF_OPTIONAL | BAF_TRANSITIVE,
.export = bgp_export_community,
.encode = bgp_encode_u32s,
.decode = bgp_decode_community,
},
[BA_ORIGINATOR_ID] = {
- .name = "originator_id",
- .type = EAF_TYPE_ROUTER_ID,
+ .name = "bgp_originator_id",
+ .type = T_QUAD,
.flags = BAF_OPTIONAL,
.export = bgp_export_originator_id,
.encode = bgp_encode_u32,
.decode = bgp_decode_originator_id,
},
[BA_CLUSTER_LIST] = {
- .name = "cluster_list",
- .type = EAF_TYPE_INT_SET,
+ .name = "bgp_cluster_list",
+ .type = T_CLIST,
.flags = BAF_OPTIONAL,
.export = bgp_export_cluster_list,
.encode = bgp_encode_u32s,
@@ -1077,43 +1132,47 @@ static const struct bgp_attr_desc bgp_attr_table[] = {
.format = bgp_format_cluster_list,
},
[BA_MP_REACH_NLRI] = {
- .name = "mp_reach_nlri",
- .type = EAF_TYPE_OPAQUE,
+ .name = "bgp_mp_reach_nlri",
+ .type = T_OPAQUE,
+ .hidden = 1,
.flags = BAF_OPTIONAL,
.decode = bgp_decode_mp_reach_nlri,
},
[BA_MP_UNREACH_NLRI] = {
- .name = "mp_unreach_nlri",
- .type = EAF_TYPE_OPAQUE,
+ .name = "bgp_mp_unreach_nlri",
+ .type = T_OPAQUE,
+ .hidden = 1,
.flags = BAF_OPTIONAL,
.decode = bgp_decode_mp_unreach_nlri,
},
[BA_EXT_COMMUNITY] = {
- .name = "ext_community",
- .type = EAF_TYPE_EC_SET,
+ .name = "bgp_ext_community",
+ .type = T_ECLIST,
.flags = BAF_OPTIONAL | BAF_TRANSITIVE,
.export = bgp_export_ext_community,
.encode = bgp_encode_u32s,
.decode = bgp_decode_ext_community,
},
[BA_AS4_PATH] = {
- .name = "as4_path",
- .type = EAF_TYPE_AS_PATH,
+ .name = "bgp_as4_path",
+ .type = T_PATH,
+ .hidden = 1,
.flags = BAF_OPTIONAL | BAF_TRANSITIVE,
.encode = bgp_encode_raw,
.decode = bgp_decode_as4_path,
},
[BA_AS4_AGGREGATOR] = {
- .name = "as4_aggregator",
- .type = EAF_TYPE_OPAQUE,
+ .name = "bgp_as4_aggregator",
+ .type = T_OPAQUE,
+ .hidden = 1,
.flags = BAF_OPTIONAL | BAF_TRANSITIVE,
.encode = bgp_encode_raw,
.decode = bgp_decode_as4_aggregator,
.format = bgp_format_aggregator,
},
[BA_AIGP] = {
- .name = "aigp",
- .type = EAF_TYPE_OPAQUE,
+ .name = "bgp_aigp",
+ .type = T_OPAQUE,
.flags = BAF_OPTIONAL | BAF_DECODE_FLAGS,
.export = bgp_export_aigp,
.encode = bgp_encode_raw,
@@ -1121,8 +1180,8 @@ static const struct bgp_attr_desc bgp_attr_table[] = {
.format = bgp_format_aigp,
},
[BA_LARGE_COMMUNITY] = {
- .name = "large_community",
- .type = EAF_TYPE_LC_SET,
+ .name = "bgp_large_community",
+ .type = T_LCLIST,
.flags = BAF_OPTIONAL | BAF_TRANSITIVE,
.export = bgp_export_large_community,
.encode = bgp_encode_u32s,
@@ -1130,14 +1189,15 @@ static const struct bgp_attr_desc bgp_attr_table[] = {
},
[BA_ONLY_TO_CUSTOMER] = {
.name = "otc",
- .type = EAF_TYPE_INT,
+ .type = T_INT,
.flags = BAF_OPTIONAL | BAF_TRANSITIVE,
.encode = bgp_encode_u32,
.decode = bgp_decode_otc,
},
[BA_MPLS_LABEL_STACK] = {
- .name = "mpls_label_stack",
- .type = EAF_TYPE_INT_SET,
+ .name = "bgp_mpls_label_stack",
+ .type = T_CLIST,
+ .readonly = 1,
.export = bgp_export_mpls_label_stack,
.encode = bgp_encode_mpls_label_stack,
.decode = bgp_decode_mpls_label_stack,
@@ -1145,12 +1205,32 @@ static const struct bgp_attr_desc bgp_attr_table[] = {
},
};
-static inline int
-bgp_attr_known(uint code)
+eattr *
+bgp_find_attr(ea_list *attrs, uint code)
{
- return (code < ARRAY_SIZE(bgp_attr_table)) && bgp_attr_table[code].name;
+ return ea_find(attrs, BGP_EA_ID(code));
}
+void
+bgp_register_attrs(void)
+{
+ for (uint i=0; i<ARRAY_SIZE(bgp_attr_table); i++)
+ {
+ if (!bgp_attr_table[i].name)
+ bgp_attr_table[i] = (union bgp_attr_desc) {
+ .name = mb_sprintf(&root_pool, "bgp_unknown_0x%02x", i),
+ .type = T_OPAQUE,
+ .flags = BAF_OPTIONAL,
+ .readonly = 1,
+ .export = bgp_export_unknown,
+ .encode = bgp_encode_raw,
+ .decode = bgp_decode_unknown,
+ .format = bgp_format_unknown,
+ };
+
+ ea_register_init(&bgp_attr_table[i].class);
+ }
+}
/*
* Attribute export
@@ -1159,38 +1239,24 @@ bgp_attr_known(uint code)
static inline void
bgp_export_attr(struct bgp_export_state *s, eattr *a, ea_list *to)
{
- if (EA_PROTO(a->id) != PROTOCOL_BGP)
+ const union bgp_attr_desc *desc = bgp_find_attr_desc(a);
+ if (!desc)
return;
- uint code = EA_ID(a->id);
-
- if (bgp_attr_known(code))
- {
- const struct bgp_attr_desc *desc = &bgp_attr_table[code];
-
- /* The flags might have been zero if the attr was added by filters */
- a->flags = (a->flags & BAF_PARTIAL) | desc->flags;
-
- /* Set partial bit if new opt-trans attribute is attached to non-local route */
- if ((s->src != NULL) && (a->originated) &&
- (a->flags & BAF_OPTIONAL) && (a->flags & BAF_TRANSITIVE))
- a->flags |= BAF_PARTIAL;
+ /* The flags might have been zero if the attr was added locally */
+ a->flags = (a->flags & BAF_PARTIAL) | desc->flags;
- /* Call specific hook */
- CALL(desc->export, s, a);
+ /* Set partial bit if new opt-trans attribute is attached to non-local route */
+ if ((s->src != NULL) && (a->originated) &&
+ (a->flags & BAF_OPTIONAL) && (a->flags & BAF_TRANSITIVE))
+ a->flags |= BAF_PARTIAL;
- /* Attribute might become undefined in hook */
- if (a->undef)
- return;
- }
- else
- {
- /* Don't re-export unknown non-transitive attributes */
- if (!(a->flags & BAF_TRANSITIVE))
- return;
+ /* Call specific hook */
+ CALL(desc->export, s, a);
- a->flags |= BAF_PARTIAL;
- }
+ /* Attribute might become undefined in hook */
+ if (a->undef)
+ return;
/* Append updated attribute */
to->attrs[to->count++] = *a;
@@ -1210,12 +1276,11 @@ bgp_export_attr(struct bgp_export_state *s, eattr *a, ea_list *to)
* Result: one sorted attribute list segment, or NULL if attributes are unsuitable.
*/
static inline ea_list *
-bgp_export_attrs(struct bgp_export_state *s, ea_list *attrs)
+bgp_export_attrs(struct bgp_export_state *s, ea_list *a)
{
/* Merge the attribute list */
- ea_list *new = lp_alloc(s->pool, ea_scan(attrs));
- ea_merge(attrs, new);
- ea_sort(new);
+ ea_list *new = ea_normalize(a, 0);
+ ASSERT_DIE(new);
uint i, count;
count = new->count;
@@ -1239,14 +1304,9 @@ bgp_export_attrs(struct bgp_export_state *s, ea_list *attrs)
static inline int
bgp_encode_attr(struct bgp_write_state *s, eattr *a, byte *buf, uint size)
{
- ASSERT(EA_PROTO(a->id) == PROTOCOL_BGP);
-
- uint code = EA_ID(a->id);
-
- if (bgp_attr_known(code))
- return bgp_attr_table[code].encode(s, a, buf, size);
- else
- return bgp_encode_raw(s, a, buf, size);
+ const union bgp_attr_desc *desc = bgp_find_attr_desc(a);
+ ASSERT_DIE(desc);
+ return desc->encode(s, a, buf, size);
}
/**
@@ -1311,7 +1371,7 @@ bgp_cluster_list_loopy(struct bgp_proto *p, ea_list *attrs)
}
static inline void
-bgp_decode_attr(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to)
+bgp_decode_attr(struct bgp_parse_state *s, byte code, byte flags, byte *data, uint len, ea_list **to)
{
/* Handle duplicate attributes; RFC 7606 3 (g) */
if (BIT32_TEST(s->attrs_seen, code))
@@ -1323,24 +1383,15 @@ bgp_decode_attr(struct bgp_parse_state *s, uint code, uint flags, byte *data, ui
}
BIT32_SET(s->attrs_seen, code);
- if (bgp_attr_known(code))
- {
- const struct bgp_attr_desc *desc = &bgp_attr_table[code];
-
- /* Handle conflicting flags; RFC 7606 3 (c) */
- if (((flags ^ desc->flags) & (BAF_OPTIONAL | BAF_TRANSITIVE)) &&
- !(desc->flags & BAF_DECODE_FLAGS))
- WITHDRAW("Malformed %s attribute - conflicting flags (%02x)", desc->name, flags);
+ ASSERT_DIE(bgp_attr_table[code].id);
+ const union bgp_attr_desc *desc = &bgp_attr_table[code];
- desc->decode(s, code, flags, data, len, to);
- }
- else /* Unknown attribute */
- {
- if (!(flags & BAF_OPTIONAL))
- WITHDRAW("Unknown attribute (code %u) - conflicting flags (%02x)", code, flags);
+ /* Handle conflicting flags; RFC 7606 3 (c) */
+ if (((flags ^ desc->flags) & (BAF_OPTIONAL | BAF_TRANSITIVE)) &&
+ !(desc->flags & BAF_DECODE_FLAGS))
+ WITHDRAW("Malformed %s attribute - conflicting flags (%02x, expected %02x)", desc->name, flags, desc->flags);
- bgp_decode_unknown(s, code, flags, data, len, to);
- }
+ desc->decode(s, code, flags, data, len, to);
}
/**
@@ -1358,7 +1409,8 @@ bgp_decode_attrs(struct bgp_parse_state *s, byte *data, uint len)
{
struct bgp_proto *p = s->proto;
ea_list *attrs = NULL;
- uint code, flags, alen;
+ uint alen;
+ byte code, flags;
byte *pos = data;
/* Parse the attributes */
@@ -1439,7 +1491,7 @@ bgp_decode_attrs(struct bgp_parse_state *s, byte *data, uint len)
/* If there is no local preference, define one */
if (!BIT32_TEST(s->attrs_seen, BA_LOCAL_PREF))
- bgp_set_attr_u32(&attrs, s->pool, BA_LOCAL_PREF, 0, p->cf->default_local_pref);
+ bgp_set_attr_u32(&attrs, BA_LOCAL_PREF, 0, p->cf->default_local_pref);
return attrs;
@@ -1463,20 +1515,20 @@ loop:
}
void
-bgp_finish_attrs(struct bgp_parse_state *s, rta *a)
+bgp_finish_attrs(struct bgp_parse_state *s, ea_list **to)
{
/* AIGP test here instead of in bgp_decode_aigp() - we need to know channel */
if (BIT32_TEST(s->attrs_seen, BA_AIGP) && !s->channel->cf->aigp)
{
REPORT("Discarding AIGP attribute received on non-AIGP session");
- bgp_unset_attr(&a->eattrs, s->pool, BA_AIGP);
+ bgp_unset_attr(to, BA_AIGP);
}
/* Handle OTC ingress procedure, RFC 9234 */
if (bgp_channel_is_role_applicable(s->channel))
{
struct bgp_proto *p = s->proto;
- eattr *e = bgp_find_attr(a->eattrs, BA_ONLY_TO_CUSTOMER);
+ eattr *e = bgp_find_attr(*to, BA_ONLY_TO_CUSTOMER);
/* Reject routes from downstream if they are leaked */
if (e && (p->cf->local_role == BGP_ROLE_PROVIDER ||
@@ -1492,7 +1544,7 @@ bgp_finish_attrs(struct bgp_parse_state *s, rta *a)
if (!e && (p->cf->local_role == BGP_ROLE_CUSTOMER ||
p->cf->local_role == BGP_ROLE_PEER ||
p->cf->local_role == BGP_ROLE_RS_CLIENT))
- bgp_set_attr_u32(&a->eattrs, s->pool, BA_ONLY_TO_CUSTOMER, 0, p->cf->remote_as);
+ bgp_set_attr_u32(to, BA_ONLY_TO_CUSTOMER, 0, p->cf->remote_as);
}
}
@@ -1512,8 +1564,8 @@ bgp_finish_attrs(struct bgp_parse_state *s, rta *a)
HASH_DEFINE_REHASH_FN(RBH, struct bgp_bucket)
-void
-bgp_init_bucket_table(struct bgp_channel *c)
+static void
+bgp_init_bucket_table(struct bgp_pending_tx *c)
{
HASH_INIT(c->bucket_hash, c->pool, 8);
@@ -1521,24 +1573,8 @@ bgp_init_bucket_table(struct bgp_channel *c)
c->withdraw_bucket = NULL;
}
-void
-bgp_free_bucket_table(struct bgp_channel *c)
-{
- HASH_FREE(c->bucket_hash);
-
- struct bgp_bucket *b;
- WALK_LIST_FIRST(b, c->bucket_queue)
- {
- rem_node(&b->send_node);
- mb_free(b);
- }
-
- mb_free(c->withdraw_bucket);
- c->withdraw_bucket = NULL;
-}
-
static struct bgp_bucket *
-bgp_get_bucket(struct bgp_channel *c, ea_list *new)
+bgp_get_bucket(struct bgp_pending_tx *c, ea_list *new)
{
/* Hash and lookup */
u32 hash = ea_hash(new);
@@ -1547,55 +1583,27 @@ bgp_get_bucket(struct bgp_channel *c, ea_list *new)
if (b)
return b;
- uint ea_size = sizeof(ea_list) + new->count * sizeof(eattr);
- uint ea_size_aligned = BIRD_ALIGN(ea_size, CPU_STRUCT_ALIGN);
- uint size = sizeof(struct bgp_bucket) + ea_size_aligned;
- uint i;
- byte *dest;
-
- /* Gather total size of non-inline attributes */
- for (i = 0; i < new->count; i++)
- {
- eattr *a = &new->attrs[i];
-
- if (!(a->type & EAF_EMBEDDED))
- size += BIRD_ALIGN(sizeof(struct adata) + a->u.ptr->length, CPU_STRUCT_ALIGN);
- }
+ /* Scan the list for total size */
+ uint ea_size = BIRD_CPU_ALIGN(ea_list_size(new));
+ uint size = sizeof(struct bgp_bucket) + ea_size;
- /* Create the bucket */
+ /* Allocate the bucket */
b = mb_alloc(c->pool, size);
*b = (struct bgp_bucket) { };
init_list(&b->prefixes);
b->hash = hash;
- /* Copy list of extended attributes */
- memcpy(b->eattrs, new, ea_size);
- dest = ((byte *) b->eattrs) + ea_size_aligned;
-
- /* Copy values of non-inline attributes */
- for (i = 0; i < new->count; i++)
- {
- eattr *a = &b->eattrs->attrs[i];
-
- if (!(a->type & EAF_EMBEDDED))
- {
- const struct adata *oa = a->u.ptr;
- struct adata *na = (struct adata *) dest;
- memcpy(na, oa, sizeof(struct adata) + oa->length);
- a->u.ptr = na;
- dest += BIRD_ALIGN(sizeof(struct adata) + na->length, CPU_STRUCT_ALIGN);
- }
- }
+ /* Copy the ea_list */
+ ea_list_copy(b->eattrs, new, ea_size);
- /* Insert the bucket to send queue and bucket hash */
- add_tail(&c->bucket_queue, &b->send_node);
+ /* Insert the bucket to bucket hash */
HASH_INSERT2(c->bucket_hash, RBH, c->pool, b);
return b;
}
static struct bgp_bucket *
-bgp_get_withdraw_bucket(struct bgp_channel *c)
+bgp_get_withdraw_bucket(struct bgp_pending_tx *c)
{
if (!c->withdraw_bucket)
{
@@ -1606,25 +1614,45 @@ bgp_get_withdraw_bucket(struct bgp_channel *c)
return c->withdraw_bucket;
}
-void
-bgp_free_bucket(struct bgp_channel *c, struct bgp_bucket *b)
+static void
+bgp_free_bucket(struct bgp_pending_tx *c, struct bgp_bucket *b)
{
- rem_node(&b->send_node);
HASH_REMOVE2(c->bucket_hash, RBH, c->pool, b);
mb_free(b);
}
+int
+bgp_done_bucket(struct bgp_channel *bc, struct bgp_bucket *b)
+{
+ struct bgp_pending_tx *c = bc->ptx;
+
+ /* Won't free the withdraw bucket */
+ if (b == c->withdraw_bucket)
+ return 0;
+
+ if (EMPTY_LIST(b->prefixes))
+ rem_node(&b->send_node);
+
+ if (b->px_uc || !EMPTY_LIST(b->prefixes))
+ return 0;
+
+ bgp_free_bucket(c, b);
+ return 1;
+}
+
void
-bgp_defer_bucket(struct bgp_channel *c, struct bgp_bucket *b)
+bgp_defer_bucket(struct bgp_channel *bc, struct bgp_bucket *b)
{
+ struct bgp_pending_tx *c = bc->ptx;
rem_node(&b->send_node);
add_tail(&c->bucket_queue, &b->send_node);
}
void
-bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b)
+bgp_withdraw_bucket(struct bgp_channel *bc, struct bgp_bucket *b)
{
- struct bgp_proto *p = (void *) c->c.proto;
+ struct bgp_proto *p = (void *) bc->c.proto;
+ struct bgp_pending_tx *c = bc->ptx;
struct bgp_bucket *wb = bgp_get_withdraw_bucket(c);
log(L_ERR "%s: Attribute list too long", p->p.name);
@@ -1633,8 +1661,8 @@ bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b)
struct bgp_prefix *px = HEAD(b->prefixes);
log(L_ERR "%s: - withdrawing %N", p->p.name, &px->net);
- rem_node(&px->buck_node);
- add_tail(&wb->prefixes, &px->buck_node);
+ rem_node(&px->buck_node_xx);
+ add_tail(&wb->prefixes, &px->buck_node_xx);
}
}
@@ -1645,7 +1673,7 @@ bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b)
#define PXH_KEY(px) px->net, px->path_id, px->hash
#define PXH_NEXT(px) px->next
-#define PXH_EQ(n1,i1,h1,n2,i2,h2) h1 == h2 && i1 == i2 && net_equal(n1, n2)
+#define PXH_EQ(n1,i1,h1,n2,i2,h2) h1 == h2 && (add_path_tx ? (i1 == i2) : 1) && net_equal(n1, n2)
#define PXH_FN(n,i,h) h
#define PXH_REHASH bgp_pxh_rehash
@@ -1654,34 +1682,34 @@ bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b)
HASH_DEFINE_REHASH_FN(PXH, struct bgp_prefix)
-void
-bgp_init_prefix_table(struct bgp_channel *c)
+static void
+bgp_init_prefix_table(struct bgp_channel *bc)
{
+ struct bgp_pending_tx *c = bc->ptx;
HASH_INIT(c->prefix_hash, c->pool, 8);
- uint alen = net_addr_length[c->c.net_type];
+ uint alen = net_addr_length[bc->c.net_type];
c->prefix_slab = alen ? sl_new(c->pool, sizeof(struct bgp_prefix) + alen) : NULL;
}
-void
-bgp_free_prefix_table(struct bgp_channel *c)
-{
- HASH_FREE(c->prefix_hash);
-
- rfree(c->prefix_slab);
- c->prefix_slab = NULL;
-}
-
static struct bgp_prefix *
-bgp_get_prefix(struct bgp_channel *c, net_addr *net, u32 path_id)
+bgp_get_prefix(struct bgp_pending_tx *c, const net_addr *net, struct rte_src *src, int add_path_tx)
{
+ u32 path_id = src->global_id;
+ u32 path_id_hash = add_path_tx ? path_id : 0;
/* We must use a different hash function than the rtable */
- u32 hash = u32_hash(net_hash(net) ^ u32_hash(path_id));
- struct bgp_prefix *px = HASH_FIND(c->prefix_hash, PXH, net, path_id, hash);
+ u32 hash = u32_hash(net_hash(net) ^ u32_hash(path_id_hash));
+ struct bgp_prefix *px = HASH_FIND(c->prefix_hash, PXH, net, path_id_hash, hash);
if (px)
{
- rem_node(&px->buck_node);
+ if (!add_path_tx && (path_id != px->path_id))
+ {
+ rt_unlock_source(rt_find_source_global(px->path_id));
+ rt_lock_source(src);
+ px->path_id = path_id;
+ }
+
return px;
}
@@ -1694,24 +1722,306 @@ bgp_get_prefix(struct bgp_channel *c, net_addr *net, u32 path_id)
px->hash = hash;
px->path_id = path_id;
net_copy(px->net, net);
+ rt_lock_source(src);
HASH_INSERT2(c->prefix_hash, PXH, c->pool, px);
return px;
}
-void
-bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *px)
+static void bgp_free_prefix(struct bgp_pending_tx *c, struct bgp_prefix *px);
+
+static inline int
+bgp_update_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bucket *b)
+{
+#define IS_WITHDRAW_BUCKET(b) ((b) == c->ptx->withdraw_bucket)
+#define BPX_TRACE(what) do { \
+ if (c->c.debug & D_ROUTES) log(L_TRACE "%s.%s < %s %N %uG %s", \
+ c->c.proto->name, c->c.name, what, \
+ px->net, px->path_id, IS_WITHDRAW_BUCKET(b) ? "withdraw" : "update"); } while (0)
+ px->lastmod = current_time();
+
+ /* Already queued for the same bucket */
+ if (px->cur == b)
+ {
+ BPX_TRACE("already queued");
+ return 0;
+ }
+
+ /* Unqueue from the old bucket */
+ if (px->cur)
+ {
+ rem_node(&px->buck_node_xx);
+ bgp_done_bucket(c, px->cur);
+ }
+
+ /* The new bucket is the same as we sent before */
+ if ((px->last == b) || c->c.out_table && !px->last && IS_WITHDRAW_BUCKET(b))
+ {
+ if (px->cur)
+ BPX_TRACE("reverted");
+ else
+ BPX_TRACE("already sent");
+
+ /* Well, we haven't sent anything yet */
+ if (!px->last)
+ bgp_free_prefix(c->ptx, px);
+
+ px->cur = NULL;
+ return 0;
+ }
+
+ /* Enqueue the bucket if it has been empty */
+ if (!IS_WITHDRAW_BUCKET(b) && EMPTY_LIST(b->prefixes))
+ add_tail(&c->ptx->bucket_queue, &b->send_node);
+
+ /* Enqueue to the new bucket and indicate the change */
+ add_tail(&b->prefixes, &px->buck_node_xx);
+ px->cur = b;
+
+ BPX_TRACE("queued");
+ return 1;
+
+#undef BPX_TRACE
+}
+
+static void
+bgp_free_prefix(struct bgp_pending_tx *c, struct bgp_prefix *px)
{
- rem_node(&px->buck_node);
HASH_REMOVE2(c->prefix_hash, PXH, c->pool, px);
+ rt_unlock_source(rt_find_source_global(px->path_id));
+
if (c->prefix_slab)
sl_free(px);
else
mb_free(px);
}
+void
+bgp_done_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bucket *buck)
+{
+ /* Cleanup: We're called from bucket senders. */
+ ASSERT_DIE(px->cur == buck);
+ rem_node(&px->buck_node_xx);
+
+ /* We may want to store the updates */
+ if (c->c.out_table)
+ {
+ /* Nothing to be sent right now */
+ px->cur = NULL;
+
+ /* Unref the previous sent version */
+ if (px->last)
+ px->last->px_uc--;
+
+ /* Ref the current sent version */
+ if (!IS_WITHDRAW_BUCKET(buck))
+ {
+ px->last = buck;
+ px->last->px_uc++;
+ return;
+ }
+
+ /* Prefixes belonging to the withdraw bucket are freed always */
+ }
+
+ bgp_free_prefix(c->ptx, px);
+}
+
+static void
+bgp_pending_tx_rfree(resource *r)
+{
+ struct bgp_pending_tx *ptx = SKIP_BACK(struct bgp_pending_tx, r, r);
+
+ HASH_WALK(ptx->prefix_hash, next, n)
+ rt_unlock_source(rt_find_source_global(n->path_id));
+ HASH_WALK_END;
+}
+
+static void bgp_pending_tx_dump(resource *r UNUSED, unsigned indent UNUSED) { debug("\n"); }
+
+static struct resclass bgp_pending_tx_class = {
+ .name = "BGP Pending TX",
+ .size = sizeof(struct bgp_pending_tx),
+ .free = bgp_pending_tx_rfree,
+ .dump = bgp_pending_tx_dump,
+};
+
+void
+bgp_init_pending_tx(struct bgp_channel *c)
+{
+ ASSERT_DIE(!c->ptx);
+
+ pool *p = rp_new(c->pool, "BGP Pending TX");
+ c->ptx = ralloc(p, &bgp_pending_tx_class);
+ c->ptx->pool = p;
+
+ bgp_init_bucket_table(c->ptx);
+ bgp_init_prefix_table(c);
+}
+
+void
+bgp_free_pending_tx(struct bgp_channel *c)
+{
+ ASSERT_DIE(c->ptx);
+ ASSERT_DIE(c->ptx->pool);
+
+ rfree(c->ptx->pool);
+ c->ptx = NULL;
+}
+
+
+/*
+ * Prefix hash table exporter
+ */
+
+struct bgp_out_export_hook {
+ struct rt_export_hook h;
+ u32 hash_iter; /* Iterator over hash */
+};
+
+static void
+bgp_out_table_feed(void *data)
+{
+ struct bgp_out_export_hook *hook = data;
+ struct bgp_channel *bc = SKIP_BACK(struct bgp_channel, prefix_exporter, hook->h.table);
+ struct bgp_pending_tx *c = bc->ptx;
+
+ int max = 512;
+
+ const net_addr *neq = (hook->h.req->addr_mode == TE_ADDR_EQUAL) ? hook->h.req->addr : NULL;
+ const net_addr *cand = NULL;
+
+ do {
+ HASH_WALK_ITER(c->prefix_hash, PXH, n, hook->hash_iter)
+ {
+ switch (hook->h.req->addr_mode)
+ {
+ case TE_ADDR_IN:
+ if (!net_in_netX(n->net, hook->h.req->addr))
+ continue;
+ /* fall through */
+ case TE_ADDR_NONE:
+ /* Splitting only for multi-net exports */
+ if (--max <= 0)
+ HASH_WALK_ITER_PUT;
+ break;
+
+ case TE_ADDR_FOR:
+ if (!neq)
+ {
+ if (net_in_netX(hook->h.req->addr, n->net) && (!cand || (n->net->length > cand->length)))
+ cand = n->net;
+ continue;
+ }
+ /* fall through */
+ case TE_ADDR_EQUAL:
+ if (!net_equal(n->net, neq))
+ continue;
+ break;
+ }
+
+ struct bgp_bucket *buck = n->cur ?: n->last;
+ ea_list *ea = NULL;
+ if (buck == c->withdraw_bucket)
+ ea_set_dest(&ea, 0, RTD_UNREACHABLE);
+ else
+ {
+ ea = buck->eattrs;
+ eattr *eanh = bgp_find_attr(ea, BA_NEXT_HOP);
+ ASSERT_DIE(eanh);
+ const ip_addr *nh = (const void *) eanh->u.ptr->data;
+
+ struct nexthop_adata nhad = {
+ .ad = { .length = sizeof (struct nexthop_adata) - sizeof (struct adata), },
+ .nh = { .gw = nh[0], },
+ };
+
+ ea_set_attr(&ea, EA_LITERAL_DIRECT_ADATA(&ea_gen_nexthop, 0, tmp_copy_adata(&nhad.ad)));
+ }
+
+ struct rte_storage es = {
+ .rte = {
+ .attrs = ea,
+ .net = n->net,
+ .src = rt_find_source_global(n->path_id),
+ .sender = NULL,
+ .lastmod = n->lastmod,
+ .flags = n->cur ? REF_PENDING : 0,
+ },
+ };
+
+ struct rt_pending_export rpe = {
+ .new = &es, .new_best = &es,
+ };
+
+ if (hook->h.req->export_bulk)
+ {
+ const rte *feed = &es.rte;
+ hook->h.req->export_bulk(hook->h.req, n->net, &rpe, &rpe, &feed, 1);
+ }
+ else if (hook->h.req->export_one)
+ hook->h.req->export_one(hook->h.req, n->net, &rpe);
+ else
+ bug("No export method in export request");
+ }
+ HASH_WALK_ITER_END;
+
+ neq = cand;
+ cand = NULL;
+ } while (neq);
+
+ if (hook->hash_iter)
+ ev_schedule_work(&hook->h.event);
+ else
+ rt_set_export_state(&hook->h, TES_READY);
+}
+
+static void
+bgp_out_table_export_start(struct rt_exporter *re, struct rt_export_request *req)
+{
+ req->hook = rt_alloc_export(re, sizeof(struct bgp_out_export_hook));
+ req->hook->req = req;
+
+ struct bgp_out_export_hook *hook = SKIP_BACK(struct bgp_out_export_hook, h, req->hook);
+
+ hook->h.event.hook = bgp_out_table_feed;
+ rt_init_export(re, req->hook);
+}
+
+static void
+bgp_out_table_export_done(void *data)
+{
+ struct bgp_out_export_hook *hook = data;
+ struct rt_export_request *req = hook->h.req;
+ void (*stopped)(struct rt_export_request *) = hook->h.stopped;
+
+ rt_export_stopped(&hook->h);
+ CALL(stopped, req);
+}
+
+static const struct rt_exporter_class bgp_out_table_export_class = {
+ .start = bgp_out_table_export_start,
+ .done = bgp_out_table_export_done,
+};
+
+void
+bgp_setup_out_table(struct bgp_channel *c)
+{
+ ASSERT_DIE(c->c.out_table == NULL);
+
+ c->prefix_exporter = (struct rt_exporter) {
+ .class = &bgp_out_table_export_class,
+ .addr_type = c->c.table->addr_type,
+ .rp = c->c.proto->pool,
+ };
+
+ rt_exporter_init(&c->prefix_exporter);
+
+ c->c.out_table = &c->prefix_exporter;
+}
+
/*
* BGP protocol glue
@@ -1720,9 +2030,8 @@ bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *px)
int
bgp_preexport(struct channel *C, rte *e)
{
- struct proto *SRC = e->src->proto;
struct bgp_proto *p = (struct bgp_proto *) C->proto;
- struct bgp_proto *src = (SRC->proto == &proto_bgp) ? (struct bgp_proto *) SRC : NULL;
+ struct bgp_proto *src = bgp_rte_proto(e);
struct bgp_channel *c = (struct bgp_channel *) C;
/* Ignore non-BGP channels */
@@ -1738,8 +2047,20 @@ bgp_preexport(struct channel *C, rte *e)
return 0;
/* Reject flowspec that failed validation */
- if ((e->attrs->dest == RTD_UNREACHABLE) && net_is_flow(e->net->n.addr))
- return -1;
+ if (net_is_flow(e->net))
+ switch (rt_get_flowspec_valid(e))
+ {
+ case FLOWSPEC_VALID:
+ break;
+ case FLOWSPEC_INVALID:
+ return -1;
+ case FLOWSPEC_UNKNOWN:
+ ASSUME((rt_get_source_attr(e) != RTS_BGP) ||
+ !((struct bgp_channel *) SKIP_BACK(struct channel, in_req, e->sender->req))->base_table);
+ break;
+ case FLOWSPEC__MAX:
+ bug("This never happens.");
+ }
/* IBGP route reflection, RFC 4456 */
if (p->is_internal && src->is_internal && (p->local_as == src->local_as))
@@ -1750,14 +2071,14 @@ bgp_preexport(struct channel *C, rte *e)
/* Generally, this should be handled when path is received, but we check it
also here as rr_cluster_id may be undefined or different in src. */
- if (p->rr_cluster_id && bgp_cluster_list_loopy(p, e->attrs->eattrs))
+ if (p->rr_cluster_id && bgp_cluster_list_loopy(p, e->attrs))
return -1;
}
/* Handle well-known communities, RFC 1997 */
struct eattr *a;
if (p->cf->interpret_communities &&
- (a = bgp_find_attr(e->attrs->eattrs, BA_COMMUNITY)))
+ (a = bgp_find_attr(e->attrs, BA_COMMUNITY)))
{
const struct adata *d = a->u.ptr;
@@ -1781,7 +2102,7 @@ bgp_preexport(struct channel *C, rte *e)
/* Do not export routes marked with OTC to upstream, RFC 9234 */
if (bgp_channel_is_role_applicable(c))
{
- a = bgp_find_attr(e->attrs->eattrs, BA_ONLY_TO_CUSTOMER);
+ a = bgp_find_attr(e->attrs, BA_ONLY_TO_CUSTOMER);
if (a && (p->cf->local_role==BGP_ROLE_CUSTOMER ||
p->cf->local_role==BGP_ROLE_PEER ||
p->cf->local_role==BGP_ROLE_RS_CLIENT))
@@ -1794,8 +2115,7 @@ bgp_preexport(struct channel *C, rte *e)
static ea_list *
bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *attrs0, struct linpool *pool)
{
- struct proto *SRC = e->src->proto;
- struct bgp_proto *src = (SRC->proto == &proto_bgp) ? (void *) SRC : NULL;
+ struct bgp_proto *src = bgp_rte_proto(e);
struct bgp_export_state s = { .proto = p, .channel = c, .pool = pool, .src = src, .route = e, .mpls = c->desc->mpls };
ea_list *attrs = attrs0;
eattr *a;
@@ -1803,7 +2123,7 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at
/* ORIGIN attribute - mandatory, attach if missing */
if (! bgp_find_attr(attrs0, BA_ORIGIN))
- bgp_set_attr_u32(&attrs, pool, BA_ORIGIN, 0, src ? ORIGIN_INCOMPLETE : ORIGIN_IGP);
+ bgp_set_attr_u32(&attrs, BA_ORIGIN, 0, src ? ORIGIN_INCOMPLETE : ORIGIN_IGP);
/* AS_PATH attribute - mandatory */
a = bgp_find_attr(attrs0, BA_AS_PATH);
@@ -1818,24 +2138,24 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at
{
/* IBGP or route server -> just ensure there is one */
if (!a)
- bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, &null_adata);
+ bgp_set_attr_ptr(&attrs, BA_AS_PATH, 0, &null_adata);
}
else if (p->is_interior)
{
/* Confederation -> prepend ASN as AS_CONFED_SEQUENCE */
ad = as_path_prepend2(pool, ad, AS_PATH_CONFED_SEQUENCE, p->public_as);
- bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, ad);
+ bgp_set_attr_ptr(&attrs, BA_AS_PATH, 0, ad);
}
else /* Regular EBGP (no RS, no confederation) */
{
/* Regular EBGP -> prepend ASN as regular sequence */
ad = as_path_prepend2(pool, ad, AS_PATH_SEQUENCE, p->public_as);
- bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, ad);
+ bgp_set_attr_ptr(&attrs, BA_AS_PATH, 0, ad);
/* MULTI_EXIT_DESC attribute - accept only if set in export filter */
a = bgp_find_attr(attrs0, BA_MULTI_EXIT_DISC);
if (a && !(a->fresh))
- bgp_unset_attr(&attrs, pool, BA_MULTI_EXIT_DISC);
+ bgp_unset_attr(&attrs, BA_MULTI_EXIT_DISC);
}
/* NEXT_HOP attribute - delegated to AF-specific hook */
@@ -1844,7 +2164,7 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at
/* LOCAL_PREF attribute - required for IBGP, attach if missing */
if (p->is_interior && ! bgp_find_attr(attrs0, BA_LOCAL_PREF))
- bgp_set_attr_u32(&attrs, pool, BA_LOCAL_PREF, 0, p->cf->default_local_pref);
+ bgp_set_attr_u32(&attrs, BA_LOCAL_PREF, 0, p->cf->default_local_pref);
/* AIGP attribute - accumulate local metric or originate new one */
u64 metric;
@@ -1853,7 +2173,7 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at
(c->cf->aigp_originate && bgp_init_aigp_metric(e, &metric, &ad))))
{
ad = bgp_aigp_set_metric(pool, ad, metric);
- bgp_set_attr_ptr(&attrs, pool, BA_AIGP, 0, ad);
+ bgp_set_attr_ptr(&attrs, BA_AIGP, 0, ad);
}
/* IBGP route reflection, RFC 4456 */
@@ -1861,7 +2181,7 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at
{
/* ORIGINATOR_ID attribute - attach if not already set */
if (! bgp_find_attr(attrs0, BA_ORIGINATOR_ID))
- bgp_set_attr_u32(&attrs, pool, BA_ORIGINATOR_ID, 0, src->remote_id);
+ bgp_set_attr_u32(&attrs, BA_ORIGINATOR_ID, 0, src->remote_id);
/* CLUSTER_LIST attribute - prepend cluster ID */
a = bgp_find_attr(attrs0, BA_CLUSTER_LIST);
@@ -1876,7 +2196,7 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at
ad = int_set_prepend(pool, ad, p->rr_cluster_id);
/* Should be at least one prepended cluster ID */
- bgp_set_attr_ptr(&attrs, pool, BA_CLUSTER_LIST, 0, ad);
+ bgp_set_attr_ptr(&attrs, BA_CLUSTER_LIST, 0, ad);
}
/* AS4_* transition attributes, RFC 6793 4.2.2 */
@@ -1885,15 +2205,15 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at
a = bgp_find_attr(attrs, BA_AS_PATH);
if (a && as_path_contains_as4(a->u.ptr))
{
- bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, as_path_to_old(pool, a->u.ptr));
- bgp_set_attr_ptr(&attrs, pool, BA_AS4_PATH, 0, as_path_strip_confed(pool, a->u.ptr));
+ bgp_set_attr_ptr(&attrs, BA_AS_PATH, 0, as_path_to_old(pool, a->u.ptr));
+ bgp_set_attr_ptr(&attrs, BA_AS4_PATH, 0, as_path_strip_confed(pool, a->u.ptr));
}
a = bgp_find_attr(attrs, BA_AGGREGATOR);
if (a && aggregator_contains_as4(a->u.ptr))
{
- bgp_set_attr_ptr(&attrs, pool, BA_AGGREGATOR, 0, aggregator_to_old(pool, a->u.ptr));
- bgp_set_attr_ptr(&attrs, pool, BA_AS4_AGGREGATOR, 0, a->u.ptr);
+ bgp_set_attr_ptr(&attrs, BA_AGGREGATOR, 0, aggregator_to_old(pool, a->u.ptr));
+ bgp_set_attr_ptr(&attrs, BA_AS4_AGGREGATOR, 0, a->u.ptr);
}
}
@@ -1904,7 +2224,7 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at
if (!a && (p->cf->local_role == BGP_ROLE_PROVIDER ||
p->cf->local_role == BGP_ROLE_PEER ||
p->cf->local_role == BGP_ROLE_RS_SERVER))
- bgp_set_attr_u32(&attrs, pool, BA_ONLY_TO_CUSTOMER, 0, p->public_as);
+ bgp_set_attr_u32(&attrs, BA_ONLY_TO_CUSTOMER, 0, p->public_as);
}
/*
@@ -1918,13 +2238,12 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at
}
void
-bgp_rt_notify(struct proto *P, struct channel *C, net *n, rte *new, rte *old)
+bgp_rt_notify(struct proto *P, struct channel *C, const net_addr *n, rte *new, const rte *old)
{
struct bgp_proto *p = (void *) P;
struct bgp_channel *c = (void *) C;
struct bgp_bucket *buck;
- struct bgp_prefix *px;
- u32 path;
+ struct rte_src *path;
/* Ignore non-BGP channels */
if (C->channel != &channel_bgp)
@@ -1932,71 +2251,53 @@ bgp_rt_notify(struct proto *P, struct channel *C, net *n, rte *new, rte *old)
if (new)
{
- struct ea_list *attrs = bgp_update_attrs(p, c, new, new->attrs->eattrs, tmp_linpool);
+ struct ea_list *attrs = bgp_update_attrs(p, c, new, new->attrs, tmp_linpool);
/* Error during attribute processing */
if (!attrs)
- log(L_ERR "%s: Invalid route %N withdrawn", p->p.name, n->n.addr);
+ log(L_ERR "%s: Invalid route %N withdrawn", p->p.name, n);
/* If attributes are invalid, we fail back to withdraw */
- buck = attrs ? bgp_get_bucket(c, attrs) : bgp_get_withdraw_bucket(c);
- path = new->src->global_id;
+ buck = attrs ? bgp_get_bucket(c->ptx, attrs) : bgp_get_withdraw_bucket(c->ptx);
+ path = new->src;
}
else
{
- buck = bgp_get_withdraw_bucket(c);
- path = old->src->global_id;
+ buck = bgp_get_withdraw_bucket(c->ptx);
+ path = old->src;
}
- px = bgp_get_prefix(c, n->n.addr, c->add_path_tx ? path : 0);
- add_tail(&buck->prefixes, &px->buck_node);
-
- bgp_schedule_packet(p->conn, c, PKT_UPDATE);
+ if (bgp_update_prefix(c, bgp_get_prefix(c->ptx, n, path, c->add_path_tx), buck))
+ bgp_schedule_packet(p->conn, c, PKT_UPDATE);
}
static inline u32
-bgp_get_neighbor(rte *r)
+bgp_get_neighbor(const rte *r)
{
- eattr *e = ea_find(r->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
+ eattr *e = ea_find(r->attrs, BGP_EA_ID(BA_AS_PATH));
u32 as;
if (e && as_path_get_first_regular(e->u.ptr, &as))
return as;
/* If AS_PATH is not defined, we treat rte as locally originated */
- struct bgp_proto *p = (void *) r->src->proto;
+ struct bgp_proto *p = bgp_rte_proto(r);
return p->cf->confederation ?: p->local_as;
}
static inline int
-rte_stale(rte *r)
+rte_stale(const rte *r)
{
- if (r->pflags & BGP_REF_STALE)
- return 1;
-
- if (r->pflags & BGP_REF_NOT_STALE)
- return 0;
-
- /* If staleness is unknown, compute and cache it */
- eattr *a = ea_find(r->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY));
- if (a && int_set_contains(a->u.ptr, BGP_COMM_LLGR_STALE))
- {
- r->pflags |= BGP_REF_STALE;
- return 1;
- }
- else
- {
- r->pflags |= BGP_REF_NOT_STALE;
- return 0;
- }
+ eattr *a = ea_find(r->attrs, BGP_EA_ID(BA_COMMUNITY));
+ return a && int_set_contains(a->u.ptr, BGP_COMM_LLGR_STALE);
}
int
-bgp_rte_better(rte *new, rte *old)
+bgp_rte_better(const rte *new, const rte *old)
{
- struct bgp_proto *new_bgp = (struct bgp_proto *) new->src->proto;
- struct bgp_proto *old_bgp = (struct bgp_proto *) old->src->proto;
+ struct bgp_proto *new_bgp = bgp_rte_proto(new);
+ struct bgp_proto *old_bgp = bgp_rte_proto(old);
eattr *x, *y;
u32 n, o;
@@ -2025,8 +2326,8 @@ bgp_rte_better(rte *new, rte *old)
return 1;
/* Start with local preferences */
- x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF));
- y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF));
+ x = ea_find(new->attrs, BGP_EA_ID(BA_LOCAL_PREF));
+ y = ea_find(old->attrs, BGP_EA_ID(BA_LOCAL_PREF));
n = x ? x->u.data : new_bgp->cf->default_local_pref;
o = y ? y->u.data : old_bgp->cf->default_local_pref;
if (n > o)
@@ -2045,8 +2346,8 @@ bgp_rte_better(rte *new, rte *old)
/* RFC 4271 9.1.2.2. a) Use AS path lengths */
if (new_bgp->cf->compare_path_lengths || old_bgp->cf->compare_path_lengths)
{
- x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
- y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
+ x = ea_find(new->attrs, BGP_EA_ID(BA_AS_PATH));
+ y = ea_find(old->attrs, BGP_EA_ID(BA_AS_PATH));
n = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN;
o = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN;
if (n < o)
@@ -2056,8 +2357,8 @@ bgp_rte_better(rte *new, rte *old)
}
/* RFC 4271 9.1.2.2. b) Use origins */
- x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN));
- y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN));
+ x = ea_find(new->attrs, BGP_EA_ID(BA_ORIGIN));
+ y = ea_find(old->attrs, BGP_EA_ID(BA_ORIGIN));
n = x ? x->u.data : ORIGIN_INCOMPLETE;
o = y ? y->u.data : ORIGIN_INCOMPLETE;
if (n < o)
@@ -2079,8 +2380,8 @@ bgp_rte_better(rte *new, rte *old)
if (new_bgp->cf->med_metric || old_bgp->cf->med_metric ||
(bgp_get_neighbor(new) == bgp_get_neighbor(old)))
{
- x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC));
- y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC));
+ x = ea_find(new->attrs, BGP_EA_ID(BA_MULTI_EXIT_DISC));
+ y = ea_find(old->attrs, BGP_EA_ID(BA_MULTI_EXIT_DISC));
n = x ? x->u.data : new_bgp->cf->default_med;
o = y ? y->u.data : old_bgp->cf->default_med;
if (n < o)
@@ -2096,8 +2397,8 @@ bgp_rte_better(rte *new, rte *old)
return 1;
/* RFC 4271 9.1.2.2. e) Compare IGP metrics */
- n = new_bgp->cf->igp_metric ? new->attrs->igp_metric : 0;
- o = old_bgp->cf->igp_metric ? old->attrs->igp_metric : 0;
+ n = new_bgp->cf->igp_metric ? rt_get_igp_metric(new) : 0;
+ o = old_bgp->cf->igp_metric ? rt_get_igp_metric(old) : 0;
if (n < o)
return 1;
if (n > o)
@@ -2105,8 +2406,8 @@ bgp_rte_better(rte *new, rte *old)
/* RFC 4271 9.1.2.2. f) Compare BGP identifiers */
/* RFC 4456 9. a) Use ORIGINATOR_ID instead of local neighbor ID */
- x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGINATOR_ID));
- y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGINATOR_ID));
+ x = ea_find(new->attrs, BGP_EA_ID(BA_ORIGINATOR_ID));
+ y = ea_find(old->attrs, BGP_EA_ID(BA_ORIGINATOR_ID));
n = x ? x->u.data : new_bgp->remote_id;
o = y ? y->u.data : old_bgp->remote_id;
@@ -2123,8 +2424,8 @@ bgp_rte_better(rte *new, rte *old)
return 0;
/* RFC 4456 9. b) Compare cluster list lengths */
- x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_CLUSTER_LIST));
- y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_CLUSTER_LIST));
+ x = ea_find(new->attrs, BGP_EA_ID(BA_CLUSTER_LIST));
+ y = ea_find(old->attrs, BGP_EA_ID(BA_CLUSTER_LIST));
n = x ? int_set_get_size(x->u.ptr) : 0;
o = y ? int_set_get_size(y->u.ptr) : 0;
if (n < o)
@@ -2138,10 +2439,10 @@ bgp_rte_better(rte *new, rte *old)
int
-bgp_rte_mergable(rte *pri, rte *sec)
+bgp_rte_mergable(const rte *pri, const rte *sec)
{
- struct bgp_proto *pri_bgp = (struct bgp_proto *) pri->src->proto;
- struct bgp_proto *sec_bgp = (struct bgp_proto *) sec->src->proto;
+ struct bgp_proto *pri_bgp = bgp_rte_proto(pri);
+ struct bgp_proto *sec_bgp = bgp_rte_proto(sec);
eattr *x, *y;
u32 p, s;
@@ -2158,8 +2459,8 @@ bgp_rte_mergable(rte *pri, rte *sec)
return 0;
/* Start with local preferences */
- x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF));
- y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF));
+ x = ea_find(pri->attrs, BGP_EA_ID(BA_LOCAL_PREF));
+ y = ea_find(sec->attrs, BGP_EA_ID(BA_LOCAL_PREF));
p = x ? x->u.data : pri_bgp->cf->default_local_pref;
s = y ? y->u.data : sec_bgp->cf->default_local_pref;
if (p != s)
@@ -2168,8 +2469,8 @@ bgp_rte_mergable(rte *pri, rte *sec)
/* RFC 4271 9.1.2.2. a) Use AS path lengths */
if (pri_bgp->cf->compare_path_lengths || sec_bgp->cf->compare_path_lengths)
{
- x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
- y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
+ x = ea_find(pri->attrs, BGP_EA_ID(BA_AS_PATH));
+ y = ea_find(sec->attrs, BGP_EA_ID(BA_AS_PATH));
p = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN;
s = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN;
@@ -2181,8 +2482,8 @@ bgp_rte_mergable(rte *pri, rte *sec)
}
/* RFC 4271 9.1.2.2. b) Use origins */
- x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN));
- y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN));
+ x = ea_find(pri->attrs, BGP_EA_ID(BA_ORIGIN));
+ y = ea_find(sec->attrs, BGP_EA_ID(BA_ORIGIN));
p = x ? x->u.data : ORIGIN_INCOMPLETE;
s = y ? y->u.data : ORIGIN_INCOMPLETE;
if (p != s)
@@ -2192,8 +2493,8 @@ bgp_rte_mergable(rte *pri, rte *sec)
if (pri_bgp->cf->med_metric || sec_bgp->cf->med_metric ||
(bgp_get_neighbor(pri) == bgp_get_neighbor(sec)))
{
- x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC));
- y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC));
+ x = ea_find(pri->attrs, BGP_EA_ID(BA_MULTI_EXIT_DISC));
+ y = ea_find(sec->attrs, BGP_EA_ID(BA_MULTI_EXIT_DISC));
p = x ? x->u.data : pri_bgp->cf->default_med;
s = y ? y->u.data : sec_bgp->cf->default_med;
if (p != s)
@@ -2205,8 +2506,8 @@ bgp_rte_mergable(rte *pri, rte *sec)
return 0;
/* RFC 4271 9.1.2.2. e) Compare IGP metrics */
- p = pri_bgp->cf->igp_metric ? pri->attrs->igp_metric : 0;
- s = sec_bgp->cf->igp_metric ? sec->attrs->igp_metric : 0;
+ p = pri_bgp->cf->igp_metric ? rt_get_igp_metric(pri) : 0;
+ s = sec_bgp->cf->igp_metric ? rt_get_igp_metric(sec) : 0;
if (p != s)
return 0;
@@ -2219,22 +2520,21 @@ bgp_rte_mergable(rte *pri, rte *sec)
static inline int
same_group(rte *r, u32 lpref, u32 lasn)
{
- return (r->attrs->pref == lpref) && (bgp_get_neighbor(r) == lasn);
+ return (rt_get_preference(r) == lpref) && (bgp_get_neighbor(r) == lasn);
}
static inline int
-use_deterministic_med(rte *r)
+use_deterministic_med(struct rte_storage *r)
{
- struct proto *P = r->src->proto;
- return (P->proto == &proto_bgp) && ((struct bgp_proto *) P)->cf->deterministic_med;
+ struct bgp_proto *p = bgp_rte_proto(&r->rte);
+ return p && p->cf->deterministic_med;
}
int
-bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best)
+bgp_rte_recalculate(struct rtable_private *table, net *net, rte *new, rte *old, rte *old_best)
{
- rte *r, *s;
rte *key = new ? new : old;
- u32 lpref = key->attrs->pref;
+ u32 lpref = rt_get_preference(key);
u32 lasn = bgp_get_neighbor(key);
int old_suppressed = old ? !!(old->pflags & BGP_REF_SUPPRESSED) : 0;
@@ -2300,13 +2600,13 @@ bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best)
}
/* The default case - find a new best-in-group route */
- r = new; /* new may not be in the list */
- for (s=net->routes; rte_is_valid(s); s=s->next)
- if (use_deterministic_med(s) && same_group(s, lpref, lasn))
+ rte *r = new; /* new may not be in the list */
+ for (struct rte_storage *s = net->routes; rte_is_valid(RTE_OR_NULL(s)); s = s->next)
+ if (use_deterministic_med(s) && same_group(&s->rte, lpref, lasn))
{
- s->pflags |= BGP_REF_SUPPRESSED;
- if (!r || bgp_rte_better(s, r))
- r = s;
+ s->rte.pflags |= BGP_REF_SUPPRESSED;
+ if (!r || bgp_rte_better(&s->rte, r))
+ r = &s->rte;
}
/* Simple case - the last route in group disappears */
@@ -2318,10 +2618,10 @@ bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best)
new->pflags &= ~BGP_REF_SUPPRESSED;
/* Found all existing routes mergable with best-in-group */
- for (s=net->routes; rte_is_valid(s); s=s->next)
- if (use_deterministic_med(s) && same_group(s, lpref, lasn))
- if ((s != r) && bgp_rte_mergable(r, s))
- s->pflags &= ~BGP_REF_SUPPRESSED;
+ for (struct rte_storage *s = net->routes; rte_is_valid(RTE_OR_NULL(s)); s = s->next)
+ if (use_deterministic_med(s) && same_group(&s->rte, lpref, lasn))
+ if ((&s->rte != r) && bgp_rte_mergable(r, &s->rte))
+ s->rte.pflags &= ~BGP_REF_SUPPRESSED;
/* Found best-in-group */
r->pflags &= ~BGP_REF_SUPPRESSED;
@@ -2356,25 +2656,59 @@ bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best)
return !old_suppressed;
}
-struct rte *
-bgp_rte_modify_stale(struct rte *r, struct linpool *pool)
+void
+bgp_rte_modify_stale(struct rt_export_request *req, const net_addr *n,
+ struct rt_pending_export *first, struct rt_pending_export *last,
+ const rte **feed, uint count)
{
- eattr *a = ea_find(r->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY));
- const struct adata *ad = a ? a->u.ptr : NULL;
- uint flags = a ? a->flags : BAF_PARTIAL;
+ struct bgp_channel *c = SKIP_BACK(struct bgp_channel, stale_feed, req);
+ struct rt_import_hook *irh = c->c.in_req.hook;
- if (ad && int_set_contains(ad, BGP_COMM_NO_LLGR))
- return NULL;
+ /* Find our routes among others */
+ for (uint i=0; i<count; i++)
+ {
+ const rte *r = feed[i];
+
+ if (
+ !rte_is_valid(r) || /* Not a valid route */
+ (r->sender != irh) || /* Not our route */
+ (r->stale_cycle == irh->stale_set)) /* A new route, do not mark as stale */
+ continue;
+
+ eattr *ea = ea_find(r->attrs, BGP_EA_ID(BA_COMMUNITY));
+ const struct adata *ad = ea ? ea->u.ptr : NULL;
+ uint flags = ea ? ea->flags : BAF_PARTIAL;
+
+ /* LLGR not allowed, withdraw the route */
+ if (ad && int_set_contains(ad, BGP_COMM_NO_LLGR))
+ {
+ rte_import(&c->c.in_req, n, NULL, r->src);
+ continue;
+ }
- if (ad && int_set_contains(ad, BGP_COMM_LLGR_STALE))
- return r;
+ /* Route already marked as LLGR, do nothing */
+ if (ad && int_set_contains(ad, BGP_COMM_LLGR_STALE))
+ continue;
- r = rte_cow_rta(r, pool);
- bgp_set_attr_ptr(&(r->attrs->eattrs), pool, BA_COMMUNITY, flags,
- int_set_add(pool, ad, BGP_COMM_LLGR_STALE));
- r->pflags |= BGP_REF_STALE;
+ /* Store the tmp_linpool state to aggresively save memory */
+ struct lp_state tmpp;
+ lp_save(tmp_linpool, &tmpp);
- return r;
+ /* Mark the route as LLGR */
+ rte e0 = *r;
+ bgp_set_attr_ptr(&e0.attrs, BA_COMMUNITY, flags, int_set_add(tmp_linpool, ad, BGP_COMM_LLGR_STALE));
+
+ /* We need to update the route but keep it stale. */
+ ASSERT_DIE(irh->stale_set == irh->stale_valid + 1);
+ irh->stale_set--;
+ rte_import(&c->c.in_req, n, &e0, r->src);
+ irh->stale_set++;
+
+ /* Restore the memory state */
+ lp_restore(tmp_linpool, &tmpp);
+ }
+
+ rpe_mark_seen_all(req->hook, first, last, NULL);
}
@@ -2390,8 +2724,8 @@ bgp_process_as4_attrs(ea_list **attrs, struct linpool *pool)
eattr *a4 = bgp_find_attr(*attrs, BA_AS4_AGGREGATOR);
/* First, unset AS4_* attributes */
- if (p4) bgp_unset_attr(attrs, pool, BA_AS4_PATH);
- if (a4) bgp_unset_attr(attrs, pool, BA_AS4_AGGREGATOR);
+ if (p4) bgp_unset_attr(attrs, BA_AS4_PATH);
+ if (a4) bgp_unset_attr(attrs, BA_AS4_AGGREGATOR);
/* Handle AGGREGATOR attribute */
if (a2 && a4)
@@ -2424,60 +2758,37 @@ bgp_process_as4_attrs(ea_list **attrs, struct linpool *pool)
}
}
-int
-bgp_get_attr(const eattr *a, byte *buf, int buflen)
-{
- uint i = EA_ID(a->id);
- const struct bgp_attr_desc *d;
- int len;
-
- if (bgp_attr_known(i))
- {
- d = &bgp_attr_table[i];
- len = bsprintf(buf, "%s", d->name);
- buf += len;
- if (d->format)
- {
- *buf++ = ':';
- *buf++ = ' ';
- d->format(a, buf, buflen - len - 2);
- return GA_FULL;
- }
- return GA_NAME;
- }
-
- bsprintf(buf, "%02x%s", i, (a->flags & BAF_TRANSITIVE) ? " [t]" : "");
- return GA_NAME;
-}
-
void
-bgp_get_route_info(rte *e, byte *buf)
+bgp_get_route_info(const rte *e, byte *buf)
{
- eattr *p = ea_find(e->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
- eattr *o = ea_find(e->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN));
+ eattr *p = ea_find(e->attrs, BGP_EA_ID(BA_AS_PATH));
+ eattr *o = ea_find(e->attrs, BGP_EA_ID(BA_ORIGIN));
u32 origas;
- buf += bsprintf(buf, " (%d", e->attrs->pref);
+ buf += bsprintf(buf, " (%d", rt_get_preference(e));
- if (e->pflags & BGP_REF_SUPPRESSED)
- buf += bsprintf(buf, "-");
+ if (!net_is_flow(e->net))
+ {
+ if (e->pflags & BGP_REF_SUPPRESSED)
+ buf += bsprintf(buf, "-");
- if (rte_stale(e))
- buf += bsprintf(buf, "s");
+ if (rte_stale(e))
+ buf += bsprintf(buf, "s");
- u64 metric = bgp_total_aigp_metric(e);
- if (metric < BGP_AIGP_MAX)
- {
- buf += bsprintf(buf, "/%lu", metric);
- }
- else if (e->attrs->igp_metric)
- {
- if (!rte_resolvable(e))
- buf += bsprintf(buf, "/-");
- else if (e->attrs->igp_metric >= IGP_METRIC_UNKNOWN)
- buf += bsprintf(buf, "/?");
- else
- buf += bsprintf(buf, "/%d", e->attrs->igp_metric);
+ u64 metric = bgp_total_aigp_metric(e);
+ if (metric < BGP_AIGP_MAX)
+ {
+ buf += bsprintf(buf, "/%lu", metric);
+ }
+ else if (metric = rt_get_igp_metric(e))
+ {
+ if (!rte_resolvable(e))
+ buf += bsprintf(buf, "/-");
+ else if (metric >= IGP_METRIC_UNKNOWN)
+ buf += bsprintf(buf, "/?");
+ else
+ buf += bsprintf(buf, "/%d", metric);
+ }
}
buf += bsprintf(buf, ") [");
diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c
index a0e83415..cda0eb8d 100644
--- a/proto/bgp/bgp.c
+++ b/proto/bgp/bgp.c
@@ -115,7 +115,7 @@
#include "nest/bird.h"
#include "nest/iface.h"
#include "nest/protocol.h"
-#include "nest/route.h"
+#include "nest/rt.h"
#include "nest/cli.h"
#include "nest/locks.h"
#include "conf/conf.h"
@@ -126,9 +126,13 @@
#include "bgp.h"
+static void bgp_listen_create(void *);
static list STATIC_LIST_INIT(bgp_sockets); /* Global list of listening sockets */
+static list STATIC_LIST_INIT(bgp_listen_pending); /* Global list of listening socket open requests */
+static event bgp_listen_event = { .hook = bgp_listen_create };
+DOMAIN(rtable) bgp_listen_domain;
static void bgp_connect(struct bgp_proto *p);
static void bgp_active(struct bgp_proto *p);
@@ -139,73 +143,43 @@ static void bgp_update_bfd(struct bgp_proto *p, const struct bfd_options *bfd);
static int bgp_incoming_connection(sock *sk, uint dummy UNUSED);
static void bgp_listen_sock_err(sock *sk UNUSED, int err);
+static void bgp_initiate_disable(struct bgp_proto *p, int err_val);
-/**
- * bgp_open - open a BGP instance
- * @p: BGP instance
- *
- * This function allocates and configures shared BGP resources, mainly listening
- * sockets. Should be called as the last step during initialization (when lock
- * is acquired and neighbor is ready). When error, caller should change state to
- * PS_DOWN and return immediately.
- */
-static int
-bgp_open(struct bgp_proto *p)
+static void bgp_graceful_restart_feed(struct bgp_channel *c);
+
+
+static inline int
+bgp_setup_auth(struct bgp_proto *p, int enable)
{
- struct bgp_socket *bs = NULL;
- struct iface *ifa = p->cf->strict_bind ? p->cf->iface : NULL;
- ip_addr addr = p->cf->strict_bind ? p->cf->local_ip :
- (p->ipv4 ? IPA_NONE4 : IPA_NONE6);
- uint port = p->cf->local_port;
- uint flags = p->cf->free_bind ? SKF_FREEBIND : 0;
- uint flag_mask = SKF_FREEBIND;
+ /* Beware. This is done from main_birdloop and protocol birdloop is NOT ENTERED.
+ * Anyway, we are only accessing:
+ * - protocol config which can be changed only from main_birdloop (reconfig)
+ * - protocol listen socket which is always driven by main_birdloop
+ * - protocol name which is set on reconfig
+ */
- /* We assume that cf->iface is defined iff cf->local_ip is link-local */
+ if (p->cf->password && p->listen.sock)
+ {
+ ip_addr prefix = p->cf->remote_ip;
+ int pxlen = -1;
- WALK_LIST(bs, bgp_sockets)
- if (ipa_equal(bs->sk->saddr, addr) &&
- (bs->sk->sport == port) &&
- (bs->sk->iface == ifa) &&
- (bs->sk->vrf == p->p.vrf) &&
- ((bs->sk->flags & flag_mask) == flags))
+ if (p->cf->remote_range)
{
- bs->uc++;
- p->sock = bs;
- return 0;
+ prefix = net_prefix(p->cf->remote_range);
+ pxlen = net_pxlen(p->cf->remote_range);
}
- sock *sk = sk_new(proto_pool);
- sk->type = SK_TCP_PASSIVE;
- sk->ttl = 255;
- sk->saddr = addr;
- sk->sport = port;
- sk->iface = ifa;
- sk->vrf = p->p.vrf;
- sk->flags = flags;
- sk->tos = IP_PREC_INTERNET_CONTROL;
- sk->rbsize = BGP_RX_BUFFER_SIZE;
- sk->tbsize = BGP_TX_BUFFER_SIZE;
- sk->rx_hook = bgp_incoming_connection;
- sk->err_hook = bgp_listen_sock_err;
-
- if (sk_open(sk) < 0)
- goto err;
-
- bs = mb_allocz(proto_pool, sizeof(struct bgp_socket));
- bs->sk = sk;
- bs->uc = 1;
- p->sock = bs;
- sk->data = bs;
-
- add_tail(&bgp_sockets, &bs->n);
+ int rv = sk_set_md5_auth(p->listen.sock->sk,
+ p->cf->local_ip, prefix, pxlen, p->cf->iface,
+ enable ? p->cf->password : NULL, p->cf->setkey);
- return 0;
+ if (rv < 0)
+ sk_log_error(p->listen.sock->sk, p->p.name);
-err:
- sk_log_error(sk, p->p.name);
- log(L_ERR "%s: Cannot open listening socket", p->p.name);
- rfree(sk);
- return -1;
+ return rv;
+ }
+ else
+ return 0;
}
/**
@@ -217,43 +191,155 @@ err:
static void
bgp_close(struct bgp_proto *p)
{
- struct bgp_socket *bs = p->sock;
+ LOCK_DOMAIN(rtable, bgp_listen_domain);
- ASSERT(bs && bs->uc);
+ struct bgp_listen_request *req = &p->listen;
+ struct bgp_socket *bs = req->sock;
- if (--bs->uc)
- return;
+ if (bs)
+ {
+ req->sock = NULL;
+ rem_node(&req->n);
- rfree(bs->sk);
- rem_node(&bs->n);
- mb_free(bs);
+ if (bs && EMPTY_LIST(bs->requests))
+ ev_send(&global_event_list, &bgp_listen_event);
+ }
+
+ UNLOCK_DOMAIN(rtable, bgp_listen_domain);
}
-static inline int
-bgp_setup_auth(struct bgp_proto *p, int enable)
+/**
+ * bgp_open - open a BGP instance
+ * @p: BGP instance
+ *
+ * This function allocates and configures shared BGP resources, mainly listening
+ * sockets. Should be called as the last step during initialization (when lock
+ * is acquired and neighbor is ready). When error, caller should change state to
+ * PS_DOWN and return immediately.
+ */
+static void
+bgp_open(struct bgp_proto *p)
{
- if (p->cf->password)
- {
- ip_addr prefix = p->cf->remote_ip;
- int pxlen = -1;
+ LOCK_DOMAIN(rtable, bgp_listen_domain);
- if (p->cf->remote_range)
+ struct bgp_listen_request *req = &p->listen;
+ /* We assume that cf->iface is defined iff cf->local_ip is link-local */
+ req->iface = p->cf->strict_bind ? p->cf->iface : NULL;
+ req->vrf = p->p.vrf;
+ req->addr = p->cf->strict_bind ? p->cf->local_ip :
+ (p->ipv4 ? IPA_NONE4 : IPA_NONE6);
+ req->port = p->cf->local_port;
+ req->flags = p->cf->free_bind ? SKF_FREEBIND : 0;
+
+ BGP_TRACE(D_EVENTS, "Requesting listen socket at %I%J port %u", req->addr, req->iface, req->port);
+
+ add_tail(&bgp_listen_pending, &req->n);
+ ev_send(&global_event_list, &bgp_listen_event);
+
+ UNLOCK_DOMAIN(rtable, bgp_listen_domain);
+}
+
+static void
+bgp_listen_create(void *_ UNUSED)
+{
+ ASSERT_DIE(birdloop_inside(&main_birdloop));
+ uint flag_mask = SKF_FREEBIND;
+
+ while (1) {
+ LOCK_DOMAIN(rtable, bgp_listen_domain);
+
+ if (EMPTY_LIST(bgp_listen_pending))
{
- prefix = net_prefix(p->cf->remote_range);
- pxlen = net_pxlen(p->cf->remote_range);
+ UNLOCK_DOMAIN(rtable, bgp_listen_domain);
+ break;
}
- int rv = sk_set_md5_auth(p->sock->sk,
- p->cf->local_ip, prefix, pxlen, p->cf->iface,
- enable ? p->cf->password : NULL, p->cf->setkey);
+ /* Get the first request to match */
+ struct bgp_listen_request *req = HEAD(bgp_listen_pending);
+ struct bgp_proto *p = SKIP_BACK(struct bgp_proto, listen, req);
+ rem_node(&req->n);
+
+ /* First try to find existing socket */
+ struct bgp_socket *bs;
+ WALK_LIST(bs, bgp_sockets)
+ if (ipa_equal(bs->sk->saddr, req->addr) &&
+ (bs->sk->sport == req->port) &&
+ (bs->sk->iface == req->iface) &&
+ (bs->sk->vrf == req->vrf) &&
+ ((bs->sk->flags & flag_mask) == req->flags))
+ break;
- if (rv < 0)
- sk_log_error(p->sock->sk, p->p.name);
+ /* Not found any */
+ if (NODE_VALID(bs))
+ BGP_TRACE(D_EVENTS, "Found a listening socket: %p", bs);
+ else
+ {
+ /* Allocating new socket from global protocol pool.
+ * We can do this in main_birdloop. */
+ sock *sk = sk_new(proto_pool);
+ sk->type = SK_TCP_PASSIVE;
+ sk->ttl = 255;
+ sk->saddr = req->addr;
+ sk->sport = req->port;
+ sk->iface = req->iface;
+ sk->vrf = req->vrf;
+ sk->flags = req->flags;
+ sk->tos = IP_PREC_INTERNET_CONTROL;
+ sk->rbsize = BGP_RX_BUFFER_SIZE;
+ sk->tbsize = BGP_TX_BUFFER_SIZE;
+ sk->rx_hook = bgp_incoming_connection;
+ sk->err_hook = bgp_listen_sock_err;
+
+ if (sk_open(sk, &main_birdloop) < 0)
+ {
+ sk_log_error(sk, p->p.name);
+ log(L_ERR "%s: Cannot open listening socket", p->p.name);
+ rfree(sk);
+ UNLOCK_DOMAIN(rtable, bgp_listen_domain);
- return rv;
+ bgp_initiate_disable(p, BEM_NO_SOCKET);
+ continue;
+ }
+
+ bs = mb_allocz(proto_pool, sizeof(struct bgp_socket));
+ bs->sk = sk;
+ sk->data = bs;
+
+ init_list(&bs->requests);
+ add_tail(&bgp_sockets, &bs->n);
+
+ BGP_TRACE(D_EVENTS, "Created new listening socket: %p", bs);
+ }
+
+ req->sock = bs;
+ add_tail(&bs->requests, &req->n);
+
+ if (bgp_setup_auth(p, 1) < 0)
+ {
+ rem_node(&req->n);
+ req->sock = NULL;
+
+ UNLOCK_DOMAIN(rtable, bgp_listen_domain);
+
+ bgp_initiate_disable(p, BEM_INVALID_MD5);
+ continue;
+ }
+
+ UNLOCK_DOMAIN(rtable, bgp_listen_domain);
}
- else
- return 0;
+
+ /* Cleanup leftover listening sockets */
+ LOCK_DOMAIN(rtable, bgp_listen_domain);
+ struct bgp_socket *bs;
+ node *nxt;
+ WALK_LIST_DELSAFE(bs, nxt, bgp_sockets)
+ if (EMPTY_LIST(bs->requests))
+ {
+ rfree(bs->sk);
+ rem_node(&bs->n);
+ mb_free(bs);
+ }
+ UNLOCK_DOMAIN(rtable, bgp_listen_domain);
}
static inline struct bgp_channel *
@@ -279,6 +365,8 @@ bgp_startup(struct bgp_proto *p)
if (p->postponed_sk)
{
/* Apply postponed incoming connection */
+ sk_reloop(p->postponed_sk, p->p.loop);
+
bgp_setup_conn(p, &p->incoming_conn);
bgp_setup_sk(&p->incoming_conn, p->postponed_sk);
bgp_send_open(&p->incoming_conn);
@@ -296,13 +384,7 @@ bgp_startup_timeout(timer *t)
static void
bgp_initiate(struct bgp_proto *p)
{
- int err_val;
-
- if (bgp_open(p) < 0)
- { err_val = BEM_NO_SOCKET; goto err1; }
-
- if (bgp_setup_auth(p, 1) < 0)
- { err_val = BEM_INVALID_MD5; goto err2; }
+ bgp_open(p);
if (p->cf->bfd)
bgp_update_bfd(p, p->cf->bfd);
@@ -311,23 +393,28 @@ bgp_initiate(struct bgp_proto *p)
{
p->start_state = BSS_DELAY;
BGP_TRACE(D_EVENTS, "Startup delayed by %d seconds due to errors", p->startup_delay);
- bgp_start_timer(p->startup_timer, p->startup_delay);
+ bgp_start_timer(p, p->startup_timer, p->startup_delay);
}
else
bgp_startup(p);
+}
- return;
-
-err2:
- bgp_close(p);
-err1:
- p->p.disabled = 1;
- bgp_store_error(p, NULL, BE_MISC, err_val);
-
- p->neigh = NULL;
- proto_notify_state(&p->p, PS_DOWN);
-
- return;
+static void
+bgp_initiate_disable(struct bgp_proto *p, int err_val)
+{
+ PROTO_LOCKED_FROM_MAIN(&p->p)
+ {
+ /* The protocol may be already down for another reason.
+ * Shutdown the protocol only if it isn't already shutting down. */
+ switch (p->p.proto_state)
+ {
+ case PS_START:
+ case PS_UP:
+ p->p.disabled = 1;
+ bgp_store_error(p, NULL, BE_MISC, err_val);
+ bgp_stop(p, err_val, NULL, 0);
+ }
+ }
}
/**
@@ -340,14 +427,14 @@ err1:
* timers.
*/
void
-bgp_start_timer(timer *t, uint value)
+bgp_start_timer(struct bgp_proto *p, timer *t, uint value)
{
if (value)
{
/* The randomization procedure is specified in RFC 4271 section 10 */
btime time = value S;
btime randomize = random() % ((time / 4) + 1);
- tm_start(t, time - randomize);
+ tm_start_in(t, time - randomize, p->p.loop);
}
else
tm_stop(t);
@@ -374,8 +461,10 @@ bgp_close_conn(struct bgp_conn *conn)
conn->keepalive_timer = NULL;
rfree(conn->hold_timer);
conn->hold_timer = NULL;
+
rfree(conn->tx_ev);
conn->tx_ev = NULL;
+
rfree(conn->sk);
conn->sk = NULL;
@@ -512,9 +601,16 @@ void
bgp_stop(struct bgp_proto *p, int subcode, byte *data, uint len)
{
proto_notify_state(&p->p, PS_STOP);
+ p->uncork_ev->data = NULL;
bgp_graceful_close_conn(&p->outgoing_conn, subcode, data, len);
bgp_graceful_close_conn(&p->incoming_conn, subcode, data, len);
- ev_schedule(p->event);
+
+ struct bgp_channel *c;
+ WALK_LIST(c, p->p.channels)
+ if (c->ptx)
+ bgp_free_pending_tx(c);
+
+ proto_send_event(&p->p, p->event);
}
static inline void
@@ -607,7 +703,7 @@ bgp_conn_enter_established_state(struct bgp_conn *conn)
int active = loc->ready && rem->ready;
c->c.disabled = !active;
- c->c.reloadable = p->route_refresh || c->cf->import_table;
+ c->c.reloadable = p->route_refresh || ((c->c.in_keep & RIK_PREFILTER) == RIK_PREFILTER);
c->index = active ? num++ : 0;
@@ -705,7 +801,7 @@ bgp_conn_enter_close_state(struct bgp_conn *conn)
conn->sk->rx_hook = NULL;
/* Timeout for CLOSE state, if we cannot send notification soon then we just hangup */
- bgp_start_timer(conn->hold_timer, 10);
+ bgp_start_timer(p, conn->hold_timer, 10);
if (os == BS_ESTABLISHED)
bgp_conn_leave_established_state(p);
@@ -719,7 +815,7 @@ bgp_conn_enter_idle_state(struct bgp_conn *conn)
bgp_close_conn(conn);
bgp_conn_set_state(conn, BS_IDLE);
- ev_schedule(p->event);
+ proto_send_event(&p->p, p->event);
if (os == BS_ESTABLISHED)
bgp_conn_leave_established_state(p);
@@ -761,32 +857,28 @@ bgp_handle_graceful_restart(struct bgp_proto *p)
{
case BGP_GRS_NONE:
c->gr_active = BGP_GRS_ACTIVE;
- rt_refresh_begin(c->c.table, &c->c);
- break;
+ /* fall through */
case BGP_GRS_ACTIVE:
- rt_refresh_end(c->c.table, &c->c);
- rt_refresh_begin(c->c.table, &c->c);
+ rt_refresh_begin(&c->c.in_req);
break;
case BGP_GRS_LLGR:
- rt_refresh_begin(c->c.table, &c->c);
- rt_modify_stale(c->c.table, &c->c);
+ rt_refresh_begin(&c->c.in_req);
+ bgp_graceful_restart_feed(c);
break;
}
}
else
{
/* Just flush the routes */
- rt_refresh_begin(c->c.table, &c->c);
- rt_refresh_end(c->c.table, &c->c);
+ rt_refresh_begin(&c->c.in_req);
+ rt_refresh_end(&c->c.in_req);
}
/* Reset bucket and prefix tables */
- bgp_free_bucket_table(c);
- bgp_free_prefix_table(c);
- bgp_init_bucket_table(c);
- bgp_init_prefix_table(c);
+ bgp_free_pending_tx(c);
+ bgp_init_pending_tx(c);
c->packets_to_send = 0;
}
@@ -794,9 +886,56 @@ bgp_handle_graceful_restart(struct bgp_proto *p)
ASSERT(p->gr_active_num > 0);
proto_notify_state(&p->p, PS_START);
- tm_start(p->gr_timer, p->conn->remote_caps->gr_time S);
+ tm_start_in(p->gr_timer, p->conn->remote_caps->gr_time S, p->p.loop);
+}
+
+static void
+bgp_graceful_restart_feed_done(struct rt_export_request *req)
+{
+ req->hook = NULL;
+}
+
+static void
+bgp_graceful_restart_feed_dump_req(struct rt_export_request *req)
+{
+ struct bgp_channel *c = SKIP_BACK(struct bgp_channel, stale_feed, req);
+ debug(" BGP-GR %s.%s export request %p\n", c->c.proto->name, c->c.name, req);
+}
+
+static void
+bgp_graceful_restart_feed_log_state_change(struct rt_export_request *req, u8 state)
+{
+ struct bgp_channel *c = SKIP_BACK(struct bgp_channel, stale_feed, req);
+ struct bgp_proto *p = (void *) c->c.proto;
+ BGP_TRACE(D_EVENTS, "Long-lived graceful restart export state changed to %s", rt_export_state_name(state));
+
+ if (state == TES_READY)
+ rt_stop_export(req, bgp_graceful_restart_feed_done);
+}
+
+static void
+bgp_graceful_restart_drop_export(struct rt_export_request *req UNUSED, const net_addr *n UNUSED, struct rt_pending_export *rpe UNUSED)
+{ /* Nothing to do */ }
+
+static void
+bgp_graceful_restart_feed(struct bgp_channel *c)
+{
+ c->stale_feed = (struct rt_export_request) {
+ .name = "BGP-GR",
+ .list = &global_work_list,
+ .trace_routes = c->c.debug | c->c.proto->debug,
+ .dump_req = bgp_graceful_restart_feed_dump_req,
+ .log_state_change = bgp_graceful_restart_feed_log_state_change,
+ .export_bulk = bgp_rte_modify_stale,
+ .export_one = bgp_graceful_restart_drop_export,
+ };
+
+ rt_request_export(c->c.table, &c->stale_feed);
}
+
+
+
/**
* bgp_graceful_restart_done - finish active BGP graceful restart
* @c: BGP channel
@@ -820,7 +959,7 @@ bgp_graceful_restart_done(struct bgp_channel *c)
BGP_TRACE(D_EVENTS, "Neighbor graceful restart done");
tm_stop(c->stale_timer);
- rt_refresh_end(c->c.table, &c->c);
+ rt_refresh_end(&c->c.in_req);
}
/**
@@ -861,8 +1000,8 @@ bgp_graceful_restart_timeout(timer *t)
/* Channel is in GR, and supports LLGR -> start LLGR */
c->gr_active = BGP_GRS_LLGR;
- tm_start(c->stale_timer, c->stale_time S);
- rt_modify_stale(c->c.table, &c->c);
+ tm_start_in(c->stale_timer, c->stale_time S, p->p.loop);
+ bgp_graceful_restart_feed(c);
}
}
else
@@ -900,10 +1039,7 @@ bgp_refresh_begin(struct bgp_channel *c)
{ log(L_WARN "%s: BEGIN-OF-RR received before END-OF-RIB, ignoring", p->p.name); return; }
c->load_state = BFS_REFRESHING;
- rt_refresh_begin(c->c.table, &c->c);
-
- if (c->c.in_table)
- rt_refresh_begin(c->c.in_table, &c->c);
+ rt_refresh_begin(&c->c.in_req);
}
/**
@@ -924,10 +1060,7 @@ bgp_refresh_end(struct bgp_channel *c)
{ log(L_WARN "%s: END-OF-RR received without prior BEGIN-OF-RR, ignoring", p->p.name); return; }
c->load_state = BFS_NONE;
- rt_refresh_end(c->c.table, &c->c);
-
- if (c->c.in_table)
- rt_prune_sync(c->c.in_table, 0);
+ rt_refresh_end(&c->c.in_req);
}
@@ -941,7 +1074,7 @@ bgp_send_open(struct bgp_conn *conn)
bgp_prepare_capabilities(conn);
bgp_schedule_packet(conn, NULL, PKT_OPEN);
bgp_conn_set_state(conn, BS_OPENSENT);
- bgp_start_timer(conn->hold_timer, conn->bgp->cf->initial_hold_time);
+ bgp_start_timer(conn->bgp, conn->hold_timer, conn->bgp->cf->initial_hold_time);
}
static void
@@ -1018,7 +1151,7 @@ bgp_hold_timeout(timer *t)
and perhaps just not processed BGP packets in time. */
if (sk_rx_ready(conn->sk) > 0)
- bgp_start_timer(conn->hold_timer, 10);
+ bgp_start_timer(p, conn->hold_timer, 10);
else if ((conn->state == BS_ESTABLISHED) && p->llgr_ready)
{
BGP_TRACE(D_EVENTS, "Hold timer expired");
@@ -1078,7 +1211,7 @@ bgp_active(struct bgp_proto *p)
BGP_TRACE(D_EVENTS, "Connect delayed by %d seconds", delay);
bgp_setup_conn(p, conn);
bgp_conn_set_state(conn, BS_ACTIVE);
- bgp_start_timer(conn->connect_timer, delay);
+ bgp_start_timer(p, conn->connect_timer, delay);
}
/**
@@ -1109,6 +1242,7 @@ bgp_connect(struct bgp_proto *p) /* Enter Connect state and start establishing c
s->tos = IP_PREC_INTERNET_CONTROL;
s->password = p->cf->password;
s->tx_hook = bgp_connected;
+ s->flags = p->cf->free_bind ? SKF_FREEBIND : 0;
BGP_TRACE(D_EVENTS, "Connecting to %I%J from local address %I%J",
s->daddr, ipa_is_link_local(s->daddr) ? p->cf->iface : NULL,
s->saddr, ipa_is_link_local(s->saddr) ? s->iface : NULL);
@@ -1116,7 +1250,7 @@ bgp_connect(struct bgp_proto *p) /* Enter Connect state and start establishing c
bgp_setup_sk(conn, s);
bgp_conn_set_state(conn, BS_CONNECT);
- if (sk_open(s) < 0)
+ if (sk_open(s, p->p.loop) < 0)
goto err;
/* Set minimal receive TTL if needed */
@@ -1125,7 +1259,7 @@ bgp_connect(struct bgp_proto *p) /* Enter Connect state and start establishing c
goto err;
DBG("BGP: Waiting for connect success\n");
- bgp_start_timer(conn->connect_timer, p->cf->connect_retry_time);
+ bgp_start_timer(p, conn->connect_timer, p->cf->connect_retry_time);
return;
err:
@@ -1146,12 +1280,17 @@ static struct bgp_proto *
bgp_find_proto(sock *sk)
{
struct bgp_proto *best = NULL;
- struct bgp_proto *p;
+ struct bgp_socket *bs = sk->data;
+ struct bgp_listen_request *req;
/* sk->iface is valid only if src or dst address is link-local */
int link = ipa_is_link_local(sk->saddr) || ipa_is_link_local(sk->daddr);
- WALK_LIST(p, proto_list)
+ LOCK_DOMAIN(rtable, bgp_listen_domain);
+
+ WALK_LIST(req, bs->requests)
+ {
+ struct bgp_proto *p = SKIP_BACK(struct bgp_proto, listen, req);
if ((p->p.proto == &proto_bgp) &&
(ipa_equal(p->remote_ip, sk->daddr) || bgp_is_dynamic(p)) &&
(!p->cf->remote_range || ipa_in_netX(sk->daddr, p->cf->remote_range)) &&
@@ -1165,7 +1304,9 @@ bgp_find_proto(sock *sk)
if (!bgp_is_dynamic(p))
break;
}
+ }
+ UNLOCK_DOMAIN(rtable, bgp_listen_domain);
return best;
}
@@ -1184,6 +1325,8 @@ bgp_find_proto(sock *sk)
static int
bgp_incoming_connection(sock *sk, uint dummy UNUSED)
{
+ ASSERT_DIE(birdloop_inside(&main_birdloop));
+
struct bgp_proto *p;
int acc, hops;
@@ -1197,6 +1340,8 @@ bgp_incoming_connection(sock *sk, uint dummy UNUSED)
return 0;
}
+ birdloop_enter(p->p.loop);
+
/*
* BIRD should keep multiple incoming connections in OpenSent state (for
* details RFC 4271 8.2.1 par 3), but it keeps just one. Duplicate incoming
@@ -1226,7 +1371,7 @@ bgp_incoming_connection(sock *sk, uint dummy UNUSED)
if (!acc)
{
rfree(sk);
- return 0;
+ goto leave;
}
hops = p->cf->multihop ? : 1;
@@ -1251,19 +1396,24 @@ bgp_incoming_connection(sock *sk, uint dummy UNUSED)
p = bgp_spawn(p, sk->daddr);
p->postponed_sk = sk;
rmove(sk, p->p.pool);
- return 0;
+ goto leave;
}
rmove(sk, p->p.pool);
+ sk_reloop(sk, p->p.loop);
+
bgp_setup_conn(p, &p->incoming_conn);
bgp_setup_sk(&p->incoming_conn, sk);
bgp_send_open(&p->incoming_conn);
- return 0;
+ goto leave;
err:
sk_log_error(sk, p->p.name);
log(L_ERR "%s: Incoming connection aborted", p->p.name);
rfree(sk);
+
+leave:
+ birdloop_leave(p->p.loop);
return 0;
}
@@ -1374,15 +1524,22 @@ static void
bgp_update_bfd(struct bgp_proto *p, const struct bfd_options *bfd)
{
if (bfd && p->bfd_req)
+ {
+ BGP_TRACE(D_EVENTS, "Updating existing BFD request");
bfd_update_request(p->bfd_req, bfd);
+ }
if (bfd && !p->bfd_req && !bgp_is_dynamic(p))
+ {
p->bfd_req = bfd_request_session(p->p.pool, p->remote_ip, p->local_ip,
p->cf->multihop ? NULL : p->neigh->iface,
- p->p.vrf, bgp_bfd_notify, p, bfd);
+ p->p.vrf, bgp_bfd_notify, p, p->p.loop, bfd);
+ BGP_TRACE(D_EVENTS, "Requesting a new BFD session");
+ }
if (!bfd && p->bfd_req)
{
+ BGP_TRACE(D_EVENTS, "Retracting the BFD request");
rfree(p->bfd_req);
p->bfd_req = NULL;
}
@@ -1398,12 +1555,8 @@ bgp_reload_routes(struct channel *C)
if (C->channel != &channel_bgp)
return;
- ASSERT(p->conn && (p->route_refresh || c->c.in_table));
-
- if (c->c.in_table)
- channel_schedule_reload(C);
- else
- bgp_schedule_packet(p->conn, c, PKT_ROUTE_REFRESH);
+ ASSERT(p->conn && p->route_refresh);
+ bgp_schedule_packet(p->conn, c, PKT_ROUTE_REFRESH);
}
static void
@@ -1423,6 +1576,12 @@ bgp_feed_begin(struct channel *C, int initial)
if (initial && p->cf->gr_mode)
c->feed_state = BFS_LOADING;
+ if (!initial && C->out_table)
+ {
+ c->feed_out_table = 1;
+ return;
+ }
+
/* It is refeed and both sides support enhanced route refresh */
if (!initial && p->enhanced_refresh)
{
@@ -1445,6 +1604,12 @@ bgp_feed_end(struct channel *C)
if (C->channel != &channel_bgp)
return;
+ if (c->feed_out_table)
+ {
+ c->feed_out_table = 0;
+ return;
+ }
+
/* This should not happen */
if (!p->conn)
return;
@@ -1467,9 +1632,9 @@ bgp_feed_end(struct channel *C)
static void
-bgp_start_locked(struct object_lock *lock)
+bgp_start_locked(void *_p)
{
- struct bgp_proto *p = lock->data;
+ struct bgp_proto *p = _p;
const struct bgp_config *cf = p->cf;
if (p->p.proto_state != PS_START)
@@ -1545,6 +1710,8 @@ bgp_start(struct proto *P)
p->last_rx_update = 0;
p->event = ev_new_init(p->p.pool, bgp_decision, p);
+ p->uncork_ev = ev_new_init(p->p.pool, bgp_uncork, p);
+
p->startup_timer = tm_new_init(p->p.pool, bgp_startup_timeout, p, 0, 0);
p->gr_timer = tm_new_init(p->p.pool, bgp_graceful_restart_timeout, p, 0, 0);
@@ -1574,8 +1741,11 @@ bgp_start(struct proto *P)
lock->iface = p->cf->iface;
lock->vrf = p->cf->iface ? NULL : p->p.vrf;
lock->type = OBJLOCK_TCP;
- lock->hook = bgp_start_locked;
- lock->data = p;
+ lock->event = (event) {
+ .hook = bgp_start_locked,
+ .data = p,
+ };
+ lock->target = proto_event_list(P);
/* For dynamic BGP, we use inst 1 to avoid collisions with regular BGP */
if (bgp_is_dynamic(p))
@@ -1675,6 +1845,13 @@ done:
return p->p.proto_state;
}
+struct rte_owner_class bgp_rte_owner_class = {
+ .get_route_info = bgp_get_route_info,
+ .rte_better = bgp_rte_better,
+ .rte_mergable = bgp_rte_mergable,
+ .rte_igp_metric = bgp_rte_igp_metric,
+};
+
static struct proto *
bgp_init(struct proto_config *CF)
{
@@ -1684,15 +1861,13 @@ bgp_init(struct proto_config *CF)
P->rt_notify = bgp_rt_notify;
P->preexport = bgp_preexport;
- P->neigh_notify = bgp_neigh_notify;
+ P->iface_sub.neigh_notify = bgp_neigh_notify;
P->reload_routes = bgp_reload_routes;
P->feed_begin = bgp_feed_begin;
P->feed_end = bgp_feed_end;
- P->rte_better = bgp_rte_better;
- P->rte_mergable = bgp_rte_mergable;
- P->rte_recalculate = cf->deterministic_med ? bgp_rte_recalculate : NULL;
- P->rte_modify = bgp_rte_modify_stale;
- P->rte_igp_metric = bgp_rte_igp_metric;
+
+ P->sources.class = &bgp_rte_owner_class;
+ P->sources.rte_recalculate = cf->deterministic_med ? bgp_rte_recalculate : NULL;
p->cf = cf;
p->is_internal = (cf->local_as == cf->remote_as);
@@ -1759,14 +1934,11 @@ bgp_channel_start(struct channel *C)
}
c->pool = p->p.pool; // XXXX
- bgp_init_bucket_table(c);
- bgp_init_prefix_table(c);
-
- if (c->cf->import_table)
- channel_setup_in_table(C);
if (c->cf->export_table)
- channel_setup_out_table(C);
+ bgp_setup_out_table(c);
+
+ bgp_init_pending_tx(c);
c->stale_timer = tm_new_init(c->pool, bgp_long_lived_stale_timeout, c, 0, 0);
@@ -1889,7 +2061,7 @@ bgp_default_igp_table(struct bgp_config *cf, struct bgp_channel_config *cc, u32
return cc2->c.table;
/* Last, try default table of given type */
- if (tab = cf->c.global->def_tables[type])
+ if (tab = rt_get_default_table(cf->c.global, type))
return tab;
cf_error("Undefined IGP table");
@@ -1908,7 +2080,7 @@ bgp_default_base_table(struct bgp_config *cf, struct bgp_channel_config *cc)
return cc2->c.table;
/* Last, try default table of given type */
- struct rtable_config *tab = cf->c.global->def_tables[type];
+ struct rtable_config *tab = rt_get_default_table(cf->c.global, type);
if (tab)
return tab;
@@ -2175,7 +2347,6 @@ bgp_channel_reconfigure(struct channel *C, struct channel_config *CC, int *impor
(new->llgr_time != old->llgr_time) ||
(new->ext_next_hop != old->ext_next_hop) ||
(new->add_path != old->add_path) ||
- (new->import_table != old->import_table) ||
(new->export_table != old->export_table) ||
(TABLE(new, igp_table_ip4) != TABLE(old, igp_table_ip4)) ||
(TABLE(new, igp_table_ip6) != TABLE(old, igp_table_ip6)) ||
@@ -2190,11 +2361,13 @@ bgp_channel_reconfigure(struct channel *C, struct channel_config *CC, int *impor
(new->aigp != old->aigp) ||
(new->cost != old->cost))
{
- /* import_changed itself does not force ROUTE_REFRESH when import_table is active */
- if (c->c.in_table && (c->c.channel_state == CS_UP))
+ /* If import table is active and route refresh is possible, we just ask for route refresh */
+ if ((c->c.in_keep & RIK_PREFILTER) && (c->c.channel_state == CS_UP) && p->route_refresh)
bgp_schedule_packet(p->conn, c, PKT_ROUTE_REFRESH);
- *import_changed = 1;
+ /* Otherwise we do complete reload */
+ else
+ *import_changed = 1;
}
if (!ipa_equal(new->next_hop_addr, old->next_hop_addr) ||
@@ -2562,6 +2735,9 @@ bgp_show_proto_info(struct proto *P)
tm_remains(p->conn->hold_timer), p->conn->hold_time);
cli_msg(-1006, " Keepalive timer: %t/%u",
tm_remains(p->conn->keepalive_timer), p->conn->keepalive_time);
+ cli_msg(-1006, " TX pending: %d bytes%s",
+ p->conn->sk->tpos - p->conn->sk->ttx,
+ ev_active(p->conn->tx_ev) ? " (refill scheduled)" : "");
}
#if 0
@@ -2615,6 +2791,22 @@ bgp_show_proto_info(struct proto *P)
if (c->base_table)
cli_msg(-1006, " Base table: %s", c->base_table->name);
+
+ uint bucket_cnt = 0;
+ uint prefix_cnt = 0;
+ struct bgp_bucket *buck;
+ struct bgp_prefix *px;
+ if (c->ptx)
+ WALK_LIST(buck, c->ptx->bucket_queue)
+ {
+ bucket_cnt++;
+ WALK_LIST(px, buck->prefixes)
+ if (px->cur)
+ prefix_cnt++;
+ }
+
+ cli_msg(-1006, " Pending %u attribute sets with total %u prefixes to send",
+ bucket_cnt, prefix_cnt);
}
}
}
@@ -2632,7 +2824,6 @@ struct channel_class channel_bgp = {
struct protocol proto_bgp = {
.name = "BGP",
.template = "bgp%d",
- .class = PROTOCOL_BGP,
.preference = DEF_PREF_BGP,
.channel_mask = NB_IP | NB_VPN | NB_FLOW,
.proto_size = sizeof(struct bgp_proto),
@@ -2644,12 +2835,12 @@ struct protocol proto_bgp = {
.reconfigure = bgp_reconfigure,
.copy_config = bgp_copy_config,
.get_status = bgp_get_status,
- .get_attr = bgp_get_attr,
- .get_route_info = bgp_get_route_info,
.show_proto_info = bgp_show_proto_info
};
void bgp_build(void)
{
proto_build(&proto_bgp);
+ bgp_register_attrs();
+ bgp_listen_domain = DOMAIN_NEW(rtable, "BGP Listen Sockets");
}
diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h
index a36949c2..c98582dd 100644
--- a/proto/bgp/bgp.h
+++ b/proto/bgp/bgp.h
@@ -14,7 +14,7 @@
#include <stdint.h>
#include <setjmp.h>
#include "nest/bird.h"
-#include "nest/route.h"
+#include "nest/rt.h"
#include "nest/bfd.h"
//#include "lib/lists.h"
#include "lib/hash.h"
@@ -67,10 +67,10 @@ struct bgp_af_desc {
u8 no_igp;
const char *name;
uint (*encode_nlri)(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size);
- void (*decode_nlri)(struct bgp_parse_state *s, byte *pos, uint len, rta *a);
+ void (*decode_nlri)(struct bgp_parse_state *s, byte *pos, uint len, ea_list *a);
void (*update_next_hop)(struct bgp_export_state *s, eattr *nh, ea_list **to);
uint (*encode_next_hop)(struct bgp_write_state *s, eattr *nh, byte *buf, uint size);
- void (*decode_next_hop)(struct bgp_parse_state *s, byte *pos, uint len, rta *a);
+ void (*decode_next_hop)(struct bgp_parse_state *s, byte *pos, uint len, ea_list **to);
};
@@ -162,7 +162,7 @@ struct bgp_channel_config {
u8 aigp_originate; /* AIGP is originated automatically */
u32 cost; /* IGP cost for direct next hops */
u8 import_table; /* Use c.in_table as Adj-RIB-In */
- u8 export_table; /* Use c.out_table as Adj-RIB-Out */
+ u8 export_table; /* Keep Adj-RIB-Out and export it */
struct rtable_config *igp_table_ip4; /* Table for recursive IPv4 next hop lookups */
struct rtable_config *igp_table_ip6; /* Table for recursive IPv6 next hop lookups */
@@ -220,8 +220,6 @@ struct bgp_channel_config {
/* rte->pflags */
#define BGP_REF_SUPPRESSED 0x1 /* Used for deterministic MED comparison */
-#define BGP_REF_STALE 0x2 /* Route is LLGR_STATE */
-#define BGP_REF_NOT_STALE 0x4 /* Route is NOT LLGR_STATE */
struct bgp_af_caps {
u32 afi;
@@ -266,8 +264,8 @@ struct bgp_caps {
struct bgp_socket {
node n; /* Node in global bgp_sockets */
+ list requests; /* Listen requests */
sock *sk; /* Real listening socket */
- u32 uc; /* Use count */
};
struct bgp_stats {
@@ -302,6 +300,16 @@ struct bgp_conn {
uint hold_time, keepalive_time; /* Times calculated from my and neighbor's requirements */
};
+struct bgp_listen_request {
+ node n; /* Node in bgp_socket / pending list */
+ struct bgp_socket *sock; /* Assigned socket */
+ ip_addr addr;
+ struct iface *iface;
+ struct iface *vrf;
+ uint port;
+ uint flags;
+};
+
struct bgp_proto {
struct proto p;
const struct bgp_config *cf; /* Shortcut to BGP configuration */
@@ -333,9 +341,10 @@ struct bgp_proto {
struct bgp_conn incoming_conn; /* Incoming connection we have neither accepted nor rejected yet */
struct object_lock *lock; /* Lock for neighbor connection */
struct neighbor *neigh; /* Neighbor entry corresponding to remote ip, NULL if multihop */
- struct bgp_socket *sock; /* Shared listening socket */
+ struct bgp_listen_request listen; /* Shared listening socket */
struct bfd_request *bfd_req; /* BFD request, if BFD is used */
struct birdsock *postponed_sk; /* Postponed incoming socket for dynamic BGP */
+ event *uncork_ev; /* Uncork event in case of congestion */
struct bgp_stats stats; /* BGP statistics */
btime last_established; /* Last time of enter/leave of established state */
btime last_rx_update; /* Last time of RX update */
@@ -367,12 +376,8 @@ struct bgp_channel {
/* Rest are zeroed when down */
pool *pool;
- HASH(struct bgp_bucket) bucket_hash; /* Hash table of route buckets */
- struct bgp_bucket *withdraw_bucket; /* Withdrawn routes */
- list bucket_queue; /* Queue of buckets to send (struct bgp_bucket) */
-
- HASH(struct bgp_prefix) prefix_hash; /* Prefixes to be sent */
- slab *prefix_slab; /* Slab holding prefix nodes */
+ struct bgp_pending_tx *ptx; /* Routes waiting to be sent */
+ struct rt_exporter prefix_exporter; /* Table-like exporter for ptx */
ip_addr next_hop_addr; /* Local address for NEXT_HOP attribute */
ip_addr link_addr; /* Link-local version of next_hop_addr */
@@ -386,17 +391,23 @@ struct bgp_channel {
timer *stale_timer; /* Long-lived stale timer for LLGR */
u32 stale_time; /* Stored LLGR stale time from last session */
+ struct rt_export_request stale_feed; /* Feeder request for stale route modification */
u8 add_path_rx; /* Session expects receive of ADD-PATH extended NLRI */
u8 add_path_tx; /* Session expects transmit of ADD-PATH extended NLRI */
u8 feed_state; /* Feed state (TX) for EoR, RR packets, see BFS_* */
u8 load_state; /* Load state (RX) for EoR, RR packets, see BFS_* */
+
+ u8 feed_out_table; /* Refeed into out_table */
};
struct bgp_prefix {
- node buck_node; /* Node in per-bucket list */
+ node buck_node_xx; /* Node in per-bucket list */
struct bgp_prefix *next; /* Node in prefix hash table */
+ struct bgp_bucket *last; /* Last bucket sent with this prefix */
+ struct bgp_bucket *cur; /* Current bucket (cur == last) if no update is required */
+ btime lastmod; /* Last modification of this prefix */
u32 hash;
u32 path_id;
net_addr net[0];
@@ -405,11 +416,24 @@ struct bgp_prefix {
struct bgp_bucket {
node send_node; /* Node in send queue */
struct bgp_bucket *next; /* Node in bucket hash table */
- list prefixes; /* Prefixes in this bucket (struct bgp_prefix) */
+ list prefixes; /* Prefixes to send in this bucket (struct bgp_prefix) */
u32 hash; /* Hash over extended attributes */
+ u32 px_uc; /* How many prefixes are linking this bucket */
ea_list eattrs[0]; /* Per-bucket extended attributes */
};
+struct bgp_pending_tx {
+ resource r;
+ pool *pool;
+
+ HASH(struct bgp_bucket) bucket_hash; /* Hash table of route buckets */
+ struct bgp_bucket *withdraw_bucket; /* Withdrawn routes */
+ list bucket_queue; /* Queue of buckets to send (struct bgp_bucket) */
+
+ HASH(struct bgp_prefix) prefix_hash; /* Prefixes to be sent */
+ slab *prefix_slab; /* Slab holding prefix nodes */
+};
+
struct bgp_export_state {
struct bgp_proto *proto;
struct bgp_channel *channel;
@@ -473,13 +497,12 @@ struct bgp_parse_state {
uint err_subcode;
jmp_buf err_jmpbuf;
- struct hostentry *hostentry;
adata *mpls_labels;
/* Cached state for bgp_rte_update() */
u32 last_id;
struct rte_src *last_src;
- rta *cached_rta;
+ ea_list *cached_ea;
};
#define BGP_PORT 179
@@ -524,7 +547,7 @@ bgp_parse_error(struct bgp_parse_state *s, uint subcode)
}
-void bgp_start_timer(timer *t, uint value);
+void bgp_start_timer(struct bgp_proto *p, timer *t, uint value);
void bgp_check_config(struct bgp_config *c);
void bgp_error(struct bgp_conn *c, unsigned code, unsigned subcode, byte *data, int len);
void bgp_close_conn(struct bgp_conn *c);
@@ -542,11 +565,17 @@ void bgp_stop(struct bgp_proto *p, int subcode, byte *data, uint len);
const char *bgp_format_role_name(u8 role);
static inline int
-rte_resolvable(rte *rt)
+rte_resolvable(const rte *rt)
{
- return rt->attrs->dest != RTD_UNREACHABLE;
+ eattr *nhea = ea_find(rt->attrs, &ea_gen_nexthop);
+ if (!nhea)
+ return 0;
+
+ struct nexthop_adata *nhad = (void *) nhea->u.ptr;
+ return NEXTHOP_IS_REACHABLE(nhad) || (nhad->dest != RTD_UNREACHABLE);
}
+extern struct rte_owner_class bgp_rte_owner_class;
#ifdef LOCAL_DEBUG
#define BGP_FORCE_DEBUG 1
@@ -562,73 +591,61 @@ rte_resolvable(rte *rt)
/* attrs.c */
-static inline eattr *
-bgp_find_attr(ea_list *attrs, uint code)
-{
- return ea_find(attrs, EA_CODE(PROTOCOL_BGP, code));
-}
-
eattr *
-bgp_set_attr(ea_list **attrs, struct linpool *pool, uint code, uint flags, uintptr_t val);
-
-static inline void
-bgp_set_attr_u32(ea_list **to, struct linpool *pool, uint code, uint flags, u32 val)
-{ bgp_set_attr(to, pool, code, flags, (uintptr_t) val); }
+bgp_find_attr(ea_list *attrs, uint code);
-static inline void
-bgp_set_attr_ptr(ea_list **to, struct linpool *pool, uint code, uint flags, const struct adata *val)
-{ bgp_set_attr(to, pool, code, flags, (uintptr_t) val); }
-
-static inline void
-bgp_set_attr_data(ea_list **to, struct linpool *pool, uint code, uint flags, void *data, uint len)
-{
- struct adata *a = lp_alloc_adata(pool, len);
- bmemcpy(a->data, data, len);
- bgp_set_attr(to, pool, code, flags, (uintptr_t) a);
-}
-
-#define bgp_unset_attr(to, pool, code) ea_unset_attr(to, pool, 0, code)
+void bgp_set_attr_u32(ea_list **to, uint code, uint flags, u32 val);
+void bgp_set_attr_ptr(ea_list **to, uint code, uint flags, const struct adata *ad);
+void bgp_set_attr_data(ea_list **to, uint code, uint flags, void *data, uint len);
+void bgp_unset_attr(ea_list **to, uint code);
int bgp_encode_mp_reach_mrt(struct bgp_write_state *s, eattr *a, byte *buf, uint size);
int bgp_encode_attrs(struct bgp_write_state *s, ea_list *attrs, byte *buf, byte *end);
ea_list * bgp_decode_attrs(struct bgp_parse_state *s, byte *data, uint len);
-void bgp_finish_attrs(struct bgp_parse_state *s, rta *a);
+void bgp_finish_attrs(struct bgp_parse_state *s, ea_list **to);
+
+void bgp_setup_out_table(struct bgp_channel *c);
+
+void bgp_init_pending_tx(struct bgp_channel *c);
+void bgp_free_pending_tx(struct bgp_channel *c);
-void bgp_init_bucket_table(struct bgp_channel *c);
-void bgp_free_bucket_table(struct bgp_channel *c);
-void bgp_free_bucket(struct bgp_channel *c, struct bgp_bucket *b);
-void bgp_defer_bucket(struct bgp_channel *c, struct bgp_bucket *b);
void bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b);
+int bgp_done_bucket(struct bgp_channel *c, struct bgp_bucket *b);
-void bgp_init_prefix_table(struct bgp_channel *c);
-void bgp_free_prefix_table(struct bgp_channel *c);
-void bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *bp);
+void bgp_done_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bucket *buck);
-int bgp_rte_better(struct rte *, struct rte *);
-int bgp_rte_mergable(rte *pri, rte *sec);
-int bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best);
-struct rte *bgp_rte_modify_stale(struct rte *r, struct linpool *pool);
-u32 bgp_rte_igp_metric(struct rte *);
-void bgp_rt_notify(struct proto *P, struct channel *C, net *n, rte *new, rte *old);
+int bgp_rte_better(const rte *, const rte *);
+int bgp_rte_mergable(const rte *pri, const rte *sec);
+int bgp_rte_recalculate(struct rtable_private *table, net *net, rte *new, rte *old, rte *old_best);
+void bgp_rte_modify_stale(struct rt_export_request *req, const net_addr *n, struct rt_pending_export *first, struct rt_pending_export *last, const rte **feed, uint count);
+u32 bgp_rte_igp_metric(const rte *);
+void bgp_rt_notify(struct proto *P, struct channel *C, const net_addr *n, rte *new, const rte *old);
int bgp_preexport(struct channel *, struct rte *);
-int bgp_get_attr(const struct eattr *e, byte *buf, int buflen);
-void bgp_get_route_info(struct rte *, byte *buf);
-int bgp_total_aigp_metric_(rte *e, u64 *metric, const struct adata **ad);
+void bgp_get_route_info(const rte *, byte *);
+int bgp_total_aigp_metric_(const rte *e, u64 *metric, const struct adata **ad);
+
+static inline struct bgp_proto *bgp_rte_proto(const rte *rte)
+{
+ return (rte->src->owner->class == &bgp_rte_owner_class) ?
+ SKIP_BACK(struct bgp_proto, p.sources, rte->src->owner) : NULL;
+}
#define BGP_AIGP_METRIC 1
#define BGP_AIGP_MAX U64(0xffffffffffffffff)
static inline u64
-bgp_total_aigp_metric(rte *r)
+bgp_total_aigp_metric(const rte *e)
{
u64 metric = BGP_AIGP_MAX;
const struct adata *ad;
- bgp_total_aigp_metric_(r, &metric, &ad);
+ bgp_total_aigp_metric_(e, &metric, &ad);
return metric;
}
+void bgp_register_attrs(void);
+
/* packets.c */
@@ -640,6 +657,7 @@ void bgp_schedule_packet(struct bgp_conn *conn, struct bgp_channel *c, int type)
void bgp_kick_tx(void *vconn);
void bgp_tx(struct birdsock *sk);
int bgp_rx(struct birdsock *sk, uint size);
+void bgp_uncork(void *vp);
const char * bgp_error_dsc(unsigned code, unsigned subcode);
void bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsigned subcode, byte *data, unsigned len);
@@ -665,27 +683,32 @@ void bgp_update_next_hop(struct bgp_export_state *s, eattr *a, ea_list **to);
#define BAF_DECODE_FLAGS 0x0100 /* Private flag - attribute flags are handled by the decode hook */
-#define BA_ORIGIN 0x01 /* RFC 4271 */ /* WM */
-#define BA_AS_PATH 0x02 /* WM */
-#define BA_NEXT_HOP 0x03 /* WM */
-#define BA_MULTI_EXIT_DISC 0x04 /* ON */
-#define BA_LOCAL_PREF 0x05 /* WD */
-#define BA_ATOMIC_AGGR 0x06 /* WD */
-#define BA_AGGREGATOR 0x07 /* OT */
-#define BA_COMMUNITY 0x08 /* RFC 1997 */ /* OT */
-#define BA_ORIGINATOR_ID 0x09 /* RFC 4456 */ /* ON */
-#define BA_CLUSTER_LIST 0x0a /* RFC 4456 */ /* ON */
-#define BA_MP_REACH_NLRI 0x0e /* RFC 4760 */
-#define BA_MP_UNREACH_NLRI 0x0f /* RFC 4760 */
-#define BA_EXT_COMMUNITY 0x10 /* RFC 4360 */
-#define BA_AS4_PATH 0x11 /* RFC 6793 */
-#define BA_AS4_AGGREGATOR 0x12 /* RFC 6793 */
-#define BA_AIGP 0x1a /* RFC 7311 */
-#define BA_LARGE_COMMUNITY 0x20 /* RFC 8092 */
+enum bgp_attr_id {
+ BA_ORIGIN = 0x01, /* RFC 4271 */ /* WM */
+ BA_AS_PATH = 0x02, /* WM */
+ BA_NEXT_HOP = 0x03, /* WM */
+ BA_MULTI_EXIT_DISC = 0x04, /* ON */
+ BA_LOCAL_PREF = 0x05, /* WD */
+ BA_ATOMIC_AGGR = 0x06, /* WD */
+ BA_AGGREGATOR = 0x07, /* OT */
+ BA_COMMUNITY = 0x08, /* RFC 1997 */ /* OT */
+ BA_ORIGINATOR_ID = 0x09, /* RFC 4456 */ /* ON */
+ BA_CLUSTER_LIST = 0x0a, /* RFC 4456 */ /* ON */
+ BA_MP_REACH_NLRI = 0x0e, /* RFC 4760 */
+ BA_MP_UNREACH_NLRI = 0x0f, /* RFC 4760 */
+ BA_EXT_COMMUNITY = 0x10, /* RFC 4360 */
+ BA_AS4_PATH = 0x11, /* RFC 6793 */
+ BA_AS4_AGGREGATOR = 0x12, /* RFC 6793 */
+ BA_AIGP = 0x1a, /* RFC 7311 */
+ BA_LARGE_COMMUNITY = 0x20, /* RFC 8092 */
#define BA_ONLY_TO_CUSTOMER 0x23 /* RFC 9234 */
/* Bird's private internal BGP attributes */
-#define BA_MPLS_LABEL_STACK 0xfe /* MPLS label stack transfer attribute */
+ BA_MPLS_LABEL_STACK = 0x100, /* MPLS label stack transfer attribute */
+
+/* Maximum */
+ BGP_ATTR_MAX,
+};
/* BGP connection states */
diff --git a/proto/bgp/config.Y b/proto/bgp/config.Y
index a2dfa747..8b0d4356 100644
--- a/proto/bgp/config.Y
+++ b/proto/bgp/config.Y
@@ -19,18 +19,17 @@ CF_DECLS
CF_KEYWORDS(BGP, LOCAL, NEIGHBOR, AS, HOLD, TIME, CONNECT, RETRY, KEEPALIVE,
MULTIHOP, STARTUP, VIA, NEXT, HOP, SELF, DEFAULT, PATH, METRIC, ERROR,
- START, DELAY, FORGET, WAIT, ENABLE, DISABLE, AFTER, BGP_PATH,
- BGP_LOCAL_PREF, BGP_MED, BGP_ORIGIN, BGP_NEXT_HOP, BGP_ATOMIC_AGGR,
- BGP_AGGREGATOR, BGP_COMMUNITY, BGP_EXT_COMMUNITY, BGP_LARGE_COMMUNITY,
+ START, DELAY, FORGET, WAIT, ENABLE, DISABLE, AFTER,
+ BGP_LOCAL_PREF, BGP_MED,
SOURCE, ADDRESS, PASSWORD, RR, RS, CLIENT, CLUSTER, ID, AS4, ADVERTISE,
IPV4, CAPABILITIES, LIMIT, PASSIVE, PREFER, OLDER, MISSING, LLADDR,
- DROP, IGNORE, ROUTE, REFRESH, INTERPRET, COMMUNITIES, BGP_ORIGINATOR_ID,
- BGP_CLUSTER_LIST, IGP, TABLE, GATEWAY, DIRECT, RECURSIVE, MED, TTL,
+ DROP, IGNORE, ROUTE, REFRESH, INTERPRET, COMMUNITIES,
+ IGP, TABLE, GATEWAY, DIRECT, RECURSIVE, MED, TTL,
SECURITY, DETERMINISTIC, SECONDARY, ALLOW, BFD, ADD, PATHS, RX, TX,
GRACEFUL, RESTART, AWARE, CHECK, LINK, PORT, EXTENDED, MESSAGES, SETKEY,
STRICT, BIND, CONFEDERATION, MEMBER, MULTICAST, FLOW4, FLOW6, LONG,
LIVED, STALE, IMPORT, IBGP, EBGP, MANDATORY, INTERNAL, EXTERNAL, SETS,
- DYNAMIC, RANGE, NAME, DIGITS, BGP_AIGP, AIGP, ORIGINATE, COST, ENFORCE,
+ DYNAMIC, RANGE, NAME, DIGITS, AIGP, ORIGINATE, COST, ENFORCE,
FIRST, FREE, VALIDATE, BASE, ROLE, ROLES, PEER, PROVIDER, CUSTOMER,
RS_SERVER, RS_CLIENT, REQUIRE, BGP_OTC, GLOBAL)
@@ -46,12 +45,14 @@ CF_KEYWORDS(CEASE, PREFIX, LIMIT, HIT, ADMINISTRATIVE, SHUTDOWN, RESET, PEER,
CF_GRAMMAR
/* Workaround for collisions between keywords and symbols */
-symbol: ROLE | PEER | PROVIDER | CUSTOMER | RS_SERVER | RS_CLIENT ;
+toksym: ROLE | PEER | PROVIDER | CUSTOMER | RS_SERVER | RS_CLIENT ;
+toksym: BGP_MED | BGP_LOCAL_PREF | SOURCE ;
proto: bgp_proto '}' ;
bgp_proto_start: proto_start BGP {
this_proto = proto_config_new(&proto_bgp, $1);
+ this_proto->loop_order = DOMAIN_ORDER(proto);
BGP_CFG->local_port = BGP_PORT;
BGP_CFG->remote_port = BGP_PORT;
BGP_CFG->multihop = -1; /* undefined */
@@ -329,43 +330,14 @@ bgp_channel_end:
if (!this_channel->table)
cf_error("Routing table not specified");
+ if (BGP_CC->import_table)
+ this_channel->in_keep |= RIK_PREFILTER;
+
this_channel = NULL;
};
bgp_proto_channel: bgp_channel_start bgp_channel_opt_list bgp_channel_end;
-
-dynamic_attr: BGP_ORIGIN
- { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_ENUM_BGP_ORIGIN, EA_CODE(PROTOCOL_BGP, BA_ORIGIN)); } ;
-dynamic_attr: BGP_PATH
- { $$ = f_new_dynamic_attr(EAF_TYPE_AS_PATH, T_PATH, EA_CODE(PROTOCOL_BGP, BA_AS_PATH)); } ;
-dynamic_attr: BGP_NEXT_HOP
- { $$ = f_new_dynamic_attr(EAF_TYPE_IP_ADDRESS, T_IP, EA_CODE(PROTOCOL_BGP, BA_NEXT_HOP)); } ;
-dynamic_attr: BGP_MED
- { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC)); } ;
-dynamic_attr: BGP_LOCAL_PREF
- { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF)); } ;
-dynamic_attr: BGP_ATOMIC_AGGR
- { $$ = f_new_dynamic_attr(EAF_TYPE_OPAQUE, T_ENUM_EMPTY, EA_CODE(PROTOCOL_BGP, BA_ATOMIC_AGGR)); } ;
-dynamic_attr: BGP_AGGREGATOR
- { $$ = f_new_dynamic_attr(EAF_TYPE_OPAQUE, T_ENUM_EMPTY, EA_CODE(PROTOCOL_BGP, BA_AGGREGATOR)); } ;
-dynamic_attr: BGP_COMMUNITY
- { $$ = f_new_dynamic_attr(EAF_TYPE_INT_SET, T_CLIST, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY)); } ;
-dynamic_attr: BGP_ORIGINATOR_ID
- { $$ = f_new_dynamic_attr(EAF_TYPE_ROUTER_ID, T_QUAD, EA_CODE(PROTOCOL_BGP, BA_ORIGINATOR_ID)); } ;
-dynamic_attr: BGP_CLUSTER_LIST
- { $$ = f_new_dynamic_attr(EAF_TYPE_INT_SET, T_CLIST, EA_CODE(PROTOCOL_BGP, BA_CLUSTER_LIST)); } ;
-dynamic_attr: BGP_EXT_COMMUNITY
- { $$ = f_new_dynamic_attr(EAF_TYPE_EC_SET, T_ECLIST, EA_CODE(PROTOCOL_BGP, BA_EXT_COMMUNITY)); } ;
-dynamic_attr: BGP_AIGP
- { $$ = f_new_dynamic_attr(EAF_TYPE_OPAQUE, T_ENUM_EMPTY, EA_CODE(PROTOCOL_BGP, BA_AIGP)); } ;
-dynamic_attr: BGP_LARGE_COMMUNITY
- { $$ = f_new_dynamic_attr(EAF_TYPE_LC_SET, T_LCLIST, EA_CODE(PROTOCOL_BGP, BA_LARGE_COMMUNITY)); } ;
-dynamic_attr: BGP_OTC
- { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_CODE(PROTOCOL_BGP, BA_ONLY_TO_CUSTOMER)); } ;
-
-
-
CF_ENUM(T_ENUM_BGP_ORIGIN, ORIGIN_, IGP, EGP, INCOMPLETE)
CF_CODE
diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c
index 5c17c370..978a1891 100644
--- a/proto/bgp/packets.c
+++ b/proto/bgp/packets.c
@@ -15,8 +15,8 @@
#include "nest/bird.h"
#include "nest/iface.h"
#include "nest/protocol.h"
-#include "nest/route.h"
-#include "nest/attrs.h"
+#include "nest/rt.h"
+#include "lib/attrs.h"
#include "proto/mrt/mrt.h"
#include "conf/conf.h"
#include "lib/unaligned.h"
@@ -973,7 +973,7 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, uint len)
conn->hold_time, conn->keepalive_time, p->remote_as, p->remote_id, conn->as4_session);
bgp_schedule_packet(conn, NULL, PKT_KEEPALIVE);
- bgp_start_timer(conn->hold_timer, conn->hold_time);
+ bgp_start_timer(p, conn->hold_timer, conn->hold_time);
bgp_conn_enter_openconfirm_state(conn);
}
@@ -1002,7 +1002,7 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, uint len)
#define MISMATCHED_AF " - mismatched address family (%I for %s)"
static void
-bgp_apply_next_hop(struct bgp_parse_state *s, rta *a, ip_addr gw, ip_addr ll)
+bgp_apply_next_hop(struct bgp_parse_state *s, ea_list **to, ip_addr gw, ip_addr ll)
{
struct bgp_proto *p = s->proto;
struct bgp_channel *c = s->channel;
@@ -1025,10 +1025,18 @@ bgp_apply_next_hop(struct bgp_parse_state *s, rta *a, ip_addr gw, ip_addr ll)
if (nbr->scope == SCOPE_HOST)
WITHDRAW(BAD_NEXT_HOP " - address %I is local", nbr->addr);
- a->dest = RTD_UNICAST;
- a->nh.gw = nbr->addr;
- a->nh.iface = nbr->iface;
- a->igp_metric = c->cf->cost;
+ ea_set_attr_u32(to, &ea_gen_igp_metric, 0, c->cf->cost);
+
+ struct nexthop_adata nhad = {
+ .nh = {
+ .gw = nbr->addr,
+ .iface = nbr->iface,
+ },
+ .ad = {
+ .length = sizeof nhad - sizeof nhad.ad,
+ },
+ };
+ ea_set_attr_data(to, &ea_gen_nexthop, 0, nhad.ad.data, nhad.ad.length);
}
else /* GW_RECURSIVE */
{
@@ -1037,59 +1045,52 @@ bgp_apply_next_hop(struct bgp_parse_state *s, rta *a, ip_addr gw, ip_addr ll)
rtable *tab = ipa_is_ip4(gw) ? c->igp_table_ip4 : c->igp_table_ip6;
ip_addr lla = (c->cf->next_hop_prefer == NHP_LOCAL) ? ll : IPA_NONE;
- s->hostentry = rt_get_hostentry(tab, gw, lla, c->c.table);
-
- if (!s->mpls)
- rta_apply_hostentry(a, s->hostentry, NULL);
- /* With MPLS, hostentry is applied later in bgp_apply_mpls_labels() */
+ if (s->mpls)
+ {
+ u32 labels[BGP_MPLS_MAX];
+ ea_set_hostentry(to, c->c.table, tab, gw, lla, BGP_MPLS_MAX, labels);
+ }
+ else
+ ea_set_hostentry(to, c->c.table, tab, gw, lla, 0, NULL);
}
}
static void
-bgp_apply_mpls_labels(struct bgp_parse_state *s, rta *a, u32 *labels, uint lnum)
+bgp_apply_mpls_labels(struct bgp_parse_state *s, ea_list **to, u32 lnum, u32 labels[lnum])
{
if (lnum > MPLS_MAX_LABEL_STACK)
{
REPORT("Too many MPLS labels ($u)", lnum);
- a->dest = RTD_UNREACHABLE;
- a->hostentry = NULL;
- a->nh = (struct nexthop) { };
+ ea_set_dest(to, 0, RTD_UNREACHABLE);
return;
}
/* Handle implicit NULL as empty MPLS stack */
if ((lnum == 1) && (labels[0] == BGP_MPLS_NULL))
- lnum = 0;
+ lnum = s->mpls_labels->length = 0;
if (s->channel->cf->gw_mode == GW_DIRECT)
{
- a->nh.labels = lnum;
- memcpy(a->nh.label, labels, 4*lnum);
+ eattr *e = ea_find(*to, &ea_gen_nexthop);
+ struct {
+ struct nexthop_adata nhad;
+ u32 labels[MPLS_MAX_LABEL_STACK];
+ } nh;
+
+ memcpy(&nh.nhad, e->u.ptr, sizeof(struct adata) + e->u.ptr->length);
+ nh.nhad.nh.labels = lnum;
+ memcpy(nh.labels, labels, lnum * sizeof(u32));
+ nh.nhad.ad.length = sizeof nh.nhad + lnum * sizeof(u32);
}
else /* GW_RECURSIVE */
{
- mpls_label_stack ms;
-
- ms.len = lnum;
- memcpy(ms.stack, labels, 4*lnum);
- rta_apply_hostentry(a, s->hostentry, &ms);
- }
-}
-
-static void
-bgp_apply_flow_validation(struct bgp_parse_state *s, const net_addr *n, rta *a)
-{
- struct bgp_channel *c = s->channel;
- int valid = rt_flowspec_check(c->base_table, c->c.table, n, a, s->proto->is_interior);
- a->dest = valid ? RTD_NONE : RTD_UNREACHABLE;
-
- /* Invalidate cached rta if dest changes */
- if (s->cached_rta && (s->cached_rta->dest != a->dest))
- {
- rta_free(s->cached_rta);
- s->cached_rta = NULL;
+ eattr *e = ea_find(*to, &ea_gen_hostentry);
+ ASSERT_DIE(e);
+ struct hostentry_adata *head = (void *) e->u.ptr;
+ memcpy(&head->labels, labels, lnum * sizeof(u32));
+ head->ad.length = (void *)(&head->labels[lnum]) - (void *) head->ad.data;
}
}
@@ -1130,7 +1131,7 @@ bgp_use_next_hop(struct bgp_export_state *s, eattr *a)
return 0;
/* Do not pass NEXT_HOP between different VRFs */
- if (p->p.vrf_set && s->src && s->src->p.vrf_set && (p->p.vrf != s->src->p.vrf))
+ if (p->p.vrf && s->src && s->src->p.vrf && (p->p.vrf != s->src->p.vrf))
return 0;
/* Keep it when exported to internal peers */
@@ -1143,35 +1144,45 @@ bgp_use_next_hop(struct bgp_export_state *s, eattr *a)
return p->neigh && (p->neigh->iface == ifa);
}
-static inline int
+static inline struct nexthop *
bgp_use_gateway(struct bgp_export_state *s)
{
struct bgp_proto *p = s->proto;
struct bgp_channel *c = s->channel;
- rta *ra = s->route->attrs;
+ ea_list *ra = s->route->attrs;
/* Handle next hop self option - also applies to gateway */
if (c->cf->next_hop_self && bgp_match_src(s, c->cf->next_hop_self))
- return 0;
+ return NULL;
+
+ eattr *nhea = ea_find(ra, &ea_gen_nexthop);
+ if (!nhea)
+ return NULL;
/* We need one valid global gateway */
- if ((ra->dest != RTD_UNICAST) || ra->nh.next || ipa_zero(ra->nh.gw) || ipa_is_link_local(ra->nh.gw))
- return 0;
+ struct nexthop_adata *nhad = (struct nexthop_adata *) nhea->u.ptr;
+ if (!NEXTHOP_IS_REACHABLE(nhad) ||
+ !NEXTHOP_ONE(nhad) || ipa_zero(nhad->nh.gw) ||
+ ipa_is_link_local(nhad->nh.gw))
+ return NULL;
/* Check for non-matching AF */
- if ((ipa_is_ip4(ra->nh.gw) != bgp_channel_is_ipv4(c)) && !c->ext_next_hop)
- return 0;
+ if ((ipa_is_ip4(nhad->nh.gw) != bgp_channel_is_ipv4(c)) && !c->ext_next_hop)
+ return NULL;
/* Do not use gateway from different VRF */
- if (p->p.vrf_set && ra->nh.iface && (p->p.vrf != ra->nh.iface->master))
+ if (p->p.vrf && nhad->nh.iface && (p->p.vrf != nhad->nh.iface->master))
return 0;
/* Use it when exported to internal peers */
if (p->is_interior)
- return 1;
+ return &nhad->nh;
/* Use it when forwarded to single-hop BGP peer on on the same iface */
- return p->neigh && (p->neigh->iface == ra->nh.iface);
+ if (p->neigh && (p->neigh->iface == nhad->nh.iface))
+ return &nhad->nh;
+
+ return NULL;
}
static void
@@ -1179,36 +1190,36 @@ bgp_update_next_hop_ip(struct bgp_export_state *s, eattr *a, ea_list **to)
{
if (!a || !bgp_use_next_hop(s, a))
{
- if (bgp_use_gateway(s))
+ struct nexthop *nhloc;
+ if (nhloc = bgp_use_gateway(s))
{
- rta *ra = s->route->attrs;
- ip_addr nh[1] = { ra->nh.gw };
- bgp_set_attr_data(to, s->pool, BA_NEXT_HOP, 0, nh, 16);
+ ip_addr nh[1] = { nhloc->gw };
+ bgp_set_attr_data(to, BA_NEXT_HOP, 0, nh, 16);
if (s->mpls)
{
u32 implicit_null = BGP_MPLS_NULL;
- u32 *labels = ra->nh.labels ? ra->nh.label : &implicit_null;
- uint lnum = ra->nh.labels ? ra->nh.labels : 1;
- bgp_set_attr_data(to, s->pool, BA_MPLS_LABEL_STACK, 0, labels, lnum * 4);
+ u32 *labels = nhloc->labels ? nhloc->label : &implicit_null;
+ uint lnum = nhloc->labels ? nhloc->labels : 1;
+ bgp_set_attr_data(to, BA_MPLS_LABEL_STACK, 0, labels, lnum * 4);
}
else
- bgp_unset_attr(to, s->pool, BA_MPLS_LABEL_STACK);
+ bgp_unset_attr(to, BA_MPLS_LABEL_STACK);
}
else
{
ip_addr nh[2] = { s->channel->next_hop_addr, s->channel->link_addr };
- bgp_set_attr_data(to, s->pool, BA_NEXT_HOP, 0, nh, ipa_nonzero(nh[1]) ? 32 : 16);
+ bgp_set_attr_data(to, BA_NEXT_HOP, 0, nh, ipa_nonzero(nh[1]) ? 32 : 16);
s->local_next_hop = 1;
/* TODO: Use local MPLS assigned label */
if (s->mpls)
{
u32 implicit_null = BGP_MPLS_NULL;
- bgp_set_attr_data(to, s->pool, BA_MPLS_LABEL_STACK, 0, &implicit_null, 4);
+ bgp_set_attr_data(to, BA_MPLS_LABEL_STACK, 0, &implicit_null, 4);
}
else
- bgp_unset_attr(to, s->pool, BA_MPLS_LABEL_STACK);
+ bgp_unset_attr(to, BA_MPLS_LABEL_STACK);
}
}
@@ -1270,7 +1281,7 @@ bgp_encode_next_hop_ip(struct bgp_write_state *s, eattr *a, byte *buf, uint size
}
static void
-bgp_decode_next_hop_ip(struct bgp_parse_state *s, byte *data, uint len, rta *a)
+bgp_decode_next_hop_ip(struct bgp_parse_state *s, byte *data, uint len, ea_list **to)
{
struct bgp_channel *c = s->channel;
struct adata *ad = lp_alloc_adata(s->pool, 32);
@@ -1311,8 +1322,8 @@ bgp_decode_next_hop_ip(struct bgp_parse_state *s, byte *data, uint len, rta *a)
// XXXX validate next hop
- bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_NEXT_HOP, 0, ad);
- bgp_apply_next_hop(s, a, nh[0], nh[1]);
+ bgp_set_attr_ptr(to, BA_NEXT_HOP, 0, ad);
+ bgp_apply_next_hop(s, to, nh[0], nh[1]);
}
static uint
@@ -1350,7 +1361,7 @@ bgp_encode_next_hop_vpn(struct bgp_write_state *s, eattr *a, byte *buf, uint siz
}
static void
-bgp_decode_next_hop_vpn(struct bgp_parse_state *s, byte *data, uint len, rta *a)
+bgp_decode_next_hop_vpn(struct bgp_parse_state *s, byte *data, uint len, ea_list **to)
{
struct bgp_channel *c = s->channel;
struct adata *ad = lp_alloc_adata(s->pool, 32);
@@ -1392,8 +1403,8 @@ bgp_decode_next_hop_vpn(struct bgp_parse_state *s, byte *data, uint len, rta *a)
// XXXX validate next hop
- bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_NEXT_HOP, 0, ad);
- bgp_apply_next_hop(s, a, nh[0], nh[1]);
+ bgp_set_attr_ptr(to, BA_NEXT_HOP, 0, ad);
+ bgp_apply_next_hop(s, to, nh[0], nh[1]);
}
@@ -1405,7 +1416,7 @@ bgp_encode_next_hop_none(struct bgp_write_state *s UNUSED, eattr *a UNUSED, byte
}
static void
-bgp_decode_next_hop_none(struct bgp_parse_state *s UNUSED, byte *data UNUSED, uint len UNUSED, rta *a UNUSED)
+bgp_decode_next_hop_none(struct bgp_parse_state *s UNUSED, byte *data UNUSED, uint len UNUSED, ea_list **to UNUSED)
{
/*
* Although we expect no next hop and RFC 7606 7.11 states that attribute
@@ -1417,11 +1428,11 @@ bgp_decode_next_hop_none(struct bgp_parse_state *s UNUSED, byte *data UNUSED, ui
}
static void
-bgp_update_next_hop_none(struct bgp_export_state *s, eattr *a, ea_list **to)
+bgp_update_next_hop_none(struct bgp_export_state *s UNUSED, eattr *a, ea_list **to)
{
/* NEXT_HOP shall not pass */
if (a)
- bgp_unset_attr(to, s->pool, BA_NEXT_HOP);
+ bgp_unset_attr(to, BA_NEXT_HOP);
}
@@ -1430,15 +1441,17 @@ bgp_update_next_hop_none(struct bgp_export_state *s, eattr *a, ea_list **to)
*/
static void
-bgp_rte_update(struct bgp_parse_state *s, const net_addr *n, u32 path_id, rta *a0)
+bgp_rte_update(struct bgp_parse_state *s, const net_addr *n, u32 path_id, ea_list *a0)
{
if (path_id != s->last_id)
{
+ rt_unlock_source(s->last_src);
+
s->last_src = rt_get_source(&s->proto->p, path_id);
s->last_id = path_id;
- rta_free(s->cached_rta);
- s->cached_rta = NULL;
+ ea_free(s->cached_ea);
+ s->cached_ea = NULL;
}
if (!a0)
@@ -1448,23 +1461,20 @@ bgp_rte_update(struct bgp_parse_state *s, const net_addr *n, u32 path_id, rta *a
REPORT("Invalid route %N withdrawn", n);
/* Route withdraw */
- rte_update3(&s->channel->c, n, NULL, s->last_src);
+ rte_update(&s->channel->c, n, NULL, s->last_src);
return;
}
/* Prepare cached route attributes */
- if (s->cached_rta == NULL)
- {
- /* Workaround for rta_lookup() breaking eattrs */
- ea_list *ea = a0->eattrs;
- s->cached_rta = rta_lookup(a0);
- a0->eattrs = ea;
- }
+ if (s->cached_ea == NULL)
+ s->cached_ea = ea_lookup(a0, 0);
- rta *a = rta_clone(s->cached_rta);
- rte *e = rte_get_temp(a, s->last_src);
+ rte e0 = {
+ .attrs = s->cached_ea,
+ .src = s->last_src,
+ };
- rte_update3(&s->channel->c, n, e, s->last_src);
+ rte_update(&s->channel->c, n, &e0, s->last_src);
}
static void
@@ -1487,9 +1497,10 @@ bgp_encode_mpls_labels(struct bgp_write_state *s UNUSED, const adata *mpls, byte
}
static void
-bgp_decode_mpls_labels(struct bgp_parse_state *s, byte **pos, uint *len, uint *pxlen, rta *a)
+bgp_decode_mpls_labels(struct bgp_parse_state *s, byte **pos, uint *len, uint *pxlen, ea_list **to)
{
- u32 labels[BGP_MPLS_MAX], label;
+ u32 labels[BGP_MPLS_MAX];
+ u32 label;
uint lnum = 0;
do {
@@ -1508,26 +1519,15 @@ bgp_decode_mpls_labels(struct bgp_parse_state *s, byte **pos, uint *len, uint *p
}
while (!(label & BGP_MPLS_BOS));
- if (!a)
+ if (!*to)
return;
- /* Attach MPLS attribute unless we already have one */
- if (!s->mpls_labels)
- {
- s->mpls_labels = lp_alloc_adata(s->pool, 4*BGP_MPLS_MAX);
- bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_MPLS_LABEL_STACK, 0, s->mpls_labels);
- }
-
- /* Overwrite data in the attribute */
- s->mpls_labels->length = 4*lnum;
- memcpy(s->mpls_labels->data, labels, 4*lnum);
-
/* Update next hop entry in rta */
- bgp_apply_mpls_labels(s, a, labels, lnum);
+ bgp_apply_mpls_labels(s, to, lnum, labels);
/* Attributes were changed, invalidate cached entry */
- rta_free(s->cached_rta);
- s->cached_rta = NULL;
+ rta_free(s->cached_ea);
+ s->cached_ea = NULL;
return;
}
@@ -1563,14 +1563,14 @@ bgp_encode_nlri_ip4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *bu
memcpy(pos, &a, b);
ADVANCE(pos, size, b);
- bgp_free_prefix(s->channel, px);
+ bgp_done_prefix(s->channel, px, buck);
}
return pos - buf;
}
static void
-bgp_decode_nlri_ip4(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
+bgp_decode_nlri_ip4(struct bgp_parse_state *s, byte *pos, uint len, ea_list *a)
{
while (len)
{
@@ -1596,7 +1596,7 @@ bgp_decode_nlri_ip4(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
/* Decode MPLS labels */
if (s->mpls)
- bgp_decode_mpls_labels(s, &pos, &len, &l, a);
+ bgp_decode_mpls_labels(s, &pos, &len, &l, &a);
if (l > IP4_MAX_PREFIX_LENGTH)
bgp_parse_error(s, 10);
@@ -1648,14 +1648,14 @@ bgp_encode_nlri_ip6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *bu
memcpy(pos, &a, b);
ADVANCE(pos, size, b);
- bgp_free_prefix(s->channel, px);
+ bgp_done_prefix(s->channel, px, buck);
}
return pos - buf;
}
static void
-bgp_decode_nlri_ip6(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
+bgp_decode_nlri_ip6(struct bgp_parse_state *s, byte *pos, uint len, ea_list *a)
{
while (len)
{
@@ -1681,7 +1681,7 @@ bgp_decode_nlri_ip6(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
/* Decode MPLS labels */
if (s->mpls)
- bgp_decode_mpls_labels(s, &pos, &len, &l, a);
+ bgp_decode_mpls_labels(s, &pos, &len, &l, &a);
if (l > IP6_MAX_PREFIX_LENGTH)
bgp_parse_error(s, 10);
@@ -1736,14 +1736,14 @@ bgp_encode_nlri_vpn4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *b
memcpy(pos, &a, b);
ADVANCE(pos, size, b);
- bgp_free_prefix(s->channel, px);
+ bgp_done_prefix(s->channel, px, buck);
}
return pos - buf;
}
static void
-bgp_decode_nlri_vpn4(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
+bgp_decode_nlri_vpn4(struct bgp_parse_state *s, byte *pos, uint len, ea_list *a)
{
while (len)
{
@@ -1769,7 +1769,7 @@ bgp_decode_nlri_vpn4(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
/* Decode MPLS labels */
if (s->mpls)
- bgp_decode_mpls_labels(s, &pos, &len, &l, a);
+ bgp_decode_mpls_labels(s, &pos, &len, &l, &a);
/* Decode route distinguisher */
if (l < 64)
@@ -1833,14 +1833,14 @@ bgp_encode_nlri_vpn6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *b
memcpy(pos, &a, b);
ADVANCE(pos, size, b);
- bgp_free_prefix(s->channel, px);
+ bgp_done_prefix(s->channel, px, buck);
}
return pos - buf;
}
static void
-bgp_decode_nlri_vpn6(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
+bgp_decode_nlri_vpn6(struct bgp_parse_state *s, byte *pos, uint len, ea_list *a)
{
while (len)
{
@@ -1866,7 +1866,7 @@ bgp_decode_nlri_vpn6(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
/* Decode MPLS labels */
if (s->mpls)
- bgp_decode_mpls_labels(s, &pos, &len, &l, a);
+ bgp_decode_mpls_labels(s, &pos, &len, &l, &a);
/* Decode route distinguisher */
if (l < 64)
@@ -1920,14 +1920,14 @@ bgp_encode_nlri_flow4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *
memcpy(pos, net->data, flen);
ADVANCE(pos, size, flen);
- bgp_free_prefix(s->channel, px);
+ bgp_done_prefix(s->channel, px, buck);
}
return pos - buf;
}
static void
-bgp_decode_nlri_flow4(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
+bgp_decode_nlri_flow4(struct bgp_parse_state *s, byte *pos, uint len, ea_list *a)
{
while (len)
{
@@ -1978,10 +1978,6 @@ bgp_decode_nlri_flow4(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
net_fill_flow4(n, px, pxlen, pos, flen);
ADVANCE(pos, len, flen);
- /* Apply validation procedure per RFC 8955 (6) */
- if (a && s->channel->cf->validate)
- bgp_apply_flow_validation(s, n, a);
-
bgp_rte_update(s, n, path_id, a);
}
}
@@ -2012,14 +2008,14 @@ bgp_encode_nlri_flow6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *
memcpy(pos, net->data, flen);
ADVANCE(pos, size, flen);
- bgp_free_prefix(s->channel, px);
+ bgp_done_prefix(s->channel, px, buck);
}
return pos - buf;
}
static void
-bgp_decode_nlri_flow6(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
+bgp_decode_nlri_flow6(struct bgp_parse_state *s, byte *pos, uint len, ea_list *a)
{
while (len)
{
@@ -2070,10 +2066,6 @@ bgp_decode_nlri_flow6(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
net_fill_flow6(n, px, pxlen, pos, flen);
ADVANCE(pos, len, flen);
- /* Apply validation procedure per RFC 8955 (6) */
- if (a && s->channel->cf->validate)
- bgp_apply_flow_validation(s, n, a);
-
bgp_rte_update(s, n, path_id, a);
}
}
@@ -2250,6 +2242,8 @@ bgp_create_ip_reach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *bu
* var IPv4 Network Layer Reachability Information
*/
+ ASSERT_DIE(s->channel->ptx->withdraw_bucket != buck);
+
int lr, la;
la = bgp_encode_attrs(s, buck->eattrs, buf+4, buf + MAX_ATTRS_LENGTH);
@@ -2271,6 +2265,8 @@ bgp_create_ip_reach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *bu
static byte *
bgp_create_mp_reach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end)
{
+ ASSERT_DIE(s->channel->ptx->withdraw_bucket != buck);
+
/*
* 2 B IPv4 Withdrawn Routes Length (zero)
* --- IPv4 Withdrawn Routes NLRI (unused)
@@ -2409,7 +2405,7 @@ again: ;
};
/* Try unreachable bucket */
- if ((buck = c->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
+ if ((buck = c->ptx->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
{
res = (c->afi == BGP_AF_IPV4) && !c->ext_next_hop ?
bgp_create_ip_unreach(&s, buck, buf, end):
@@ -2419,14 +2415,13 @@ again: ;
}
/* Try reachable buckets */
- if (!EMPTY_LIST(c->bucket_queue))
+ if (!EMPTY_LIST(c->ptx->bucket_queue))
{
- buck = HEAD(c->bucket_queue);
+ buck = HEAD(c->ptx->bucket_queue);
/* Cleanup empty buckets */
- if (EMPTY_LIST(buck->prefixes))
+ if (bgp_done_bucket(c, buck))
{
- bgp_free_bucket(c, buck);
lp_restore(tmp_linpool, &tmpp);
goto again;
}
@@ -2435,10 +2430,7 @@ again: ;
bgp_create_ip_reach(&s, buck, buf, end):
bgp_create_mp_reach(&s, buck, buf, end);
- if (EMPTY_LIST(buck->prefixes))
- bgp_free_bucket(c, buck);
- else
- bgp_defer_bucket(c, buck);
+ bgp_done_bucket(c, buck);
if (!res)
{
@@ -2522,7 +2514,6 @@ static inline void
bgp_decode_nlri(struct bgp_parse_state *s, u32 afi, byte *nlri, uint len, ea_list *ea, byte *nh, uint nh_len)
{
struct bgp_channel *c = bgp_get_channel(s->proto, afi);
- rta *a = NULL;
if (!c)
DISCARD(BAD_AFI, BGP_AFI(afi), BGP_SAFI(afi));
@@ -2533,6 +2524,7 @@ bgp_decode_nlri(struct bgp_parse_state *s, u32 afi, byte *nlri, uint len, ea_lis
s->last_id = 0;
s->last_src = s->proto->p.main_source;
+ rt_lock_source(s->last_src);
/*
* IPv4 BGP and MP-BGP may be used together in one update, therefore we do not
@@ -2543,26 +2535,24 @@ bgp_decode_nlri(struct bgp_parse_state *s, u32 afi, byte *nlri, uint len, ea_lis
if (ea)
{
- a = allocz(RTA_MAX_SIZE);
+ ea_set_attr_data(&ea, &ea_gen_from, 0, &s->proto->remote_ip, sizeof(ip_addr));
+ ea_set_attr_u32(&ea, &ea_gen_preference, 0, c->c.preference);
+ ea_set_attr_u32(&ea, &ea_gen_source, 0, RTS_BGP);
- a->source = RTS_BGP;
- a->scope = SCOPE_UNIVERSE;
- a->from = s->proto->remote_ip;
- a->eattrs = ea;
- a->pref = c->c.preference;
-
- c->desc->decode_next_hop(s, nh, nh_len, a);
- bgp_finish_attrs(s, a);
+ c->desc->decode_next_hop(s, nh, nh_len, &ea);
+ bgp_finish_attrs(s, &ea);
/* Handle withdraw during next hop decoding */
if (s->err_withdraw)
- a = NULL;
+ ea = NULL;
}
- c->desc->decode_nlri(s, nlri, len, a);
+ c->desc->decode_nlri(s, nlri, len, ea);
+
+ rta_free(s->cached_ea);
+ s->cached_ea = NULL;
- rta_free(s->cached_rta);
- s->cached_rta = NULL;
+ rt_unlock_source(s->last_src);
}
static void
@@ -2582,7 +2572,7 @@ bgp_rx_update(struct bgp_conn *conn, byte *pkt, uint len)
if (conn->state != BS_ESTABLISHED)
{ bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
- bgp_start_timer(conn->hold_timer, conn->hold_time);
+ bgp_start_timer(p, conn->hold_timer, conn->hold_time);
struct lp_state tmpp;
lp_save(tmp_linpool, &tmpp);
@@ -2667,7 +2657,7 @@ bgp_rx_update(struct bgp_conn *conn, byte *pkt, uint len)
ea, s.mp_next_hop_data, s.mp_next_hop_len);
done:
- rta_free(s.cached_rta);
+ rta_free(s.cached_ea);
lp_restore(tmp_linpool, &tmpp);
return;
}
@@ -2923,7 +2913,7 @@ bgp_fire_tx(struct bgp_conn *conn)
{
conn->packets_to_send &= ~(1 << PKT_KEEPALIVE);
BGP_TRACE(D_PACKETS, "Sending KEEPALIVE");
- bgp_start_timer(conn->keepalive_timer, conn->keepalive_time);
+ bgp_start_timer(p, conn->keepalive_timer, conn->keepalive_time);
return bgp_send(conn, PKT_KEEPALIVE, BGP_HEADER_LENGTH);
}
else while (conn->channels_to_send)
@@ -2991,7 +2981,11 @@ bgp_schedule_packet(struct bgp_conn *conn, struct bgp_channel *c, int type)
{
ASSERT(conn->sk);
- DBG("BGP: Scheduling packet type %d\n", type);
+ struct bgp_proto *p = conn->bgp;
+ if (c)
+ BGP_TRACE(D_PACKETS, "Scheduling packet type %d for channel %s", type, c->c.name);
+ else
+ BGP_TRACE(D_PACKETS, "Scheduling packet type %d", type);
if (c)
{
@@ -3008,7 +3002,7 @@ bgp_schedule_packet(struct bgp_conn *conn, struct bgp_channel *c, int type)
conn->packets_to_send |= 1 << type;
if ((conn->sk->tpos == conn->sk->tbuf) && !ev_active(conn->tx_ev))
- ev_schedule(conn->tx_ev);
+ proto_send_event(&p->p, conn->tx_ev);
}
void
bgp_kick_tx(void *vconn)
@@ -3021,7 +3015,7 @@ bgp_kick_tx(void *vconn)
;
if (!max && !ev_active(conn->tx_ev))
- ev_schedule(conn->tx_ev);
+ proto_send_event(&conn->bgp->p, conn->tx_ev);
}
void
@@ -3029,13 +3023,14 @@ bgp_tx(sock *sk)
{
struct bgp_conn *conn = sk->data;
+ ASSERT_DIE(birdloop_inside(conn->bgp->p.loop));
DBG("BGP: TX hook\n");
uint max = 1024;
while (--max && (bgp_fire_tx(conn) > 0))
;
if (!max && !ev_active(conn->tx_ev))
- ev_schedule(conn->tx_ev);
+ proto_send_event(&conn->bgp->p, conn->tx_ev);
}
@@ -3224,7 +3219,7 @@ bgp_rx_keepalive(struct bgp_conn *conn)
struct bgp_proto *p = conn->bgp;
BGP_TRACE(D_PACKETS, "Got KEEPALIVE");
- bgp_start_timer(conn->hold_timer, conn->hold_time);
+ bgp_start_timer(p, conn->hold_timer, conn->hold_time);
if (conn->state == BS_OPENCONFIRM)
{ bgp_conn_enter_established_state(conn); return; }
@@ -3266,6 +3261,30 @@ bgp_rx_packet(struct bgp_conn *conn, byte *pkt, uint len)
}
}
+void
+bgp_uncork(void *vp)
+{
+ /* The uncork event is run from &main_birdloop and there is no useful way how
+ * to assign the target loop to it, thus we have to lock it ourselves. */
+
+ struct bgp_proto *p = vp;
+ if (!p)
+ return;
+
+ birdloop_enter(p->p.loop);
+
+ if (p && p->conn && (p->conn->state == BS_ESTABLISHED) && !p->conn->sk->rx_hook)
+ {
+ struct birdsock *sk = p->conn->sk;
+ ASSERT_DIE(sk->rpos > sk->rbuf);
+ sk_resume_rx(p->p.loop, sk, bgp_rx);
+ bgp_rx(sk, sk->rpos - sk->rbuf);
+ BGP_TRACE(D_PACKETS, "Uncorked");
+ }
+
+ birdloop_leave(p->p.loop);
+}
+
/**
* bgp_rx - handle received data
* @sk: socket
@@ -3280,6 +3299,7 @@ int
bgp_rx(sock *sk, uint size)
{
struct bgp_conn *conn = sk->data;
+ struct bgp_proto *p = conn->bgp;
byte *pkt_start = sk->rbuf;
byte *end = pkt_start + size;
uint i, len;
@@ -3289,6 +3309,12 @@ bgp_rx(sock *sk, uint size)
{
if ((conn->state == BS_CLOSE) || (conn->sk != sk))
return 0;
+ if ((conn->state == BS_ESTABLISHED) && rt_cork_check(conn->bgp->uncork_ev))
+ {
+ sk_pause_rx(p->p.loop, sk);
+ BGP_TRACE(D_PACKETS, "Corked");
+ return 0;
+ }
for(i=0; i<16; i++)
if (pkt_start[i] != 0xff)
{
diff --git a/proto/mrt/mrt.c b/proto/mrt/mrt.c
index d1c334e1..82fd426a 100644
--- a/proto/mrt/mrt.c
+++ b/proto/mrt/mrt.c
@@ -228,7 +228,7 @@ mrt_next_table_(rtable *tab, rtable *tab_ptr, const char *pattern)
NODE_VALID(tn);
tn = tn->next)
{
- tab = SKIP_BACK(struct rtable, n, tn);
+ tab = SKIP_BACK(rtable, n, tn);
if (patmatch(pattern, tab->name) &&
((tab->addr_type == NET_IP4) || (tab->addr_type == NET_IP6)))
return tab;
@@ -243,13 +243,15 @@ mrt_next_table(struct mrt_table_dump_state *s)
rtable *tab = mrt_next_table_(s->table, s->table_ptr, s->table_expr);
if (s->table)
- rt_unlock_table(s->table);
+ RT_LOCKED(s->table, tab)
+ rt_unlock_table(tab);
s->table = tab;
s->ipv4 = tab ? (tab->addr_type == NET_IP4) : 0;
if (s->table)
- rt_lock_table(s->table);
+ RT_LOCKED(s->table, tab)
+ rt_lock_table(tab);
return s->table;
}
@@ -423,7 +425,7 @@ mrt_rib_table_header(struct mrt_table_dump_state *s, net_addr *n)
static void
mrt_rib_table_entry_bgp_attrs(struct mrt_table_dump_state *s, rte *r)
{
- struct ea_list *eattrs = r->attrs->eattrs;
+ struct ea_list *eattrs = r->attrs;
buffer *b = &s->buf;
if (!eattrs)
@@ -431,7 +433,7 @@ mrt_rib_table_entry_bgp_attrs(struct mrt_table_dump_state *s, rte *r)
/* Attribute list must be normalized for bgp_encode_attrs() */
if (!rta_is_cached(r->attrs))
- ea_normalize(eattrs);
+ eattrs = ea_normalize(eattrs, 0);
mrt_buffer_need(b, MRT_ATTR_BUFFER_SIZE);
byte *pos = b->pos;
@@ -460,7 +462,7 @@ mrt_rib_table_entry_bgp_attrs(struct mrt_table_dump_state *s, rte *r)
return;
fail:
- mrt_log(s, "Attribute list too long for %N", r->net->n.addr);
+ mrt_log(s, "Attribute list too long for %N", r->net);
}
#endif
@@ -472,9 +474,9 @@ mrt_rib_table_entry(struct mrt_table_dump_state *s, rte *r)
#ifdef CONFIG_BGP
/* Find peer index */
- if (r->src->proto->proto == &proto_bgp)
+ struct bgp_proto *p = bgp_rte_proto(r);
+ if (p)
{
- struct bgp_proto *p = (void *) r->src->proto;
struct mrt_peer_entry *n =
HASH_FIND(s->peer_hash, PEER, p->remote_id, p->remote_as, p->remote_ip);
@@ -512,24 +514,21 @@ mrt_rib_table_dump(struct mrt_table_dump_state *s, net *n, int add_path)
mrt_init_message(&s->buf, MRT_TABLE_DUMP_V2, subtype);
mrt_rib_table_header(s, n->n.addr);
- rte *rt, *rt0;
- for (rt0 = n->routes; rt = rt0; rt0 = rt0->next)
+ for (struct rte_storage *rt, *rt0 = n->routes; rt = rt0; rt0 = rt0->next)
{
- if (rte_is_filtered(rt))
+ if (rte_is_filtered(&rt->rte))
continue;
/* Skip routes that should be reported in the other phase */
- if (!s->always_add_path && (!rt->src->private_id != !s->add_path))
+ if (!s->always_add_path && (!rt->rte.src->private_id != !s->add_path))
{
s->want_add_path = 1;
continue;
}
- if (f_run(s->filter, &rt, s->linpool, 0) <= F_ACCEPT)
- mrt_rib_table_entry(s, rt);
-
- if (rt != rt0)
- rte_free(rt);
+ rte e = rt->rte;
+ if (f_run(s->filter, &e, 0) <= F_ACCEPT)
+ mrt_rib_table_entry(s, &e);
lp_flush(s->linpool);
}
@@ -576,14 +575,18 @@ mrt_table_dump_init(pool *pp)
static void
mrt_table_dump_free(struct mrt_table_dump_state *s)
{
- if (s->table_open)
- FIB_ITERATE_UNLINK(&s->fit, &s->table->fib);
-
if (s->table)
- rt_unlock_table(s->table);
+ RT_LOCKED(s->table, tab)
+ {
+ if (s->table_open)
+ FIB_ITERATE_UNLINK(&s->fit, &tab->fib);
+
+ rt_unlock_table(tab);
+ }
if (s->table_ptr)
- rt_unlock_table(s->table_ptr);
+ RT_LOCKED(s->table_ptr, tab)
+ rt_unlock_table(tab);
config_del_obstacle(s->config);
@@ -609,16 +612,19 @@ mrt_table_dump_step(struct mrt_table_dump_state *s)
mrt_peer_table_dump(s);
- FIB_ITERATE_INIT(&s->fit, &s->table->fib);
+ RT_LOCKED(s->table, tab)
+ {
+
+ FIB_ITERATE_INIT(&s->fit, &tab->fib);
s->table_open = 1;
step:
- FIB_ITERATE_START(&s->table->fib, &s->fit, net, n)
+ FIB_ITERATE_START(&tab->fib, &s->fit, net, n)
{
if (s->max < 0)
{
FIB_ITERATE_PUT(&s->fit);
- return 0;
+ RT_RETURN(tab, 0);
}
/* With Always ADD_PATH option, we jump directly to second phase */
@@ -633,6 +639,8 @@ mrt_table_dump_step(struct mrt_table_dump_state *s)
FIB_ITERATE_END;
s->table_open = 0;
+ }
+
mrt_close_file(s);
mrt_peer_table_flush(s);
}
@@ -664,7 +672,8 @@ mrt_timer(timer *t)
s->always_add_path = cf->always_add_path;
if (s->table_ptr)
- rt_lock_table(s->table_ptr);
+ RT_LOCKED(s->table_ptr, tab)
+ rt_lock_table(tab);
p->table_dump = s;
ev_schedule(p->event);
@@ -706,14 +715,17 @@ mrt_dump_cont(struct cli *c)
cli_printf(c, 0, "");
mrt_table_dump_free(c->rover);
- c->cont = c->cleanup = c->rover = NULL;
+ c->cont = NULL;
+ c->cleanup = NULL;
+ c->rover = NULL;
}
-static void
+static int
mrt_dump_cleanup(struct cli *c)
{
mrt_table_dump_free(c->rover);
c->rover = NULL;
+ return 0;
}
void
@@ -737,7 +749,8 @@ mrt_dump_cmd(struct mrt_dump_data *d)
s->filename = d->filename;
if (s->table_ptr)
- rt_lock_table(s->table_ptr);
+ RT_LOCKED(s->table_ptr, tab)
+ rt_lock_table(tab);
this_cli->cont = mrt_dump_cont;
this_cli->cleanup = mrt_dump_cleanup;
@@ -907,7 +920,6 @@ mrt_copy_config(struct proto_config *dest UNUSED, struct proto_config *src UNUSE
struct protocol proto_mrt = {
.name = "MRT",
.template = "mrt%d",
- .class = PROTOCOL_MRT,
.proto_size = sizeof(struct mrt_proto),
.config_size = sizeof(struct mrt_config),
.init = mrt_init,
diff --git a/proto/mrt/mrt.h b/proto/mrt/mrt.h
index 4ff94c12..f535a391 100644
--- a/proto/mrt/mrt.h
+++ b/proto/mrt/mrt.h
@@ -13,7 +13,7 @@
#include "nest/bird.h"
#include "nest/protocol.h"
#include "lib/lists.h"
-#include "nest/route.h"
+#include "nest/rt.h"
#include "lib/event.h"
#include "lib/hash.h"
@@ -40,7 +40,7 @@ struct mrt_proto {
struct mrt_dump_data {
const char *table_expr;
- struct rtable *table_ptr;
+ rtable *table_ptr;
const struct filter *filter;
const char *filename;
};
@@ -60,7 +60,7 @@ struct mrt_table_dump_state {
/* Configuration information */
const char *table_expr; /* Wildcard for table name (or NULL) */
- struct rtable *table_ptr; /* Explicit table (or NULL) */
+ rtable *table_ptr; /* Explicit table (or NULL) */
const struct filter *filter; /* Optional filter */
const char *filename; /* Filename pattern */
int always_add_path; /* Always use *_ADDPATH message subtypes */
@@ -73,7 +73,7 @@ struct mrt_table_dump_state {
HASH(struct mrt_peer_entry) peer_hash; /* Hash for peers to find the index */
- struct rtable *table; /* Processed table, NULL initially */
+ rtable *table; /* Processed table, NULL initially */
struct fib_iterator fit; /* Iterator in processed table */
int table_open; /* Whether iterator is linked */
diff --git a/proto/ospf/config.Y b/proto/ospf/config.Y
index 4b7d5a36..bc3df8db 100644
--- a/proto/ospf/config.Y
+++ b/proto/ospf/config.Y
@@ -190,7 +190,7 @@ ospf_check_auth(void)
CF_DECLS
-CF_KEYWORDS(OSPF, V2, V3, OSPF_METRIC1, OSPF_METRIC2, OSPF_TAG, OSPF_ROUTER_ID)
+CF_KEYWORDS(OSPF, V2, V3)
CF_KEYWORDS(AREA, NEIGHBORS, RFC1583COMPAT, STUB, TICK, COST, COST2, RETRANSMIT)
CF_KEYWORDS(HELLO, TRANSMIT, PRIORITY, DEAD, TYPE, BROADCAST, BCAST, DEFAULT)
CF_KEYWORDS(NONBROADCAST, NBMA, POINTOPOINT, PTP, POINTOMULTIPOINT, PTMP)
@@ -505,11 +505,6 @@ ospf_iface:
ospf_iface_start ospf_iface_patt_list ospf_iface_opt_list { ospf_iface_finish(); }
;
-dynamic_attr: OSPF_METRIC1 { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_OSPF_METRIC1); } ;
-dynamic_attr: OSPF_METRIC2 { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_OSPF_METRIC2); } ;
-dynamic_attr: OSPF_TAG { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_OSPF_TAG); } ;
-dynamic_attr: OSPF_ROUTER_ID { $$ = f_new_dynamic_attr(EAF_TYPE_ROUTER_ID, T_QUAD, EA_OSPF_ROUTER_ID); } ;
-
CF_CLI_HELP(SHOW OSPF, ..., [[Show information about OSPF protocol]]);
CF_CLI(SHOW OSPF, optproto, [<name>], [[Show information about OSPF protocol]])
{ PROTO_WALK_CMD($3, &proto_ospf, p) ospf_sh(p); };
diff --git a/proto/ospf/iface.c b/proto/ospf/iface.c
index 87e3d95e..0aa7fa00 100644
--- a/proto/ospf/iface.c
+++ b/proto/ospf/iface.c
@@ -136,7 +136,7 @@ ospf_sk_open(struct ospf_iface *ifa)
sk->flags = SKF_LADDR_RX | (ifa->check_ttl ? SKF_TTL_RX : 0);
sk->ttl = ifa->cf->ttl_security ? 255 : 1;
- if (sk_open(sk) < 0)
+ if (sk_open(sk, p->p.loop) < 0)
goto err;
/* 12 is an offset of the checksum in an OSPFv3 packet */
@@ -220,7 +220,7 @@ ospf_open_vlink_sk(struct ospf_proto *p)
sk->data = (void *) p;
sk->flags = 0;
- if (sk_open(sk) < 0)
+ if (sk_open(sk, p->p.loop) < 0)
goto err;
/* 12 is an offset of the checksum in an OSPFv3 packet */
@@ -484,9 +484,9 @@ ospf_iface_find(struct ospf_proto *p, struct iface *what)
}
static void
-ospf_iface_add(struct object_lock *lock)
+ospf_iface_add(void *_ifa)
{
- struct ospf_iface *ifa = lock->data;
+ struct ospf_iface *ifa = _ifa;
struct ospf_proto *p = ifa->oa->po;
/* Open socket if interface is not stub */
@@ -668,8 +668,11 @@ ospf_iface_new(struct ospf_area *oa, struct ifa *addr, struct ospf_iface_patt *i
lock->port = OSPF_PROTO;
lock->inst = ifa->instance_id;
lock->iface = iface;
- lock->data = ifa;
- lock->hook = ospf_iface_add;
+ lock->event = (event) {
+ .hook = ospf_iface_add,
+ .data = ifa,
+ };
+ lock->target = &global_event_list;
olock_acquire(lock);
}
@@ -1222,12 +1225,11 @@ ospf_ifa_notify3(struct proto *P, uint flags, struct ifa *a)
static void
ospf_reconfigure_ifaces2(struct ospf_proto *p)
{
- struct iface *iface;
struct ifa *a;
- WALK_LIST(iface, iface_list)
+ IFACE_WALK(iface)
{
- if (p->p.vrf_set && p->p.vrf != iface->master)
+ if (p->p.vrf && p->p.vrf != iface->master)
continue;
if (! (iface->flags & IF_UP))
@@ -1271,12 +1273,11 @@ ospf_reconfigure_ifaces2(struct ospf_proto *p)
static void
ospf_reconfigure_ifaces3(struct ospf_proto *p)
{
- struct iface *iface;
struct ifa *a;
- WALK_LIST(iface, iface_list)
+ IFACE_WALK(iface)
{
- if (p->p.vrf_set && p->p.vrf != iface->master)
+ if (p->p.vrf && p->p.vrf != iface->master)
continue;
if (! (iface->flags & IF_UP))
diff --git a/proto/ospf/neighbor.c b/proto/ospf/neighbor.c
index ca369819..b0fdc42f 100644
--- a/proto/ospf/neighbor.c
+++ b/proto/ospf/neighbor.c
@@ -777,7 +777,7 @@ ospf_neigh_update_bfd(struct ospf_neighbor *n, int use_bfd)
if (use_bfd && !n->bfd_req)
n->bfd_req = bfd_request_session(n->pool, n->ip, n->ifa->addr->ip,
n->ifa->iface, p->p.vrf,
- ospf_neigh_bfd_hook, n, NULL);
+ ospf_neigh_bfd_hook, n, p->p.loop, NULL);
if (!use_bfd && n->bfd_req)
{
diff --git a/proto/ospf/ospf.c b/proto/ospf/ospf.c
index ad4b2d14..896bf5a3 100644
--- a/proto/ospf/ospf.c
+++ b/proto/ospf/ospf.c
@@ -106,11 +106,12 @@
#include <stdlib.h>
#include "ospf.h"
+#include "lib/macro.h"
-static int ospf_preexport(struct channel *P, rte *new);
+static int ospf_preexport(struct channel *C, rte *new);
static void ospf_reload_routes(struct channel *C);
-static int ospf_rte_better(struct rte *new, struct rte *old);
-static u32 ospf_rte_igp_metric(struct rte *rt);
+static int ospf_rte_better(const rte *new, const rte *old);
+static u32 ospf_rte_igp_metric(const rte *rt);
static void ospf_disp(timer *timer);
@@ -370,39 +371,42 @@ ospf_init(struct proto_config *CF)
P->main_channel = proto_add_channel(P, proto_cf_main_channel(CF));
P->rt_notify = ospf_rt_notify;
- P->if_notify = ospf_if_notify;
- P->ifa_notify = cf->ospf2 ? ospf_ifa_notify2 : ospf_ifa_notify3;
+ P->iface_sub.if_notify = ospf_if_notify;
+ P->iface_sub.ifa_notify = cf->ospf2 ? ospf_ifa_notify2 : ospf_ifa_notify3;
P->preexport = ospf_preexport;
P->reload_routes = ospf_reload_routes;
P->feed_begin = ospf_feed_begin;
P->feed_end = ospf_feed_end;
- P->rte_better = ospf_rte_better;
- P->rte_igp_metric = ospf_rte_igp_metric;
+
+ P->sources.class = &ospf_rte_owner_class;
return P;
}
/* If new is better return 1 */
static int
-ospf_rte_better(struct rte *new, struct rte *old)
+ospf_rte_better(const rte *new, const rte *old)
{
- u32 new_metric1 = ea_get_int(new->attrs->eattrs, EA_OSPF_METRIC1, LSINFINITY);
+ u32 new_metric1 = ea_get_int(new->attrs, &ea_ospf_metric1, LSINFINITY);
if (new_metric1 == LSINFINITY)
return 0;
- if(new->attrs->source < old->attrs->source) return 1;
- if(new->attrs->source > old->attrs->source) return 0;
+ u32 ns = rt_get_source_attr(new);
+ u32 os = rt_get_source_attr(old);
+
+ if (ns < os) return 1;
+ if (ns > os) return 0;
- if(new->attrs->source == RTS_OSPF_EXT2)
+ if (ns == RTS_OSPF_EXT2)
{
- u32 old_metric2 = ea_get_int(old->attrs->eattrs, EA_OSPF_METRIC2, LSINFINITY);
- u32 new_metric2 = ea_get_int(new->attrs->eattrs, EA_OSPF_METRIC2, LSINFINITY);
- if(new_metric2 < old_metric2) return 1;
- if(new_metric2 > old_metric2) return 0;
+ u32 old_metric2 = ea_get_int(old->attrs, &ea_ospf_metric2, LSINFINITY);
+ u32 new_metric2 = ea_get_int(new->attrs, &ea_ospf_metric2, LSINFINITY);
+ if (new_metric2 < old_metric2) return 1;
+ if (new_metric2 > old_metric2) return 0;
}
- u32 old_metric1 = ea_get_int(old->attrs->eattrs, EA_OSPF_METRIC1, LSINFINITY);
+ u32 old_metric1 = ea_get_int(old->attrs, &ea_ospf_metric1, LSINFINITY);
if (new_metric1 < old_metric1)
return 1;
@@ -410,12 +414,12 @@ ospf_rte_better(struct rte *new, struct rte *old)
}
static u32
-ospf_rte_igp_metric(struct rte *rt)
+ospf_rte_igp_metric(const rte *rt)
{
- if (rt->attrs->source == RTS_OSPF_EXT2)
+ if (rt_get_source_attr(rt) == RTS_OSPF_EXT2)
return IGP_METRIC_UNKNOWN;
- return ea_get_int(rt->attrs->eattrs, EA_OSPF_METRIC1, LSINFINITY);
+ return ea_get_int(rt->attrs, &ea_ospf_metric1, LSINFINITY);
}
void
@@ -488,7 +492,7 @@ ospf_preexport(struct channel *C, rte *e)
struct ospf_area *oa = ospf_main_area(p);
/* Reject our own routes */
- if (e->src->proto == &p->p)
+ if (e->sender == C->in_req.hook)
return -1;
/* Do not export routes to stub areas */
@@ -531,7 +535,7 @@ ospf_shutdown(struct proto *P)
/* Cleanup locked rta entries */
FIB_WALK(&p->rtf, ort, nf)
{
- rta_free(nf->old_rta);
+ ea_free(nf->old_ea);
}
FIB_WALK_END;
@@ -566,11 +570,12 @@ ospf_get_status(struct proto *P, byte * buf)
}
static void
-ospf_get_route_info(rte * rte, byte * buf)
+ospf_get_route_info(const rte * rte, byte * buf)
{
char *type = "<bug>";
- switch (rte->attrs->source)
+ uint source = rt_get_source_attr(rte);
+ switch (source)
{
case RTS_OSPF:
type = "I";
@@ -587,42 +592,26 @@ ospf_get_route_info(rte * rte, byte * buf)
}
buf += bsprintf(buf, " %s", type);
- buf += bsprintf(buf, " (%d/%d", rte->attrs->pref, ea_get_int(rte->attrs->eattrs, EA_OSPF_METRIC1, LSINFINITY));
- if (rte->attrs->source == RTS_OSPF_EXT2)
- buf += bsprintf(buf, "/%d", ea_get_int(rte->attrs->eattrs, EA_OSPF_METRIC2, LSINFINITY));
+ buf += bsprintf(buf, " (%d/%d", rt_get_preference(rte), ea_get_int(rte->attrs, &ea_ospf_metric1, LSINFINITY));
+ if (source == RTS_OSPF_EXT2)
+ buf += bsprintf(buf, "/%d", ea_get_int(rte->attrs, &ea_ospf_metric2, LSINFINITY));
buf += bsprintf(buf, ")");
- if (rte->attrs->source == RTS_OSPF_EXT1 || rte->attrs->source == RTS_OSPF_EXT2)
+ if (source == RTS_OSPF_EXT1 || source == RTS_OSPF_EXT2)
{
- eattr *ea = ea_find(rte->attrs->eattrs, EA_OSPF_TAG);
+ eattr *ea = ea_find(rte->attrs, &ea_ospf_tag);
if (ea && (ea->u.data > 0))
buf += bsprintf(buf, " [%x]", ea->u.data);
}
- eattr *ea = ea_find(rte->attrs->eattrs, EA_OSPF_ROUTER_ID);
+ eattr *ea = ea_find(rte->attrs, &ea_ospf_router_id);
if (ea)
buf += bsprintf(buf, " [%R]", ea->u.data);
}
-static int
-ospf_get_attr(const eattr * a, byte * buf, int buflen UNUSED)
+static void
+ospf_tag_format(const eattr * a, byte * buf, uint buflen)
{
- switch (a->id)
- {
- case EA_OSPF_METRIC1:
- bsprintf(buf, "metric1");
- return GA_NAME;
- case EA_OSPF_METRIC2:
- bsprintf(buf, "metric2");
- return GA_NAME;
- case EA_OSPF_TAG:
- bsprintf(buf, "tag: 0x%08x", a->u.data);
- return GA_FULL;
- case EA_OSPF_ROUTER_ID:
- bsprintf(buf, "router_id");
- return GA_NAME;
- default:
- return GA_UNKNOWN;
- }
+ bsnprintf(buf, buflen, "0x%08x", a->u.data);
}
static void
@@ -1517,10 +1506,15 @@ ospf_sh_lsadb(struct lsadb_show_data *ld)
}
+struct rte_owner_class ospf_rte_owner_class = {
+ .get_route_info = ospf_get_route_info,
+ .rte_better = ospf_rte_better,
+ .rte_igp_metric = ospf_rte_igp_metric,
+};
+
struct protocol proto_ospf = {
.name = "OSPF",
.template = "ospf%d",
- .class = PROTOCOL_OSPF,
.preference = DEF_PREF_OSPF,
.channel_mask = NB_IP,
.proto_size = sizeof(struct ospf_proto),
@@ -1531,12 +1525,38 @@ struct protocol proto_ospf = {
.shutdown = ospf_shutdown,
.reconfigure = ospf_reconfigure,
.get_status = ospf_get_status,
- .get_attr = ospf_get_attr,
- .get_route_info = ospf_get_route_info
+};
+
+struct ea_class ea_ospf_metric1 = {
+ .name = "ospf_metric1",
+ .type = T_INT,
+};
+
+struct ea_class ea_ospf_metric2 = {
+ .name = "ospf_metric2",
+ .type = T_INT,
+};
+
+struct ea_class ea_ospf_tag = {
+ .name = "ospf_tag",
+ .type = T_INT,
+ .format = ospf_tag_format,
+};
+
+struct ea_class ea_ospf_router_id = {
+ .name = "ospf_router_id",
+ .type = T_QUAD,
};
void
ospf_build(void)
{
proto_build(&proto_ospf);
+
+ EA_REGISTER_ALL(
+ &ea_ospf_metric1,
+ &ea_ospf_metric2,
+ &ea_ospf_tag,
+ &ea_ospf_router_id
+ );
}
diff --git a/proto/ospf/ospf.h b/proto/ospf/ospf.h
index 3e704ae8..3477ba5a 100644
--- a/proto/ospf/ospf.h
+++ b/proto/ospf/ospf.h
@@ -22,7 +22,7 @@
#include "lib/resource.h"
#include "nest/protocol.h"
#include "nest/iface.h"
-#include "nest/route.h"
+#include "nest/rt.h"
#include "nest/cli.h"
#include "nest/locks.h"
#include "nest/bfd.h"
@@ -939,12 +939,7 @@ struct lsadb_show_data {
u32 router; /* Advertising router, 0 -> all */
};
-
-#define EA_OSPF_METRIC1 EA_CODE(PROTOCOL_OSPF, 0)
-#define EA_OSPF_METRIC2 EA_CODE(PROTOCOL_OSPF, 1)
-#define EA_OSPF_TAG EA_CODE(PROTOCOL_OSPF, 2)
-#define EA_OSPF_ROUTER_ID EA_CODE(PROTOCOL_OSPF, 3)
-
+extern struct ea_class ea_ospf_metric1, ea_ospf_metric2, ea_ospf_tag, ea_ospf_router_id;
/*
* For regular networks, neighbor address must match network prefix.
@@ -1007,6 +1002,8 @@ void ospf_sh_state(struct proto *P, int verbose, int reachable);
void ospf_sh_lsadb(struct lsadb_show_data *ld);
+extern struct rte_owner_class ospf_rte_owner_class;
+
/* iface.c */
void ospf_iface_chstate(struct ospf_iface *ifa, u8 state);
void ospf_iface_sm(struct ospf_iface *ifa, int event);
diff --git a/proto/ospf/rt.c b/proto/ospf/rt.c
index 471bb586..69c2907d 100644
--- a/proto/ospf/rt.c
+++ b/proto/ospf/rt.c
@@ -28,24 +28,30 @@ nh_is_vlink(struct nexthop *nhs)
static inline int
unresolved_vlink(ort *ort)
{
- return ort->n.nhs && nh_is_vlink(ort->n.nhs);
+ return ort->n.nhs && nh_is_vlink(&ort->n.nhs->nh);
}
-static inline struct nexthop *
+static inline struct nexthop_adata *
new_nexthop(struct ospf_proto *p, ip_addr gw, struct iface *iface, byte weight)
{
- struct nexthop *nh = lp_allocz(p->nhpool, sizeof(struct nexthop));
- nh->gw = gw;
- nh->iface = iface;
- nh->weight = weight;
- return nh;
+ struct nexthop_adata *nhad = lp_alloc(p->nhpool, sizeof(struct nexthop_adata));
+ *nhad = (struct nexthop_adata) {
+ .ad = { .length = sizeof *nhad - sizeof nhad->ad, },
+ .nh = {
+ .gw = gw,
+ .iface = iface,
+ .weight = weight,
+ },
+ };
+
+ return nhad;
}
/* Returns true if there are device nexthops in n */
static inline int
-has_device_nexthops(const struct nexthop *n)
+has_device_nexthops(struct nexthop_adata *nhad)
{
- for (; n; n = n->next)
+ NEXTHOP_WALK(n, nhad)
if (ipa_zero(n->gw))
return 1;
@@ -53,38 +59,22 @@ has_device_nexthops(const struct nexthop *n)
}
/* Replace device nexthops with nexthops to gw */
-static struct nexthop *
-fix_device_nexthops(struct ospf_proto *p, const struct nexthop *n, ip_addr gw)
+static struct nexthop_adata *
+fix_device_nexthops(struct ospf_proto *p, struct nexthop_adata *old, ip_addr gw)
{
- struct nexthop *root1 = NULL;
- struct nexthop *root2 = NULL;
- struct nexthop **nn1 = &root1;
- struct nexthop **nn2 = &root2;
-
if (!p->ecmp)
- return new_nexthop(p, gw, n->iface, n->weight);
-
- /* This is a bit tricky. We cannot just copy the list and update n->gw,
- because the list should stay sorted, so we create two lists, one with new
- gateways and one with old ones, and then merge them. */
-
- for (; n; n = n->next)
{
- struct nexthop *nn = new_nexthop(p, ipa_zero(n->gw) ? gw : n->gw, n->iface, n->weight);
+ struct nexthop_adata *new = (struct nexthop_adata *) lp_store_adata(p->nhpool, old->ad.data, old->ad.length);
+ new->nh.gw = gw;
+ return new;
+ }
+ struct nexthop_adata *tmp = (struct nexthop_adata *) tmp_copy_adata(&old->ad);
+ NEXTHOP_WALK(n, tmp)
if (ipa_zero(n->gw))
- {
- *nn1 = nn;
- nn1 = &(nn->next);
- }
- else
- {
- *nn2 = nn;
- nn2 = &(nn->next);
- }
- }
+ n->gw = gw;
- return nexthop_merge(root1, root2, 1, 1, p->ecmp, p->nhpool);
+ return nexthop_sort(tmp, p->nhpool);
}
@@ -169,9 +159,9 @@ orta_compare(const struct ospf_proto *p, const orta *new, const orta *old)
return -1;
if (!new->nhs)
return 1;
- if (nh_is_vlink(new->nhs))
+ if (nh_is_vlink(&new->nhs->nh))
return -1;
- if (nh_is_vlink(old->nhs))
+ if (nh_is_vlink(&old->nhs->nh))
return 1;
@@ -279,11 +269,7 @@ ort_merge(struct ospf_proto *p, ort *o, const orta *new)
orta *old = &o->n;
if (old->nhs != new->nhs)
- {
- old->nhs = nexthop_merge(old->nhs, new->nhs, old->nhs_reuse, new->nhs_reuse,
- p->ecmp, p->nhpool);
- old->nhs_reuse = 1;
- }
+ old->nhs = nexthop_merge(old->nhs, new->nhs, p->ecmp, p->nhpool);
if (old->rid < new->rid)
old->rid = new->rid;
@@ -295,11 +281,7 @@ ort_merge_ext(struct ospf_proto *p, ort *o, const orta *new)
orta *old = &o->n;
if (old->nhs != new->nhs)
- {
- old->nhs = nexthop_merge(old->nhs, new->nhs, old->nhs_reuse, new->nhs_reuse,
- p->ecmp, p->nhpool);
- old->nhs_reuse = 1;
- }
+ old->nhs = nexthop_merge(old->nhs, new->nhs, p->ecmp, p->nhpool);
if (old->tag != new->tag)
old->tag = 0;
@@ -1165,7 +1147,7 @@ ospf_check_vlinks(struct ospf_proto *p)
if (tmp && (tmp->color == INSPF) && ipa_nonzero(tmp->lb) && tmp->nhs)
{
- struct ospf_iface *nhi = ospf_iface_find(p, tmp->nhs->iface);
+ struct ospf_iface *nhi = ospf_iface_find(p, tmp->nhs->nh.iface);
if ((ifa->state != OSPF_IS_PTP)
|| (ifa->vifa != nhi)
@@ -1579,10 +1561,7 @@ ospf_ext_spf(struct ospf_proto *p)
/* Replace device nexthops with nexthops to forwarding address from LSA */
if (has_device_nexthops(nfa.nhs))
- {
nfa.nhs = fix_device_nexthops(p, nfa.nhs, rt.fwaddr);
- nfa.nhs_reuse = 1;
- }
}
if (rt.ebit)
@@ -1726,10 +1705,10 @@ ospf_rt_spf(struct ospf_proto *p)
static inline int
-inherit_nexthops(struct nexthop *pn)
+inherit_nexthops(struct nexthop_adata *pn)
{
/* Proper nexthops (with defined GW) or dummy vlink nexthops (without iface) */
- return pn && (ipa_nonzero(pn->gw) || !pn->iface);
+ return pn && (ipa_nonzero(pn->nh.gw) || !pn->nh.iface);
}
static inline ip_addr
@@ -1744,12 +1723,12 @@ link_lsa_lladdr(struct ospf_proto *p, struct top_hash_entry *en)
return ospf_is_ip4(p) ? ipa_from_ip4(ospf3_6to4(ll)) : ipa_from_ip6(ll);
}
-static struct nexthop *
+static struct nexthop_adata *
calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en,
struct top_hash_entry *par, int pos, uint data, uint lif, uint nif)
{
struct ospf_proto *p = oa->po;
- struct nexthop *pn = par->nhs;
+ struct nexthop_adata *pn = par->nhs;
struct top_hash_entry *link = NULL;
struct ospf_iface *ifa = NULL;
ip_addr nh = IPA_NONE;
@@ -1827,10 +1806,10 @@ calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en,
return NULL;
}
- struct nexthop *nhs = new_nexthop(p, nh, ifa->iface, ifa->ecmp_weight);
+ struct nexthop_adata *nhs = new_nexthop(p, nh, ifa->iface, ifa->ecmp_weight);
if (ifa->addr->flags & IA_HOST)
- nhs->flags = RNF_ONLINK;
+ nhs->nh.flags = RNF_ONLINK;
return nhs;
}
@@ -1851,7 +1830,7 @@ calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en,
if (ipa_zero(en->lb))
goto bad;
- return new_nexthop(p, en->lb, pn->iface, pn->weight);
+ return new_nexthop(p, en->lb, pn->nh.iface, pn->nh.weight);
}
else /* OSPFv3 */
{
@@ -1859,7 +1838,7 @@ calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en,
* Next-hop is taken from lladdr field of Link-LSA, en->lb_id
* is computed in link_back().
*/
- link = ospf_hash_find(p->gr, pn->iface->index, en->lb_id, rid, LSA_T_LINK);
+ link = ospf_hash_find(p->gr, pn->nh.iface->index, en->lb_id, rid, LSA_T_LINK);
if (!link)
return NULL;
@@ -1867,7 +1846,7 @@ calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en,
if (ipa_zero(nh))
return NULL;
- return new_nexthop(p, nh, pn->iface, pn->weight);
+ return new_nexthop(p, nh, pn->nh.iface, pn->nh.weight);
}
}
@@ -1914,7 +1893,7 @@ add_cand(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry
if (!link_back(oa, en, par, lif, nif))
return;
- struct nexthop *nhs = calc_next_hop(oa, en, par, pos, data, lif, nif);
+ struct nexthop_adata *nhs = calc_next_hop(oa, en, par, pos, data, lif, nif);
if (!nhs)
{
log(L_WARN "%s: Cannot find next hop for LSA (Type: %04x, Id: %R, Rt: %R)",
@@ -1923,7 +1902,7 @@ add_cand(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry
}
/* If en->dist > 0, we know that en->color == CANDIDATE and en->nhs is defined. */
- if ((dist == en->dist) && !nh_is_vlink(en->nhs))
+ if ((dist == en->dist) && !nh_is_vlink(&en->nhs->nh))
{
/*
* For multipath, we should merge nexthops. We merge regular nexthops only.
@@ -1947,13 +1926,11 @@ add_cand(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry
*/
/* Keep old ones */
- if (!p->ecmp || nh_is_vlink(nhs) || (nhs == en->nhs))
+ if (!p->ecmp || nh_is_vlink(&nhs->nh) || (nhs == en->nhs))
return;
/* Merge old and new */
- int new_reuse = (par->nhs != nhs);
- en->nhs = nexthop_merge(en->nhs, nhs, en->nhs_reuse, new_reuse, p->ecmp, p->nhpool);
- en->nhs_reuse = 1;
+ en->nhs = nexthop_merge(en->nhs, nhs, p->ecmp, p->nhpool);
return;
}
@@ -1967,7 +1944,6 @@ add_cand(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry
en->nhs = nhs;
en->dist = dist;
en->color = CANDIDATE;
- en->nhs_reuse = (par->nhs != nhs);
prev = NULL;
@@ -2001,14 +1977,34 @@ add_cand(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry
}
static inline int
-ort_changed(ort *nf, rta *nr)
+ort_changed(ort *nf, ea_list *nr)
{
- rta *or = nf->old_rta;
- return !or ||
+ ea_list *or = nf->old_ea;
+
+ if (!or ||
(nf->n.metric1 != nf->old_metric1) || (nf->n.metric2 != nf->old_metric2) ||
- (nf->n.tag != nf->old_tag) || (nf->n.rid != nf->old_rid) ||
- (nr->source != or->source) || (nr->dest != or->dest) ||
- !nexthop_same(&(nr->nh), &(or->nh));
+ (nf->n.tag != nf->old_tag) || (nf->n.rid != nf->old_rid))
+ return 1;
+
+ eattr *nhea_n = ea_find(nr, &ea_gen_nexthop);
+ eattr *nhea_o = ea_find(or, &ea_gen_nexthop);
+ if (!nhea_n != !nhea_o)
+ return 1;
+
+ if (nhea_n && nhea_o)
+ {
+ struct nexthop_adata *nhad_n = (struct nexthop_adata *) nhea_n->u.ptr;
+ struct nexthop_adata *nhad_o = (struct nexthop_adata *) nhea_o->u.ptr;
+
+ if (!nexthop_same(nhad_n, nhad_o))
+ return 1;
+ }
+
+ if ( ea_get_int(nr, &ea_gen_source, 0)
+ != ea_get_int(or, &ea_gen_source, 0))
+ return 1;
+
+ return 0;
}
static void
@@ -2030,10 +2026,9 @@ again1:
FIB_ITERATE_START(fib, &fit, ort, nf)
{
/* Sanity check of next-hop addresses, failure should not happen */
- if (nf->n.type)
+ if (nf->n.type && nf->n.nhs)
{
- struct nexthop *nh;
- for (nh = nf->n.nhs; nh; nh = nh->next)
+ NEXTHOP_WALK(nh, nf->n.nhs)
if (ipa_nonzero(nh->gw))
{
neighbor *nbr = neigh_find(&p->p, nh->gw, nh->iface,
@@ -2052,68 +2047,69 @@ again1:
if (nf->n.type) /* Add the route */
{
- rta a0 = {
- .source = nf->n.type,
- .scope = SCOPE_UNIVERSE,
- .dest = RTD_UNICAST,
- .nh = *(nf->n.nhs),
- .pref = p->p.main_channel->preference,
- };
-
- if (reload || ort_changed(nf, &a0))
- {
- a0.eattrs = alloca(sizeof(ea_list) + 4 * sizeof(eattr));
- memset(a0.eattrs, 0, sizeof(ea_list));
+ struct {
+ ea_list l;
+ eattr a[7];
+ } eattrs;
+
+ eattrs.l = (ea_list) {};
+
+ eattrs.a[eattrs.l.count++] =
+ EA_LITERAL_EMBEDDED(&ea_gen_preference, 0, p->p.main_channel->preference);
+ eattrs.a[eattrs.l.count++] =
+ EA_LITERAL_EMBEDDED(&ea_gen_source, 0, nf->n.type);
+
+ eattrs.a[eattrs.l.count++] =
+ EA_LITERAL_DIRECT_ADATA(&ea_gen_nexthop, 0, &nf->n.nhs->ad);
+
+ if (reload || ort_changed(nf, &eattrs.l))
+ {
nf->old_metric1 = nf->n.metric1;
nf->old_metric2 = nf->n.metric2;
nf->old_tag = nf->n.tag;
nf->old_rid = nf->n.rid;
- a0.eattrs->attrs[a0.eattrs->count++] = (eattr) {
- .id = EA_OSPF_METRIC1,
- .type = EAF_TYPE_INT,
- .u.data = nf->n.metric1,
- };
+ eattrs.a[eattrs.l.count++] =
+ EA_LITERAL_EMBEDDED(&ea_ospf_metric1, 0, nf->n.metric1);
if (nf->n.type == RTS_OSPF_EXT2)
- a0.eattrs->attrs[a0.eattrs->count++] = (eattr) {
- .id = EA_OSPF_METRIC2,
- .type = EAF_TYPE_INT,
- .u.data = nf->n.metric2,
- };
+ eattrs.a[eattrs.l.count++] =
+ EA_LITERAL_EMBEDDED(&ea_ospf_metric2, 0, nf->n.metric2);
if ((nf->n.type == RTS_OSPF_EXT1) || (nf->n.type == RTS_OSPF_EXT2))
- a0.eattrs->attrs[a0.eattrs->count++] = (eattr) {
- .id = EA_OSPF_TAG,
- .type = EAF_TYPE_INT,
- .u.data = nf->n.tag,
- };
-
- a0.eattrs->attrs[a0.eattrs->count++] = (eattr) {
- .id = EA_OSPF_ROUTER_ID,
- .type = EAF_TYPE_ROUTER_ID,
- .u.data = nf->n.rid,
- };
+ eattrs.a[eattrs.l.count++] =
+ EA_LITERAL_EMBEDDED(&ea_ospf_tag, 0, nf->n.tag);
+
+ eattrs.a[eattrs.l.count++] =
+ EA_LITERAL_EMBEDDED(&ea_ospf_router_id, 0, nf->n.rid);
- rta *a = rta_lookup(&a0);
- rte *e = rte_get_temp(a, p->p.main_source);
+ ASSERT_DIE(ARRAY_SIZE(eattrs.a) >= eattrs.l.count);
- rta_free(nf->old_rta);
- nf->old_rta = rta_clone(a);
+ ea_list *eal = ea_lookup(&eattrs.l, 0);
+ ea_free(nf->old_ea);
+ nf->old_ea = eal;
+ rte e0 = {
+ .attrs = eal,
+ .src = p->p.main_source,
+ };
+
+ /*
DBG("Mod rte type %d - %N via %I on iface %s, met %d\n",
a0.source, nf->fn.addr, a0.gw, a0.iface ? a0.iface->name : "(none)", nf->n.metric1);
- rte_update(&p->p, nf->fn.addr, e);
+ */
+
+ rte_update(p->p.main_channel, nf->fn.addr, &e0, p->p.main_source);
}
}
- else if (nf->old_rta)
+ else if (nf->old_ea)
{
/* Remove the route */
- rta_free(nf->old_rta);
- nf->old_rta = NULL;
+ rta_free(nf->old_ea);
+ nf->old_ea = NULL;
- rte_update(&p->p, nf->fn.addr, NULL);
+ rte_update(p->p.main_channel, nf->fn.addr, NULL, p->p.main_source);
}
/* Remove unused rt entry, some special entries are persistent */
@@ -2129,7 +2125,6 @@ again1:
}
FIB_ITERATE_END;
-
WALK_LIST(oa, p->area_list)
{
/* Cleanup ASBR hash tables */
diff --git a/proto/ospf/rt.h b/proto/ospf/rt.h
index 094e125b..88eefef9 100644
--- a/proto/ospf/rt.h
+++ b/proto/ospf/rt.h
@@ -18,8 +18,6 @@
typedef struct orta
{
u8 type; /* RTS_OSPF_* */
- u8 nhs_reuse; /* Whether nhs nodes can be reused during merging.
- See a note in rt.c:add_cand() */
u32 options;
/*
* For ORT_ROUTER routes, options field are router-LSA style
@@ -53,7 +51,7 @@ typedef struct orta
struct ospf_area *oa;
struct ospf_area *voa; /* Used when route is replaced in ospf_rt_sum_tr(),
NULL otherwise */
- struct nexthop *nhs; /* Next hops computed during SPF */
+ struct nexthop_adata *nhs; /* Next hops computed during SPF */
struct top_hash_entry *en; /* LSA responsible for this orta */
}
orta;
@@ -80,7 +78,7 @@ typedef struct ort
*/
orta n;
u32 old_metric1, old_metric2, old_tag, old_rid;
- rta *old_rta;
+ ea_list *old_ea;
u32 lsa_id;
u8 external_rte;
u8 area_net;
diff --git a/proto/ospf/topology.c b/proto/ospf/topology.c
index 9fe68264..85bce03d 100644
--- a/proto/ospf/topology.c
+++ b/proto/ospf/topology.c
@@ -1300,7 +1300,7 @@ find_surrogate_fwaddr(struct ospf_proto *p, struct ospf_area *oa)
}
void
-ospf_rt_notify(struct proto *P, struct channel *ch UNUSED, net *n, rte *new, rte *old UNUSED)
+ospf_rt_notify(struct proto *P, struct channel *ch UNUSED, const net_addr *n, rte *new, const rte *old UNUSED)
{
struct ospf_proto *p = (struct ospf_proto *) P;
struct ospf_area *oa = NULL; /* non-NULL for NSSA-LSA */
@@ -1319,7 +1319,7 @@ ospf_rt_notify(struct proto *P, struct channel *ch UNUSED, net *n, rte *new, rte
if (!new)
{
- nf = fib_find(&p->rtf, n->n.addr);
+ nf = fib_find(&p->rtf, n);
if (!nf || !nf->external_rte)
return;
@@ -1337,23 +1337,23 @@ ospf_rt_notify(struct proto *P, struct channel *ch UNUSED, net *n, rte *new, rte
ASSERT(p->asbr);
/* Get route attributes */
- rta *a = new->attrs;
- eattr *m1a = ea_find(a->eattrs, EA_OSPF_METRIC1);
- eattr *m2a = ea_find(a->eattrs, EA_OSPF_METRIC2);
+ ea_list *a = new->attrs;
+ eattr *m1a = ea_find(a, &ea_ospf_metric1);
+ eattr *m2a = ea_find(a, &ea_ospf_metric2);
uint m1 = m1a ? m1a->u.data : 0;
uint m2 = m2a ? m2a->u.data : 10000;
if (m1 > LSINFINITY)
{
log(L_WARN "%s: Invalid ospf_metric1 value %u for route %N",
- p->p.name, m1, n->n.addr);
+ p->p.name, m1, n);
m1 = LSINFINITY;
}
if (m2 > LSINFINITY)
{
log(L_WARN "%s: Invalid ospf_metric2 value %u for route %N",
- p->p.name, m2, n->n.addr);
+ p->p.name, m2, n);
m2 = LSINFINITY;
}
@@ -1363,11 +1363,14 @@ ospf_rt_notify(struct proto *P, struct channel *ch UNUSED, net *n, rte *new, rte
uint ebit = m2a || !m1a;
uint metric = ebit ? m2 : m1;
- uint tag = ea_get_int(a->eattrs, EA_OSPF_TAG, 0);
+ uint tag = ea_get_int(a, &ea_ospf_tag, 0);
ip_addr fwd = IPA_NONE;
- if ((a->dest == RTD_UNICAST) && use_gw_for_fwaddr(p, a->nh.gw, a->nh.iface))
- fwd = a->nh.gw;
+ eattr *nhea = ea_find(a, &ea_gen_nexthop);
+ struct nexthop_adata *nhad = (struct nexthop_adata *) nhea->u.ptr;
+ if (NEXTHOP_IS_REACHABLE(nhad))
+ if (use_gw_for_fwaddr(p, nhad->nh.gw, nhad->nh.iface))
+ fwd = nhad->nh.gw;
/* NSSA-LSA with P-bit set must have non-zero forwarding address */
if (oa && ipa_zero(fwd))
@@ -1377,12 +1380,12 @@ ospf_rt_notify(struct proto *P, struct channel *ch UNUSED, net *n, rte *new, rte
if (ipa_zero(fwd))
{
log(L_ERR "%s: Cannot find forwarding address for NSSA-LSA %N",
- p->p.name, n->n.addr);
+ p->p.name, n);
return;
}
}
- nf = fib_get(&p->rtf, n->n.addr);
+ nf = fib_get(&p->rtf, n);
ospf_originate_ext_lsa(p, oa, nf, LSA_M_EXPORT, metric, ebit, fwd, tag, 1, p->vpn_pe);
nf->external_rte = 1;
}
diff --git a/proto/ospf/topology.h b/proto/ospf/topology.h
index 535d1f1b..3c92b431 100644
--- a/proto/ospf/topology.h
+++ b/proto/ospf/topology.h
@@ -28,7 +28,7 @@ struct top_hash_entry
u16 next_lsa_opts; /* For postponed LSA origination */
btime inst_time; /* Time of installation into DB */
struct ort *nf; /* Reference fibnode for sum and ext LSAs, NULL for otherwise */
- struct nexthop *nhs; /* Computed nexthops - valid only in ospf_rt_spf() */
+ struct nexthop_adata *nhs; /* Computed nexthops - valid only in ospf_rt_spf() */
ip_addr lb; /* In OSPFv2, link back address. In OSPFv3, any global address in the area useful for vlinks */
u32 lb_id; /* Interface ID of link back iface (for bcast or NBMA networks) */
u32 dist; /* Distance from the root */
@@ -39,8 +39,6 @@ struct top_hash_entry
#define CANDIDATE 1
#define INSPF 2
u8 mode; /* LSA generated during RT calculation (LSA_RTCALC or LSA_STALE)*/
- u8 nhs_reuse; /* Whether nhs nodes can be reused during merging.
- See a note in rt.c:add_cand() */
};
@@ -200,7 +198,7 @@ void ospf_originate_sum_rt_lsa(struct ospf_proto *p, struct ospf_area *oa, u32 d
void ospf_originate_ext_lsa(struct ospf_proto *p, struct ospf_area *oa, ort *nf, u8 mode, u32 metric, u32 ebit, ip_addr fwaddr, u32 tag, int pbit, int dn);
void ospf_originate_gr_lsa(struct ospf_proto *p, struct ospf_iface *ifa);
-void ospf_rt_notify(struct proto *P, struct channel *ch, net *n, rte *new, rte *old);
+void ospf_rt_notify(struct proto *P, struct channel *ch, const net_addr *n, rte *new, const rte *old);
void ospf_update_topology(struct ospf_proto *p);
struct top_hash_entry *ospf_hash_find(struct top_graph *, u32 domain, u32 lsa, u32 rtr, u32 type);
diff --git a/proto/perf/perf.c b/proto/perf/perf.c
index 75e405f0..dc5bbf2f 100644
--- a/proto/perf/perf.c
+++ b/proto/perf/perf.c
@@ -18,7 +18,7 @@
#include "nest/bird.h"
#include "nest/iface.h"
#include "nest/protocol.h"
-#include "nest/route.h"
+#include "nest/rt.h"
#include "nest/cli.h"
#include "conf/conf.h"
#include "filter/filter.h"
@@ -85,7 +85,7 @@ random_net_ip4(void)
}
struct perf_random_routes {
- struct rta *a;
+ ea_list *a;
net_addr net;
};
@@ -142,17 +142,21 @@ perf_loop(void *data)
*((net_addr_ip4 *) &(p->data[i].net)) = random_net_ip4();
if (!p->attrs_per_rte || !(i % p->attrs_per_rte)) {
- struct rta a0 = {
- .source = RTS_PERF,
- .scope = SCOPE_UNIVERSE,
- .dest = RTD_UNICAST,
- .pref = p->p.main_channel->preference,
+ ea_list *ea = NULL;
+
+ ea_set_attr_u32(&ea, &ea_gen_preference, 0, p->p.main_channel->preference);
+ ea_set_attr_u32(&ea, &ea_gen_source, 0, RTS_PERF);
+
+ struct nexthop_adata nhad = {
.nh.iface = p->ifa->iface,
.nh.gw = gw,
.nh.weight = 1,
};
- p->data[i].a = rta_lookup(&a0);
+ ea_set_attr_data(&ea, &ea_gen_nexthop, 0,
+ &nhad.ad.data, sizeof nhad - sizeof nhad.ad);
+
+ p->data[i].a = rta_lookup(ea, 0);
}
else
p->data[i].a = rta_clone(p->data[i-1].a);
@@ -160,17 +164,17 @@ perf_loop(void *data)
clock_gettime(CLOCK_MONOTONIC, &ts_generated);
- for (uint i=0; i<N; i++) {
- rte *e = rte_get_temp(p->data[i].a, p->p.main_source);
-
- rte_update(P, &(p->data[i].net), e);
+ for (uint i=0; i<N; i++)
+ {
+ rte e0 = { .attrs = p->data[i].a, .src = P->main_source, };
+ rte_update(P->main_channel, &(p->data[i].net), &e0, P->main_source);
}
clock_gettime(CLOCK_MONOTONIC, &ts_update);
if (!p->keep)
for (uint i=0; i<N; i++)
- rte_update(P, &(p->data[i].net), NULL);
+ rte_update(P->main_channel, &(p->data[i].net), NULL, P->main_source);
clock_gettime(CLOCK_MONOTONIC, &ts_withdraw);
@@ -198,12 +202,14 @@ perf_loop(void *data)
p->exp++;
}
- rt_schedule_prune(P->main_channel->table);
+ RT_LOCKED(P->main_channel->table, tab)
+ rt_schedule_prune(tab);
+
ev_schedule(p->loop);
}
static void
-perf_rt_notify(struct proto *P, struct channel *c UNUSED, struct network *net UNUSED, struct rte *new UNUSED, struct rte *old UNUSED)
+perf_rt_notify(struct proto *P, struct channel *c UNUSED, const net_addr *net UNUSED, struct rte *new UNUSED, const struct rte *old UNUSED)
{
struct perf_proto *p = (struct perf_proto *) P;
p->exp++;
@@ -266,7 +272,7 @@ perf_init(struct proto_config *CF)
switch (p->mode) {
case PERF_MODE_IMPORT:
- P->ifa_notify = perf_ifa_notify;
+ P->iface_sub.ifa_notify = perf_ifa_notify;
break;
case PERF_MODE_EXPORT:
P->rt_notify = perf_rt_notify;
@@ -305,7 +311,6 @@ perf_copy_config(struct proto_config *dest UNUSED, struct proto_config *src UNUS
struct protocol proto_perf = {
.name = "Perf",
.template = "perf%d",
- .class = PROTOCOL_PERF,
.channel_mask = NB_IP,
.proto_size = sizeof(struct perf_proto),
.config_size = sizeof(struct perf_config),
diff --git a/proto/pipe/config.Y b/proto/pipe/config.Y
index 1202c169..444de127 100644
--- a/proto/pipe/config.Y
+++ b/proto/pipe/config.Y
@@ -16,7 +16,7 @@ CF_DEFINES
CF_DECLS
-CF_KEYWORDS(PIPE, PEER, TABLE)
+CF_KEYWORDS(PIPE, PEER, TABLE, MAX, GENERATION)
CF_GRAMMAR
@@ -25,6 +25,8 @@ proto: pipe_proto '}' { this_channel = NULL; } ;
pipe_proto_start: proto_start PIPE
{
this_proto = proto_config_new(&proto_pipe, $1);
+ this_proto->loop_order = DOMAIN_ORDER(proto);
+ PIPE_CFG->max_generation = 16;
}
proto_name
{
@@ -40,7 +42,17 @@ pipe_proto:
pipe_proto_start '{'
| pipe_proto proto_item ';'
| pipe_proto channel_item_ ';'
+ | pipe_proto IMPORT IN net_any imexport ';' {
+ if (this_channel->net_type && ($4->type != this_channel->net_type))
+ cf_error("Incompatible export prefilter type");
+ PIPE_CFG->in_subprefix = $4;
+ this_channel->in_filter = $5;
+ }
| pipe_proto PEER TABLE rtable ';' { PIPE_CFG->peer = $4; }
+ | pipe_proto MAX GENERATION expr ';' {
+ if (($4 < 1) || ($4 > 254)) cf_error("Max generation must be in range 1..254, got %u", $4);
+ PIPE_CFG->max_generation = $4;
+ }
;
CF_CODE
diff --git a/proto/pipe/pipe.c b/proto/pipe/pipe.c
index d48ce387..b2083010 100644
--- a/proto/pipe/pipe.c
+++ b/proto/pipe/pipe.c
@@ -35,7 +35,7 @@
#include "nest/bird.h"
#include "nest/iface.h"
#include "nest/protocol.h"
-#include "nest/route.h"
+#include "nest/rt.h"
#include "nest/cli.h"
#include "conf/conf.h"
#include "filter/filter.h"
@@ -44,54 +44,54 @@
#include "pipe.h"
static void
-pipe_rt_notify(struct proto *P, struct channel *src_ch, net *n, rte *new, rte *old)
+pipe_rt_notify(struct proto *P, struct channel *src_ch, const net_addr *n, rte *new, const rte *old)
{
struct pipe_proto *p = (void *) P;
struct channel *dst = (src_ch == p->pri) ? p->sec : p->pri;
- struct rte_src *src;
-
- rte *e;
- rta *a;
+ uint *flags = (src_ch == p->pri) ? &p->sec_flags : &p->pri_flags;
if (!new && !old)
return;
- if (dst->table->pipe_busy)
- {
- log(L_ERR "Pipe loop detected when sending %N to table %s",
- n->n.addr, dst->table->name);
- return;
- }
+ /* Start the route refresh if requested to */
+ if (*flags & PIPE_FL_RR_BEGIN_PENDING)
+ {
+ *flags &= ~PIPE_FL_RR_BEGIN_PENDING;
+ rt_refresh_begin(&dst->in_req);
+ }
if (new)
{
- src = new->src;
+ rte e0 = rte_init_from(new);
- a = alloca(rta_size(new->attrs));
- memcpy(a, new->attrs, rta_size(new->attrs));
+ e0.generation = new->generation + 1;
+ ea_unset_attr(&e0.attrs, 0, &ea_gen_hostentry);
- a->cached = 0;
- a->hostentry = NULL;
- e = rte_get_temp(a, src);
+ rte_update(dst, n, &e0, new->src);
}
else
- {
- e = NULL;
- src = old->src;
- }
-
- src_ch->table->pipe_busy = 1;
- rte_update2(dst, n->n.addr, e, src);
- src_ch->table->pipe_busy = 0;
+ rte_update(dst, n, NULL, old->src);
}
static int
pipe_preexport(struct channel *C, rte *e)
{
- struct proto *pp = e->sender->proto;
+ struct pipe_proto *p = (void *) C->proto;
- if (pp == C->proto)
- return -1; /* Avoid local loops automatically */
+ /* Avoid direct loopbacks */
+ if (e->sender == C->in_req.hook)
+ return -1;
+
+ /* Indirection check */
+ uint max_generation = ((struct pipe_config *) p->p.cf)->max_generation;
+ if (e->generation >= max_generation)
+ {
+ log_rl(&p->rl_gen, L_ERR "Route overpiped (%u hops of %u configured in %s) in table %s: %N %s/%u:%u",
+ e->generation, max_generation, C->proto->name,
+ C->table->name, e->net, e->src->owner->name, e->src->private_id, e->src->global_id);
+
+ return -1;
+ }
return 0;
}
@@ -105,6 +105,32 @@ pipe_reload_routes(struct channel *C)
channel_request_feeding((C == p->pri) ? p->sec : p->pri);
}
+static void
+pipe_feed_begin(struct channel *C, int initial UNUSED)
+{
+ struct pipe_proto *p = (void *) C->proto;
+ uint *flags = (C == p->pri) ? &p->sec_flags : &p->pri_flags;
+
+ *flags |= PIPE_FL_RR_BEGIN_PENDING;
+}
+
+static void
+pipe_feed_end(struct channel *C)
+{
+ struct pipe_proto *p = (void *) C->proto;
+ struct channel *dst = (C == p->pri) ? p->sec : p->pri;
+ uint *flags = (C == p->pri) ? &p->sec_flags : &p->pri_flags;
+
+ /* If not even started, start the RR now */
+ if (*flags & PIPE_FL_RR_BEGIN_PENDING)
+ {
+ *flags &= ~PIPE_FL_RR_BEGIN_PENDING;
+ rt_refresh_begin(&dst->in_req);
+ }
+
+ /* Finish RR always */
+ rt_refresh_end(&dst->in_req);
+}
static void
pipe_postconfig(struct proto_config *CF)
@@ -124,10 +150,16 @@ pipe_postconfig(struct proto_config *CF)
if (cc->table->addr_type != cf->peer->addr_type)
cf_error("Primary table and peer table must have the same type");
+ if (cc->out_subprefix && (cc->table->addr_type != cc->out_subprefix->type))
+ cf_error("Export subprefix must match table type");
+
+ if (cf->in_subprefix && (cc->table->addr_type != cf->in_subprefix->type))
+ cf_error("Import subprefix must match table type");
+
if (cc->rx_limit.action)
cf_error("Pipe protocol does not support receive limits");
- if (cc->in_keep_filtered)
+ if (cc->in_keep)
cf_error("Pipe protocol prohibits keeping filtered routes");
cc->debug = cf->c.debug;
@@ -143,6 +175,7 @@ pipe_configure_channels(struct pipe_proto *p, struct pipe_config *cf)
.channel = cc->channel,
.table = cc->table,
.out_filter = cc->out_filter,
+ .out_subprefix = cc->out_subprefix,
.in_limit = cc->in_limit,
.ra_mode = RA_ANY,
.debug = cc->debug,
@@ -154,6 +187,7 @@ pipe_configure_channels(struct pipe_proto *p, struct pipe_config *cf)
.channel = cc->channel,
.table = cf->peer,
.out_filter = cc->in_filter,
+ .out_subprefix = cf->in_subprefix,
.in_limit = cc->out_limit,
.ra_mode = RA_ANY,
.debug = cc->debug,
@@ -175,6 +209,10 @@ pipe_init(struct proto_config *CF)
P->rt_notify = pipe_rt_notify;
P->preexport = pipe_preexport;
P->reload_routes = pipe_reload_routes;
+ P->feed_begin = pipe_feed_begin;
+ P->feed_end = pipe_feed_end;
+
+ p->rl_gen = (struct tbf) TBF_DEFAULT_LOG_LIMITS;
pipe_configure_channels(p, cf);
@@ -207,8 +245,18 @@ pipe_get_status(struct proto *P, byte *buf)
static void
pipe_show_stats(struct pipe_proto *p)
{
- struct proto_stats *s1 = &p->pri->stats;
- struct proto_stats *s2 = &p->sec->stats;
+ struct channel_import_stats *s1i = &p->pri->import_stats;
+ struct channel_export_stats *s1e = &p->pri->export_stats;
+ struct channel_import_stats *s2i = &p->sec->import_stats;
+ struct channel_export_stats *s2e = &p->sec->export_stats;
+
+ struct rt_import_stats *rs1i = p->pri->in_req.hook ? &p->pri->in_req.hook->stats : NULL;
+ struct rt_export_stats *rs1e = p->pri->out_req.hook ? &p->pri->out_req.hook->stats : NULL;
+ struct rt_import_stats *rs2i = p->sec->in_req.hook ? &p->sec->in_req.hook->stats : NULL;
+ struct rt_export_stats *rs2e = p->sec->out_req.hook ? &p->sec->out_req.hook->stats : NULL;
+
+ u32 pri_routes = p->pri->in_limit.count;
+ u32 sec_routes = p->sec->in_limit.count;
/*
* Pipe stats (as anything related to pipes) are a bit tricky. There
@@ -232,24 +280,22 @@ pipe_show_stats(struct pipe_proto *p)
*/
cli_msg(-1006, " Routes: %u imported, %u exported",
- s1->imp_routes, s2->imp_routes);
+ pri_routes, sec_routes);
cli_msg(-1006, " Route change stats: received rejected filtered ignored accepted");
cli_msg(-1006, " Import updates: %10u %10u %10u %10u %10u",
- s2->exp_updates_received, s2->exp_updates_rejected + s1->imp_updates_invalid,
- s2->exp_updates_filtered, s1->imp_updates_ignored, s1->imp_updates_accepted);
+ rs2e->updates_received, s2e->updates_rejected + s1i->updates_invalid,
+ s2e->updates_filtered, rs1i->updates_ignored, rs1i->updates_accepted);
cli_msg(-1006, " Import withdraws: %10u %10u --- %10u %10u",
- s2->exp_withdraws_received, s1->imp_withdraws_invalid,
- s1->imp_withdraws_ignored, s1->imp_withdraws_accepted);
+ rs2e->withdraws_received, s1i->withdraws_invalid,
+ rs1i->withdraws_ignored, rs1i->withdraws_accepted);
cli_msg(-1006, " Export updates: %10u %10u %10u %10u %10u",
- s1->exp_updates_received, s1->exp_updates_rejected + s2->imp_updates_invalid,
- s1->exp_updates_filtered, s2->imp_updates_ignored, s2->imp_updates_accepted);
+ rs1e->updates_received, s1e->updates_rejected + s2i->updates_invalid,
+ s1e->updates_filtered, rs2i->updates_ignored, rs2i->updates_accepted);
cli_msg(-1006, " Export withdraws: %10u %10u --- %10u %10u",
- s1->exp_withdraws_received, s2->imp_withdraws_invalid,
- s2->imp_withdraws_ignored, s2->imp_withdraws_accepted);
+ rs1e->withdraws_received, s2i->withdraws_invalid,
+ rs2i->withdraws_ignored, rs2i->withdraws_accepted);
}
-static const char *pipe_feed_state[] = { [ES_DOWN] = "down", [ES_FEEDING] = "feed", [ES_READY] = "up" };
-
static void
pipe_show_proto_info(struct proto *P)
{
@@ -258,13 +304,17 @@ pipe_show_proto_info(struct proto *P)
cli_msg(-1006, " Channel %s", "main");
cli_msg(-1006, " Table: %s", p->pri->table->name);
cli_msg(-1006, " Peer table: %s", p->sec->table->name);
- cli_msg(-1006, " Import state: %s", pipe_feed_state[p->sec->export_state]);
- cli_msg(-1006, " Export state: %s", pipe_feed_state[p->pri->export_state]);
+ cli_msg(-1006, " Import state: %s", rt_export_state_name(rt_export_get_state(p->sec->out_req.hook)));
+ cli_msg(-1006, " Export state: %s", rt_export_state_name(rt_export_get_state(p->pri->out_req.hook)));
cli_msg(-1006, " Import filter: %s", filter_name(p->sec->out_filter));
cli_msg(-1006, " Export filter: %s", filter_name(p->pri->out_filter));
- channel_show_limit(&p->pri->in_limit, "Import limit:");
- channel_show_limit(&p->sec->in_limit, "Export limit:");
+
+
+ channel_show_limit(&p->pri->in_limit, "Import limit:",
+ (p->pri->limit_active & (1 << PLD_IN)), p->pri->limit_actions[PLD_IN]);
+ channel_show_limit(&p->sec->in_limit, "Export limit:",
+ (p->sec->limit_active & (1 << PLD_IN)), p->sec->limit_actions[PLD_IN]);
if (P->proto_state != PS_DOWN)
pipe_show_stats(p);
@@ -282,7 +332,6 @@ pipe_update_debug(struct proto *P)
struct protocol proto_pipe = {
.name = "Pipe",
.template = "pipe%d",
- .class = PROTOCOL_PIPE,
.proto_size = sizeof(struct pipe_proto),
.config_size = sizeof(struct pipe_config),
.postconfig = pipe_postconfig,
diff --git a/proto/pipe/pipe.h b/proto/pipe/pipe.h
index 038c6666..501b8565 100644
--- a/proto/pipe/pipe.h
+++ b/proto/pipe/pipe.h
@@ -12,12 +12,19 @@
struct pipe_config {
struct proto_config c;
struct rtable_config *peer; /* Table we're connected to */
+ const net_addr *in_subprefix;
+ u8 max_generation;
};
struct pipe_proto {
struct proto p;
struct channel *pri;
struct channel *sec;
+ uint pri_flags;
+ uint sec_flags;
+ struct tbf rl_gen;
};
+#define PIPE_FL_RR_BEGIN_PENDING 1 /* Route refresh should start with the first route notified */
+
#endif
diff --git a/proto/radv/config.Y b/proto/radv/config.Y
index 8d4a3ab9..fb68d2e5 100644
--- a/proto/radv/config.Y
+++ b/proto/radv/config.Y
@@ -33,7 +33,7 @@ CF_KEYWORDS(RADV, PREFIX, INTERFACE, MIN, MAX, RA, DELAY, INTERVAL, SOLICITED,
RETRANS, TIMER, CURRENT, HOP, LIMIT, DEFAULT, VALID, PREFERRED, MULT,
LIFETIME, SKIP, ONLINK, AUTONOMOUS, RDNSS, DNSSL, NS, DOMAIN, LOCAL,
TRIGGER, SENSITIVE, PREFERENCE, LOW, MEDIUM, HIGH, PROPAGATE, ROUTE,
- ROUTES, RA_PREFERENCE, RA_LIFETIME)
+ ROUTES)
CF_ENUM(T_ENUM_RA_PREFERENCE, RA_PREF_, LOW, MEDIUM, HIGH)
@@ -336,9 +336,6 @@ radv_sensitive:
| SENSITIVE bool { $$ = $2; }
;
-dynamic_attr: RA_PREFERENCE { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_ENUM_RA_PREFERENCE, EA_RA_PREFERENCE); } ;
-dynamic_attr: RA_LIFETIME { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_RA_LIFETIME); } ;
-
CF_CODE
CF_END
diff --git a/proto/radv/packets.c b/proto/radv/packets.c
index 5cd8b2de..c6b565d2 100644
--- a/proto/radv/packets.c
+++ b/proto/radv/packets.c
@@ -493,7 +493,7 @@ radv_sk_open(struct radv_iface *ifa)
sk->data = ifa;
sk->flags = SKF_LADDR_RX;
- if (sk_open(sk) < 0)
+ if (sk_open(sk, ifa->ra->p.loop) < 0)
goto err;
/* We want listen just to ICMPv6 messages of type RS and RA */
diff --git a/proto/radv/radv.c b/proto/radv/radv.c
index 119a8dc4..434155dc 100644
--- a/proto/radv/radv.c
+++ b/proto/radv/radv.c
@@ -10,6 +10,7 @@
#include <stdlib.h>
#include "radv.h"
+#include "lib/macro.h"
/**
* DOC: Router Advertisements
@@ -42,6 +43,8 @@
* RFC 6106 - DNS extensions (RDDNS, DNSSL)
*/
+static struct ea_class ea_radv_preference, ea_radv_lifetime;
+
static void radv_prune_prefixes(struct radv_iface *ifa);
static void radv_prune_routes(struct radv_proto *p);
@@ -263,9 +266,9 @@ radv_iface_find(struct radv_proto *p, struct iface *what)
}
static void
-radv_iface_add(struct object_lock *lock)
+radv_iface_add(void *_ifa)
{
- struct radv_iface *ifa = lock->data;
+ struct radv_iface *ifa = _ifa;
struct radv_proto *p = ifa->ra;
if (! radv_sk_open(ifa))
@@ -302,8 +305,11 @@ radv_iface_new(struct radv_proto *p, struct iface *iface, struct radv_iface_conf
lock->type = OBJLOCK_IP;
lock->port = ICMPV6_PROTO;
lock->iface = iface;
- lock->data = ifa;
- lock->hook = radv_iface_add;
+ lock->event = (event) {
+ .hook = radv_iface_add,
+ .data = ifa,
+ };
+ lock->target = &global_event_list;
ifa->lock = lock;
olock_acquire(lock);
@@ -385,9 +391,9 @@ radv_trigger_valid(struct radv_config *cf)
}
static inline int
-radv_net_match_trigger(struct radv_config *cf, net *n)
+radv_net_match_trigger(struct radv_config *cf, const net_addr *n)
{
- return radv_trigger_valid(cf) && net_equal(n->n.addr, &cf->trigger);
+ return radv_trigger_valid(cf) && net_equal(n, &cf->trigger);
}
int
@@ -406,7 +412,7 @@ radv_preexport(struct channel *C, rte *new)
}
static void
-radv_rt_notify(struct proto *P, struct channel *ch UNUSED, net *n, rte *new, rte *old UNUSED)
+radv_rt_notify(struct proto *P, struct channel *ch UNUSED, const net_addr *n, rte *new, const rte *old UNUSED)
{
struct radv_proto *p = (struct radv_proto *) P;
struct radv_config *cf = (struct radv_config *) (P->cf);
@@ -444,11 +450,11 @@ radv_rt_notify(struct proto *P, struct channel *ch UNUSED, net *n, rte *new, rte
{
/* Update */
- ea = ea_find(new->attrs->eattrs, EA_RA_PREFERENCE);
+ ea = ea_find(new->attrs, &ea_radv_preference);
uint preference = ea ? ea->u.data : RA_PREF_MEDIUM;
uint preference_set = !!ea;
- ea = ea_find(new->attrs->eattrs, EA_RA_LIFETIME);
+ ea = ea_find(new->attrs, &ea_radv_lifetime);
uint lifetime = ea ? ea->u.data : 0;
uint lifetime_set = !!ea;
@@ -457,14 +463,14 @@ radv_rt_notify(struct proto *P, struct channel *ch UNUSED, net *n, rte *new, rte
(preference != RA_PREF_HIGH))
{
log(L_WARN "%s: Invalid ra_preference value %u on route %N",
- p->p.name, preference, n->n.addr);
+ p->p.name, preference, n);
preference = RA_PREF_MEDIUM;
preference_set = 1;
lifetime = 0;
lifetime_set = 1;
}
- rt = fib_get(&p->routes, n->n.addr);
+ rt = fib_get(&p->routes, n);
/* Ignore update if nothing changed */
if (rt->valid &&
@@ -487,7 +493,7 @@ radv_rt_notify(struct proto *P, struct channel *ch UNUSED, net *n, rte *new, rte
else
{
/* Withdraw */
- rt = fib_find(&p->routes, n->n.addr);
+ rt = fib_find(&p->routes, n);
if (!rt || !rt->valid)
return;
@@ -577,8 +583,8 @@ radv_init(struct proto_config *CF)
P->preexport = radv_preexport;
P->rt_notify = radv_rt_notify;
- P->if_notify = radv_if_notify;
- P->ifa_notify = radv_ifa_notify;
+ P->iface_sub.if_notify = radv_if_notify;
+ P->iface_sub.ifa_notify = radv_ifa_notify;
return P;
}
@@ -660,10 +666,9 @@ radv_reconfigure(struct proto *P, struct proto_config *CF)
if (!old->propagate_routes && new->propagate_routes)
channel_request_feeding(p->p.main_channel);
- struct iface *iface;
- WALK_LIST(iface, iface_list)
+ IFACE_WALK(iface)
{
- if (p->p.vrf_set && p->p.vrf != iface->master)
+ if (p->p.vrf && p->p.vrf != iface->master)
continue;
if (!(iface->flags & IF_UP))
@@ -741,27 +746,26 @@ radv_pref_str(u32 pref)
}
}
-/* The buffer has some minimal size */
-static int
-radv_get_attr(const eattr *a, byte *buf, int buflen UNUSED)
+static void
+radv_preference_format(const eattr *a, byte *buf, uint buflen)
{
- switch (a->id)
- {
- case EA_RA_PREFERENCE:
- bsprintf(buf, "preference: %s", radv_pref_str(a->u.data));
- return GA_FULL;
- case EA_RA_LIFETIME:
- bsprintf(buf, "lifetime");
- return GA_NAME;
- default:
- return GA_UNKNOWN;
- }
+ bsnprintf(buf, buflen, "%s", radv_pref_str(a->u.data));
}
+static struct ea_class ea_radv_preference = {
+ .name = "radv_preference",
+ .type = T_ENUM_RA_PREFERENCE,
+ .format = radv_preference_format,
+};
+
+static struct ea_class ea_radv_lifetime = {
+ .name = "radv_lifetime",
+ .type = T_INT,
+};
+
struct protocol proto_radv = {
.name = "RAdv",
.template = "radv%d",
- .class = PROTOCOL_RADV,
.channel_mask = NB_IP6,
.proto_size = sizeof(struct radv_proto),
.config_size = sizeof(struct radv_config),
@@ -772,11 +776,15 @@ struct protocol proto_radv = {
.reconfigure = radv_reconfigure,
.copy_config = radv_copy_config,
.get_status = radv_get_status,
- .get_attr = radv_get_attr
};
void
radv_build(void)
{
proto_build(&proto_radv);
+
+ EA_REGISTER_ALL(
+ &ea_radv_preference,
+ &ea_radv_lifetime
+ );
}
diff --git a/proto/radv/radv.h b/proto/radv/radv.h
index 14d40f8a..c9219bda 100644
--- a/proto/radv/radv.h
+++ b/proto/radv/radv.h
@@ -19,7 +19,7 @@
#include "lib/resource.h"
#include "nest/protocol.h"
#include "nest/iface.h"
-#include "nest/route.h"
+#include "nest/rt.h"
#include "nest/cli.h"
#include "nest/locks.h"
#include "conf/conf.h"
@@ -195,10 +195,6 @@ struct radv_iface
#define RA_PREF_HIGH 0x08
#define RA_PREF_MASK 0x18
-/* Attributes */
-#define EA_RA_PREFERENCE EA_CODE(PROTOCOL_RADV, 0)
-#define EA_RA_LIFETIME EA_CODE(PROTOCOL_RADV, 1)
-
#ifdef LOCAL_DEBUG
#define RADV_FORCE_DEBUG 1
#else
diff --git a/proto/rip/config.Y b/proto/rip/config.Y
index 28ee9609..3c0973b1 100644
--- a/proto/rip/config.Y
+++ b/proto/rip/config.Y
@@ -37,7 +37,7 @@ CF_KEYWORDS(RIP, NG, ECMP, LIMIT, WEIGHT, INFINITY, METRIC, UPDATE, TIMEOUT,
PASSIVE, VERSION, SPLIT, HORIZON, POISON, REVERSE, CHECK, ZERO,
TIME, BFD, AUTHENTICATION, NONE, PLAINTEXT, CRYPTOGRAPHIC, MD5,
TTL, SECURITY, RX, TX, BUFFER, LENGTH, PRIORITY, ONLY, LINK,
- DEMAND, CIRCUIT, RIP_METRIC, RIP_TAG)
+ DEMAND, CIRCUIT)
%type <i> rip_variant rip_auth
@@ -190,9 +190,6 @@ rip_iface:
rip_iface_start iface_patt_list_nopx rip_iface_opt_list rip_iface_finish;
-dynamic_attr: RIP_METRIC { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_RIP_METRIC); } ;
-dynamic_attr: RIP_TAG { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_RIP_TAG); } ;
-
CF_CLI_HELP(SHOW RIP, ..., [[Show information about RIP protocol]]);
CF_CLI(SHOW RIP INTERFACES, optproto opttext, [<name>] [\"<interface>\"], [[Show information about RIP interfaces]])
diff --git a/proto/rip/packets.c b/proto/rip/packets.c
index 9c3bd7a3..fecdf896 100644
--- a/proto/rip/packets.c
+++ b/proto/rip/packets.c
@@ -1012,7 +1012,7 @@ rip_open_socket(struct rip_iface *ifa)
/* sk->rbsize and sk->tbsize are handled in rip_iface_update_buffers() */
- if (sk_open(sk) < 0)
+ if (sk_open(sk, p->p.loop) < 0)
goto err;
if (ifa->cf->mode == RIP_IM_MULTICAST)
diff --git a/proto/rip/rip.c b/proto/rip/rip.c
index 8c2d5aeb..d15177da 100644
--- a/proto/rip/rip.c
+++ b/proto/rip/rip.c
@@ -78,6 +78,7 @@
#include <stdlib.h>
#include "rip.h"
+#include "lib/macro.h"
static inline void rip_lock_neighbor(struct rip_neighbor *n);
@@ -88,6 +89,7 @@ static inline void rip_iface_kick_timer(struct rip_iface *ifa);
static void rip_iface_timer(timer *timer);
static void rip_trigger_update(struct rip_proto *p);
+static struct ea_class ea_rip_metric, ea_rip_tag, ea_rip_from;
/*
* RIP routes
@@ -149,91 +151,94 @@ rip_announce_rte(struct rip_proto *p, struct rip_entry *en)
if (rt)
{
/* Update */
- rta a0 = {
- .pref = p->p.main_channel->preference,
- .source = RTS_RIP,
- .scope = SCOPE_UNIVERSE,
- .dest = RTD_UNICAST,
+ struct {
+ ea_list l;
+ eattr a[3];
+ } ea_block = {
+ .l.count = ARRAY_SIZE(ea_block.a),
+ .a = {
+ EA_LITERAL_EMBEDDED(&ea_gen_preference, 0, p->p.main_channel->preference),
+ EA_LITERAL_EMBEDDED(&ea_gen_source, 0, RTS_RIP),
+ EA_LITERAL_EMBEDDED(&ea_rip_metric, 0, rt->metric),
+ },
};
- u8 rt_metric = rt->metric;
+ ea_list *ea = &ea_block.l;
+
u16 rt_tag = rt->tag;
+ struct iface *rt_from = NULL;
if (p->ecmp)
{
/* ECMP route */
- struct nexthop *nhs = NULL;
int num = 0;
for (rt = en->routes; rt && (num < p->ecmp); rt = rt->next)
+ if (rip_valid_rte(rt))
+ num++;
+
+ struct nexthop_adata *nhad = (struct nexthop_adata *) tmp_alloc_adata((num+1) * sizeof(struct nexthop));
+ struct nexthop *nh = &nhad->nh;
+
+ for (rt = en->routes; rt && (num < p->ecmp); rt = rt->next)
{
if (!rip_valid_rte(rt))
- continue;
+ continue;
- struct nexthop *nh = allocz(sizeof(struct nexthop));
+ *nh = (struct nexthop) {
+ .gw = rt->next_hop,
+ .iface = rt->from->ifa->iface,
+ .weight = rt->from->ifa->cf->ecmp_weight,
+ };
- nh->gw = rt->next_hop;
- nh->iface = rt->from->ifa->iface;
- nh->weight = rt->from->ifa->cf->ecmp_weight;
+ if (!rt_from)
+ rt_from = rt->from->ifa->iface;
- nexthop_insert(&nhs, nh);
- num++;
+ nh = NEXTHOP_NEXT(nh);
if (rt->tag != rt_tag)
rt_tag = 0;
}
- a0.nh = *nhs;
+ nhad->ad.length = ((void *) nh - (void *) nhad->ad.data);
+
+ ea_set_attr(&ea,
+ EA_LITERAL_DIRECT_ADATA(&ea_gen_nexthop, 0,
+ &(nexthop_sort(nhad, tmp_linpool)->ad)));
}
else
{
/* Unipath route */
- a0.from = rt->from->nbr->addr;
- a0.nh.gw = rt->next_hop;
- a0.nh.iface = rt->from->ifa->iface;
+ rt_from = rt->from->ifa->iface;
+
+ struct nexthop_adata nhad = {
+ .nh.gw = rt->next_hop,
+ .nh.iface = rt->from->ifa->iface,
+ };
+
+ ea_set_attr_data(&ea, &ea_gen_nexthop, 0,
+ &nhad.ad.data, sizeof nhad - sizeof nhad.ad);
+ ea_set_attr_data(&ea, &ea_gen_from, 0, &rt->from->nbr->addr, sizeof(ip_addr));
}
- struct {
- ea_list l;
- eattr e[3];
- struct rip_iface_adata riad;
- } ea_block = {
- .l = { .count = 3, },
- .e = {
- {
- .id = EA_RIP_METRIC,
- .type = EAF_TYPE_INT,
- .u.data = rt_metric,
- },
- {
- .id = EA_RIP_TAG,
- .type = EAF_TYPE_INT,
- .u.data = rt_tag,
- },
- {
- .id = EA_RIP_FROM,
- .type = EAF_TYPE_IFACE,
- .u.ptr = &ea_block.riad.ad,
- }
- },
- .riad = {
- .ad = { .length = sizeof(struct rip_iface_adata) - sizeof(struct adata) },
- .iface = a0.nh.iface,
- },
- };
+ ea_set_attr_u32(&ea, &ea_rip_tag, 0, rt_tag);
- a0.eattrs = &ea_block.l;
+ struct rip_iface_adata riad = {
+ .ad = { .length = sizeof(struct rip_iface_adata) - sizeof(struct adata) },
+ .iface = rt_from,
+ };
+ ea_set_attr(&ea,
+ EA_LITERAL_DIRECT_ADATA(&ea_rip_from, 0, &riad.ad));
- rta *a = rta_lookup(&a0);
- rte *e = rte_get_temp(a, p->p.main_source);
+ rte e0 = {
+ .attrs = ea,
+ .src = p->p.main_source,
+ };
- rte_update(&p->p, en->n.addr, e);
+ rte_update(p->p.main_channel, en->n.addr, &e0, p->p.main_source);
}
else
- {
- /* Withdraw */
- rte_update(&p->p, en->n.addr, NULL);
- }
+ rte_update(p->p.main_channel, en->n.addr, NULL, p->p.main_source);
}
/**
@@ -328,8 +333,8 @@ rip_withdraw_rte(struct rip_proto *p, net_addr *n, struct rip_neighbor *from)
* it into our data structures.
*/
static void
-rip_rt_notify(struct proto *P, struct channel *ch UNUSED, struct network *net, struct rte *new,
- struct rte *old UNUSED)
+rip_rt_notify(struct proto *P, struct channel *ch UNUSED, const net_addr *net, struct rte *new,
+ const struct rte *old UNUSED)
{
struct rip_proto *p = (struct rip_proto *) P;
struct rip_entry *en;
@@ -338,22 +343,22 @@ rip_rt_notify(struct proto *P, struct channel *ch UNUSED, struct network *net, s
if (new)
{
/* Update */
- u32 rt_tag = ea_get_int(new->attrs->eattrs, EA_RIP_TAG, 0);
- u32 rt_metric = ea_get_int(new->attrs->eattrs, EA_RIP_METRIC, 1);
- const eattr *rie = ea_find(new->attrs->eattrs, EA_RIP_FROM);
+ u32 rt_tag = ea_get_int(new->attrs, &ea_rip_tag, 0);
+ u32 rt_metric = ea_get_int(new->attrs, &ea_rip_metric, 1);
+ const eattr *rie = ea_find(new->attrs, &ea_rip_from);
struct iface *rt_from = rie ? ((struct rip_iface_adata *) rie->u.ptr)->iface : NULL;
if (rt_metric > p->infinity)
{
log(L_WARN "%s: Invalid rip_metric value %u for route %N",
- p->p.name, rt_metric, net->n.addr);
+ p->p.name, rt_metric, net);
rt_metric = p->infinity;
}
if (rt_tag > 0xffff)
{
log(L_WARN "%s: Invalid rip_tag value %u for route %N",
- p->p.name, rt_tag, net->n.addr);
+ p->p.name, rt_tag, net);
rt_metric = p->infinity;
rt_tag = 0;
}
@@ -365,21 +370,27 @@ rip_rt_notify(struct proto *P, struct channel *ch UNUSED, struct network *net, s
* collection.
*/
- en = fib_get(&p->rtable, net->n.addr);
+ en = fib_get(&p->rtable, net);
old_metric = en->valid ? en->metric : -1;
en->valid = RIP_ENTRY_VALID;
en->metric = rt_metric;
en->tag = rt_tag;
- en->from = (new->src->proto == P) ? rt_from : NULL;
- en->iface = new->attrs->nh.iface;
- en->next_hop = new->attrs->nh.gw;
+ en->from = (new->src->owner == &P->sources) ? rt_from : NULL;
+
+ eattr *nhea = ea_find(new->attrs, &ea_gen_nexthop);
+ if (nhea)
+ {
+ struct nexthop_adata *nhad = (struct nexthop_adata *) nhea->u.ptr;
+ en->iface = nhad->nh.iface;
+ en->next_hop = nhad->nh.gw;
+ }
}
else
{
/* Withdraw */
- en = fib_find(&p->rtable, net->n.addr);
+ en = fib_find(&p->rtable, net);
if (!en || en->valid != RIP_ENTRY_VALID)
return;
@@ -532,7 +543,7 @@ rip_update_bfd(struct rip_proto *p, struct rip_neighbor *n)
ip_addr saddr = rip_is_v2(p) ? n->ifa->sk->saddr : n->nbr->ifa->ip;
n->bfd_req = bfd_request_session(p->p.pool, n->nbr->addr, saddr,
n->nbr->iface, p->p.vrf,
- rip_bfd_notify, n, NULL);
+ rip_bfd_notify, n, p->p.loop, NULL);
}
if (!use_bfd && n->bfd_req)
@@ -656,9 +667,9 @@ rip_iface_update_bfd(struct rip_iface *ifa)
static void
-rip_iface_locked(struct object_lock *lock)
+rip_iface_locked(void *_ifa)
{
- struct rip_iface *ifa = lock->data;
+ struct rip_iface *ifa = _ifa;
struct rip_proto *p = ifa->rip;
if (!rip_open_socket(ifa))
@@ -720,8 +731,11 @@ rip_add_iface(struct rip_proto *p, struct iface *iface, struct rip_iface_config
lock->type = OBJLOCK_UDP;
lock->port = ic->port;
lock->iface = iface;
- lock->data = ifa;
- lock->hook = rip_iface_locked;
+ lock->event = (event) {
+ .hook = rip_iface_locked,
+ .data = ifa,
+ };
+ lock->target = &global_event_list;
ifa->lock = lock;
olock_acquire(lock);
@@ -793,11 +807,9 @@ rip_reconfigure_iface(struct rip_proto *p, struct rip_iface *ifa, struct rip_ifa
static void
rip_reconfigure_ifaces(struct rip_proto *p, struct rip_config *cf)
{
- struct iface *iface;
-
- WALK_LIST(iface, iface_list)
+ IFACE_WALK(iface)
{
- if (p->p.vrf_set && p->p.vrf != iface->master)
+ if (p->p.vrf && p->p.vrf != iface->master)
continue;
if (!(iface->flags & IF_UP))
@@ -1104,14 +1116,23 @@ rip_reload_routes(struct channel *C)
rip_kick_timer(p);
}
+static struct rte_owner_class rip_rte_owner_class;
+
+static inline struct rip_proto *
+rip_rte_proto(const rte *rte)
+{
+ return (rte->src->owner->class == &rip_rte_owner_class) ?
+ SKIP_BACK(struct rip_proto, p.sources, rte->src->owner) : NULL;
+}
+
static u32
-rip_rte_igp_metric(struct rte *rt)
+rip_rte_igp_metric(const rte *rt)
{
- return ea_get_int(rt->attrs->eattrs, EA_RIP_METRIC, IGP_METRIC_UNKNOWN);
+ return ea_get_int(rt->attrs, &ea_rip_metric, IGP_METRIC_UNKNOWN);
}
static int
-rip_rte_better(struct rte *new, struct rte *old)
+rip_rte_better(const rte *new, const rte *old)
{
return rip_rte_igp_metric(new) < rip_rte_igp_metric(old);
}
@@ -1133,12 +1154,11 @@ rip_init(struct proto_config *CF)
P->main_channel = proto_add_channel(P, proto_cf_main_channel(CF));
- P->if_notify = rip_if_notify;
+ P->iface_sub.if_notify = rip_if_notify;
P->rt_notify = rip_rt_notify;
- P->neigh_notify = rip_neigh_notify;
+ P->iface_sub.neigh_notify = rip_neigh_notify;
P->reload_routes = rip_reload_routes;
- P->rte_better = rip_rte_better;
- P->rte_igp_metric = rip_rte_igp_metric;
+ P->sources.class = &rip_rte_owner_class;
return P;
}
@@ -1211,38 +1231,41 @@ rip_reconfigure(struct proto *P, struct proto_config *CF)
}
static void
-rip_get_route_info(rte *rte, byte *buf)
+rip_get_route_info(const rte *rte, byte *buf)
{
- struct rip_proto *p = (struct rip_proto *) rte->src->proto;
- u32 rt_metric = ea_get_int(rte->attrs->eattrs, EA_RIP_METRIC, p->infinity);
- u32 rt_tag = ea_get_int(rte->attrs->eattrs, EA_RIP_TAG, 0);
+ struct rip_proto *p = rip_rte_proto(rte);
+ u32 rt_metric = ea_get_int(rte->attrs, &ea_rip_metric, p->infinity);
+ u32 rt_tag = ea_get_int(rte->attrs, &ea_rip_tag, 0);
- buf += bsprintf(buf, " (%d/%d)", rte->attrs->pref, rt_metric);
+ buf += bsprintf(buf, " (%d/%d)", rt_get_preference(rte), rt_metric);
if (rt_tag)
bsprintf(buf, " [%04x]", rt_tag);
}
-static int
-rip_get_attr(const eattr *a, byte *buf, int buflen UNUSED)
+static void
+rip_tag_format(const eattr *a, byte *buf, uint buflen)
{
- switch (a->id)
- {
- case EA_RIP_METRIC:
- bsprintf(buf, "metric: %d", a->u.data);
- return GA_FULL;
+ bsnprintf(buf, buflen, "%04x", a->u.data);
+}
- case EA_RIP_TAG:
- bsprintf(buf, "tag: %04x", a->u.data);
- return GA_FULL;
+static struct ea_class ea_rip_metric = {
+ .name = "rip_metric",
+ .type = T_INT,
+};
- case EA_RIP_FROM:
- return GA_HIDDEN;
+static struct ea_class ea_rip_tag = {
+ .name = "rip_tag",
+ .type = T_INT,
+ .format = rip_tag_format,
+};
- default:
- return GA_UNKNOWN;
- }
-}
+static struct ea_class ea_rip_from = {
+ .name = "rip_from",
+ .type = T_IFACE,
+ .readonly = 1,
+ .hidden = 1,
+};
void
rip_show_interfaces(struct proto *P, const char *iff)
@@ -1343,10 +1366,15 @@ rip_dump(struct proto *P)
}
+static struct rte_owner_class rip_rte_owner_class = {
+ .get_route_info = rip_get_route_info,
+ .rte_better = rip_rte_better,
+ .rte_igp_metric = rip_rte_igp_metric,
+};
+
struct protocol proto_rip = {
.name = "RIP",
.template = "rip%d",
- .class = PROTOCOL_RIP,
.preference = DEF_PREF_RIP,
.channel_mask = NB_IP,
.proto_size = sizeof(struct rip_proto),
@@ -1357,12 +1385,16 @@ struct protocol proto_rip = {
.start = rip_start,
.shutdown = rip_shutdown,
.reconfigure = rip_reconfigure,
- .get_route_info = rip_get_route_info,
- .get_attr = rip_get_attr
};
void
rip_build(void)
{
proto_build(&proto_rip);
+
+ EA_REGISTER_ALL(
+ &ea_rip_metric,
+ &ea_rip_tag,
+ &ea_rip_from
+ );
}
diff --git a/proto/rip/rip.h b/proto/rip/rip.h
index f8713c4a..a01f8d3b 100644
--- a/proto/rip/rip.h
+++ b/proto/rip/rip.h
@@ -16,7 +16,7 @@
#include "nest/cli.h"
#include "nest/iface.h"
#include "nest/protocol.h"
-#include "nest/route.h"
+#include "nest/rt.h"
#include "nest/password.h"
#include "nest/locks.h"
#include "nest/bfd.h"
@@ -195,10 +195,6 @@ struct rip_rte
#define RIP_ENTRY_VALID 1 /* Valid outgoing route */
#define RIP_ENTRY_STALE 2 /* Stale outgoing route, waiting for GC */
-#define EA_RIP_METRIC EA_CODE(PROTOCOL_RIP, 0)
-#define EA_RIP_TAG EA_CODE(PROTOCOL_RIP, 1)
-#define EA_RIP_FROM EA_CODE(PROTOCOL_RIP, 2)
-
static inline int rip_is_v2(struct rip_proto *p)
{ return p->rip2; }
diff --git a/proto/rpki/config.Y b/proto/rpki/config.Y
index d6d326b8..743b5b42 100644
--- a/proto/rpki/config.Y
+++ b/proto/rpki/config.Y
@@ -42,6 +42,7 @@ proto: rpki_proto ;
rpki_proto_start: proto_start RPKI {
this_proto = proto_config_new(&proto_rpki, $1);
+ this_proto->loop_order = DOMAIN_ORDER(proto);
RPKI_CFG->retry_interval = RPKI_RETRY_INTERVAL;
RPKI_CFG->refresh_interval = RPKI_REFRESH_INTERVAL;
RPKI_CFG->expire_interval = RPKI_EXPIRE_INTERVAL;
diff --git a/proto/rpki/packets.c b/proto/rpki/packets.c
index d246dd50..d7895a22 100644
--- a/proto/rpki/packets.c
+++ b/proto/rpki/packets.c
@@ -233,7 +233,12 @@ static const size_t min_pdu_size[] = {
[ERROR] = 16,
};
-static int rpki_send_error_pdu(struct rpki_cache *cache, const enum pdu_error_type error_code, const u32 err_pdu_len, const struct pdu_header *erroneous_pdu, const char *fmt, ...);
+static int rpki_send_error_pdu_(struct rpki_cache *cache, const enum pdu_error_type error_code, const u32 err_pdu_len, const struct pdu_header *erroneous_pdu, const char *fmt, ...);
+
+#define rpki_send_error_pdu(cache, error_code, err_pdu_len, erroneous_pdu, fmt...) ({ \
+ rpki_send_error_pdu_(cache, error_code, err_pdu_len, erroneous_pdu, #fmt); \
+ CACHE_TRACE(D_PACKETS, cache, #fmt); \
+ })
static void
rpki_pdu_to_network_byte_order(struct pdu_header *pdu)
@@ -595,6 +600,7 @@ rpki_handle_error_pdu(struct rpki_cache *cache, const struct pdu_error *pdu)
case INTERNAL_ERROR:
case INVALID_REQUEST:
case UNSUPPORTED_PDU_TYPE:
+ CACHE_TRACE(D_PACKETS, cache, "Got UNSUPPORTED_PDU_TYPE");
rpki_cache_change_state(cache, RPKI_CS_ERROR_FATAL);
break;
@@ -652,21 +658,7 @@ rpki_handle_cache_response_pdu(struct rpki_cache *cache, const struct pdu_cache_
{
if (cache->request_session_id)
{
- if (cache->last_update)
- {
- /*
- * This isn't the first sync and we already received records. This point
- * is after Reset Query and before importing new records from cache
- * server. We need to load new ones and kick out missing ones. So start
- * a refresh cycle.
- */
- if (cache->p->roa4_channel)
- rt_refresh_begin(cache->p->roa4_channel->table, cache->p->roa4_channel);
- if (cache->p->roa6_channel)
- rt_refresh_begin(cache->p->roa6_channel->table, cache->p->roa6_channel);
-
- cache->p->refresh_channels = 1;
- }
+ rpki_start_refresh(cache->p);
cache->session_id = pdu->session_id;
cache->request_session_id = 0;
}
@@ -842,14 +834,7 @@ rpki_handle_end_of_data_pdu(struct rpki_cache *cache, const struct pdu_end_of_da
(cf->keep_expire_interval ? "keeps " : ""), cache->expire_interval);
}
- if (cache->p->refresh_channels)
- {
- cache->p->refresh_channels = 0;
- if (cache->p->roa4_channel)
- rt_refresh_end(cache->p->roa4_channel->table, cache->p->roa4_channel);
- if (cache->p->roa6_channel)
- rt_refresh_end(cache->p->roa6_channel->table, cache->p->roa6_channel);
- }
+ rpki_stop_refresh(cache->p);
cache->last_update = current_time();
cache->serial_num = pdu->serial_num;
@@ -924,6 +909,9 @@ rpki_rx_hook(struct birdsock *sk, uint size)
struct rpki_cache *cache = sk->data;
struct rpki_proto *p = cache->p;
+ if ((p->p.proto_state == PS_DOWN) || (p->cache != cache))
+ return 0;
+
byte *pkt_start = sk->rbuf;
byte *end = pkt_start + size;
@@ -980,6 +968,8 @@ rpki_err_hook(struct birdsock *sk, int error_num)
CACHE_TRACE(D_EVENTS, cache, "The other side closed a connection");
}
+ if (cache->p->cache != cache)
+ return;
rpki_cache_change_state(cache, RPKI_CS_ERROR_TRANSPORT);
}
@@ -999,6 +989,9 @@ rpki_tx_hook(sock *sk)
{
struct rpki_cache *cache = sk->data;
+ if (cache->p->cache != cache)
+ return;
+
while (rpki_fire_tx(cache) > 0)
;
}
@@ -1008,6 +1001,9 @@ rpki_connected_hook(sock *sk)
{
struct rpki_cache *cache = sk->data;
+ if (cache->p->cache != cache)
+ return;
+
CACHE_TRACE(D_EVENTS, cache, "Connected");
proto_notify_state(&cache->p->p, PS_UP);
@@ -1029,7 +1025,7 @@ rpki_connected_hook(sock *sk)
* This function prepares Error PDU and sends it to a cache server.
*/
static int
-rpki_send_error_pdu(struct rpki_cache *cache, const enum pdu_error_type error_code, const u32 err_pdu_len, const struct pdu_header *erroneous_pdu, const char *fmt, ...)
+rpki_send_error_pdu_(struct rpki_cache *cache, const enum pdu_error_type error_code, const u32 err_pdu_len, const struct pdu_header *erroneous_pdu, const char *fmt, ...)
{
va_list args;
char msg[128];
diff --git a/proto/rpki/rpki.c b/proto/rpki/rpki.c
index 3e321627..e5638aff 100644
--- a/proto/rpki/rpki.c
+++ b/proto/rpki/rpki.c
@@ -109,6 +109,7 @@ static void rpki_schedule_next_expire_check(struct rpki_cache *cache);
static void rpki_stop_refresh_timer_event(struct rpki_cache *cache);
static void rpki_stop_retry_timer_event(struct rpki_cache *cache);
static void rpki_stop_expire_timer_event(struct rpki_cache *cache);
+static void rpki_stop_all_timers(struct rpki_cache *cache);
/*
@@ -120,26 +121,46 @@ rpki_table_add_roa(struct rpki_cache *cache, struct channel *channel, const net_
{
struct rpki_proto *p = cache->p;
- rta a0 = {
- .pref = channel->preference,
- .source = RTS_RPKI,
- .scope = SCOPE_UNIVERSE,
- .dest = RTD_NONE,
- };
+ ea_list *ea = NULL;
+ ea_set_attr_u32(&ea, &ea_gen_preference, 0, channel->preference);
+ ea_set_attr_u32(&ea, &ea_gen_source, 0, RTS_RPKI);
- rta *a = rta_lookup(&a0);
- rte *e = rte_get_temp(a, p->p.main_source);
+ rte e0 = { .attrs = ea, .src = p->p.main_source, };
- rte_update2(channel, &pfxr->n, e, e->src);
+ rte_update(channel, &pfxr->n, &e0, p->p.main_source);
}
void
rpki_table_remove_roa(struct rpki_cache *cache, struct channel *channel, const net_addr_union *pfxr)
{
struct rpki_proto *p = cache->p;
- rte_update2(channel, &pfxr->n, NULL, p->p.main_source);
+ rte_update(channel, &pfxr->n, NULL, p->p.main_source);
+}
+
+void
+rpki_start_refresh(struct rpki_proto *p)
+{
+ if (p->roa4_channel)
+ rt_refresh_begin(&p->roa4_channel->in_req);
+ if (p->roa6_channel)
+ rt_refresh_begin(&p->roa6_channel->in_req);
+
+ p->refresh_channels = 1;
}
+void
+rpki_stop_refresh(struct rpki_proto *p)
+{
+ if (!p->refresh_channels)
+ return;
+
+ p->refresh_channels = 0;
+
+ if (p->roa4_channel)
+ rt_refresh_end(&p->roa4_channel->in_req);
+ if (p->roa6_channel)
+ rt_refresh_end(&p->roa6_channel->in_req);
+}
/*
* RPKI Protocol Logic
@@ -196,6 +217,8 @@ rpki_force_restart_proto(struct rpki_proto *p)
{
if (p->cache)
{
+ rpki_tr_close(p->cache->tr_sock);
+ rpki_stop_all_timers(p->cache);
CACHE_DBG(p->cache, "Connection object destroying");
}
@@ -320,7 +343,7 @@ rpki_schedule_next_refresh(struct rpki_cache *cache)
btime t = cache->refresh_interval S;
CACHE_DBG(cache, "after %t s", t);
- tm_start(cache->refresh_timer, t);
+ tm_start_in(cache->refresh_timer, t, cache->p->p.loop);
}
static void
@@ -329,7 +352,7 @@ rpki_schedule_next_retry(struct rpki_cache *cache)
btime t = cache->retry_interval S;
CACHE_DBG(cache, "after %t s", t);
- tm_start(cache->retry_timer, t);
+ tm_start_in(cache->retry_timer, t, cache->p->p.loop);
}
static void
@@ -340,7 +363,7 @@ rpki_schedule_next_expire_check(struct rpki_cache *cache)
t = MAX(t, 1 S);
CACHE_DBG(cache, "after %t s", t);
- tm_start(cache->expire_timer, t);
+ tm_start_in(cache->expire_timer, t, cache->p->p.loop);
}
static void
@@ -357,13 +380,21 @@ rpki_stop_retry_timer_event(struct rpki_cache *cache)
tm_stop(cache->retry_timer);
}
-static void UNUSED
+static void
rpki_stop_expire_timer_event(struct rpki_cache *cache)
{
CACHE_DBG(cache, "Stop");
tm_stop(cache->expire_timer);
}
+static void
+rpki_stop_all_timers(struct rpki_cache *cache)
+{
+ rpki_stop_refresh_timer_event(cache);
+ rpki_stop_retry_timer_event(cache);
+ rpki_stop_expire_timer_event(cache);
+}
+
static int
rpki_do_we_recv_prefix_pdu_in_last_seconds(struct rpki_cache *cache)
{
@@ -386,6 +417,9 @@ rpki_refresh_hook(timer *tm)
{
struct rpki_cache *cache = tm->data;
+ if (cache->p->cache != cache)
+ return;
+
CACHE_DBG(cache, "%s", rpki_cache_state_to_str(cache->state));
switch (cache->state)
@@ -432,6 +466,9 @@ rpki_retry_hook(timer *tm)
{
struct rpki_cache *cache = tm->data;
+ if (cache->p->cache != cache)
+ return;
+
CACHE_DBG(cache, "%s", rpki_cache_state_to_str(cache->state));
switch (cache->state)
@@ -482,6 +519,9 @@ rpki_expire_hook(timer *tm)
{
struct rpki_cache *cache = tm->data;
+ if (cache->p->cache != cache)
+ return;
+
if (!cache->last_update)
return;
@@ -624,6 +664,7 @@ rpki_close_connection(struct rpki_cache *cache)
{
CACHE_TRACE(D_EVENTS, cache, "Closing a connection");
rpki_tr_close(cache->tr_sock);
+ rpki_stop_refresh(cache->p);
proto_notify_state(&cache->p->p, PS_START);
}
@@ -950,7 +991,6 @@ rpki_copy_config(struct proto_config *dest UNUSED, struct proto_config *src UNUS
struct protocol proto_rpki = {
.name = "RPKI",
.template = "rpki%d",
- .class = PROTOCOL_RPKI,
.preference = DEF_PREF_RPKI,
.proto_size = sizeof(struct rpki_proto),
.config_size = sizeof(struct rpki_config),
diff --git a/proto/rpki/rpki.h b/proto/rpki/rpki.h
index 8a5c38fd..20253844 100644
--- a/proto/rpki/rpki.h
+++ b/proto/rpki/rpki.h
@@ -13,7 +13,7 @@
#define _BIRD_RPKI_H_
#include "nest/bird.h"
-#include "nest/route.h"
+#include "nest/rt.h"
#include "nest/protocol.h"
#include "lib/socket.h"
#include "lib/ip.h"
@@ -83,6 +83,8 @@ const char *rpki_cache_state_to_str(enum rpki_cache_state state);
void rpki_table_add_roa(struct rpki_cache *cache, struct channel *channel, const net_addr_union *pfxr);
void rpki_table_remove_roa(struct rpki_cache *cache, struct channel *channel, const net_addr_union *pfxr);
+void rpki_start_refresh(struct rpki_proto *p);
+void rpki_stop_refresh(struct rpki_proto *p);
/*
* RPKI Protocol Logic
diff --git a/proto/rpki/ssh_transport.c b/proto/rpki/ssh_transport.c
index 6333f367..425ad460 100644
--- a/proto/rpki/ssh_transport.c
+++ b/proto/rpki/ssh_transport.c
@@ -35,7 +35,7 @@ rpki_tr_ssh_open(struct rpki_tr_sock *tr)
sk->ssh->subsystem = "rpki-rtr";
sk->ssh->state = SK_SSH_CONNECT;
- if (sk_open(sk) != 0)
+ if (sk_open(sk, cache->p->p.loop) != 0)
return RPKI_TR_ERROR;
return RPKI_TR_SUCCESS;
diff --git a/proto/rpki/tcp_transport.c b/proto/rpki/tcp_transport.c
index 132f8e2d..ebb8030f 100644
--- a/proto/rpki/tcp_transport.c
+++ b/proto/rpki/tcp_transport.c
@@ -28,7 +28,7 @@ rpki_tr_tcp_open(struct rpki_tr_sock *tr)
sk->type = SK_TCP_ACTIVE;
- if (sk_open(sk) != 0)
+ if (sk_open(sk, tr->cache->p->p.loop) != 0)
return RPKI_TR_ERROR;
return RPKI_TR_SUCCESS;
diff --git a/proto/static/static.c b/proto/static/static.c
index bb93305e..6bae827b 100644
--- a/proto/static/static.c
+++ b/proto/static/static.c
@@ -38,7 +38,7 @@
#include "nest/bird.h"
#include "nest/iface.h"
#include "nest/protocol.h"
-#include "nest/route.h"
+#include "nest/rt.h"
#include "nest/cli.h"
#include "conf/conf.h"
#include "filter/filter.h"
@@ -47,91 +47,96 @@
#include "static.h"
-static linpool *static_lp;
-
static inline struct rte_src * static_get_source(struct static_proto *p, uint i)
{ return i ? rt_get_source(&p->p, i) : p->p.main_source; }
+static inline void static_free_source(struct rte_src *src, uint i)
+{ if (i) rt_unlock_source(src); }
+
static void
static_announce_rte(struct static_proto *p, struct static_route *r)
{
- rta *a = allocz(RTA_MAX_SIZE);
- struct rte_src *src = static_get_source(p, r->index);
- a->source = RTS_STATIC;
- a->scope = SCOPE_UNIVERSE;
- a->dest = r->dest;
- a->pref = p->p.main_channel->preference;
+ struct rte_src *src;
+ ea_list *ea = NULL;
+ ea_set_attr_u32(&ea, &ea_gen_preference, 0, p->p.main_channel->preference);
+ ea_set_attr_u32(&ea, &ea_gen_source, 0, RTS_STATIC);
if (r->dest == RTD_UNICAST)
{
- struct static_route *r2;
- struct nexthop *nhs = NULL;
+ uint sz = 0;
+ for (struct static_route *r2 = r; r2; r2 = r2->mp_next)
+ if (r2->active)
+ sz += NEXTHOP_SIZE_CNT(r2->mls ? r2->mls->length / sizeof(u32) : 0);
- for (r2 = r; r2; r2 = r2->mp_next)
+ if (!sz)
+ goto withdraw;
+
+ struct nexthop_adata *nhad = allocz(sz + sizeof *nhad);
+ struct nexthop *nh = &nhad->nh;
+
+ for (struct static_route *r2 = r; r2; r2 = r2->mp_next)
{
if (!r2->active)
continue;
- struct nexthop *nh = allocz(NEXTHOP_MAX_SIZE);
- nh->gw = r2->via;
- nh->iface = r2->neigh->iface;
- nh->flags = r2->onlink ? RNF_ONLINK : 0;
- nh->weight = r2->weight;
+ *nh = (struct nexthop) {
+ .gw = r2->via,
+ .iface = r2->neigh->iface,
+ .flags = r2->onlink ? RNF_ONLINK : 0,
+ .weight = r2->weight,
+ };
+
if (r2->mls)
{
- nh->labels = r2->mls->len;
- memcpy(nh->label, r2->mls->stack, r2->mls->len * sizeof(u32));
+ nh->labels = r2->mls->length / sizeof(u32);
+ memcpy(nh->label, r2->mls->data, r2->mls->length);
}
- nexthop_insert(&nhs, nh);
+ nh = NEXTHOP_NEXT(nh);
}
- if (!nhs)
- goto withdraw;
-
- nexthop_link(a, nhs);
+ ea_set_attr_data(&ea, &ea_gen_nexthop, 0,
+ nhad->ad.data, (void *) nh - (void *) nhad->ad.data);
}
- if (r->dest == RTDX_RECURSIVE)
+ else if (r->dest == RTDX_RECURSIVE)
{
rtable *tab = ipa_is_ip4(r->via) ? p->igp_table_ip4 : p->igp_table_ip6;
- rta_set_recursive_next_hop(p->p.main_channel->table, a, tab, r->via, IPA_NONE, r->mls);
+ u32 *labels = r->mls ? (void *) r->mls->data : NULL;
+ u32 lnum = r->mls ? r->mls->length / sizeof(u32) : 0;
+
+ ea_set_hostentry(&ea, p->p.main_channel->table, tab,
+ r->via, IPA_NONE, lnum, labels);
}
+ else if (r->dest)
+ ea_set_dest(&ea, 0, r->dest);
+
/* Already announced */
if (r->state == SRS_CLEAN)
return;
/* We skip rta_lookup() here */
- rte *e = rte_get_temp(a, src);
+ src = static_get_source(p, r->index);
+ rte e0 = { .attrs = ea, .src = src, .net = r->net, }, *e = &e0;
+ /* Evaluate the filter */
if (r->cmds)
- {
- /* Create a temporary table node */
- e->net = alloca(sizeof(net) + r->net->length);
- memset(e->net, 0, sizeof(net) + r->net->length);
- net_copy(e->net->n.addr, r->net);
+ f_eval_rte(r->cmds, e);
- /* Evaluate the filter */
- f_eval_rte(r->cmds, &e, static_lp);
+ rte_update(p->p.main_channel, r->net, e, src);
+ static_free_source(src, r->index);
- /* Remove the temporary node */
- e->net = NULL;
- }
-
- rte_update2(p->p.main_channel, r->net, e, src);
r->state = SRS_CLEAN;
-
- if (r->cmds)
- lp_flush(static_lp);
-
return;
withdraw:
if (r->state == SRS_DOWN)
return;
- rte_update2(p->p.main_channel, r->net, NULL, src);
+ src = static_get_source(p, r->index);
+ rte_update(p->p.main_channel, r->net, NULL, src);
+ static_free_source(src, r->index);
r->state = SRS_DOWN;
}
@@ -208,7 +213,7 @@ static_update_bfd(struct static_proto *p, struct static_route *r)
// ip_addr local = ipa_nonzero(r->local) ? r->local : nb->ifa->ip;
r->bfd_req = bfd_request_session(p->p.pool, r->via, nb->ifa->ip,
nb->iface, p->p.vrf,
- static_bfd_notify, r, NULL);
+ static_bfd_notify, r, p->p.loop, NULL);
}
if (!bfd_up && r->bfd_req)
@@ -297,7 +302,11 @@ static void
static_remove_rte(struct static_proto *p, struct static_route *r)
{
if (r->state)
- rte_update2(p->p.main_channel, r->net, NULL, static_get_source(p, r->index));
+ {
+ struct rte_src *src = static_get_source(p, r->index);
+ rte_update(p->p.main_channel, r->net, NULL, src);
+ static_free_source(src, r->index);
+ }
static_reset_rte(p, r);
}
@@ -320,31 +329,17 @@ static_same_dest(struct static_route *x, struct static_route *y)
(x->weight != y->weight) ||
(x->use_bfd != y->use_bfd) ||
(!x->mls != !y->mls) ||
- ((x->mls) && (y->mls) && (x->mls->len != y->mls->len)))
+ ((x->mls) && (y->mls) && adata_same(x->mls, y->mls)))
return 0;
-
- if (!x->mls)
- continue;
-
- for (uint i = 0; i < x->mls->len; i++)
- if (x->mls->stack[i] != y->mls->stack[i])
- return 0;
}
return !x && !y;
case RTDX_RECURSIVE:
if (!ipa_equal(x->via, y->via) ||
(!x->mls != !y->mls) ||
- ((x->mls) && (y->mls) && (x->mls->len != y->mls->len)))
+ ((x->mls) && (y->mls) && adata_same(x->mls, y->mls)))
return 0;
- if (!x->mls)
- return 1;
-
- for (uint i = 0; i < x->mls->len; i++)
- if (x->mls->stack[i] != y->mls->stack[i])
- return 0;
-
return 1;
default:
@@ -411,18 +406,18 @@ static_reload_routes(struct channel *C)
}
static int
-static_rte_better(rte *new, rte *old)
+static_rte_better(const rte *new, const rte *old)
{
- u32 n = ea_get_int(new->attrs->eattrs, EA_GEN_IGP_METRIC, IGP_METRIC_UNKNOWN);
- u32 o = ea_get_int(old->attrs->eattrs, EA_GEN_IGP_METRIC, IGP_METRIC_UNKNOWN);
+ u32 n = ea_get_int(new->attrs, &ea_gen_igp_metric, IGP_METRIC_UNKNOWN);
+ u32 o = ea_get_int(old->attrs, &ea_gen_igp_metric, IGP_METRIC_UNKNOWN);
return n < o;
}
static int
-static_rte_mergable(rte *pri, rte *sec)
+static_rte_mergable(const rte *pri, const rte *sec)
{
- u32 a = ea_get_int(pri->attrs->eattrs, EA_GEN_IGP_METRIC, IGP_METRIC_UNKNOWN);
- u32 b = ea_get_int(sec->attrs->eattrs, EA_GEN_IGP_METRIC, IGP_METRIC_UNKNOWN);
+ u32 a = ea_get_int(pri->attrs, &ea_gen_igp_metric, IGP_METRIC_UNKNOWN);
+ u32 b = ea_get_int(sec->attrs, &ea_gen_igp_metric, IGP_METRIC_UNKNOWN);
return a == b;
}
@@ -441,11 +436,11 @@ static_postconfig(struct proto_config *CF)
if (!cf->igp_table_ip4)
cf->igp_table_ip4 = (cc->table->addr_type == NET_IP4) ?
- cc->table : cf->c.global->def_tables[NET_IP4];
+ cc->table : rt_get_default_table(cf->c.global, NET_IP4);
if (!cf->igp_table_ip6)
cf->igp_table_ip6 = (cc->table->addr_type == NET_IP6) ?
- cc->table : cf->c.global->def_tables[NET_IP6];
+ cc->table : rt_get_default_table(cf->c.global, NET_IP6);
WALK_LIST(r, cf->routes)
if (r->net && (r->net->type != CF->net_type))
@@ -454,6 +449,8 @@ static_postconfig(struct proto_config *CF)
static_index_routes(cf);
}
+static struct rte_owner_class static_rte_owner_class;
+
static struct proto *
static_init(struct proto_config *CF)
{
@@ -463,10 +460,9 @@ static_init(struct proto_config *CF)
P->main_channel = proto_add_channel(P, proto_cf_main_channel(CF));
- P->neigh_notify = static_neigh_notify;
+ P->iface_sub.neigh_notify = static_neigh_notify;
P->reload_routes = static_reload_routes;
- P->rte_better = static_rte_better;
- P->rte_mergable = static_rte_mergable;
+ P->sources.class = &static_rte_owner_class;
if (cf->igp_table_ip4)
p->igp_table_ip4 = cf->igp_table_ip4->table;
@@ -484,9 +480,6 @@ static_start(struct proto *P)
struct static_config *cf = (void *) P->cf;
struct static_route *r;
- if (!static_lp)
- static_lp = lp_new(&root_pool);
-
if (p->igp_table_ip4)
rt_lock_table(p->igp_table_ip4);
@@ -501,7 +494,12 @@ static_start(struct proto *P)
proto_notify_state(P, PS_UP);
WALK_LIST(r, cf->routes)
+ {
+ struct lp_state lps;
+ lp_save(tmp_linpool, &lps);
static_add_rte(p, r);
+ lp_restore(tmp_linpool, &lps);
+ }
return PS_UP;
}
@@ -517,19 +515,13 @@ static_shutdown(struct proto *P)
WALK_LIST(r, cf->routes)
static_reset_rte(p, r);
- return PS_DOWN;
-}
-
-static void
-static_cleanup(struct proto *P)
-{
- struct static_proto *p = (void *) P;
-
if (p->igp_table_ip4)
rt_unlock_table(p->igp_table_ip4);
if (p->igp_table_ip6)
rt_unlock_table(p->igp_table_ip6);
+
+ return PS_DOWN;
}
static void
@@ -717,13 +709,14 @@ static_copy_config(struct proto_config *dest, struct proto_config *src)
}
static void
-static_get_route_info(rte *rte, byte *buf)
+static_get_route_info(const rte *rte, byte *buf)
{
- eattr *a = ea_find(rte->attrs->eattrs, EA_GEN_IGP_METRIC);
- if (a)
- buf += bsprintf(buf, " (%d/%u)", rte->attrs->pref, a->u.data);
+ eattr *a = ea_find(rte->attrs, &ea_gen_igp_metric);
+ u32 pref = rt_get_preference(rte);
+ if (a && (a->u.data < IGP_METRIC_UNKNOWN))
+ buf += bsprintf(buf, " (%d/%u)", pref, a->u.data);
else
- buf += bsprintf(buf, " (%d)", rte->attrs->pref);
+ buf += bsprintf(buf, " (%d)", pref);
}
static void
@@ -773,11 +766,15 @@ static_show(struct proto *P)
static_show_rt(r);
}
+static struct rte_owner_class static_rte_owner_class = {
+ .get_route_info = static_get_route_info,
+ .rte_better = static_rte_better,
+ .rte_mergable = static_rte_mergable,
+};
struct protocol proto_static = {
.name = "Static",
.template = "static%d",
- .class = PROTOCOL_STATIC,
.preference = DEF_PREF_STATIC,
.channel_mask = NB_ANY,
.proto_size = sizeof(struct static_proto),
@@ -787,10 +784,8 @@ struct protocol proto_static = {
.dump = static_dump,
.start = static_start,
.shutdown = static_shutdown,
- .cleanup = static_cleanup,
.reconfigure = static_reconfigure,
.copy_config = static_copy_config,
- .get_route_info = static_get_route_info,
};
void
diff --git a/proto/static/static.h b/proto/static/static.h
index fc91f71c..ea7ca33b 100644
--- a/proto/static/static.h
+++ b/proto/static/static.h
@@ -9,7 +9,7 @@
#ifndef _BIRD_STATIC_H_
#define _BIRD_STATIC_H_
-#include "nest/route.h"
+#include "nest/rt.h"
#include "nest/bfd.h"
#include "lib/buffer.h"
@@ -49,7 +49,7 @@ struct static_route {
byte weight; /* Multipath next hop weight */
byte use_bfd; /* Configured to use BFD */
struct bfd_request *bfd_req; /* BFD request, if BFD is used */
- mpls_label_stack *mls; /* MPLS label stack; may be NULL */
+ struct adata *mls; /* MPLS label stack; may be NULL */
};
/*