summaryrefslogtreecommitdiff
path: root/proto
diff options
context:
space:
mode:
Diffstat (limited to 'proto')
-rw-r--r--proto/Doc3
-rw-r--r--proto/babel/Makefile9
-rw-r--r--proto/babel/babel.c1455
-rw-r--r--proto/babel/babel.h156
-rw-r--r--proto/babel/config.Y20
-rw-r--r--proto/babel/packets.c368
-rw-r--r--proto/bfd/Makefile9
-rw-r--r--proto/bfd/bfd.c67
-rw-r--r--proto/bfd/bfd.h14
-rw-r--r--proto/bfd/io.c281
-rw-r--r--proto/bfd/io.h67
-rw-r--r--proto/bfd/packets.c13
-rw-r--r--proto/bgp/Makefile9
-rw-r--r--proto/bgp/attrs.c2756
-rw-r--r--proto/bgp/bgp.c1656
-rw-r--r--proto/bgp/bgp.h505
-rw-r--r--proto/bgp/config.Y159
-rw-r--r--proto/bgp/packets.c3405
-rw-r--r--proto/ospf/Makefile9
-rw-r--r--proto/ospf/config.Y139
-rw-r--r--proto/ospf/dbdes.c6
-rw-r--r--proto/ospf/hello.c29
-rw-r--r--proto/ospf/iface.c75
-rw-r--r--proto/ospf/lsalib.c32
-rw-r--r--proto/ospf/lsalib.h9
-rw-r--r--proto/ospf/lsupd.c6
-rw-r--r--proto/ospf/neighbor.c40
-rw-r--r--proto/ospf/ospf.c174
-rw-r--r--proto/ospf/ospf.h207
-rw-r--r--proto/ospf/packet.c23
-rw-r--r--proto/ospf/rt.c512
-rw-r--r--proto/ospf/rt.h6
-rw-r--r--proto/ospf/topology.c163
-rw-r--r--proto/ospf/topology.h8
-rw-r--r--proto/pipe/Makefile10
-rw-r--r--proto/pipe/config.Y25
-rw-r--r--proto/pipe/pipe.c266
-rw-r--r--proto/pipe/pipe.h16
-rw-r--r--proto/radv/Makefile9
-rw-r--r--proto/radv/config.Y35
-rw-r--r--proto/radv/packets.c21
-rw-r--r--proto/radv/radv.c166
-rw-r--r--proto/radv/radv.h21
-rw-r--r--proto/rip/Makefile9
-rw-r--r--proto/rip/config.Y32
-rw-r--r--proto/rip/packets.c106
-rw-r--r--proto/rip/rip.c233
-rw-r--r--proto/rip/rip.h51
-rw-r--r--proto/rpki/Doc5
-rw-r--r--proto/rpki/Makefile6
-rw-r--r--proto/rpki/config.Y144
-rw-r--r--proto/rpki/packets.c1073
-rw-r--r--proto/rpki/packets.h45
-rw-r--r--proto/rpki/rpki.c928
-rw-r--r--proto/rpki/rpki.h165
-rw-r--r--proto/rpki/ssh_transport.c75
-rw-r--r--proto/rpki/tcp_transport.c78
-rw-r--r--proto/rpki/transport.c135
-rw-r--r--proto/rpki/transport.h79
-rw-r--r--proto/static/Makefile10
-rw-r--r--proto/static/config.Y136
-rw-r--r--proto/static/static.c892
-rw-r--r--proto/static/static.h54
63 files changed, 11086 insertions, 6129 deletions
diff --git a/proto/Doc b/proto/Doc
index 04c25bc0..ef573d2a 100644
--- a/proto/Doc
+++ b/proto/Doc
@@ -4,7 +4,8 @@ C bfd
C bgp
C ospf
C pipe
-C rip
C radv
+C rip
+C rpki
C static
S ../nest/rt-dev.c
diff --git a/proto/babel/Makefile b/proto/babel/Makefile
index 400ffbac..a5b4a13b 100644
--- a/proto/babel/Makefile
+++ b/proto/babel/Makefile
@@ -1,5 +1,6 @@
-source=babel.c packets.c
-root-rel=../../
-dir-name=proto/babel
+src := babel.c packets.c
+obj := $(src-o-files)
+$(all-daemon)
+$(cf-local)
-include ../../Rules
+tests_objs := $(tests_objs) $(src-o-files) \ No newline at end of file
diff --git a/proto/babel/babel.c b/proto/babel/babel.c
index 38be6909..aa7e8b68 100644
--- a/proto/babel/babel.c
+++ b/proto/babel/babel.c
@@ -2,6 +2,8 @@
* BIRD -- The Babel protocol
*
* Copyright (c) 2015--2016 Toke Hoiland-Jorgensen
+ * (c) 2016--2017 Ondrej Zajicek <santiago@crfreenet.org>
+ * (c) 2016--2017 CZ.NIC z.s.p.o.
*
* Can be freely distributed and used under the terms of the GNU GPL.
*
@@ -29,17 +31,14 @@
*
* The main route selection is done in babel_select_route(). This is called when
* an entry is updated by receiving updates from the network or when modified by
- * internal timers. It performs feasibility checks on the available routes for
- * the prefix and selects the one with the lowest metric to be announced to the
- * core.
+ * internal timers. The function selects from feasible and reachable routes the
+ * one with the lowest metric to be announced to the core.
*/
#include <stdlib.h>
#include "babel.h"
-#define OUR_ROUTE(r) (r->neigh == NULL)
-
/*
* Is one number greater or equal than another mod 2^16? This is based on the
* definition of serial number space in RFC 1982. Note that arguments are of
@@ -48,47 +47,49 @@
static inline int ge_mod64k(uint a, uint b)
{ return (u16)(a - b) < 0x8000; }
-static void babel_dump_entry(struct babel_entry *e);
-static void babel_dump_route(struct babel_route *r);
-static void babel_select_route(struct babel_entry *e);
-static void babel_send_route_request(struct babel_entry *e, struct babel_neighbor *n);
-static void babel_send_wildcard_request(struct babel_iface *ifa);
-static int babel_cache_seqno_request(struct babel_proto *p, ip_addr prefix, u8 plen,
- u64 router_id, u16 seqno);
-static void babel_trigger_iface_update(struct babel_iface *ifa);
-static void babel_trigger_update(struct babel_proto *p);
-static void babel_send_seqno_request(struct babel_entry *e);
+static void babel_expire_requests(struct babel_proto *p, struct babel_entry *e);
+static void babel_select_route(struct babel_proto *p, struct babel_entry *e, struct babel_route *mod);
+static inline void babel_announce_retraction(struct babel_proto *p, struct babel_entry *e);
+static void babel_send_route_request(struct babel_proto *p, struct babel_entry *e, struct babel_neighbor *n);
+static void babel_send_seqno_request(struct babel_proto *p, struct babel_entry *e, struct babel_seqno_request *sr);
+static void babel_update_cost(struct babel_neighbor *n);
static inline void babel_kick_timer(struct babel_proto *p);
static inline void babel_iface_kick_timer(struct babel_iface *ifa);
+static inline void babel_lock_neighbor(struct babel_neighbor *nbr)
+{ if (nbr) nbr->uc++; }
+
+static inline void babel_unlock_neighbor(struct babel_neighbor *nbr)
+{ if (nbr && !--nbr->uc) mb_free(nbr); }
+
/*
* Functions to maintain data structures
*/
static void
-babel_init_entry(struct fib_node *n)
+babel_init_entry(void *E)
{
- struct babel_entry *e = (void *) n;
- e->proto = NULL;
- e->selected_in = NULL;
- e->selected_out = NULL;
- e->updated = now;
+ struct babel_entry *e = E;
+
+ e->updated = current_time();
+ init_list(&e->requests);
init_list(&e->sources);
init_list(&e->routes);
}
static inline struct babel_entry *
-babel_find_entry(struct babel_proto *p, ip_addr prefix, u8 plen)
+babel_find_entry(struct babel_proto *p, const net_addr *n)
{
- return fib_find(&p->rtable, &prefix, plen);
+ struct fib *rtable = (n->type == NET_IP4) ? &p->ip4_rtable : &p->ip6_rtable;
+ return fib_find(rtable, n);
}
static struct babel_entry *
-babel_get_entry(struct babel_proto *p, ip_addr prefix, u8 plen)
+babel_get_entry(struct babel_proto *p, const net_addr *n)
{
- struct babel_entry *e = fib_get(&p->rtable, &prefix, plen);
- e->proto = p;
+ struct fib *rtable = (n->type == NET_IP4) ? &p->ip4_rtable : &p->ip6_rtable;
+ struct babel_entry *e = fib_get(rtable, n);
return e;
}
@@ -105,9 +106,8 @@ babel_find_source(struct babel_entry *e, u64 router_id)
}
static struct babel_source *
-babel_get_source(struct babel_entry *e, u64 router_id)
+babel_get_source(struct babel_proto *p, struct babel_entry *e, u64 router_id)
{
- struct babel_proto *p = e->proto;
struct babel_source *s = babel_find_source(e, router_id);
if (s)
@@ -115,7 +115,7 @@ babel_get_source(struct babel_entry *e, u64 router_id)
s = sl_alloc(p->source_slab);
s->router_id = router_id;
- s->expires = now + BABEL_GARBAGE_INTERVAL;
+ s->expires = current_time() + BABEL_GARBAGE_INTERVAL;
s->seqno = 0;
s->metric = BABEL_INFINITY;
add_tail(&e->sources, NODE s);
@@ -124,14 +124,14 @@ babel_get_source(struct babel_entry *e, u64 router_id)
}
static void
-babel_expire_sources(struct babel_entry *e)
+babel_expire_sources(struct babel_proto *p, struct babel_entry *e)
{
- struct babel_proto *p = e->proto;
struct babel_source *n, *nx;
+ btime now_ = current_time();
WALK_LIST_DELSAFE(n, nx, e->sources)
{
- if (n->expires && n->expires <= now)
+ if (n->expires && n->expires <= now_)
{
rem_node(NODE n);
sl_free(p->source_slab, n);
@@ -152,9 +152,8 @@ babel_find_route(struct babel_entry *e, struct babel_neighbor *n)
}
static struct babel_route *
-babel_get_route(struct babel_entry *e, struct babel_neighbor *nbr)
+babel_get_route(struct babel_proto *p, struct babel_entry *e, struct babel_neighbor *nbr)
{
- struct babel_proto *p = e->proto;
struct babel_route *r = babel_find_route(e, nbr);
if (r)
@@ -162,94 +161,91 @@ babel_get_route(struct babel_entry *e, struct babel_neighbor *nbr)
r = sl_alloc(p->route_slab);
memset(r, 0, sizeof(*r));
+
r->e = e;
+ r->neigh = nbr;
add_tail(&e->routes, NODE r);
-
- if (nbr)
- {
- r->neigh = nbr;
- r->expires = now + BABEL_GARBAGE_INTERVAL;
- add_tail(&nbr->routes, NODE &r->neigh_route);
- }
+ add_tail(&nbr->routes, NODE &r->neigh_route);
return r;
}
-static void
-babel_flush_route(struct babel_route *r)
+static inline void
+babel_retract_route(struct babel_proto *p, struct babel_route *r)
{
- struct babel_proto *p = r->e->proto;
+ r->metric = r->advert_metric = BABEL_INFINITY;
- DBG("Babel: Flush route %I/%d router_id %lR neigh %I\n",
- r->e->n.prefix, r->e->n.pxlen, r->router_id, r->neigh ? r->neigh->addr : IPA_NONE);
-
- rem_node(NODE r);
+ if (r == r->e->selected)
+ babel_select_route(p, r->e, r);
+}
- if (r->neigh)
- rem_node(&r->neigh_route);
+static void
+babel_flush_route(struct babel_proto *p, struct babel_route *r)
+{
+ DBG("Babel: Flush route %N router_id %lR neigh %I\n",
+ r->e->n.addr, r->router_id, r->neigh->addr);
- if (r->e->selected_in == r)
- r->e->selected_in = NULL;
+ rem_node(NODE r);
+ rem_node(&r->neigh_route);
- if (r->e->selected_out == r)
- r->e->selected_out = NULL;
+ if (r->e->selected == r)
+ r->e->selected = NULL;
sl_free(p->route_slab, r);
}
static void
-babel_expire_route(struct babel_route *r)
+babel_expire_route(struct babel_proto *p, struct babel_route *r)
{
- struct babel_proto *p = r->e->proto;
- struct babel_entry *e = r->e;
+ struct babel_config *cf = (void *) p->p.cf;
- TRACE(D_EVENTS, "Route expiry timer for %I/%d router-id %lR fired",
- e->n.prefix, e->n.pxlen, r->router_id);
+ TRACE(D_EVENTS, "Route expiry timer for %N router-id %lR fired",
+ r->e->n.addr, r->router_id);
if (r->metric < BABEL_INFINITY)
{
- r->metric = BABEL_INFINITY;
- r->expires = now + r->expiry_interval;
+ r->metric = r->advert_metric = BABEL_INFINITY;
+ r->expires = current_time() + cf->hold_time;
}
else
{
- babel_flush_route(r);
+ babel_flush_route(p, r);
}
}
static void
-babel_refresh_route(struct babel_route *r)
+babel_refresh_route(struct babel_proto *p, struct babel_route *r)
{
- if (!OUR_ROUTE(r) && (r == r->e->selected_in))
- babel_send_route_request(r->e, r->neigh);
+ if (r == r->e->selected)
+ babel_send_route_request(p, r->e, r->neigh);
r->refresh_time = 0;
}
static void
-babel_expire_routes(struct babel_proto *p)
+babel_expire_routes_(struct babel_proto *p, struct fib *rtable)
{
- struct babel_entry *e;
+ struct babel_config *cf = (void *) p->p.cf;
struct babel_route *r, *rx;
struct fib_iterator fit;
+ btime now_ = current_time();
- FIB_ITERATE_INIT(&fit, &p->rtable);
+ FIB_ITERATE_INIT(&fit, rtable);
loop:
- FIB_ITERATE_START(&p->rtable, &fit, n)
+ FIB_ITERATE_START(rtable, &fit, struct babel_entry, e)
{
- e = (struct babel_entry *) n;
int changed = 0;
WALK_LIST_DELSAFE(r, rx, e->routes)
{
- if (r->refresh_time && r->refresh_time <= now)
- babel_refresh_route(r);
+ if (r->refresh_time && r->refresh_time <= now_)
+ babel_refresh_route(p, r);
- if (r->expires && r->expires <= now)
+ if (r->expires && r->expires <= now_)
{
- babel_expire_route(r);
- changed = 1;
+ changed = changed || (r == e->selected);
+ babel_expire_route(p, r);
}
}
@@ -258,25 +254,148 @@ loop:
/*
* We have to restart the iteration because there may be a cascade of
* synchronous events babel_select_route() -> nest table change ->
- * babel_rt_notify() -> p->rtable change, invalidating hidden variables.
+ * babel_rt_notify() -> rtable change, invalidating hidden variables.
*/
+ FIB_ITERATE_PUT(&fit);
+ babel_select_route(p, e, NULL);
+ goto loop;
+ }
+
+ /* Clean up stale entries */
+ if ((e->valid == BABEL_ENTRY_STALE) && ((e->updated + cf->hold_time) <= now_))
+ e->valid = BABEL_ENTRY_DUMMY;
- FIB_ITERATE_PUT(&fit, n);
- babel_select_route(e);
+ /* Clean up unreachable route */
+ if (e->unreachable && (!e->valid || (e->router_id == p->router_id)))
+ {
+ FIB_ITERATE_PUT(&fit);
+ babel_announce_retraction(p, e);
goto loop;
}
- babel_expire_sources(e);
+ babel_expire_sources(p, e);
+ babel_expire_requests(p, e);
/* Remove empty entries */
- if (EMPTY_LIST(e->sources) && EMPTY_LIST(e->routes))
+ if (!e->valid && EMPTY_LIST(e->routes) && EMPTY_LIST(e->sources) && EMPTY_LIST(e->requests))
{
- FIB_ITERATE_PUT(&fit, n);
- fib_delete(&p->rtable, e);
+ FIB_ITERATE_PUT(&fit);
+ fib_delete(rtable, e);
goto loop;
}
}
- FIB_ITERATE_END(n);
+ FIB_ITERATE_END;
+}
+
+static void
+babel_expire_routes(struct babel_proto *p)
+{
+ babel_expire_routes_(p, &p->ip4_rtable);
+ babel_expire_routes_(p, &p->ip6_rtable);
+}
+
+static inline int seqno_request_valid(struct babel_seqno_request *sr)
+{ return !sr->nbr || sr->nbr->ifa; }
+
+/*
+ * Add seqno request to the table of pending requests (RFC 6216 3.2.6) and send
+ * it to network. Do nothing if it is already in the table.
+ */
+
+static void
+babel_add_seqno_request(struct babel_proto *p, struct babel_entry *e,
+ u64 router_id, u16 seqno, u8 hop_count,
+ struct babel_neighbor *nbr)
+{
+ struct babel_seqno_request *sr;
+
+ WALK_LIST(sr, e->requests)
+ if (sr->router_id == router_id)
+ {
+ /* Found matching or newer */
+ if (ge_mod64k(sr->seqno, seqno) && seqno_request_valid(sr))
+ return;
+
+ /* Found older */
+ babel_unlock_neighbor(sr->nbr);
+ rem_node(NODE sr);
+ goto found;
+ }
+
+ /* No entries found */
+ sr = sl_alloc(p->seqno_slab);
+
+found:
+ sr->router_id = router_id;
+ sr->seqno = seqno;
+ sr->hop_count = hop_count;
+ sr->count = 0;
+ sr->expires = current_time() + BABEL_SEQNO_REQUEST_EXPIRY;
+ babel_lock_neighbor(sr->nbr = nbr);
+ add_tail(&e->requests, NODE sr);
+
+ babel_send_seqno_request(p, e, sr);
+}
+
+static void
+babel_remove_seqno_request(struct babel_proto *p, struct babel_seqno_request *sr)
+{
+ babel_unlock_neighbor(sr->nbr);
+ rem_node(NODE sr);
+ sl_free(p->seqno_slab, sr);
+}
+
+static int
+babel_satisfy_seqno_request(struct babel_proto *p, struct babel_entry *e,
+ u64 router_id, u16 seqno)
+{
+ struct babel_seqno_request *sr;
+
+ WALK_LIST(sr, e->requests)
+ if ((sr->router_id == router_id) && ge_mod64k(seqno, sr->seqno))
+ {
+ /* Found the request, remove it */
+ babel_remove_seqno_request(p, sr);
+ return 1;
+ }
+
+ return 0;
+}
+
+static void
+babel_expire_requests(struct babel_proto *p, struct babel_entry *e)
+{
+ struct babel_seqno_request *sr, *srx;
+ btime now_ = current_time();
+
+ WALK_LIST_DELSAFE(sr, srx, e->requests)
+ {
+ /* Remove seqno requests sent to dead neighbors */
+ if (!seqno_request_valid(sr))
+ {
+ babel_remove_seqno_request(p, sr);
+ continue;
+ }
+
+ /* Handle expired requests - resend or remove */
+ if (sr->expires && sr->expires <= now_)
+ {
+ if (sr->count < BABEL_SEQNO_REQUEST_RETRY)
+ {
+ sr->count++;
+ sr->expires += (BABEL_SEQNO_REQUEST_EXPIRY << sr->count);
+ babel_send_seqno_request(p, e, sr);
+ }
+ else
+ {
+ TRACE(D_EVENTS, "Seqno request for %N router-id %lR expired",
+ e->n.addr, sr->router_id);
+
+ babel_remove_seqno_request(p, sr);
+ continue;
+ }
+ }
+ }
}
static struct babel_neighbor *
@@ -294,61 +413,79 @@ babel_find_neighbor(struct babel_iface *ifa, ip_addr addr)
static struct babel_neighbor *
babel_get_neighbor(struct babel_iface *ifa, ip_addr addr)
{
+ struct babel_proto *p = ifa->proto;
struct babel_neighbor *nbr = babel_find_neighbor(ifa, addr);
if (nbr)
return nbr;
+ TRACE(D_EVENTS, "New neighbor %I on %s", addr, ifa->iface->name);
+
nbr = mb_allocz(ifa->pool, sizeof(struct babel_neighbor));
nbr->ifa = ifa;
nbr->addr = addr;
+ nbr->rxcost = BABEL_INFINITY;
nbr->txcost = BABEL_INFINITY;
+ nbr->cost = BABEL_INFINITY;
init_list(&nbr->routes);
+ babel_lock_neighbor(nbr);
add_tail(&ifa->neigh_list, NODE nbr);
return nbr;
}
static void
-babel_flush_neighbor(struct babel_neighbor *nbr)
+babel_flush_neighbor(struct babel_proto *p, struct babel_neighbor *nbr)
{
- struct babel_proto *p = nbr->ifa->proto;
+ struct babel_route *r;
node *n;
- TRACE(D_EVENTS, "Flushing neighbor %I", nbr->addr);
+ TRACE(D_EVENTS, "Removing neighbor %I on %s", nbr->addr, nbr->ifa->iface->name);
WALK_LIST_FIRST(n, nbr->routes)
{
- struct babel_route *r = SKIP_BACK(struct babel_route, neigh_route, n);
- struct babel_entry *e = r->e;
- int selected = (r == e->selected_in);
-
- babel_flush_route(r);
-
- if (selected)
- babel_select_route(e);
+ r = SKIP_BACK(struct babel_route, neigh_route, n);
+ babel_retract_route(p, r);
+ babel_flush_route(p, r);
}
+ nbr->ifa = NULL;
rem_node(NODE nbr);
- mb_free(nbr);
+ babel_unlock_neighbor(nbr);
}
static void
-babel_expire_ihu(struct babel_neighbor *nbr)
+babel_expire_ihu(struct babel_proto *p, struct babel_neighbor *nbr)
{
+ TRACE(D_EVENTS, "IHU from nbr %I on %s expired", nbr->addr, nbr->ifa->iface->name);
+
nbr->txcost = BABEL_INFINITY;
+ nbr->ihu_expiry = 0;
+ babel_update_cost(nbr);
}
static void
-babel_expire_hello(struct babel_neighbor *nbr)
+babel_expire_hello(struct babel_proto *p, struct babel_neighbor *nbr, btime now_)
{
+again:
nbr->hello_map <<= 1;
if (nbr->hello_cnt < 16)
nbr->hello_cnt++;
- if (!nbr->hello_map)
- babel_flush_neighbor(nbr);
+ nbr->hello_expiry += nbr->last_hello_int;
+
+ /* We may expire multiple hellos if last_hello_int is too short */
+ if (nbr->hello_map && nbr->hello_expiry <= now_)
+ goto again;
+
+ TRACE(D_EVENTS, "Hello from nbr %I on %s expired, %d left",
+ nbr->addr, nbr->ifa->iface->name, u32_popcount(nbr->hello_map));
+
+ if (nbr->hello_map)
+ babel_update_cost(nbr);
+ else
+ babel_flush_neighbor(p, nbr);
}
static void
@@ -356,16 +493,17 @@ babel_expire_neighbors(struct babel_proto *p)
{
struct babel_iface *ifa;
struct babel_neighbor *nbr, *nbx;
+ btime now_ = current_time();
WALK_LIST(ifa, p->interfaces)
{
WALK_LIST_DELSAFE(nbr, nbx, ifa->neigh_list)
{
- if (nbr->ihu_expiry && nbr->ihu_expiry <= now)
- babel_expire_ihu(nbr);
+ if (nbr->ihu_expiry && nbr->ihu_expiry <= now_)
+ babel_expire_ihu(p, nbr);
- if (nbr->hello_expiry && nbr->hello_expiry <= now)
- babel_expire_hello(nbr);
+ if (nbr->hello_expiry && nbr->hello_expiry <= now_)
+ babel_expire_hello(p, nbr, now_);
}
}
}
@@ -399,66 +537,81 @@ babel_is_feasible(struct babel_source *s, u16 seqno, u16 metric)
((seqno == s->seqno) && (metric < s->metric));
}
-static u16
-babel_compute_rxcost(struct babel_neighbor *n)
+/* Simple additive metric - Appendix 3.1 in the RFC */
+static inline u16
+babel_compute_metric(struct babel_neighbor *n, uint metric)
{
- struct babel_iface *ifa = n->ifa;
- u8 cnt, missed;
- u16 map=n->hello_map;
-
- if (!map) return BABEL_INFINITY;
- cnt = u32_popcount(map); // number of bits set
- missed = n->hello_cnt-cnt;
+ return MIN(metric + n->cost, BABEL_INFINITY);
+}
- if (ifa->cf->type == BABEL_IFACE_TYPE_WIRELESS)
- {
- /* ETX - Appendix 2.2 in the RFC.
+static void
+babel_update_cost(struct babel_neighbor *nbr)
+{
+ struct babel_proto *p = nbr->ifa->proto;
+ struct babel_iface_config *cf = nbr->ifa->cf;
+ uint rcv = u32_popcount(nbr->hello_map); // number of bits set
+ uint max = nbr->hello_cnt;
+ uint rxcost = BABEL_INFINITY; /* Cost to announce in IHU */
+ uint txcost = BABEL_INFINITY; /* Effective cost for route selection */
- beta = prob. of successful transmission.
- rxcost = BABEL_RXCOST_WIRELESS/beta
+ if (!rcv || !nbr->ifa->up)
+ goto done;
- Since: beta = 1-missed/n->hello_cnt = cnt/n->hello_cnt
- Then: rxcost = BABEL_RXCOST_WIRELESS * n->hello_cnt / cnt
- */
- if (!cnt) return BABEL_INFINITY;
- return BABEL_RXCOST_WIRELESS * n->hello_cnt / cnt;
- }
- else
+ switch (cf->type)
{
+ case BABEL_IFACE_TYPE_WIRED:
/* k-out-of-j selection - Appendix 2.1 in the RFC. */
- DBG("Babel: Missed %d hellos from %I\n", missed, n->addr);
- /* Link is bad if more than half the expected hellos were lost */
- return (missed > n->hello_cnt/2) ? BABEL_INFINITY : ifa->cf->rxcost;
- }
-}
+ /* Link is bad if less than cf->limit/16 of expected hellos were received */
+ if (rcv * 16 < cf->limit * max)
+ break;
-static u16
-babel_compute_cost(struct babel_neighbor *n)
-{
- struct babel_iface *ifa = n->ifa;
- u16 rxcost = babel_compute_rxcost(n);
- if (rxcost == BABEL_INFINITY) return rxcost;
- else if (ifa->cf->type == BABEL_IFACE_TYPE_WIRELESS)
- {
- /* ETX - Appendix 2.2 in the RFC */
- return (MAX(n->txcost, BABEL_RXCOST_WIRELESS) * rxcost)/BABEL_RXCOST_WIRELESS;
+ rxcost = cf->rxcost;
+ txcost = nbr->txcost;
+ break;
+
+ case BABEL_IFACE_TYPE_WIRELESS:
+ /*
+ * ETX - Appendix 2.2 in the RFC.
+ *
+ * alpha = prob. of successful transmission estimated by the neighbor
+ * beta = prob. of successful transmission estimated by the router
+ * rxcost = nominal rxcost of the router / beta
+ * txcost = nominal rxcost of the neighbor / (alpha * beta)
+ * = received txcost / beta
+ *
+ * Note that received txcost is just neighbor's rxcost. Beta is rcv/max,
+ * we use inverse values of beta (i.e. max/rcv) to stay in integers.
+ */
+ rxcost = MIN( cf->rxcost * max / rcv, BABEL_INFINITY);
+ txcost = MIN(nbr->txcost * max / rcv, BABEL_INFINITY);
+ break;
}
- else
+
+done:
+ /* If RX cost changed, send IHU with next Hello */
+ if (rxcost != nbr->rxcost)
{
- /* k-out-of-j selection - Appendix 2.1 in the RFC. */
- return n->txcost;
+ nbr->rxcost = rxcost;
+ nbr->ihu_cnt = 0;
}
-}
-/* Simple additive metric - Appendix 3.1 in the RFC */
-static u16
-babel_compute_metric(struct babel_neighbor *n, uint metric)
-{
- metric += babel_compute_cost(n);
- return MIN(metric, BABEL_INFINITY);
-}
+ /* If link cost changed, run route selection */
+ if (txcost != nbr->cost)
+ {
+ TRACE(D_EVENTS, "Cost of nbr %I on %s changed from %u to %u",
+ nbr->addr, nbr->ifa->iface->name, nbr->cost, txcost);
+ nbr->cost = txcost;
+
+ struct babel_route *r; node *n;
+ WALK_LIST2(r, n, nbr->routes, neigh_route)
+ {
+ r->metric = babel_compute_metric(nbr, r->advert_metric);
+ babel_select_route(p, r->e, r);
+ }
+ }
+}
/**
* babel_announce_rte - announce selected route to the core
@@ -466,123 +619,151 @@ babel_compute_metric(struct babel_neighbor *n, uint metric)
* @e: Babel route entry to announce
*
* This function announces a Babel entry to the core if it has a selected
- * incoming path, and retracts it otherwise. If the selected entry has infinite
- * metric, the route is announced as unreachable.
+ * incoming path, and retracts it otherwise. If there is no selected route but
+ * the entry is valid and ours, the unreachable route is announced instead.
*/
static void
babel_announce_rte(struct babel_proto *p, struct babel_entry *e)
{
- struct babel_route *r = e->selected_in;
+ struct babel_route *r = e->selected;
+ struct channel *c = (e->n.addr->type == NET_IP4) ? p->ip4_channel : p->ip6_channel;
if (r)
{
- net *n = net_get(p->p.table, e->n.prefix, e->n.pxlen);
- rta A = {
+ rta a0 = {
.src = p->p.main_source,
.source = RTS_BABEL,
.scope = SCOPE_UNIVERSE,
- .cast = RTC_UNICAST,
- .dest = r->metric == BABEL_INFINITY ? RTD_UNREACHABLE : RTD_ROUTER,
- .flags = 0,
+ .dest = RTD_UNICAST,
.from = r->neigh->addr,
- .iface = r->neigh->ifa->iface,
+ .nh.gw = r->next_hop,
+ .nh.iface = r->neigh->ifa->iface,
};
- if (r->metric < BABEL_INFINITY)
- A.gw = r->next_hop;
-
- rta *a = rta_lookup(&A);
+ rta *a = rta_lookup(&a0);
rte *rte = rte_get_temp(a);
+ rte->u.babel.seqno = r->seqno;
rte->u.babel.metric = r->metric;
rte->u.babel.router_id = r->router_id;
- rte->net = n;
rte->pflags = 0;
- rte_update(&p->p, n, rte);
+ e->unreachable = 0;
+ rte_update2(c, e->n.addr, rte, p->p.main_source);
+ }
+ else if (e->valid && (e->router_id != p->router_id))
+ {
+ /* Unreachable */
+ rta a0 = {
+ .src = p->p.main_source,
+ .source = RTS_BABEL,
+ .scope = SCOPE_UNIVERSE,
+ .dest = RTD_UNREACHABLE,
+ };
+
+ rta *a = rta_lookup(&a0);
+ rte *rte = rte_get_temp(a);
+ memset(&rte->u.babel, 0, sizeof(rte->u.babel));
+ rte->pflags = 0;
+ rte->pref = 1;
+
+ e->unreachable = 1;
+ rte_update2(c, e->n.addr, rte, p->p.main_source);
}
else
{
/* Retraction */
- net *n = net_find(p->p.table, e->n.prefix, e->n.pxlen);
- rte_update(&p->p, n, NULL);
+ e->unreachable = 0;
+ rte_update2(c, e->n.addr, NULL, p->p.main_source);
}
}
+/* Special case of babel_announce_rte() just for retraction */
+static inline void
+babel_announce_retraction(struct babel_proto *p, struct babel_entry *e)
+{
+ struct channel *c = (e->n.addr->type == NET_IP4) ? p->ip4_channel : p->ip6_channel;
+ e->unreachable = 0;
+ rte_update2(c, e->n.addr, NULL, p->p.main_source);
+}
+
+
/**
* babel_select_route - select best route for given route entry
+ * @p: Babel protocol instance
* @e: Babel entry to select the best route for
+ * @mod: Babel route that was modified or NULL if unspecified
*
- * Select the best feasible route for a given prefix among the routes received
- * from peers, and propagate it to the nest. This just selects the feasible
- * route with the lowest metric.
+ * Select the best reachable and feasible route for a given prefix among the
+ * routes received from peers, and propagate it to the nest. This just selects
+ * the reachable and feasible route with the lowest metric, but keeps selected
+ * the old one in case of tie.
*
* If no feasible route is available for a prefix that previously had a route
- * selected, a seqno request is sent to try to get a valid route. In the
- * meantime, the route is marked as infeasible in the nest (to blackhole packets
- * going to it, as per the RFC).
+ * selected, a seqno request is sent to try to get a valid route. If the entry
+ * is valid and not owned by us, the unreachable route is announced to the nest
+ * (to blackhole packets going to it, as per section 2.8). It is later removed
+ * by babel_expire_routes(). Otherwise, the route is just removed from the nest.
+ *
+ * Argument @mod is used to optimize best route calculation. When specified, the
+ * function can assume that only the @mod route was modified to avoid full best
+ * route selection and announcement when non-best route was modified in minor
+ * way. The caller is advised to not call babel_select_route() when no change is
+ * done (e.g. periodic route updates) to avoid unnecessary announcements of the
+ * same best route. The caller is not required to call the function in case of a
+ * retraction of a non-best route.
*
- * If no feasible route is available, and no previous route is selected, the
- * route is removed from the nest entirely.
+ * Note that the function does not active triggered updates. That is done by
+ * babel_rt_notify() when the change is propagated back to Babel.
*/
static void
-babel_select_route(struct babel_entry *e)
+babel_select_route(struct babel_proto *p, struct babel_entry *e, struct babel_route *mod)
{
- struct babel_proto *p = e->proto;
- struct babel_route *r, *cur = e->selected_in;
+ struct babel_route *r, *best = e->selected;
- /* try to find the best feasible route */
- WALK_LIST(r, e->routes)
- if (!OUR_ROUTE(r) && /* prevent propagating our own routes back to core */
- (!cur || r->metric < cur->metric) &&
- babel_is_feasible(babel_find_source(e, r->router_id), r->seqno, r->advert_metric))
- cur = r;
-
- if (cur && !OUR_ROUTE(cur) &&
- ((!e->selected_in && cur->metric < BABEL_INFINITY) ||
- (e->selected_in && cur->metric < e->selected_in->metric)))
+ /* Shortcut if only non-best was modified */
+ if (mod && (mod != best))
{
- TRACE(D_EVENTS, "Picked new route for prefix %I/%d: router id %lR metric %d",
- e->n.prefix, e->n.pxlen, cur->router_id, cur->metric);
-
- e->selected_in = cur;
- e->updated = now;
- babel_announce_rte(p, e);
+ /* Either select modified route, or keep old best route */
+ if ((mod->metric < (best ? best->metric : BABEL_INFINITY)) && mod->feasible)
+ best = mod;
+ else
+ return;
}
- else if (!cur || cur->metric == BABEL_INFINITY)
+ else
{
- /* Couldn't find a feasible route. If we have a selected route, that means
- it just became infeasible; so set it's metric to infinite and install it
- (as unreachable), then send a seqno request.
-
- babel_build_rte() will set the unreachable flag if the metric is BABEL_INFINITY.*/
- if (e->selected_in)
- {
- TRACE(D_EVENTS, "Lost feasible route for prefix %I/%d",
- e->n.prefix, e->n.pxlen);
-
- e->selected_in->metric = BABEL_INFINITY;
- e->updated = now;
+ /* Selected route may be modified and no longer admissible */
+ if (!best || (best->metric == BABEL_INFINITY) || !best->feasible)
+ best = NULL;
+
+ /* Find the best feasible route from all routes */
+ WALK_LIST(r, e->routes)
+ if ((r->metric < (best ? best->metric : BABEL_INFINITY)) && r->feasible)
+ best = r;
+ }
- babel_send_seqno_request(e);
- babel_announce_rte(p, e);
+ if (best)
+ {
+ if (best != e->selected)
+ TRACE(D_EVENTS, "Picked new route for prefix %N: router-id %lR metric %d",
+ e->n.addr, best->router_id, best->metric);
+ }
+ else if (e->selected)
+ {
+ /*
+ * We have lost all feasible routes. We have to broadcast seqno request
+ * (Section 3.8.2.1) and keep unreachable route for a while (section 2.8).
+ * The later is done automatically by babel_announce_rte().
+ */
- /* Section 3.6 of the RFC forbids an infeasible from being selected. This
- is cleared after announcing the route to the core to make sure an
- unreachable route is propagated first. */
- e->selected_in = NULL;
- }
- else
- {
- /* No route currently selected, and no new one selected; this means we
- don't have a route to this destination anymore (and were probably
- called from an expiry timer). Remove the route from the nest. */
- TRACE(D_EVENTS, "Flushing route for prefix %I/%d", e->n.prefix, e->n.pxlen);
-
- e->selected_in = NULL;
- e->updated = now;
- babel_announce_rte(p, e);
- }
+ TRACE(D_EVENTS, "Lost feasible route for prefix %N", e->n.addr);
+ if (e->valid && (e->selected->router_id == e->router_id))
+ babel_add_seqno_request(p, e, e->selected->router_id, e->selected->seqno + 1, 0, NULL);
}
+ else
+ return;
+
+ e->selected = best;
+ babel_announce_rte(p, e);
}
/*
@@ -610,11 +791,11 @@ babel_build_ihu(union babel_msg *msg, struct babel_iface *ifa, struct babel_neig
msg->type = BABEL_TLV_IHU;
msg->ihu.addr = n->addr;
- msg->ihu.rxcost = babel_compute_rxcost(n);
+ msg->ihu.rxcost = n->rxcost;
msg->ihu.interval = ifa->cf->ihu_interval;
- TRACE(D_PACKETS, "Sending IHU for %I with rxcost %d interval %d",
- msg->ihu.addr, msg->ihu.rxcost, msg->ihu.interval);
+ TRACE(D_PACKETS, "Sending IHU for %I with rxcost %d interval %t",
+ msg->ihu.addr, msg->ihu.rxcost, (btime) msg->ihu.interval);
}
static void
@@ -623,6 +804,7 @@ babel_send_ihu(struct babel_iface *ifa, struct babel_neighbor *n)
union babel_msg msg = {};
babel_build_ihu(&msg, ifa, n);
babel_send_unicast(&msg, ifa, n->addr);
+ n->ihu_cnt = BABEL_IHU_INTERVAL_FACTOR;
}
static void
@@ -631,14 +813,18 @@ babel_send_ihus(struct babel_iface *ifa)
struct babel_neighbor *n;
WALK_LIST(n, ifa->neigh_list)
{
- union babel_msg msg = {};
- babel_build_ihu(&msg, ifa, n);
- babel_enqueue(&msg, ifa);
+ if (n->hello_cnt && (--n->ihu_cnt <= 0))
+ {
+ union babel_msg msg = {};
+ babel_build_ihu(&msg, ifa, n);
+ babel_enqueue(&msg, ifa);
+ n->ihu_cnt = BABEL_IHU_INTERVAL_FACTOR;
+ }
}
}
static void
-babel_send_hello(struct babel_iface *ifa, u8 send_ihu)
+babel_send_hello(struct babel_iface *ifa)
{
struct babel_proto *p = ifa->proto;
union babel_msg msg = {};
@@ -647,30 +833,26 @@ babel_send_hello(struct babel_iface *ifa, u8 send_ihu)
msg.hello.seqno = ifa->hello_seqno++;
msg.hello.interval = ifa->cf->hello_interval;
- TRACE(D_PACKETS, "Sending hello on %s with seqno %d interval %d",
- ifa->ifname, msg.hello.seqno, msg.hello.interval);
+ TRACE(D_PACKETS, "Sending hello on %s with seqno %d interval %t",
+ ifa->ifname, msg.hello.seqno, (btime) msg.hello.interval);
babel_enqueue(&msg, ifa);
- if (send_ihu)
- babel_send_ihus(ifa);
+ babel_send_ihus(ifa);
}
static void
-babel_send_route_request(struct babel_entry *e, struct babel_neighbor *n)
+babel_send_route_request(struct babel_proto *p, struct babel_entry *e, struct babel_neighbor *n)
{
- struct babel_proto *p = e->proto;
- struct babel_iface *ifa = n->ifa;
union babel_msg msg = {};
- TRACE(D_PACKETS, "Sending route request for %I/%d to %I",
- e->n.prefix, e->n.pxlen, n->addr);
+ TRACE(D_PACKETS, "Sending route request for %N to %I",
+ e->n.addr, n->addr);
msg.type = BABEL_TLV_ROUTE_REQUEST;
- msg.route_request.prefix = e->n.prefix;
- msg.route_request.plen = e->n.pxlen;
+ net_copy(&msg.route_request.net, e->n.addr);
- babel_send_unicast(&msg, ifa, n->addr);
+ babel_send_unicast(&msg, n->ifa, n->addr);
}
static void
@@ -689,56 +871,32 @@ babel_send_wildcard_request(struct babel_iface *ifa)
}
static void
-babel_send_seqno_request(struct babel_entry *e)
+babel_send_seqno_request(struct babel_proto *p, struct babel_entry *e, struct babel_seqno_request *sr)
{
- struct babel_proto *p = e->proto;
- struct babel_route *r = e->selected_in;
- struct babel_iface *ifa = NULL;
- struct babel_source *s = NULL;
union babel_msg msg = {};
- s = babel_find_source(e, r->router_id);
- if (!s || !babel_cache_seqno_request(p, e->n.prefix, e->n.pxlen, r->router_id, s->seqno + 1))
- return;
-
- TRACE(D_PACKETS, "Sending seqno request for %I/%d router-id %lR seqno %d",
- e->n.prefix, e->n.pxlen, r->router_id, s->seqno + 1);
-
msg.type = BABEL_TLV_SEQNO_REQUEST;
- msg.seqno_request.plen = e->n.pxlen;
- msg.seqno_request.seqno = s->seqno + 1;
- msg.seqno_request.hop_count = BABEL_INITIAL_HOP_COUNT;
- msg.seqno_request.router_id = r->router_id;
- msg.seqno_request.prefix = e->n.prefix;
-
- WALK_LIST(ifa, p->interfaces)
- babel_enqueue(&msg, ifa);
-}
+ msg.seqno_request.hop_count = sr->hop_count ?: BABEL_INITIAL_HOP_COUNT;
+ msg.seqno_request.seqno = sr->seqno;
+ msg.seqno_request.router_id = sr->router_id;
+ net_copy(&msg.seqno_request.net, e->n.addr);
-static void
-babel_unicast_seqno_request(struct babel_route *r)
-{
- struct babel_entry *e = r->e;
- struct babel_proto *p = e->proto;
- struct babel_iface *ifa = r->neigh->ifa;
- struct babel_source *s = NULL;
- union babel_msg msg = {};
-
- s = babel_find_source(e, r->router_id);
- if (!s || !babel_cache_seqno_request(p, e->n.prefix, e->n.pxlen, r->router_id, s->seqno + 1))
- return;
-
- TRACE(D_PACKETS, "Sending seqno request for %I/%d router-id %lR seqno %d",
- e->n.prefix, e->n.pxlen, r->router_id, s->seqno + 1);
+ if (sr->nbr)
+ {
+ TRACE(D_PACKETS, "Sending seqno request for %N router-id %lR seqno %d to %I on %s",
+ e->n.addr, sr->router_id, sr->seqno, sr->nbr->addr, sr->nbr->ifa->ifname);
- msg.type = BABEL_TLV_SEQNO_REQUEST;
- msg.seqno_request.plen = e->n.pxlen;
- msg.seqno_request.seqno = s->seqno + 1;
- msg.seqno_request.hop_count = BABEL_INITIAL_HOP_COUNT;
- msg.seqno_request.router_id = r->router_id;
- msg.seqno_request.prefix = e->n.prefix;
+ babel_send_unicast(&msg, sr->nbr->ifa, sr->nbr->addr);
+ }
+ else
+ {
+ TRACE(D_PACKETS, "Sending broadcast seqno request for %N router-id %lR seqno %d",
+ e->n.addr, sr->router_id, sr->seqno);
- babel_send_unicast(&msg, ifa, r->neigh->addr);
+ struct babel_iface *ifa;
+ WALK_LIST(ifa, p->interfaces)
+ babel_enqueue(&msg, ifa);
+ }
}
/**
@@ -752,49 +910,55 @@ babel_unicast_seqno_request(struct babel_route *r)
* transmitted entry is updated.
*/
static void
-babel_send_update(struct babel_iface *ifa, bird_clock_t changed)
+babel_send_update_(struct babel_iface *ifa, btime changed, struct fib *rtable)
{
struct babel_proto *p = ifa->proto;
- FIB_WALK(&p->rtable, n)
+ /* Update increase was requested */
+ if (p->update_seqno_inc)
{
- struct babel_entry *e = (void *) n;
- struct babel_route *r = e->selected_out;
+ p->update_seqno++;
+ p->update_seqno_inc = 0;
+ }
- if (!r)
+ FIB_WALK(rtable, struct babel_entry, e)
+ {
+ if (!e->valid)
continue;
/* Our own seqno might have changed, in which case we update the routes we
originate. */
- if ((r->router_id == p->router_id) && (r->seqno < p->update_seqno))
+ if ((e->router_id == p->router_id) && (e->seqno < p->update_seqno))
{
- r->seqno = p->update_seqno;
- e->updated = now;
+ e->seqno = p->update_seqno;
+ e->updated = current_time();
}
/* Skip routes that weren't updated since 'changed' time */
if (e->updated < changed)
continue;
- TRACE(D_PACKETS, "Sending update for %I/%d router-id %lR seqno %d metric %d",
- e->n.prefix, e->n.pxlen, r->router_id, r->seqno, r->metric);
+ TRACE(D_PACKETS, "Sending update for %N router-id %lR seqno %d metric %d",
+ e->n.addr, e->router_id, e->seqno, e->metric);
union babel_msg msg = {};
msg.type = BABEL_TLV_UPDATE;
- msg.update.plen = e->n.pxlen;
msg.update.interval = ifa->cf->update_interval;
- msg.update.seqno = r->seqno;
- msg.update.metric = r->metric;
- msg.update.prefix = e->n.prefix;
- msg.update.router_id = r->router_id;
+ msg.update.seqno = e->seqno;
+ msg.update.metric = e->metric;
+ msg.update.router_id = e->router_id;
+ net_copy(&msg.update.net, e->n.addr);
+
+ msg.update.next_hop = ((e->n.addr->type == NET_IP4) ?
+ ifa->next_hop_ip4 : ifa->next_hop_ip6);
babel_enqueue(&msg, ifa);
/* Update feasibility distance for redistributed routes */
- if (!OUR_ROUTE(r))
+ if (e->router_id != p->router_id)
{
- struct babel_source *s = babel_get_source(e, r->router_id);
- s->expires = now + BABEL_GARBAGE_INTERVAL;
+ struct babel_source *s = babel_get_source(p, e, e->router_id);
+ s->expires = current_time() + BABEL_GARBAGE_INTERVAL;
if ((msg.update.seqno > s->seqno) ||
((msg.update.seqno == s->seqno) && (msg.update.metric < s->metric)))
@@ -808,6 +972,15 @@ babel_send_update(struct babel_iface *ifa, bird_clock_t changed)
}
static void
+babel_send_update(struct babel_iface *ifa, btime changed)
+{
+ struct babel_proto *p = ifa->proto;
+
+ babel_send_update_(ifa, changed, &p->ip4_rtable);
+ babel_send_update_(ifa, changed, &p->ip6_rtable);
+}
+
+static void
babel_trigger_iface_update(struct babel_iface *ifa)
{
struct babel_proto *p = ifa->proto;
@@ -819,7 +992,7 @@ babel_trigger_iface_update(struct babel_iface *ifa)
TRACE(D_EVENTS, "Scheduling triggered updates for %s seqno %d",
ifa->iface->name, p->update_seqno);
- ifa->want_triggered = now;
+ ifa->want_triggered = current_time();
babel_iface_kick_timer(ifa);
}
@@ -839,20 +1012,18 @@ babel_trigger_update(struct babel_proto *p)
/* A retraction is an update with an infinite metric */
static void
-babel_send_retraction(struct babel_iface *ifa, ip_addr prefix, int plen)
+babel_send_retraction(struct babel_iface *ifa, net_addr *n)
{
struct babel_proto *p = ifa->proto;
union babel_msg msg = {};
- TRACE(D_PACKETS, "Sending retraction for %I/%d seqno %d",
- prefix, plen, p->update_seqno);
+ TRACE(D_PACKETS, "Sending retraction for %N seqno %d", n, p->update_seqno);
msg.type = BABEL_TLV_UPDATE;
- msg.update.plen = plen;
msg.update.interval = ifa->cf->update_interval;
msg.update.seqno = p->update_seqno;
msg.update.metric = BABEL_INFINITY;
- msg.update.prefix = prefix;
+ msg.update.net = *n;
babel_enqueue(&msg, ifa);
}
@@ -881,7 +1052,7 @@ babel_send_wildcard_retraction(struct babel_iface *ifa)
/* Update hello history according to Appendix A1 of the RFC */
static void
-babel_update_hello_history(struct babel_neighbor *n, u16 seqno, u16 interval)
+babel_update_hello_history(struct babel_neighbor *n, u16 seqno, uint interval)
{
/*
* Compute the difference between expected and received seqno (modulo 2^16).
@@ -892,7 +1063,7 @@ babel_update_hello_history(struct babel_neighbor *n, u16 seqno, u16 interval)
u16 delta = ((uint) seqno - (uint) n->next_hello_seqno);
- if (delta == 0)
+ if ((delta == 0) || (n->hello_cnt == 0))
{
/* Do nothing */
}
@@ -919,84 +1090,10 @@ babel_update_hello_history(struct babel_neighbor *n, u16 seqno, u16 interval)
n->hello_map = (n->hello_map << 1) | 1;
n->next_hello_seqno = seqno+1;
if (n->hello_cnt < 16) n->hello_cnt++;
- n->hello_expiry = now + BABEL_HELLO_EXPIRY_FACTOR(interval);
-}
-
-static void
-babel_expire_seqno_requests(struct babel_proto *p)
-{
- struct babel_seqno_request *n, *nx;
- WALK_LIST_DELSAFE(n, nx, p->seqno_cache)
- {
- if ((n->updated + BABEL_SEQNO_REQUEST_EXPIRY) <= now)
- {
- rem_node(NODE n);
- sl_free(p->seqno_slab, n);
- }
- }
-}
-
-/*
- * Checks the seqno request cache for a matching request and returns failure if
- * found. Otherwise, a new entry is stored in the cache.
- */
-static int
-babel_cache_seqno_request(struct babel_proto *p, ip_addr prefix, u8 plen,
- u64 router_id, u16 seqno)
-{
- struct babel_seqno_request *r;
-
- WALK_LIST(r, p->seqno_cache)
- {
- if (ipa_equal(r->prefix, prefix) && (r->plen == plen) &&
- (r->router_id == router_id) && (r->seqno == seqno))
- return 0;
- }
-
- /* no entries found */
- r = sl_alloc(p->seqno_slab);
- r->prefix = prefix;
- r->plen = plen;
- r->router_id = router_id;
- r->seqno = seqno;
- r->updated = now;
- add_tail(&p->seqno_cache, NODE r);
-
- return 1;
-}
-
-static void
-babel_forward_seqno_request(struct babel_entry *e,
- struct babel_msg_seqno_request *in,
- ip_addr sender)
-{
- struct babel_proto *p = e->proto;
- struct babel_route *r;
-
- TRACE(D_PACKETS, "Forwarding seqno request for %I/%d router-id %lR seqno %d",
- e->n.prefix, e->n.pxlen, in->router_id, in->seqno);
-
- WALK_LIST(r, e->routes)
- {
- if ((r->router_id == in->router_id) &&
- !OUR_ROUTE(r) &&
- !ipa_equal(r->neigh->addr, sender))
- {
- if (!babel_cache_seqno_request(p, e->n.prefix, e->n.pxlen, in->router_id, in->seqno))
- return;
- union babel_msg msg = {};
- msg.type = BABEL_TLV_SEQNO_REQUEST;
- msg.seqno_request.plen = in->plen;
- msg.seqno_request.seqno = in->seqno;
- msg.seqno_request.hop_count = in->hop_count-1;
- msg.seqno_request.router_id = in->router_id;
- msg.seqno_request.prefix = e->n.prefix;
-
- babel_send_unicast(&msg, r->neigh->ifa, r->neigh->addr);
- return;
- }
- }
+ /* Update expiration */
+ n->hello_expiry = current_time() + BABEL_HELLO_EXPIRY_FACTOR(interval);
+ n->last_hello_int = interval;
}
@@ -1010,8 +1107,8 @@ babel_handle_ack_req(union babel_msg *m, struct babel_iface *ifa)
struct babel_proto *p = ifa->proto;
struct babel_msg_ack_req *msg = &m->ack_req;
- TRACE(D_PACKETS, "Handling ACK request nonce %d interval %d",
- msg->nonce, msg->interval);
+ TRACE(D_PACKETS, "Handling ACK request nonce %d interval %t",
+ msg->nonce, (btime) msg->interval);
babel_send_ack(ifa, msg->sender, msg->nonce);
}
@@ -1022,12 +1119,17 @@ babel_handle_hello(union babel_msg *m, struct babel_iface *ifa)
struct babel_proto *p = ifa->proto;
struct babel_msg_hello *msg = &m->hello;
- TRACE(D_PACKETS, "Handling hello seqno %d interval %d",
- msg->seqno, msg->interval);
+ TRACE(D_PACKETS, "Handling hello seqno %d interval %t",
+ msg->seqno, (btime) msg->interval);
struct babel_neighbor *n = babel_get_neighbor(ifa, msg->sender);
+ int first_hello = !n->hello_cnt;
+
babel_update_hello_history(n, msg->seqno, msg->interval);
- if (ifa->cf->type == BABEL_IFACE_TYPE_WIRELESS)
+ babel_update_cost(n);
+
+ /* Speed up session establishment by sending IHU immediately */
+ if (first_hello)
babel_send_ihu(ifa, n);
}
@@ -1041,12 +1143,13 @@ babel_handle_ihu(union babel_msg *m, struct babel_iface *ifa)
if ((msg->ae != BABEL_AE_WILDCARD) && !ipa_equal(msg->addr, ifa->addr))
return;
- TRACE(D_PACKETS, "Handling IHU rxcost %d interval %d",
- msg->rxcost, msg->interval);
+ TRACE(D_PACKETS, "Handling IHU rxcost %d interval %t",
+ msg->rxcost, (btime) msg->interval);
struct babel_neighbor *n = babel_get_neighbor(ifa, msg->sender);
n->txcost = msg->rxcost;
- n->ihu_expiry = now + BABEL_IHU_EXPIRY_FACTOR(msg->interval);
+ n->ihu_expiry = current_time() + BABEL_IHU_EXPIRY_FACTOR(msg->interval);
+ babel_update_cost(n);
}
/**
@@ -1069,12 +1172,15 @@ babel_handle_update(union babel_msg *m, struct babel_iface *ifa)
struct babel_neighbor *nbr;
struct babel_entry *e;
struct babel_source *s;
- struct babel_route *r;
+ struct babel_route *r, *best;
node *n;
- int feasible;
+ int feasible, metric;
- TRACE(D_PACKETS, "Handling update for %I/%d with seqno %d metric %d",
- msg->prefix, msg->plen, msg->seqno, msg->metric);
+ if (msg->wildcard)
+ TRACE(D_PACKETS, "Handling wildcard retraction", msg->seqno);
+ else
+ TRACE(D_PACKETS, "Handling update for %N with seqno %d metric %d",
+ &msg->net, msg->seqno, msg->metric);
nbr = babel_find_neighbor(ifa, msg->sender);
if (!nbr)
@@ -1089,38 +1195,12 @@ babel_handle_update(union babel_msg *m, struct babel_iface *ifa)
return;
}
- /*
- * RFC section 3.5.4:
- *
- * When a Babel node receives an update (id, prefix, seqno, metric) from a
- * neighbour neigh with a link cost value equal to cost, it checks whether it
- * already has a routing table entry indexed by (neigh, id, prefix).
- *
- * If no such entry exists:
- *
- * o if the update is unfeasible, it is ignored;
- *
- * o if the metric is infinite (the update is a retraction), the update is
- * ignored;
- *
- * o otherwise, a new route table entry is created, indexed by (neigh, id,
- * prefix), with seqno equal to seqno and an advertised metric equal to the
- * metric carried by the update.
- *
- * If such an entry exists:
- *
- * o if the entry is currently installed and the update is unfeasible, then
- * the behaviour depends on whether the router-ids of the two entries match.
- * If the router-ids are different, the update is treated as though it were
- * a retraction (i.e., as though the metric were FFFF hexadecimal). If the
- * router-ids are equal, the update is ignored;
- *
- * o otherwise (i.e., if either the update is feasible or the entry is not
- * currently installed), then the entry's sequence number, advertised
- * metric, metric, and router-id are updated and, unless the advertised
- * metric is infinite, the route's expiry timer is reset to a small multiple
- * of the Interval value included in the update.
- */
+ struct channel *c = (msg->net.type == NET_IP4) ? p->ip4_channel : p->ip6_channel;
+ if (!c || (c->channel_state != CS_UP))
+ {
+ DBG("Babel: Ignoring update for inactive address family.\n");
+ return;
+ }
/* Retraction */
if (msg->metric == BABEL_INFINITY)
@@ -1134,13 +1214,12 @@ babel_handle_update(union babel_msg *m, struct babel_iface *ifa)
WALK_LIST(n, nbr->routes)
{
r = SKIP_BACK(struct babel_route, neigh_route, n);
- r->metric = BABEL_INFINITY;
- babel_select_route(r->e);
+ babel_retract_route(p, r);
}
}
else
{
- e = babel_find_entry(p, msg->prefix, msg->plen);
+ e = babel_find_entry(p, &msg->net);
if (!e)
return;
@@ -1151,68 +1230,56 @@ babel_handle_update(union babel_msg *m, struct babel_iface *ifa)
if (!r)
return;
- r->metric = BABEL_INFINITY;
- babel_select_route(e);
+ /* Router-id, next-hop and seqno are ignored for retractions */
+ babel_retract_route(p, r);
}
/* Done with retractions */
return;
}
- e = babel_get_entry(p, msg->prefix, msg->plen);
- r = babel_find_route(e, nbr); /* the route entry indexed by neighbour */
+ /* Regular update */
+ e = babel_get_entry(p, &msg->net);
+ r = babel_get_route(p, e, nbr); /* the route entry indexed by neighbour */
s = babel_find_source(e, msg->router_id); /* for feasibility */
feasible = babel_is_feasible(s, msg->seqno, msg->metric);
+ metric = babel_compute_metric(nbr, msg->metric);
+ best = e->selected;
- if (!r)
- {
- if (!feasible)
- return;
+ /* RFC section 3.8.2.2 - Dealing with unfeasible updates */
+ if (!feasible && (metric != BABEL_INFINITY) &&
+ (!best || (r == best) || (metric < best->metric)))
+ babel_add_seqno_request(p, e, s->router_id, s->seqno + 1, 0, nbr);
- r = babel_get_route(e, nbr);
- r->advert_metric = msg->metric;
- r->router_id = msg->router_id;
- r->metric = babel_compute_metric(nbr, msg->metric);
- r->next_hop = msg->next_hop;
- r->seqno = msg->seqno;
- }
- else if (r == r->e->selected_in && !feasible)
- {
- /*
- * Route is installed and update is infeasible - we may lose the route,
- * so send a unicast seqno request (section 3.8.2.2 second paragraph).
- */
- babel_unicast_seqno_request(r);
+ /* Special case - ignore unfeasible update to best route */
+ if (r == best && !feasible && (msg->router_id == r->router_id))
+ return;
- if (msg->router_id == r->router_id)
- return;
+ r->expires = current_time() + BABEL_ROUTE_EXPIRY_FACTOR(msg->interval);
+ r->refresh_time = current_time() + BABEL_ROUTE_REFRESH_FACTOR(msg->interval);
- /* Treat as retraction */
- r->metric = BABEL_INFINITY;
- }
- else
+ /* No further processing if there is no change */
+ if ((r->feasible == feasible) && (r->seqno == msg->seqno) &&
+ (r->metric == metric) && (r->advert_metric == msg->metric) &&
+ (r->router_id == msg->router_id) && ipa_equal(r->next_hop, msg->next_hop))
+ return;
+
+ /* Last paragraph above - update the entry */
+ r->feasible = feasible;
+ r->seqno = msg->seqno;
+ r->metric = metric;
+ r->advert_metric = msg->metric;
+ r->router_id = msg->router_id;
+ r->next_hop = msg->next_hop;
+
+ /* If received update satisfies seqno request, we send triggered updates */
+ if (babel_satisfy_seqno_request(p, e, msg->router_id, msg->seqno))
{
- /* Last paragraph above - update the entry */
- r->advert_metric = msg->metric;
- r->metric = babel_compute_metric(nbr, msg->metric);
- r->next_hop = msg->next_hop;
-
- r->router_id = msg->router_id;
- r->seqno = msg->seqno;
-
- r->expiry_interval = BABEL_ROUTE_EXPIRY_FACTOR(msg->interval);
- r->expires = now + r->expiry_interval;
- if (r->expiry_interval > BABEL_ROUTE_REFRESH_INTERVAL)
- r->refresh_time = now + r->expiry_interval - BABEL_ROUTE_REFRESH_INTERVAL;
-
- /* If the route is not feasible at this point, it means it is from another
- neighbour than the one currently selected; so send a unicast seqno
- request to try to get a better route (section 3.8.2.2 last paragraph). */
- if (!feasible)
- babel_unicast_seqno_request(r);
+ babel_trigger_update(p);
+ e->updated = current_time();
}
- babel_select_route(e);
+ babel_select_route(p, e, r);
}
void
@@ -1231,23 +1298,22 @@ babel_handle_route_request(union babel_msg *m, struct babel_iface *ifa)
return;
}
- TRACE(D_PACKETS, "Handling route request for %I/%d", msg->prefix, msg->plen);
+ TRACE(D_PACKETS, "Handling route request for %N", &msg->net);
/* Non-wildcard request - see if we have an entry for the route.
If not, send a retraction, otherwise send an update. */
- struct babel_entry *e = babel_find_entry(p, msg->prefix, msg->plen);
+ struct babel_entry *e = babel_find_entry(p, &msg->net);
if (!e)
{
- babel_send_retraction(ifa, msg->prefix, msg->plen);
+ babel_send_retraction(ifa, &msg->net);
}
else
{
babel_trigger_iface_update(ifa);
- e->updated = now;
+ e->updated = current_time();
}
}
-
void
babel_handle_seqno_request(union babel_msg *m, struct babel_iface *ifa)
{
@@ -1256,36 +1322,54 @@ babel_handle_seqno_request(union babel_msg *m, struct babel_iface *ifa)
/* RFC 6126 3.8.1.2 */
- TRACE(D_PACKETS, "Handling seqno request for %I/%d router-id %lR seqno %d hop count %d",
- msg->prefix, msg->plen, msg->router_id, msg->seqno, msg->hop_count);
+ TRACE(D_PACKETS, "Handling seqno request for %N router-id %lR seqno %d hop count %d",
+ &msg->net, msg->router_id, msg->seqno, msg->hop_count);
/* Ignore if we have no such entry or entry has infinite metric */
- struct babel_entry *e = babel_find_entry(p, msg->prefix, msg->plen);
- if (!e || !e->selected_out || (e->selected_out->metric == BABEL_INFINITY))
+ struct babel_entry *e = babel_find_entry(p, &msg->net);
+ if (!e || !e->valid || (e->metric == BABEL_INFINITY))
return;
/* Trigger update on incoming interface if we have a selected route with
different router id or seqno no smaller than requested */
- struct babel_route *r = e->selected_out;
- if ((r->router_id != msg->router_id) || ge_mod64k(r->seqno, msg->seqno))
+ if ((e->router_id != msg->router_id) || ge_mod64k(e->seqno, msg->seqno))
{
babel_trigger_iface_update(ifa);
- e->updated = now;
+ e->updated = current_time();
return;
}
/* Seqno is larger; check if we own the router id */
if (msg->router_id == p->router_id)
{
- /* Ours; update seqno and trigger global update */
- p->update_seqno++;
+ /* Ours; seqno increase and trigger global update */
+ p->update_seqno_inc = 1;
babel_trigger_update(p);
}
- else
+ else if (msg->hop_count > 1)
{
/* Not ours; forward if TTL allows it */
- if (msg->hop_count > 1)
- babel_forward_seqno_request(e, msg, msg->sender);
+
+ /* Find best admissible route */
+ struct babel_route *r, *best1 = NULL, *best2 = NULL;
+ WALK_LIST(r, e->routes)
+ if ((r->router_id == msg->router_id) && !ipa_equal(r->neigh->addr, msg->sender))
+ {
+ /* Find best feasible route */
+ if ((!best1 || r->metric < best1->metric) && r->feasible)
+ best1 = r;
+
+ /* Find best not necessary feasible route */
+ if (!best2 || r->metric < best2->metric)
+ best2 = r;
+ }
+
+ /* If no route is found, do nothing */
+ r = best1 ?: best2;
+ if (!r)
+ return;
+
+ babel_add_seqno_request(p, e, msg->router_id, msg->seqno, msg->hop_count-1, r->neigh);
}
}
@@ -1320,42 +1404,43 @@ babel_iface_timer(timer *t)
{
struct babel_iface *ifa = t->data;
struct babel_proto *p = ifa->proto;
- bird_clock_t hello_period = ifa->cf->hello_interval;
- bird_clock_t update_period = ifa->cf->update_interval;
+ btime hello_period = ifa->cf->hello_interval;
+ btime update_period = ifa->cf->update_interval;
+ btime now_ = current_time();
- if (now >= ifa->next_hello)
+ if (now_ >= ifa->next_hello)
{
- babel_send_hello(ifa, (ifa->cf->type == BABEL_IFACE_TYPE_WIRELESS ||
- ifa->hello_seqno % BABEL_IHU_INTERVAL_FACTOR == 0));
- ifa->next_hello += hello_period * (1 + (now - ifa->next_hello) / hello_period);
+ babel_send_hello(ifa);
+ ifa->next_hello += hello_period * (1 + (now_ - ifa->next_hello) / hello_period);
}
- if (now >= ifa->next_regular)
+ if (now_ >= ifa->next_regular)
{
TRACE(D_EVENTS, "Sending regular updates on %s", ifa->ifname);
babel_send_update(ifa, 0);
- ifa->next_regular += update_period * (1 + (now - ifa->next_regular) / update_period);
+ ifa->next_regular += update_period * (1 + (now_ - ifa->next_regular) / update_period);
ifa->want_triggered = 0;
p->triggered = 0;
}
- else if (ifa->want_triggered && (now >= ifa->next_triggered))
+ else if (ifa->want_triggered && (now_ >= ifa->next_triggered))
{
TRACE(D_EVENTS, "Sending triggered updates on %s", ifa->ifname);
babel_send_update(ifa, ifa->want_triggered);
- ifa->next_triggered = now + MIN(5, update_period / 2 + 1);
+ ifa->next_triggered = now_ + MIN(1 S, update_period / 2);
ifa->want_triggered = 0;
p->triggered = 0;
}
- bird_clock_t next_event = MIN(ifa->next_hello, ifa->next_regular);
- tm_start(ifa->timer, ifa->want_triggered ? 1 : (next_event - now));
+ btime next_event = MIN(ifa->next_hello, ifa->next_regular);
+ if (ifa->want_triggered) next_event = MIN(next_event, ifa->next_triggered);
+ tm_set(ifa->timer, next_event);
}
static inline void
babel_iface_kick_timer(struct babel_iface *ifa)
{
- if (ifa->timer->expires > (now + 1))
- tm_start(ifa->timer, 1);
+ if (ifa->timer->expires > (current_time() + 100 MS))
+ tm_start(ifa->timer, 100 MS);
}
static void
@@ -1365,14 +1450,14 @@ babel_iface_start(struct babel_iface *ifa)
TRACE(D_EVENTS, "Starting interface %s", ifa->ifname);
- ifa->next_hello = now + (random() % ifa->cf->hello_interval) + 1;
- ifa->next_regular = now + (random() % ifa->cf->update_interval) + 1;
- ifa->next_triggered = now + MIN(5, ifa->cf->update_interval / 2 + 1);
+ ifa->next_hello = current_time() + (random() % ifa->cf->hello_interval);
+ ifa->next_regular = current_time() + (random() % ifa->cf->update_interval);
+ ifa->next_triggered = current_time() + MIN(1 S, ifa->cf->update_interval / 2);
ifa->want_triggered = 0; /* We send an immediate update (below) */
- tm_start(ifa->timer, 1);
+ tm_start(ifa->timer, 100 MS);
ifa->up = 1;
- babel_send_hello(ifa, 0);
+ babel_send_hello(ifa);
babel_send_wildcard_retraction(ifa);
babel_send_wildcard_request(ifa);
babel_send_update(ifa, 0); /* Full update */
@@ -1398,9 +1483,7 @@ babel_iface_stop(struct babel_iface *ifa)
WALK_LIST(n, nbr->routes)
{
r = SKIP_BACK(struct babel_route, neigh_route, n);
- r->metric = BABEL_INFINITY;
- r->expires = now + r->expiry_interval;
- babel_select_route(r->e);
+ babel_retract_route(p, r);
}
}
@@ -1488,21 +1571,21 @@ babel_add_iface(struct babel_proto *p, struct iface *new, struct babel_iface_con
ifa->cf = ic;
ifa->pool = pool;
ifa->ifname = new->name;
+ ifa->addr = new->llv6->ip;
add_tail(&p->interfaces, NODE ifa);
- struct ifa *addr;
- WALK_LIST(addr, new->addrs)
- if (ipa_is_link_local(addr->ip))
- ifa->addr = addr->ip;
+ ip_addr addr4 = new->addr4 ? new->addr4->ip : IPA_NONE;
+ ifa->next_hop_ip4 = ipa_nonzero(ic->next_hop_ip4) ? ic->next_hop_ip4 : addr4;
+ ifa->next_hop_ip6 = ipa_nonzero(ic->next_hop_ip6) ? ic->next_hop_ip6 : ifa->addr;
- if (ipa_zero(ifa->addr))
- log(L_WARN "%s: Cannot find link-local addr on %s", p->p.name, new->name);
+ if (ipa_zero(ifa->next_hop_ip4) && p->ip4_channel)
+ log(L_WARN "%s: Cannot find IPv4 next hop addr on %s", p->p.name, new->name);
init_list(&ifa->neigh_list);
ifa->hello_seqno = 1;
- ifa->timer = tm_new_set(ifa->pool, babel_iface_timer, ifa, 0, 0);
+ ifa->timer = tm_new_init(ifa->pool, babel_iface_timer, ifa, 0, 0);
init_list(&ifa->msg_queue);
ifa->send_event = ev_new(ifa->pool);
@@ -1527,7 +1610,7 @@ babel_remove_iface(struct babel_proto *p, struct babel_iface *ifa)
struct babel_neighbor *n;
WALK_LIST_FIRST(n, ifa->neigh_list)
- babel_flush_neighbor(n);
+ babel_flush_neighbor(p, n);
rem_node(NODE ifa);
@@ -1545,12 +1628,16 @@ babel_if_notify(struct proto *P, unsigned flags, struct iface *iface)
if (flags & IF_CHANGE_UP)
{
- struct babel_iface_config *ic = (void *) iface_patt_find(&cf->iface_list, iface, iface->addr);
+ struct babel_iface_config *ic = (void *) iface_patt_find(&cf->iface_list, iface, NULL);
/* we only speak multicast */
if (!(iface->flags & IF_MULTICAST))
return;
+ /* Ignore ifaces without link-local address */
+ if (!iface->llv6)
+ return;
+
if (ic)
babel_add_iface(p, iface, ic);
@@ -1590,11 +1677,18 @@ babel_reconfigure_iface(struct babel_proto *p, struct babel_iface *ifa, struct b
ifa->cf = new;
- if (ifa->next_hello > (now + new->hello_interval))
- ifa->next_hello = now + (random() % new->hello_interval) + 1;
+ ip_addr addr4 = ifa->iface->addr4 ? ifa->iface->addr4->ip : IPA_NONE;
+ ifa->next_hop_ip4 = ipa_nonzero(new->next_hop_ip4) ? new->next_hop_ip4 : addr4;
+ ifa->next_hop_ip6 = ipa_nonzero(new->next_hop_ip6) ? new->next_hop_ip6 : ifa->addr;
+
+ if (ipa_zero(ifa->next_hop_ip4) && p->ip4_channel)
+ log(L_WARN "%s: Cannot find IPv4 next hop addr on %s", p->p.name, ifa->ifname);
- if (ifa->next_regular > (now + new->update_interval))
- ifa->next_regular = now + (random() % new->update_interval) + 1;
+ if (ifa->next_hello > (current_time() + new->hello_interval))
+ ifa->next_hello = current_time() + (random() % new->hello_interval);
+
+ if (ifa->next_regular > (current_time() + new->update_interval))
+ ifa->next_regular = current_time() + (random() % new->update_interval);
if ((new->tx_length != old->tx_length) || (new->rx_buffer != old->rx_buffer))
babel_iface_update_buffers(ifa);
@@ -1615,7 +1709,15 @@ babel_reconfigure_ifaces(struct babel_proto *p, struct babel_config *cf)
WALK_LIST(iface, iface_list)
{
- if (! (iface->flags & IF_UP))
+ if (!(iface->flags & IF_UP))
+ continue;
+
+ /* Ignore non-multicast ifaces */
+ if (!(iface->flags & IF_MULTICAST))
+ continue;
+
+ /* Ignore ifaces without link-local address */
+ if (!iface->llv6)
continue;
struct babel_iface *ifa = babel_find_iface(p, iface);
@@ -1648,18 +1750,17 @@ babel_reconfigure_ifaces(struct babel_proto *p, struct babel_config *cf)
static void
babel_dump_source(struct babel_source *s)
{
- debug("Source router_id %lR seqno %d metric %d expires %d\n",
- s->router_id, s->seqno, s->metric, s->expires ? s->expires-now : 0);
+ debug("Source router_id %lR seqno %d metric %d expires %t\n",
+ s->router_id, s->seqno, s->metric,
+ s->expires ? s->expires - current_time() : 0);
}
static void
babel_dump_route(struct babel_route *r)
{
- debug("Route neigh %I if %s seqno %d metric %d/%d router_id %lR expires %d\n",
- r->neigh ? r->neigh->addr : IPA_NONE,
- r->neigh ? r->neigh->ifa->ifname : "(none)",
- r->seqno, r->advert_metric, r->metric,
- r->router_id, r->expires ? r->expires-now : 0);
+ debug("Route neigh %I if %s seqno %d metric %d/%d router_id %lR expires %t\n",
+ r->neigh->addr, r->neigh->ifa->ifname, r->seqno, r->advert_metric, r->metric,
+ r->router_id, r->expires ? r->expires - current_time() : 0);
}
static void
@@ -1668,7 +1769,7 @@ babel_dump_entry(struct babel_entry *e)
struct babel_source *s;
struct babel_route *r;
- debug("Babel: Entry %I/%d:\n", e->n.prefix, e->n.pxlen);
+ debug("Babel: Entry %N:\n", e->n.addr);
WALK_LIST(s,e->sources)
{ debug(" "); babel_dump_source(s); }
@@ -1676,8 +1777,7 @@ babel_dump_entry(struct babel_entry *e)
WALK_LIST(r,e->routes)
{
debug(" ");
- if (r == e->selected_out) debug("*");
- if (r == e->selected_in) debug("+");
+ if (r == e->selected) debug("*");
babel_dump_route(r);
}
}
@@ -1685,10 +1785,10 @@ babel_dump_entry(struct babel_entry *e)
static void
babel_dump_neighbor(struct babel_neighbor *n)
{
- debug("Neighbor %I txcost %d hello_map %x next seqno %d expires %d/%d\n",
+ debug("Neighbor %I txcost %d hello_map %x next seqno %d expires %t/%t\n",
n->addr, n->txcost, n->hello_map, n->next_hello_seqno,
- n->hello_expiry ? n->hello_expiry - now : 0,
- n->ihu_expiry ? n->ihu_expiry - now : 0);
+ n->hello_expiry ? n->hello_expiry - current_time() : 0,
+ n->ihu_expiry ? n->ihu_expiry - current_time() : 0);
}
static void
@@ -1696,9 +1796,10 @@ babel_dump_iface(struct babel_iface *ifa)
{
struct babel_neighbor *n;
- debug("Babel: Interface %s addr %I rxcost %d type %d hello seqno %d intervals %d %d\n",
+ debug("Babel: Interface %s addr %I rxcost %d type %d hello seqno %d intervals %t %t",
ifa->ifname, ifa->addr, ifa->cf->rxcost, ifa->cf->type, ifa->hello_seqno,
ifa->cf->hello_interval, ifa->cf->update_interval);
+ debug(" next hop v4 %I next hop v6 %I\n", ifa->next_hop_ip4, ifa->next_hop_ip6);
WALK_LIST(n, ifa->neigh_list)
{ debug(" "); babel_dump_neighbor(n); }
@@ -1715,9 +1816,14 @@ babel_dump(struct proto *P)
WALK_LIST(ifa, p->interfaces)
babel_dump_iface(ifa);
- FIB_WALK(&p->rtable, n)
+ FIB_WALK(&p->ip4_rtable, struct babel_entry, e)
{
- babel_dump_entry((struct babel_entry *) n);
+ babel_dump_entry(e);
+ }
+ FIB_WALK_END;
+ FIB_WALK(&p->ip6_rtable, struct babel_entry, e)
+ {
+ babel_dump_entry(e);
}
FIB_WALK_END;
}
@@ -1765,8 +1871,9 @@ babel_show_interfaces(struct proto *P, char *iff)
}
cli_msg(-1023, "%s:", p->p.name);
- cli_msg(-1023, "%-10s %-6s %7s %6s %6s",
- "Interface", "State", "RX cost", "Nbrs", "Timer");
+ cli_msg(-1023, "%-10s %-6s %7s %6s %7s %-15s %s",
+ "Interface", "State", "RX cost", "Nbrs", "Timer",
+ "Next hop (v4)", "Next hop (v6)");
WALK_LIST(ifa, p->interfaces)
{
@@ -1777,9 +1884,11 @@ babel_show_interfaces(struct proto *P, char *iff)
WALK_LIST(nbr, ifa->neigh_list)
nbrs++;
- int timer = MIN(ifa->next_regular, ifa->next_hello) - now;
- cli_msg(-1023, "%-10s %-6s %7u %6u %6u",
- ifa->iface->name, (ifa->up ? "Up" : "Down"), ifa->cf->rxcost, nbrs, MAX(timer, 0));
+ btime timer = MIN(ifa->next_regular, ifa->next_hello) - current_time();
+ cli_msg(-1023, "%-10s %-6s %7u %6u %7t %-15I %I",
+ ifa->iface->name, (ifa->up ? "Up" : "Down"),
+ ifa->cf->rxcost, nbrs, MAX(timer, 0),
+ ifa->next_hop_ip4, ifa->next_hop_ip6);
}
cli_msg(0, "");
@@ -1801,8 +1910,8 @@ babel_show_neighbors(struct proto *P, char *iff)
}
cli_msg(-1024, "%s:", p->p.name);
- cli_msg(-1024, "%-25s %-10s %6s %6s %10s",
- "IP address", "Interface", "Metric", "Routes", "Next hello");
+ cli_msg(-1024, "%-25s %-10s %6s %6s %6s %7s",
+ "IP address", "Interface", "Metric", "Routes", "Hellos", "Expires");
WALK_LIST(ifa, p->interfaces)
{
@@ -1815,25 +1924,48 @@ babel_show_neighbors(struct proto *P, char *iff)
WALK_LIST(r, n->routes)
rts++;
- int timer = n->hello_expiry - now;
- cli_msg(-1024, "%-25I %-10s %6u %6u %10u",
- n->addr, ifa->iface->name, n->txcost, rts, MAX(timer, 0));
+ uint hellos = u32_popcount(n->hello_map);
+ btime timer = n->hello_expiry - current_time();
+ cli_msg(-1024, "%-25I %-10s %6u %6u %6u %7t",
+ n->addr, ifa->iface->name, n->cost, rts, hellos, MAX(timer, 0));
}
}
cli_msg(0, "");
}
+static void
+babel_show_entries_(struct babel_proto *p UNUSED, struct fib *rtable)
+{
+ FIB_WALK(rtable, struct babel_entry, e)
+ {
+ struct babel_route *r = NULL;
+ uint rts = 0, srcs = 0;
+ node *n;
+
+ WALK_LIST(n, e->routes)
+ rts++;
+
+ WALK_LIST(n, e->sources)
+ srcs++;
+
+ if (e->valid)
+ cli_msg(-1025, "%-24N %-23lR %6u %5u %7u %7u",
+ e->n.addr, e->router_id, e->metric, e->seqno, rts, srcs);
+ else if (r = e->selected)
+ cli_msg(-1025, "%-24N %-23lR %6u %5u %7u %7u",
+ e->n.addr, r->router_id, r->metric, r->seqno, rts, srcs);
+ else
+ cli_msg(-1025, "%-24N %-23s %6s %5s %7u %7u",
+ e->n.addr, "<none>", "-", "-", rts, srcs);
+ }
+ FIB_WALK_END;
+}
+
void
babel_show_entries(struct proto *P)
{
struct babel_proto *p = (void *) P;
- struct babel_entry *e = NULL;
- struct babel_source *s = NULL;
- struct babel_route *r = NULL;
-
- char ipbuf[STD_ADDRESS_P_LENGTH+5];
- char ridbuf[ROUTER_ID_64_LENGTH+1];
if (p->p.proto_state != PS_UP)
{
@@ -1843,37 +1975,51 @@ babel_show_entries(struct proto *P)
}
cli_msg(-1025, "%s:", p->p.name);
- cli_msg(-1025, "%-29s %-23s %6s %5s %7s %7s",
- "Prefix", "Router ID", "Metric", "Seqno", "Expires", "Sources");
-
- FIB_WALK(&p->rtable, n)
- {
- e = (struct babel_entry *) n;
- r = e->selected_in ? e->selected_in : e->selected_out;
-
- int srcs = 0;
- WALK_LIST(s, e->sources)
- srcs++;
+ cli_msg(-1025, "%-24s %-23s %6s %5s %7s %7s",
+ "Prefix", "Router ID", "Metric", "Seqno", "Routes", "Sources");
- bsprintf(ipbuf, "%I/%u", e->n.prefix, e->n.pxlen);
+ babel_show_entries_(p, &p->ip4_rtable);
+ babel_show_entries_(p, &p->ip6_rtable);
- if (r)
- {
- if (r->router_id == p->router_id)
- bsprintf(ridbuf, "%s", "<self>");
- else
- bsprintf(ridbuf, "%lR", r->router_id);
+ cli_msg(0, "");
+}
- int time = r->expires ? r->expires - now : 0;
- cli_msg(-1025, "%-29s %-23s %6u %5u %7u %7u",
- ipbuf, ridbuf, r->metric, r->seqno, MAX(time, 0), srcs);
- }
- else
+static void
+babel_show_routes_(struct babel_proto *p UNUSED, struct fib *rtable)
+{
+ FIB_WALK(rtable, struct babel_entry, e)
+ {
+ struct babel_route *r;
+ WALK_LIST(r, e->routes)
{
- cli_msg(-1025, "%-29s %-44s %7u", ipbuf, "<pending>", srcs);
+ char c = (r == e->selected) ? '*' : (r->feasible ? '+' : ' ');
+ btime time = r->expires ? r->expires - current_time() : 0;
+ cli_msg(-1025, "%-24N %-25I %-10s %5u %c %5u %7t",
+ e->n.addr, r->next_hop, r->neigh->ifa->ifname,
+ r->metric, c, r->seqno, MAX(time, 0));
}
}
FIB_WALK_END;
+}
+
+void
+babel_show_routes(struct proto *P)
+{
+ struct babel_proto *p = (void *) P;
+
+ if (p->p.proto_state != PS_UP)
+ {
+ cli_msg(-1025, "%s: is not up", p->p.name);
+ cli_msg(0, "");
+ return;
+ }
+
+ cli_msg(-1025, "%s:", p->p.name);
+ cli_msg(-1025, "%-24s %-25s %-9s %6s F %5s %7s",
+ "Prefix", "Nexthop", "Interface", "Metric", "Seqno", "Expires");
+
+ babel_show_routes_(p, &p->ip4_rtable);
+ babel_show_routes_(p, &p->ip6_rtable);
cli_msg(0, "");
}
@@ -1897,15 +2043,14 @@ babel_timer(timer *t)
struct babel_proto *p = t->data;
babel_expire_routes(p);
- babel_expire_seqno_requests(p);
babel_expire_neighbors(p);
}
static inline void
babel_kick_timer(struct babel_proto *p)
{
- if (p->timer->expires > (now + 1))
- tm_start(p->timer, 1);
+ if (p->timer->expires > (current_time() + 100 MS))
+ tm_start(p->timer, 100 MS);
}
@@ -1936,12 +2081,18 @@ babel_prepare_attrs(struct linpool *pool, ea_list *next, uint metric, u64 router
static int
-babel_import_control(struct proto *P, struct rte **rt, struct ea_list **attrs, struct linpool *pool)
+babel_import_control(struct proto *P, struct rte **new, struct ea_list **attrs, struct linpool *pool)
{
struct babel_proto *p = (void *) P;
+ rte *rt = *new;
+
+ /* Reject our own unreachable routes */
+ if ((rt->attrs->dest == RTD_UNREACHABLE) && (rt->attrs->src->proto == P))
+ return -1;
+
/* Prepare attributes with initial values */
- if ((*rt)->attrs->source != RTS_BABEL)
+ if (rt->attrs->source != RTS_BABEL)
*attrs = babel_prepare_attrs(pool, NULL, 0, p->router_id);
return 0;
@@ -1964,70 +2115,55 @@ babel_store_tmp_attrs(struct rte *rt, struct ea_list *attrs)
* so store it into our data structures.
*/
static void
-babel_rt_notify(struct proto *P, struct rtable *table UNUSED, struct network *net,
+babel_rt_notify(struct proto *P, struct channel *c UNUSED, struct network *net,
struct rte *new, struct rte *old UNUSED, struct ea_list *attrs UNUSED)
{
struct babel_proto *p = (void *) P;
struct babel_entry *e;
- struct babel_route *r;
if (new)
{
/* Update */
- e = babel_get_entry(p, net->n.prefix, net->n.pxlen);
+ uint internal = (new->attrs->src->proto == P);
+ uint rt_seqno = internal ? new->u.babel.seqno : p->update_seqno;
+ uint rt_metric = ea_get_int(attrs, EA_BABEL_METRIC, 0);
+ uint rt_router_id = internal ? new->u.babel.router_id : p->router_id;
- if (new->attrs->src->proto != P)
+ if (rt_metric > BABEL_INFINITY)
{
- r = babel_get_route(e, NULL);
- r->seqno = p->update_seqno;
- r->router_id = p->router_id;
- r->metric = 0; /* FIXME: should be selectable */
+ log(L_WARN "%s: Invalid babel_metric value %u for route %N",
+ p->p.name, rt_metric, net->n.addr);
+ rt_metric = BABEL_INFINITY;
}
- else
- r = e->selected_in;
- if (r != e->selected_out)
+ e = babel_get_entry(p, net->n.addr);
+
+ /* Activate triggered updates */
+ if ((e->valid |= BABEL_ENTRY_VALID) ||
+ (e->router_id != rt_router_id))
{
- e->selected_out = r;
- e->updated = now;
babel_trigger_update(p);
+ e->updated = current_time();
}
+
+ e->valid = BABEL_ENTRY_VALID;
+ e->seqno = rt_seqno;
+ e->metric = rt_metric;
+ e->router_id = rt_router_id;
}
else
{
/* Withdraw */
- e = babel_find_entry(p, net->n.prefix, net->n.pxlen);
- if (!e || !e->selected_out)
+ e = babel_find_entry(p, net->n.addr);
+
+ if (!e || e->valid != BABEL_ENTRY_VALID)
return;
- if (OUR_ROUTE(e->selected_out))
- {
- /*
- * We originate this route, so set its metric to infinity and set an
- * expiry time. This causes a retraction to be sent, and later the route
- * to be flushed once the hold time has passed.
- */
- e->selected_out->metric = BABEL_INFINITY;
- e->selected_out->expires = now + BABEL_HOLD_TIME;
- e->updated = now;
- babel_trigger_update(p);
- }
- else
- {
- /*
- * This is a route originating from someone else that was lost; presumably
- * because an export filter was updated to filter it. This means we can't
- * set the metric to infinity (it would be overridden on subsequent
- * updates from the peer originating the route), so just clear the
- * exported route.
- *
- * This causes peers to expire the route after a while (like if we just
- * shut down), but it's the best we can do in these circumstances; and
- * since export filters presumably aren't updated that often this is
- * acceptable.
- */
- e->selected_out = NULL;
- }
+ e->valid = BABEL_ENTRY_STALE;
+ e->metric = BABEL_INFINITY;
+
+ babel_trigger_update(p);
+ e->updated = current_time();
}
}
@@ -2040,17 +2176,21 @@ babel_rte_better(struct rte *new, struct rte *old)
static int
babel_rte_same(struct rte *new, struct rte *old)
{
- return ((new->u.babel.router_id == old->u.babel.router_id) &&
- (new->u.babel.metric == old->u.babel.metric));
+ return ((new->u.babel.seqno == old->u.babel.seqno) &&
+ (new->u.babel.metric == old->u.babel.metric) &&
+ (new->u.babel.router_id == old->u.babel.router_id));
}
static struct proto *
-babel_init(struct proto_config *cfg)
+babel_init(struct proto_config *CF)
{
- struct proto *P = proto_new(cfg, sizeof(struct babel_proto));
+ struct proto *P = proto_new(CF);
+ struct babel_proto *p = (void *) P;
+
+ proto_configure_channel(P, &p->ip4_channel, proto_cf_find_channel(CF, NET_IP4));
+ proto_configure_channel(P, &p->ip6_channel, proto_cf_find_channel(CF, NET_IP6));
- P->accept_ra_types = RA_OPTIMAL;
P->if_notify = babel_if_notify;
P->rt_notify = babel_rt_notify;
P->import_control = babel_import_control;
@@ -2068,10 +2208,14 @@ babel_start(struct proto *P)
struct babel_proto *p = (void *) P;
struct babel_config *cf = (void *) P->cf;
- fib_init(&p->rtable, P->pool, sizeof(struct babel_entry), 0, babel_init_entry);
+ fib_init(&p->ip4_rtable, P->pool, NET_IP4, sizeof(struct babel_entry),
+ OFFSETOF(struct babel_entry, n), 0, babel_init_entry);
+ fib_init(&p->ip6_rtable, P->pool, NET_IP6, sizeof(struct babel_entry),
+ OFFSETOF(struct babel_entry, n), 0, babel_init_entry);
+
init_list(&p->interfaces);
- p->timer = tm_new_set(P->pool, babel_timer, p, 0, 1);
- tm_start(p->timer, 2);
+ p->timer = tm_new_init(P->pool, babel_timer, p, 1 S, 0);
+ tm_start(p->timer, 1 S);
p->update_seqno = 1;
p->router_id = proto_get_router_id(&cf->c);
@@ -2079,7 +2223,6 @@ babel_start(struct proto *P)
p->source_slab = sl_new(P->pool, sizeof(struct babel_source));
p->msg_slab = sl_new(P->pool, sizeof(struct babel_msg_node));
p->seqno_slab = sl_new(P->pool, sizeof(struct babel_seqno_request));
- init_list(&p->seqno_cache);
p->log_pkt_tbf = (struct tbf){ .rate = 1, .burst = 5 };
@@ -2111,14 +2254,18 @@ babel_shutdown(struct proto *P)
}
static int
-babel_reconfigure(struct proto *P, struct proto_config *c)
+babel_reconfigure(struct proto *P, struct proto_config *CF)
{
struct babel_proto *p = (void *) P;
- struct babel_config *new = (void *) c;
+ struct babel_config *new = (void *) CF;
TRACE(D_EVENTS, "Reconfiguring");
- p->p.cf = c;
+ if (!proto_configure_channel(P, &p->ip4_channel, proto_cf_find_channel(CF, NET_IP4)) ||
+ !proto_configure_channel(P, &p->ip6_channel, proto_cf_find_channel(CF, NET_IP6)))
+ return 0;
+
+ p->p.cf = CF;
babel_reconfigure_ifaces(p, new);
babel_trigger_update(p);
@@ -2133,6 +2280,8 @@ struct protocol proto_babel = {
.template = "babel%d",
.attr_class = EAP_BABEL,
.preference = DEF_PREF_BABEL,
+ .channel_mask = NB_IP,
+ .proto_size = sizeof(struct babel_proto),
.config_size = sizeof(struct babel_config),
.init = babel_init,
.dump = babel_dump,
diff --git a/proto/babel/babel.h b/proto/babel/babel.h
index 6a95d82f..1128d261 100644
--- a/proto/babel/babel.h
+++ b/proto/babel/babel.h
@@ -2,6 +2,8 @@
* BIRD -- The Babel protocol
*
* Copyright (c) 2015--2016 Toke Hoiland-Jorgensen
+ * (c) 2016--2017 Ondrej Zajicek <santiago@crfreenet.org>
+ * (c) 2016--2017 CZ.NIC z.s.p.o.
*
* Can be freely distributed and used under the terms of the GNU GPL.
*
@@ -23,10 +25,6 @@
#include "lib/string.h"
#include "lib/timer.h"
-#ifndef IPV6
-#error "The Babel protocol only speaks IPv6"
-#endif
-
#define EA_BABEL_METRIC EA_CODE(EAP_BABEL, 0)
#define EA_BABEL_ROUTER_ID EA_CODE(EAP_BABEL, 1)
@@ -36,27 +34,30 @@
#define BABEL_INFINITY 0xFFFF
-#define BABEL_HELLO_INTERVAL_WIRED 4 /* Default hello intervals in seconds */
-#define BABEL_HELLO_INTERVAL_WIRELESS 4
+#define BABEL_HELLO_INTERVAL_WIRED (4 S_) /* Default hello intervals in seconds */
+#define BABEL_HELLO_INTERVAL_WIRELESS (4 S_)
+#define BABEL_HELLO_LIMIT 12
#define BABEL_UPDATE_INTERVAL_FACTOR 4
#define BABEL_IHU_INTERVAL_FACTOR 3
-#define BABEL_IHU_EXPIRY_FACTOR(X) ((X)*3/2) /* 1.5 */
-#define BABEL_HELLO_EXPIRY_FACTOR(X) ((X)*3/2) /* 1.5 */
-#define BABEL_ROUTE_EXPIRY_FACTOR(X) ((X)*7/2) /* 3.5 */
-#define BABEL_ROUTE_REFRESH_INTERVAL 2 /* Seconds before route expiry to send route request */
-#define BABEL_HOLD_TIME 10 /* Expiry time for our own routes */
+#define BABEL_HOLD_TIME_FACTOR 4 /* How long we keep unreachable route relative to update interval */
+#define BABEL_IHU_EXPIRY_FACTOR(X) ((btime)(X)*7/2) /* 3.5 */
+#define BABEL_HELLO_EXPIRY_FACTOR(X) ((btime)(X)*3/2) /* 1.5 */
+#define BABEL_ROUTE_EXPIRY_FACTOR(X) ((btime)(X)*7/2) /* 3.5 */
+#define BABEL_ROUTE_REFRESH_FACTOR(X) ((btime)(X)*5/2) /* 2.5 */
+#define BABEL_SEQNO_REQUEST_RETRY 4
+#define BABEL_SEQNO_REQUEST_EXPIRY (2 S_)
+#define BABEL_GARBAGE_INTERVAL (300 S_)
#define BABEL_RXCOST_WIRED 96
#define BABEL_RXCOST_WIRELESS 256
#define BABEL_INITIAL_HOP_COUNT 255
-#define BABEL_MAX_SEND_INTERVAL 5
-#define BABEL_TIME_UNITS 100 /* On-wire times are counted in centiseconds */
-#define BABEL_SEQNO_REQUEST_EXPIRY 60
-#define BABEL_GARBAGE_INTERVAL 300
+#define BABEL_MAX_SEND_INTERVAL 5 /* Unused ? */
/* Max interval that will not overflow when carried as 16-bit centiseconds */
-#define BABEL_MAX_INTERVAL (0xFFFF/BABEL_TIME_UNITS)
+#define BABEL_TIME_UNITS 10000 /* On-wire times are counted in centiseconds */
+#define BABEL_MIN_INTERVAL (0x0001 * BABEL_TIME_UNITS)
+#define BABEL_MAX_INTERVAL (0xFFFF * BABEL_TIME_UNITS)
-#define BABEL_OVERHEAD (SIZE_OF_IP_HEADER+UDP_HEADER_LENGTH)
+#define BABEL_OVERHEAD (IP6_HEADER_LENGTH+UDP_HEADER_LENGTH)
#define BABEL_MIN_MTU (512 + BABEL_OVERHEAD)
@@ -82,6 +83,11 @@ enum babel_tlv_type {
BABEL_TLV_MAX
};
+enum babel_subtlv_type {
+ BABEL_SUBTLV_PAD1 = 0,
+ BABEL_SUBTLV_PADN = 1
+};
+
enum babel_iface_type {
/* In practice, UNDEF and WIRED give equivalent behaviour */
BABEL_IFACE_TYPE_UNDEF = 0,
@@ -101,8 +107,8 @@ enum babel_ae_type {
struct babel_config {
struct proto_config c;
-
- list iface_list; /* Patterns configured -- keep it first; see babel_reconfigure why */
+ list iface_list; /* List of iface configs (struct babel_iface_config) */
+ uint hold_time; /* Time to hold stale entries and unreachable routes */
};
struct babel_iface_config {
@@ -110,33 +116,41 @@ struct babel_iface_config {
u16 rxcost;
u8 type;
+ u8 limit; /* Minimum number of Hellos to keep link up */
u8 check_link;
uint port;
- u16 hello_interval;
- u16 ihu_interval;
- u16 update_interval;
+ uint hello_interval; /* Hello interval, in us */
+ uint ihu_interval; /* IHU interval, in us */
+ uint update_interval; /* Update interval, in us */
u16 rx_buffer; /* RX buffer size, 0 for MTU */
u16 tx_length; /* TX packet length limit (including headers), 0 for MTU */
int tx_tos;
int tx_priority;
+
+ ip_addr next_hop_ip4;
+ ip_addr next_hop_ip6;
};
struct babel_proto {
struct proto p;
timer *timer;
- struct fib rtable;
+ struct fib ip4_rtable;
+ struct fib ip6_rtable;
+
+ struct channel *ip4_channel;
+ struct channel *ip6_channel;
+
list interfaces; /* Interfaces we really know about (struct babel_iface) */
u64 router_id;
u16 update_seqno; /* To be increased on request */
+ u8 update_seqno_inc; /* Request for update_seqno increase */
u8 triggered; /* For triggering global updates */
slab *route_slab;
slab *source_slab;
slab *msg_slab;
-
slab *seqno_slab;
- list seqno_cache; /* Seqno requests in the cache (struct babel_seqno_request) */
struct tbf log_pkt_tbf; /* TBF for packet messages */
};
@@ -155,16 +169,18 @@ struct babel_iface {
char *ifname;
sock *sk;
ip_addr addr;
+ ip_addr next_hop_ip4;
+ ip_addr next_hop_ip6;
int tx_length;
list neigh_list; /* List of neighbors seen on this iface (struct babel_neighbor) */
list msg_queue;
u16 hello_seqno; /* To be increased on each hello */
- bird_clock_t next_hello;
- bird_clock_t next_regular;
- bird_clock_t next_triggered;
- bird_clock_t want_triggered;
+ btime next_hello;
+ btime next_regular;
+ btime next_triggered;
+ btime want_triggered;
timer *timer;
event *send_event;
@@ -175,13 +191,18 @@ struct babel_neighbor {
struct babel_iface *ifa;
ip_addr addr;
- u16 txcost;
+ uint uc; /* Reference counter for seqno requests */
+ u16 rxcost; /* Sent in last IHU */
+ u16 txcost; /* Received in last IHU */
+ u16 cost; /* Computed neighbor cost */
+ s8 ihu_cnt; /* IHU countdown, 0 to send it */
u8 hello_cnt;
u16 hello_map;
u16 next_hello_seqno;
+ uint last_hello_int;
/* expiry timers */
- bird_clock_t hello_expiry;
- bird_clock_t ihu_expiry;
+ btime hello_expiry;
+ btime ihu_expiry;
list routes; /* Routes this neighbour has sent us (struct babel_route) */
};
@@ -192,7 +213,7 @@ struct babel_source {
u64 router_id;
u16 seqno;
u16 metric;
- bird_clock_t expires;
+ btime expires;
};
struct babel_route {
@@ -201,38 +222,47 @@ struct babel_route {
struct babel_entry *e;
struct babel_neighbor *neigh;
+ u8 feasible;
u16 seqno;
- u16 advert_metric;
u16 metric;
+ u16 advert_metric;
u64 router_id;
ip_addr next_hop;
- bird_clock_t refresh_time;
- bird_clock_t expires;
- u16 expiry_interval;
+ btime refresh_time;
+ btime expires;
};
-struct babel_entry {
- struct fib_node n;
- struct babel_proto *proto;
- struct babel_route *selected_in;
- struct babel_route *selected_out;
-
- bird_clock_t updated;
-
- list sources; /* Source entries for this prefix (struct babel_source). */
- list routes; /* Routes for this prefix (struct babel_route) */
-};
-
-/* Stores forwarded seqno requests for duplicate suppression. */
struct babel_seqno_request {
node n;
- ip_addr prefix;
- u8 plen;
u64 router_id;
u16 seqno;
- bird_clock_t updated;
+ u8 hop_count;
+ u8 count;
+ btime expires;
+ struct babel_neighbor *nbr;
+};
+
+struct babel_entry {
+ struct babel_route *selected;
+
+ list routes; /* Routes for this prefix (struct babel_route) */
+ list sources; /* Source entries for this prefix (struct babel_source). */
+ list requests;
+
+ u8 valid; /* Entry validity state (BABEL_ENTRY_*) */
+ u8 unreachable; /* Unreachable route is announced */
+ u16 seqno; /* Outgoing seqno */
+ u16 metric; /* Outgoing metric */
+ u64 router_id; /* Outgoing router ID */
+ btime updated; /* Last change of outgoing rte, for triggered updates */
+
+ struct fib_node n;
};
+#define BABEL_ENTRY_DUMMY 0 /* No outgoing route */
+#define BABEL_ENTRY_VALID 1 /* Valid outgoing route */
+#define BABEL_ENTRY_STALE 2 /* Stale outgoing route, waiting for GC */
+
/*
* Internal TLV messages
@@ -241,7 +271,7 @@ struct babel_seqno_request {
struct babel_msg_ack_req {
u8 type;
u16 nonce;
- u16 interval;
+ uint interval;
ip_addr sender;
};
@@ -253,7 +283,7 @@ struct babel_msg_ack {
struct babel_msg_hello {
u8 type;
u16 seqno;
- u16 interval;
+ uint interval;
ip_addr sender;
};
@@ -261,7 +291,7 @@ struct babel_msg_ihu {
u8 type;
u8 ae;
u16 rxcost;
- u16 interval;
+ uint interval;
ip_addr addr;
ip_addr sender;
};
@@ -269,12 +299,11 @@ struct babel_msg_ihu {
struct babel_msg_update {
u8 type;
u8 wildcard;
- u8 plen;
- u16 interval;
+ uint interval;
u16 seqno;
u16 metric;
- ip_addr prefix;
u64 router_id;
+ net_addr net;
ip_addr next_hop;
ip_addr sender;
};
@@ -282,17 +311,15 @@ struct babel_msg_update {
struct babel_msg_route_request {
u8 type;
u8 full;
- u8 plen;
- ip_addr prefix;
+ net_addr net;
};
struct babel_msg_seqno_request {
u8 type;
- u8 plen;
- u16 seqno;
u8 hop_count;
+ u16 seqno;
u64 router_id;
- ip_addr prefix;
+ net_addr net;
ip_addr sender;
};
@@ -326,6 +353,7 @@ void babel_handle_seqno_request(union babel_msg *msg, struct babel_iface *ifa);
void babel_show_interfaces(struct proto *P, char *iff);
void babel_show_neighbors(struct proto *P, char *iff);
void babel_show_entries(struct proto *P);
+void babel_show_routes(struct proto *P);
/* packets.c */
void babel_enqueue(union babel_msg *msg, struct babel_iface *ifa);
diff --git a/proto/babel/config.Y b/proto/babel/config.Y
index b6170852..25ce5ba0 100644
--- a/proto/babel/config.Y
+++ b/proto/babel/config.Y
@@ -2,6 +2,8 @@
* BIRD -- Babel Configuration
*
* Copyright (c) 2015-2016 Toke Hoiland-Jorgensen
+ * (c) 2016--2017 Ondrej Zajicek <santiago@crfreenet.org>
+ * (c) 2016--2017 CZ.NIC z.s.p.o.
*
* Can be freely distributed and used under the terms of the GNU GPL.
*/
@@ -21,7 +23,8 @@ CF_DEFINES
CF_DECLS
CF_KEYWORDS(BABEL, METRIC, RXCOST, HELLO, UPDATE, INTERVAL, PORT, WIRED,
-WIRELESS, RX, TX, BUFFER, LENGTH, CHECK, LINK, BABEL_METRIC)
+ WIRELESS, RX, TX, BUFFER, LENGTH, CHECK, LINK, BABEL_METRIC, NEXT, HOP,
+ IPV4, IPV6)
CF_GRAMMAR
@@ -31,10 +34,12 @@ babel_proto_start: proto_start BABEL
{
this_proto = proto_config_new(&proto_babel, $1);
init_list(&BABEL_CFG->iface_list);
+ BABEL_CFG->hold_time = 1 S_;
};
babel_proto_item:
proto_item
+ | proto_channel
| INTERFACE babel_iface
;
@@ -54,6 +59,7 @@ babel_iface_start:
init_list(&this_ipatt->ipn_list);
BABEL_IFACE->port = BABEL_PORT;
BABEL_IFACE->type = BABEL_IFACE_TYPE_WIRED;
+ BABEL_IFACE->limit = BABEL_HELLO_LIMIT;
BABEL_IFACE->tx_tos = IP_PREC_INTERNET_CONTROL;
BABEL_IFACE->tx_priority = sk_priority_control;
BABEL_IFACE->check_link = 1;
@@ -81,21 +87,26 @@ babel_iface_finish:
if (!BABEL_IFACE->update_interval)
BABEL_IFACE->update_interval = MIN_(BABEL_IFACE->hello_interval*BABEL_UPDATE_INTERVAL_FACTOR, BABEL_MAX_INTERVAL);
BABEL_IFACE->ihu_interval = MIN_(BABEL_IFACE->hello_interval*BABEL_IHU_INTERVAL_FACTOR, BABEL_MAX_INTERVAL);
+
+ BABEL_CFG->hold_time = MAX_(BABEL_CFG->hold_time, BABEL_IFACE->update_interval*BABEL_HOLD_TIME_FACTOR);
};
babel_iface_item:
| PORT expr { BABEL_IFACE->port = $2; if (($2<1) || ($2>65535)) cf_error("Invalid port number"); }
| RXCOST expr { BABEL_IFACE->rxcost = $2; if (($2<1) || ($2>65535)) cf_error("Invalid rxcost"); }
- | HELLO INTERVAL expr { BABEL_IFACE->hello_interval = $3; if (($3<1) || ($3>BABEL_MAX_INTERVAL)) cf_error("Invalid hello interval"); }
- | UPDATE INTERVAL expr { BABEL_IFACE->update_interval = $3; if (($3<1) || ($3>BABEL_MAX_INTERVAL)) cf_error("Invalid update interval"); }
+ | LIMIT expr { BABEL_IFACE->limit = $2; if (($2<1) || ($2>16)) cf_error("Limit must be in range 1-16"); }
| TYPE WIRED { BABEL_IFACE->type = BABEL_IFACE_TYPE_WIRED; }
| TYPE WIRELESS { BABEL_IFACE->type = BABEL_IFACE_TYPE_WIRELESS; }
+ | HELLO INTERVAL expr_us { BABEL_IFACE->hello_interval = $3; if (($3<BABEL_MIN_INTERVAL) || ($3>BABEL_MAX_INTERVAL)) cf_error("Hello interval must be in range 10 ms - 655 s"); }
+ | UPDATE INTERVAL expr_us { BABEL_IFACE->update_interval = $3; if (($3<BABEL_MIN_INTERVAL) || ($3>BABEL_MAX_INTERVAL)) cf_error("Update interval must be in range 10 ms - 655 s"); }
| RX BUFFER expr { BABEL_IFACE->rx_buffer = $3; if (($3<256) || ($3>65535)) cf_error("RX buffer must be in range 256-65535"); }
| TX LENGTH expr { BABEL_IFACE->tx_length = $3; if (($3<256) || ($3>65535)) cf_error("TX length must be in range 256-65535"); }
| TX tos { BABEL_IFACE->tx_tos = $2; }
| TX PRIORITY expr { BABEL_IFACE->tx_priority = $3; }
| CHECK LINK bool { BABEL_IFACE->check_link = $3; }
+ | NEXT HOP IPV4 ipa { BABEL_IFACE->next_hop_ip4 = $4; if (!ipa_is_ip4($4)) cf_error("Must be an IPv4 address"); }
+ | NEXT HOP IPV6 ipa { BABEL_IFACE->next_hop_ip6 = $4; if (!ipa_is_ip6($4)) cf_error("Must be an IPv6 address"); }
;
babel_iface_opts:
@@ -125,6 +136,9 @@ CF_CLI(SHOW BABEL NEIGHBORS, optsym opttext, [<name>] [\"<interface>\"], [[Show
CF_CLI(SHOW BABEL ENTRIES, optsym opttext, [<name>], [[Show information about Babel prefix entries]])
{ babel_show_entries(proto_get_named($4, &proto_babel)); };
+CF_CLI(SHOW BABEL ROUTES, optsym opttext, [<name>], [[Show information about Babel route entries]])
+{ babel_show_routes(proto_get_named($4, &proto_babel)); };
+
CF_CODE
CF_END
diff --git a/proto/babel/packets.c b/proto/babel/packets.c
index 90421836..5b356fae 100644
--- a/proto/babel/packets.c
+++ b/proto/babel/packets.c
@@ -2,6 +2,8 @@
* BIRD -- The Babel protocol
*
* Copyright (c) 2015--2016 Toke Hoiland-Jorgensen
+ * (c) 2016--2017 Ondrej Zajicek <santiago@crfreenet.org>
+ * (c) 2016--2017 CZ.NIC z.s.p.o.
*
* Can be freely distributed and used under the terms of the GNU GPL.
*
@@ -112,13 +114,15 @@ struct babel_parse_state {
struct babel_proto *proto;
struct babel_iface *ifa;
ip_addr saddr;
- ip_addr next_hop;
+ ip_addr next_hop_ip4;
+ ip_addr next_hop_ip6;
u64 router_id; /* Router ID used in subsequent updates */
u8 def_ip6_prefix[16]; /* Implicit IPv6 prefix in network order */
u8 def_ip4_prefix[4]; /* Implicit IPv4 prefix in network order */
u8 router_id_seen; /* router_id field is valid */
u8 def_ip6_prefix_seen; /* def_ip6_prefix is valid */
u8 def_ip4_prefix_seen; /* def_ip4_prefix is valid */
+ u8 current_tlv_endpos; /* End of self-terminating TLVs (offset from start) */
};
enum parse_result {
@@ -130,7 +134,10 @@ enum parse_result {
struct babel_write_state {
u64 router_id;
u8 router_id_seen;
-// ip_addr next_hop;
+ ip_addr next_hop_ip4;
+ ip_addr next_hop_ip6;
+ u8 def_ip6_prefix[16]; /* Implicit IPv6 prefix in network order */
+ u8 def_ip6_pxlen;
};
@@ -146,34 +153,58 @@ struct babel_write_state {
#define TLV_HDR(tlv,t,l) ({ tlv->type = t; tlv->length = l - sizeof(struct babel_tlv); })
#define TLV_HDR0(tlv,t) TLV_HDR(tlv, t, tlv_data[t].min_length)
-#define BYTES(n) ((((uint) n) + 7) / 8)
+#define NET_SIZE(n) BYTES(net_pxlen(n))
-static inline u16
+static inline uint
+bytes_equal(u8 *b1, u8 *b2, uint maxlen)
+{
+ uint i;
+ for (i = 0; (i < maxlen) && (*b1 == *b2); i++, b1++, b2++)
+ ;
+ return i;
+}
+
+static inline uint
get_time16(const void *p)
{
- u16 v = get_u16(p) / BABEL_TIME_UNITS;
- return MAX(1, v);
+ uint v = get_u16(p) * BABEL_TIME_UNITS;
+ return MAX(BABEL_MIN_INTERVAL, v);
}
static inline void
-put_time16(void *p, u16 v)
+put_time16(void *p, uint v)
{
- put_u16(p, v * BABEL_TIME_UNITS);
+ put_u16(p, v / BABEL_TIME_UNITS);
}
-static inline ip6_addr
-get_ip6_px(const void *p, uint plen)
+static inline void
+read_ip4_px(net_addr *n, const void *p, uint plen)
+{
+ ip4_addr addr = {0};
+ memcpy(&addr, p, BYTES(plen));
+ net_fill_ip4(n, ip4_ntoh(addr), plen);
+}
+
+static inline void
+put_ip4_px(void *p, net_addr *n)
+{
+ ip4_addr addr = ip4_hton(net4_prefix(n));
+ memcpy(p, &addr, NET_SIZE(n));
+}
+
+static inline void
+read_ip6_px(net_addr *n, const void *p, uint plen)
{
ip6_addr addr = IPA_NONE;
memcpy(&addr, p, BYTES(plen));
- return ip6_ntoh(addr);
+ net_fill_ip6(n, ip6_ntoh(addr), plen);
}
static inline void
-put_ip6_px(void *p, ip6_addr addr, uint plen)
+put_ip6_px(void *p, net_addr *n)
{
- addr = ip6_hton(addr);
- memcpy(p, &addr, BYTES(plen));
+ ip6_addr addr = ip6_hton(net6_prefix(n));
+ memcpy(p, &addr, NET_SIZE(n));
}
static inline ip6_addr
@@ -351,14 +382,33 @@ babel_read_ihu(struct babel_tlv *hdr, union babel_msg *m,
if (msg->ae >= BABEL_AE_MAX)
return PARSE_IGNORE;
- // We handle link-local IPs. In every other case, the addr field will be 0 but
- // validation will succeed. The handler takes care of these cases.
- if (msg->ae == BABEL_AE_IP6_LL)
+ /*
+ * We only actually read link-local IPs. In every other case, the addr field
+ * will be 0 but validation will succeed. The handler takes care of these
+ * cases. We handle them here anyway because we need the length for parsing
+ * subtlvs.
+ */
+ switch (msg->ae)
{
+ case BABEL_AE_IP4:
+ if (TLV_OPT_LENGTH(tlv) < 4)
+ return PARSE_ERROR;
+ state->current_tlv_endpos += 4;
+ break;
+
+ case BABEL_AE_IP6:
+ if (TLV_OPT_LENGTH(tlv) < 16)
+ return PARSE_ERROR;
+ state->current_tlv_endpos += 16;
+ break;
+
+ case BABEL_AE_IP6_LL:
if (TLV_OPT_LENGTH(tlv) < 8)
return PARSE_ERROR;
msg->addr = ipa_from_ip6(get_ip6_ll(&tlv->addr));
+ state->current_tlv_endpos += 8;
+ break;
}
return PARSE_SUCCESS;
@@ -431,21 +481,27 @@ babel_read_next_hop(struct babel_tlv *hdr, union babel_msg *m UNUSED,
return PARSE_ERROR;
case BABEL_AE_IP4:
- /* TODO */
+ if (TLV_OPT_LENGTH(tlv) < sizeof(ip4_addr))
+ return PARSE_ERROR;
+
+ state->next_hop_ip4 = ipa_from_ip4(get_ip4(&tlv->addr));
+ state->current_tlv_endpos += sizeof(ip4_addr);
return PARSE_IGNORE;
case BABEL_AE_IP6:
if (TLV_OPT_LENGTH(tlv) < sizeof(ip6_addr))
return PARSE_ERROR;
- state->next_hop = ipa_from_ip6(get_ip6(&tlv->addr));
+ state->next_hop_ip6 = ipa_from_ip6(get_ip6(&tlv->addr));
+ state->current_tlv_endpos += sizeof(ip6_addr);
return PARSE_IGNORE;
case BABEL_AE_IP6_LL:
if (TLV_OPT_LENGTH(tlv) < 8)
return PARSE_ERROR;
- state->next_hop = ipa_from_ip6(get_ip6_ll(&tlv->addr));
+ state->next_hop_ip6 = ipa_from_ip6(get_ip6_ll(&tlv->addr));
+ state->current_tlv_endpos += 8;
return PARSE_IGNORE;
default:
@@ -455,6 +511,51 @@ babel_read_next_hop(struct babel_tlv *hdr, union babel_msg *m UNUSED,
return PARSE_IGNORE;
}
+/* This is called directly from babel_write_update() and returns -1 if a next
+ hop should be written but there is not enough space. */
+static int
+babel_write_next_hop(struct babel_tlv *hdr, ip_addr addr,
+ struct babel_write_state *state, uint max_len)
+{
+ struct babel_tlv_next_hop *tlv = (void *) hdr;
+
+ if (ipa_zero(addr))
+ {
+ /* Should not happen */
+ return 0;
+ }
+ else if (ipa_is_ip4(addr) && !ipa_equal(addr, state->next_hop_ip4))
+ {
+ uint len = sizeof(struct babel_tlv_next_hop) + sizeof(ip4_addr);
+ if (len > max_len)
+ return -1;
+
+ TLV_HDR(tlv, BABEL_TLV_NEXT_HOP, len);
+
+ tlv->ae = BABEL_AE_IP4;
+ put_ip4(&tlv->addr, ipa_to_ip4(addr));
+ state->next_hop_ip4 = addr;
+
+ return len;
+ }
+ else if (ipa_is_ip6(addr) && !ipa_equal(addr, state->next_hop_ip6))
+ {
+ uint len = sizeof(struct babel_tlv_next_hop) + sizeof(ip6_addr);
+ if (len > max_len)
+ return -1;
+
+ TLV_HDR(tlv, BABEL_TLV_NEXT_HOP, len);
+
+ tlv->ae = BABEL_AE_IP6;
+ put_ip6(&tlv->addr, ipa_to_ip6(addr));
+ state->next_hop_ip6 = addr;
+
+ return len;
+ }
+
+ return 0;
+}
+
static int
babel_read_update(struct babel_tlv *hdr, union babel_msg *m,
struct babel_parse_state *state)
@@ -480,15 +581,43 @@ babel_read_update(struct babel_tlv *hdr, union babel_msg *m,
if (tlv->plen > 0)
return PARSE_ERROR;
+ if (msg->metric != 65535)
+ return PARSE_ERROR;
+
msg->wildcard = 1;
break;
case BABEL_AE_IP4:
- /* TODO */
- return PARSE_IGNORE;
+ if (tlv->plen > IP4_MAX_PREFIX_LENGTH)
+ return PARSE_ERROR;
+
+ /* Cannot omit data if there is no saved prefix */
+ if (tlv->omitted && !state->def_ip4_prefix_seen)
+ return PARSE_ERROR;
+
+ /* Update must have next hop, unless it is retraction */
+ if (ipa_zero(state->next_hop_ip4) && (msg->metric != BABEL_INFINITY))
+ return PARSE_ERROR;
+
+ /* Merge saved prefix and received prefix parts */
+ memcpy(buf, state->def_ip4_prefix, tlv->omitted);
+ memcpy(buf + tlv->omitted, tlv->addr, len);
+
+ ip4_addr prefix4 = get_ip4(buf);
+ net_fill_ip4(&msg->net, prefix4, tlv->plen);
+
+ if (tlv->flags & BABEL_FLAG_DEF_PREFIX)
+ {
+ put_ip4(state->def_ip4_prefix, prefix4);
+ state->def_ip4_prefix_seen = 1;
+ }
+
+ msg->next_hop = state->next_hop_ip4;
+
+ break;
case BABEL_AE_IP6:
- if (tlv->plen > MAX_PREFIX_LENGTH)
+ if (tlv->plen > IP6_MAX_PREFIX_LENGTH)
return PARSE_ERROR;
/* Cannot omit data if there is no saved prefix */
@@ -499,20 +628,23 @@ babel_read_update(struct babel_tlv *hdr, union babel_msg *m,
memcpy(buf, state->def_ip6_prefix, tlv->omitted);
memcpy(buf + tlv->omitted, tlv->addr, len);
- msg->plen = tlv->plen;
- msg->prefix = ipa_from_ip6(get_ip6(buf));
+ ip6_addr prefix6 = get_ip6(buf);
+ net_fill_ip6(&msg->net, prefix6, tlv->plen);
if (tlv->flags & BABEL_FLAG_DEF_PREFIX)
{
- put_ip6(state->def_ip6_prefix, msg->prefix);
+ put_ip6(state->def_ip6_prefix, prefix6);
state->def_ip6_prefix_seen = 1;
}
if (tlv->flags & BABEL_FLAG_ROUTER_ID)
{
- state->router_id = ((u64) _I2(msg->prefix)) << 32 | _I3(msg->prefix);
+ state->router_id = ((u64) _I2(prefix6)) << 32 | _I3(prefix6);
state->router_id_seen = 1;
}
+
+ msg->next_hop = state->next_hop_ip6;
+
break;
case BABEL_AE_IP6_LL:
@@ -531,8 +663,8 @@ babel_read_update(struct babel_tlv *hdr, union babel_msg *m,
}
msg->router_id = state->router_id;
- msg->next_hop = state->next_hop;
msg->sender = state->saddr;
+ state->current_tlv_endpos += len;
return PARSE_SUCCESS;
}
@@ -541,7 +673,6 @@ static uint
babel_write_update(struct babel_tlv *hdr, union babel_msg *m,
struct babel_write_state *state, uint max_len)
{
- struct babel_tlv_update *tlv = (void *) hdr;
struct babel_msg_update *msg = &m->update;
uint len0 = 0;
@@ -550,16 +681,35 @@ babel_write_update(struct babel_tlv *hdr, union babel_msg *m,
* both of them. There is enough space for the Router-ID TLV, because
* sizeof(struct babel_tlv_router_id) == sizeof(struct babel_tlv_update).
*
- * Router ID is not used for retractions, so do not us it in such case.
+ * Router ID is not used for retractions, so do not use it in such case.
*/
if ((msg->metric < BABEL_INFINITY) &&
(!state->router_id_seen || (msg->router_id != state->router_id)))
{
len0 = babel_write_router_id(hdr, msg->router_id, state, max_len);
- tlv = (struct babel_tlv_update *) NEXT_TLV(tlv);
+ hdr = NEXT_TLV(hdr);
}
- uint len = sizeof(struct babel_tlv_update) + BYTES(msg->plen);
+ /*
+ * We also may add Next Hop TLV for regular updates. It may fail for not
+ * enough space or it may be unnecessary as the next hop is the same as the
+ * last one already announced. So we handle all three cases.
+ */
+ if (msg->metric < BABEL_INFINITY)
+ {
+ int l = babel_write_next_hop(hdr, msg->next_hop, state, max_len - len0);
+ if (l < 0)
+ return 0;
+
+ if (l)
+ {
+ len0 += l;
+ hdr = NEXT_TLV(hdr);
+ }
+ }
+
+ struct babel_tlv_update *tlv = (void *) hdr;
+ uint len = sizeof(struct babel_tlv_update) + NET_SIZE(&msg->net);
if (len0 + len > max_len)
return 0;
@@ -572,11 +722,39 @@ babel_write_update(struct babel_tlv *hdr, union babel_msg *m,
tlv->ae = BABEL_AE_WILDCARD;
tlv->plen = 0;
}
+ else if (msg->net.type == NET_IP4)
+ {
+ tlv->ae = BABEL_AE_IP4;
+ tlv->plen = net4_pxlen(&msg->net);
+ put_ip4_px(tlv->addr, &msg->net);
+ }
else
{
tlv->ae = BABEL_AE_IP6;
- tlv->plen = msg->plen;
- put_ip6_px(tlv->addr, msg->prefix, msg->plen);
+ tlv->plen = net6_pxlen(&msg->net);
+
+ /* Address compression - omit initial matching bytes */
+ u8 buf[16], omit;
+ put_ip6(buf, net6_prefix(&msg->net));
+ omit = bytes_equal(buf, state->def_ip6_prefix,
+ MIN(tlv->plen, state->def_ip6_pxlen) / 8);
+
+ if (omit > 0)
+ {
+ memcpy(tlv->addr, buf + omit, NET_SIZE(&msg->net) - omit);
+
+ tlv->omitted = omit;
+ tlv->length -= omit;
+ len -= omit;
+ }
+ else
+ {
+ put_ip6_px(tlv->addr, &msg->net);
+ tlv->flags |= BABEL_FLAG_DEF_PREFIX;
+
+ put_ip6(state->def_ip6_prefix, net6_prefix(&msg->net));
+ state->def_ip6_pxlen = tlv->plen;
+ }
}
put_time16(&tlv->interval, msg->interval);
@@ -606,18 +784,25 @@ babel_read_route_request(struct babel_tlv *hdr, union babel_msg *m,
return PARSE_SUCCESS;
case BABEL_AE_IP4:
- /* TODO */
- return PARSE_IGNORE;
+ if (tlv->plen > IP4_MAX_PREFIX_LENGTH)
+ return PARSE_ERROR;
+
+ if (TLV_OPT_LENGTH(tlv) < BYTES(tlv->plen))
+ return PARSE_ERROR;
+
+ read_ip4_px(&msg->net, tlv->addr, tlv->plen);
+ state->current_tlv_endpos += BYTES(tlv->plen);
+ return PARSE_SUCCESS;
case BABEL_AE_IP6:
- if (tlv->plen > MAX_PREFIX_LENGTH)
+ if (tlv->plen > IP6_MAX_PREFIX_LENGTH)
return PARSE_ERROR;
if (TLV_OPT_LENGTH(tlv) < BYTES(tlv->plen))
return PARSE_ERROR;
- msg->plen = tlv->plen;
- msg->prefix = get_ip6_px(tlv->addr, tlv->plen);
+ read_ip6_px(&msg->net, tlv->addr, tlv->plen);
+ state->current_tlv_endpos += BYTES(tlv->plen);
return PARSE_SUCCESS;
case BABEL_AE_IP6_LL:
@@ -637,7 +822,7 @@ babel_write_route_request(struct babel_tlv *hdr, union babel_msg *m,
struct babel_tlv_route_request *tlv = (void *) hdr;
struct babel_msg_route_request *msg = &m->route_request;
- uint len = sizeof(struct babel_tlv_route_request) + BYTES(msg->plen);
+ uint len = sizeof(struct babel_tlv_route_request) + NET_SIZE(&msg->net);
if (len > max_len)
return 0;
@@ -649,11 +834,17 @@ babel_write_route_request(struct babel_tlv *hdr, union babel_msg *m,
tlv->ae = BABEL_AE_WILDCARD;
tlv->plen = 0;
}
+ else if (msg->net.type == NET_IP4)
+ {
+ tlv->ae = BABEL_AE_IP4;
+ tlv->plen = net4_pxlen(&msg->net);
+ put_ip4_px(tlv->addr, &msg->net);
+ }
else
{
tlv->ae = BABEL_AE_IP6;
- tlv->plen = msg->plen;
- put_ip6_px(tlv->addr, msg->prefix, msg->plen);
+ tlv->plen = net6_pxlen(&msg->net);
+ put_ip6_px(tlv->addr, &msg->net);
}
return len;
@@ -681,18 +872,25 @@ babel_read_seqno_request(struct babel_tlv *hdr, union babel_msg *m,
return PARSE_ERROR;
case BABEL_AE_IP4:
- /* TODO */
- return PARSE_IGNORE;
+ if (tlv->plen > IP4_MAX_PREFIX_LENGTH)
+ return PARSE_ERROR;
+
+ if (TLV_OPT_LENGTH(tlv) < BYTES(tlv->plen))
+ return PARSE_ERROR;
+
+ read_ip4_px(&msg->net, tlv->addr, tlv->plen);
+ state->current_tlv_endpos += BYTES(tlv->plen);
+ return PARSE_SUCCESS;
case BABEL_AE_IP6:
- if (tlv->plen > MAX_PREFIX_LENGTH)
+ if (tlv->plen > IP6_MAX_PREFIX_LENGTH)
return PARSE_ERROR;
if (TLV_OPT_LENGTH(tlv) < BYTES(tlv->plen))
return PARSE_ERROR;
- msg->plen = tlv->plen;
- msg->prefix = get_ip6_px(tlv->addr, tlv->plen);
+ read_ip6_px(&msg->net, tlv->addr, tlv->plen);
+ state->current_tlv_endpos += BYTES(tlv->plen);
return PARSE_SUCCESS;
case BABEL_AE_IP6_LL:
@@ -712,23 +910,70 @@ babel_write_seqno_request(struct babel_tlv *hdr, union babel_msg *m,
struct babel_tlv_seqno_request *tlv = (void *) hdr;
struct babel_msg_seqno_request *msg = &m->seqno_request;
- uint len = sizeof(struct babel_tlv_seqno_request) + BYTES(msg->plen);
+ uint len = sizeof(struct babel_tlv_seqno_request) + NET_SIZE(&msg->net);
if (len > max_len)
return 0;
TLV_HDR(tlv, BABEL_TLV_SEQNO_REQUEST, len);
- tlv->ae = BABEL_AE_IP6;
- tlv->plen = msg->plen;
+
+ if (msg->net.type == NET_IP4)
+ {
+ tlv->ae = BABEL_AE_IP4;
+ tlv->plen = net4_pxlen(&msg->net);
+ put_ip4_px(tlv->addr, &msg->net);
+ }
+ else
+ {
+ tlv->ae = BABEL_AE_IP6;
+ tlv->plen = net6_pxlen(&msg->net);
+ put_ip6_px(tlv->addr, &msg->net);
+ }
+
put_u16(&tlv->seqno, msg->seqno);
tlv->hop_count = msg->hop_count;
put_u64(&tlv->router_id, msg->router_id);
- put_ip6_px(tlv->addr, msg->prefix, msg->plen);
return len;
}
static inline int
+babel_read_subtlvs(struct babel_tlv *hdr,
+ union babel_msg *msg UNUSED,
+ struct babel_parse_state *state)
+{
+ struct babel_tlv *tlv;
+
+ for (tlv = (void *) hdr + state->current_tlv_endpos;
+ (void *) tlv < (void *) hdr + TLV_LENGTH(hdr);
+ tlv = NEXT_TLV(tlv))
+ {
+ /*
+ * The subtlv type space is non-contiguous (due to the mandatory bit), so
+ * use a switch for dispatch instead of the mapping array we use for TLVs
+ */
+ switch (tlv->type)
+ {
+ case BABEL_SUBTLV_PAD1:
+ case BABEL_SUBTLV_PADN:
+ /* FIXME: Framing errors in PADN are silently ignored, see babel_process_packet() */
+ break;
+
+ default:
+ /* Unknown mandatory subtlv; PARSE_IGNORE ignores the whole TLV */
+ if (tlv->type > 128)
+ {
+ DBG("Babel: Mandatory subtlv %d found; skipping TLV\n", tlv->type);
+ return PARSE_IGNORE;
+ }
+ break;
+ }
+ }
+
+ return PARSE_SUCCESS;
+}
+
+static inline int
babel_read_tlv(struct babel_tlv *hdr,
union babel_msg *msg,
struct babel_parse_state *state)
@@ -741,8 +986,14 @@ babel_read_tlv(struct babel_tlv *hdr,
if (TLV_LENGTH(hdr) < tlv_data[hdr->type].min_length)
return PARSE_ERROR;
+ state->current_tlv_endpos = tlv_data[hdr->type].min_length;
memset(msg, 0, sizeof(*msg));
- return tlv_data[hdr->type].read_tlv(hdr, msg, state);
+
+ int res = tlv_data[hdr->type].read_tlv(hdr, msg, state);
+ if (res != PARSE_SUCCESS)
+ return res;
+
+ return babel_read_subtlvs(hdr, msg, state);
}
static uint
@@ -797,7 +1048,7 @@ static uint
babel_write_queue(struct babel_iface *ifa, list *queue)
{
struct babel_proto *p = ifa->proto;
- struct babel_write_state state = {};
+ struct babel_write_state state = { .next_hop_ip6 = ifa->addr };
if (EMPTY_LIST(*queue))
return 0;
@@ -933,10 +1184,10 @@ babel_process_packet(struct babel_pkt_header *pkt, int len,
byte *end = (byte *)pkt + plen;
struct babel_parse_state state = {
- .proto = p,
- .ifa = ifa,
- .saddr = saddr,
- .next_hop = saddr,
+ .proto = p,
+ .ifa = ifa,
+ .saddr = saddr,
+ .next_hop_ip6 = saddr,
};
if ((pkt->magic != BABEL_MAGIC) || (pkt->version != BABEL_VERSION))
@@ -1045,7 +1296,7 @@ babel_rx_hook(sock *sk, uint len)
sk->iface->name, sk->faddr, sk->laddr);
/* Silently ignore my own packets */
- if (ipa_equal(ifa->iface->addr->ip, sk->faddr))
+ if (ipa_equal(sk->faddr, sk->saddr))
return 1;
if (!ipa_is_link_local(sk->faddr))
@@ -1080,6 +1331,7 @@ babel_open_socket(struct babel_iface *ifa)
sk->sport = ifa->cf->port;
sk->dport = ifa->cf->port;
sk->iface = ifa->iface;
+ sk->saddr = ifa->addr;
sk->vrf = p->p.vrf;
sk->rx_hook = babel_rx_hook;
diff --git a/proto/bfd/Makefile b/proto/bfd/Makefile
index c28cedec..402122fc 100644
--- a/proto/bfd/Makefile
+++ b/proto/bfd/Makefile
@@ -1,5 +1,6 @@
-source=bfd.c packets.c io.c
-root-rel=../../
-dir-name=proto/bfd
+src := bfd.c io.c packets.c
+obj := $(src-o-files)
+$(all-daemon)
+$(cf-local)
-include ../../Rules
+tests_objs := $(tests_objs) $(src-o-files) \ No newline at end of file
diff --git a/proto/bfd/bfd.c b/proto/bfd/bfd.c
index 79135fae..67ec2270 100644
--- a/proto/bfd/bfd.c
+++ b/proto/bfd/bfd.c
@@ -64,16 +64,15 @@
* ready, the protocol just creates a BFD request like any other protocol.
*
* The protocol uses a new generic event loop (structure &birdloop) from |io.c|,
- * which supports sockets, timers and events like the main loop. Timers
- * (structure &timer2) are new microsecond based timers, while sockets and
- * events are the same. A birdloop is associated with a thread (field @thread)
- * in which event hooks are executed. Most functions for setting event sources
- * (like sk_start() or tm2_start()) must be called from the context of that
- * thread. Birdloop allows to temporarily acquire the context of that thread for
- * the main thread by calling birdloop_enter() and then birdloop_leave(), which
- * also ensures mutual exclusion with all event hooks. Note that resources
- * associated with a birdloop (like timers) should be attached to the
- * independent resource pool, detached from the main resource tree.
+ * which supports sockets, timers and events like the main loop. A birdloop is
+ * associated with a thread (field @thread) in which event hooks are executed.
+ * Most functions for setting event sources (like sk_start() or tm_start()) must
+ * be called from the context of that thread. Birdloop allows to temporarily
+ * acquire the context of that thread for the main thread by calling
+ * birdloop_enter() and then birdloop_leave(), which also ensures mutual
+ * exclusion with all event hooks. Note that resources associated with a
+ * birdloop (like timers) should be attached to the independent resource pool,
+ * detached from the main resource tree.
*
* There are two kinds of interaction between the BFD core (running in the BFD
* thread) and the rest of BFD (running in the main thread). The first kind are
@@ -112,7 +111,7 @@
#define HASH_IP_KEY(n) n->addr
#define HASH_IP_NEXT(n) n->next_ip
#define HASH_IP_EQ(a,b) ipa_equal(a,b)
-#define HASH_IP_FN(k) ipa_hash32(k)
+#define HASH_IP_FN(k) ipa_hash(k)
static list bfd_proto_list;
static list bfd_wait_list;
@@ -145,6 +144,7 @@ bfd_session_update_state(struct bfd_session *s, uint state, uint diag)
bfd_lock_sessions(p);
s->loc_state = state;
s->loc_diag = diag;
+ s->last_state_change = current_time();
notify = !NODE_VALID(&s->n);
if (notify)
@@ -176,7 +176,7 @@ bfd_session_update_tx_interval(struct bfd_session *s)
return;
/* Set timer relative to last tx_timer event */
- tm2_set(s->tx_timer, s->last_tx + tx_int_l);
+ tm_set(s->tx_timer, s->last_tx + tx_int_l);
}
static void
@@ -190,7 +190,7 @@ bfd_session_update_detection_time(struct bfd_session *s, int kick)
if (!s->last_rx)
return;
- tm2_set(s->hold_timer, s->last_rx + timeout);
+ tm_set(s->hold_timer, s->last_rx + timeout);
}
static void
@@ -211,16 +211,16 @@ bfd_session_control_tx_timer(struct bfd_session *s, int reset)
goto stop;
/* So TX timer should run */
- if (reset || !tm2_active(s->tx_timer))
+ if (reset || !tm_active(s->tx_timer))
{
s->last_tx = 0;
- tm2_start(s->tx_timer, 0);
+ tm_start(s->tx_timer, 0);
}
return;
stop:
- tm2_stop(s->tx_timer);
+ tm_stop(s->tx_timer);
s->last_tx = 0;
}
@@ -379,7 +379,7 @@ bfd_find_session_by_addr(struct bfd_proto *p, ip_addr addr)
}
static void
-bfd_tx_timer_hook(timer2 *t)
+bfd_tx_timer_hook(timer *t)
{
struct bfd_session *s = t->data;
@@ -388,7 +388,7 @@ bfd_tx_timer_hook(timer2 *t)
}
static void
-bfd_hold_timer_hook(timer2 *t)
+bfd_hold_timer_hook(timer *t)
{
bfd_session_timeout(t->data);
}
@@ -432,13 +432,13 @@ bfd_add_session(struct bfd_proto *p, ip_addr addr, ip_addr local, struct iface *
s->passive = ifa->cf->passive;
s->tx_csn = random_u32();
- s->tx_timer = tm2_new_init(p->tpool, bfd_tx_timer_hook, s, 0, 0);
- s->hold_timer = tm2_new_init(p->tpool, bfd_hold_timer_hook, s, 0, 0);
+ s->tx_timer = tm_new_init(p->tpool, bfd_tx_timer_hook, s, 0, 0);
+ s->hold_timer = tm_new_init(p->tpool, bfd_hold_timer_hook, s, 0, 0);
bfd_session_update_tx_interval(s);
bfd_session_control_tx_timer(s, 1);
init_list(&s->request_list);
- s->last_state_change = now;
+ s->last_state_change = current_time();
TRACE(D_EVENTS, "Session to %I added", s->addr);
@@ -879,9 +879,6 @@ bfd_notify_hook(sock *sk, uint len UNUSED)
diag = s->loc_diag;
bfd_unlock_sessions(p);
- /* FIXME: convert to btime and move to bfd_session_update_state() */
- s->last_state_change = now;
-
s->notify_running = 1;
WALK_LIST_DELSAFE(n, nn, s->request_list)
bfd_request_notify(SKIP_BACK(struct bfd_request, n, n), state, diag);
@@ -954,7 +951,7 @@ bfd_init_all(void)
static struct proto *
bfd_init(struct proto_config *c)
{
- struct proto *p = proto_new(c, sizeof(struct bfd_proto));
+ struct proto *p = proto_new(c);
p->neigh_notify = bfd_neigh_notify;
@@ -983,8 +980,10 @@ bfd_start(struct proto *P)
add_tail(&bfd_proto_list, &p->bfd_node);
birdloop_enter(p->loop);
- p->rx_1 = bfd_open_rx_sk(p, 0);
- p->rx_m = bfd_open_rx_sk(p, 1);
+ p->rx4_1 = bfd_open_rx_sk(p, 0, SK_IPV4);
+ p->rx4_m = bfd_open_rx_sk(p, 1, SK_IPV4);
+ p->rx6_1 = bfd_open_rx_sk(p, 0, SK_IPV6);
+ p->rx6_m = bfd_open_rx_sk(p, 1, SK_IPV6);
birdloop_leave(p->loop);
bfd_take_requests(p);
@@ -1078,7 +1077,7 @@ bfd_show_sessions(struct proto *P)
byte tbuf[TM_DATETIME_BUFFER_SIZE];
struct bfd_proto *p = (struct bfd_proto *) P;
uint state, diag UNUSED;
- u32 tx_int, timeout;
+ btime tx_int, timeout;
const char *ifname;
if (p->p.proto_state != PS_UP)
@@ -1099,15 +1098,14 @@ bfd_show_sessions(struct proto *P)
state = s->loc_state;
diag = s->loc_diag;
ifname = (s->ifa && s->ifa->iface) ? s->ifa->iface->name : "---";
- tx_int = s->last_tx ? (MAX(s->des_min_tx_int, s->rem_min_rx_int) TO_MS) : 0;
- timeout = (MAX(s->req_min_rx_int, s->rem_min_tx_int) TO_MS) * s->rem_detect_mult;
+ tx_int = s->last_tx ? MAX(s->des_min_tx_int, s->rem_min_rx_int) : 0;
+ timeout = (btime) MAX(s->req_min_rx_int, s->rem_min_tx_int) * s->rem_detect_mult;
state = (state < 4) ? state : 0;
- tm_format_datetime(tbuf, &config->tf_proto, s->last_state_change);
+ tm_format_time(tbuf, &config->tf_proto, s->last_state_change);
- cli_msg(-1020, "%-25I %-10s %-10s %-10s %3u.%03u %3u.%03u",
- s->addr, ifname, bfd_state_names[state], tbuf,
- tx_int / 1000, tx_int % 1000, timeout / 1000, timeout % 1000);
+ cli_msg(-1020, "%-25I %-10s %-10s %-10s %7t %7t",
+ s->addr, ifname, bfd_state_names[state], tbuf, tx_int, timeout);
}
HASH_WALK_END;
@@ -1118,6 +1116,7 @@ bfd_show_sessions(struct proto *P)
struct protocol proto_bfd = {
.name = "BFD",
.template = "bfd%d",
+ .proto_size = sizeof(struct bfd_proto),
.config_size = sizeof(struct bfd_config),
.init = bfd_init,
.start = bfd_start,
diff --git a/proto/bfd/bfd.h b/proto/bfd/bfd.h
index 46e09879..bc4fe969 100644
--- a/proto/bfd/bfd.h
+++ b/proto/bfd/bfd.h
@@ -87,8 +87,10 @@ struct bfd_proto
sock *notify_ws;
list notify_list;
- sock *rx_1;
- sock *rx_m;
+ sock *rx4_1;
+ sock *rx6_1;
+ sock *rx4_m;
+ sock *rx6_m;
list iface_list;
};
@@ -138,11 +140,11 @@ struct bfd_session
btime last_tx; /* Time of last sent periodic control packet */
btime last_rx; /* Time of last received valid control packet */
- timer2 *tx_timer; /* Periodic control packet timer */
- timer2 *hold_timer; /* Timer for session down detection time */
+ timer *tx_timer; /* Periodic control packet timer */
+ timer *hold_timer; /* Timer for session down detection time */
list request_list; /* List of client requests (struct bfd_request) */
- bird_clock_t last_state_change; /* Time of last state change */
+ btime last_state_change; /* Time of last state change */
u8 notify_running; /* 1 if notify hooks are running */
u8 rx_csn_known; /* Received crypto sequence number is known */
@@ -201,7 +203,7 @@ void bfd_show_sessions(struct proto *P);
/* packets.c */
void bfd_send_ctl(struct bfd_proto *p, struct bfd_session *s, int final);
-sock * bfd_open_rx_sk(struct bfd_proto *p, int multihop);
+sock * bfd_open_rx_sk(struct bfd_proto *p, int multihop, int inet_version);
sock * bfd_open_tx_sk(struct bfd_proto *p, ip_addr local, struct iface *ifa);
diff --git a/proto/bfd/io.c b/proto/bfd/io.c
index 8f4f5007..b01cbfce 100644
--- a/proto/bfd/io.c
+++ b/proto/bfd/io.c
@@ -18,10 +18,10 @@
#include "proto/bfd/io.h"
#include "lib/buffer.h"
-#include "lib/heap.h"
#include "lib/lists.h"
#include "lib/resource.h"
#include "lib/event.h"
+#include "lib/timer.h"
#include "lib/socket.h"
@@ -31,16 +31,12 @@ struct birdloop
pthread_t thread;
pthread_mutex_t mutex;
- btime last_time;
- btime real_time;
- u8 use_monotonic_clock;
-
u8 stop_called;
u8 poll_active;
u8 wakeup_masked;
int wakeup_fds[2];
- BUFFER(timer2 *) timers;
+ struct timeloop time;
list event_list;
list sock_list;
uint sock_num;
@@ -57,6 +53,7 @@ struct birdloop
*/
static pthread_key_t current_loop_key;
+extern pthread_key_t current_time_key;
static inline struct birdloop *
birdloop_current(void)
@@ -68,6 +65,7 @@ static inline void
birdloop_set_current(struct birdloop *loop)
{
pthread_setspecific(current_loop_key, loop);
+ pthread_setspecific(current_time_key, loop ? &loop->time : &main_timeloop);
}
static inline void
@@ -78,98 +76,6 @@ birdloop_init_current(void)
/*
- * Time clock
- */
-
-static void times_update_alt(struct birdloop *loop);
-
-static void
-times_init(struct birdloop *loop)
-{
- struct timespec ts;
- int rv;
-
- rv = clock_gettime(CLOCK_MONOTONIC, &ts);
- if (rv < 0)
- {
- log(L_WARN "Monotonic clock is missing");
-
- loop->use_monotonic_clock = 0;
- loop->last_time = 0;
- loop->real_time = 0;
- times_update_alt(loop);
- return;
- }
-
- if ((ts.tv_sec < 0) || (((s64) ts.tv_sec) > ((s64) 1 << 40)))
- log(L_WARN "Monotonic clock is crazy");
-
- loop->use_monotonic_clock = 1;
- loop->last_time = ((s64) ts.tv_sec S) + (ts.tv_nsec / 1000);
- loop->real_time = 0;
-}
-
-static void
-times_update_pri(struct birdloop *loop)
-{
- struct timespec ts;
- int rv;
-
- rv = clock_gettime(CLOCK_MONOTONIC, &ts);
- if (rv < 0)
- die("clock_gettime: %m");
-
- btime new_time = ((s64) ts.tv_sec S) + (ts.tv_nsec / 1000);
-
- if (new_time < loop->last_time)
- log(L_ERR "Monotonic clock is broken");
-
- loop->last_time = new_time;
- loop->real_time = 0;
-}
-
-static void
-times_update_alt(struct birdloop *loop)
-{
- struct timeval tv;
- int rv;
-
- rv = gettimeofday(&tv, NULL);
- if (rv < 0)
- die("gettimeofday: %m");
-
- btime new_time = ((s64) tv.tv_sec S) + tv.tv_usec;
- btime delta = new_time - loop->real_time;
-
- if ((delta < 0) || (delta > (60 S)))
- {
- if (loop->real_time)
- log(L_WARN "Time jump, delta %d us", (int) delta);
-
- delta = 100 MS;
- }
-
- loop->last_time += delta;
- loop->real_time = new_time;
-}
-
-static void
-times_update(struct birdloop *loop)
-{
- if (loop->use_monotonic_clock)
- times_update_pri(loop);
- else
- times_update_alt(loop);
-}
-
-btime
-current_time(void)
-{
- return birdloop_current()->last_time;
-}
-
-
-/*
* Wakeup code for birdloop
*/
@@ -238,7 +144,7 @@ wakeup_drain(struct birdloop *loop)
}
static inline void
-wakeup_do_kick(struct birdloop *loop)
+wakeup_do_kick(struct birdloop *loop)
{
pipe_kick(loop->wakeup_fds[1]);
}
@@ -252,6 +158,16 @@ wakeup_kick(struct birdloop *loop)
loop->wakeup_masked = 2;
}
+/* For notifications from outside */
+void
+wakeup_kick_current(void)
+{
+ struct birdloop *loop = birdloop_current();
+
+ if (loop && loop->poll_active)
+ wakeup_kick(loop);
+}
+
/*
* Events
@@ -272,7 +188,7 @@ events_init(struct birdloop *loop)
static void
events_fire(struct birdloop *loop)
{
- times_update(loop);
+ times_update(&loop->time);
ev_run_list(&loop->event_list);
}
@@ -292,154 +208,6 @@ ev2_schedule(event *e)
/*
- * Timers
- */
-
-#define TIMER_LESS(a,b) ((a)->expires < (b)->expires)
-#define TIMER_SWAP(heap,a,b,t) (t = heap[a], heap[a] = heap[b], heap[b] = t, \
- heap[a]->index = (a), heap[b]->index = (b))
-
-static inline uint timers_count(struct birdloop *loop)
-{ return loop->timers.used - 1; }
-
-static inline timer2 *timers_first(struct birdloop *loop)
-{ return (loop->timers.used > 1) ? loop->timers.data[1] : NULL; }
-
-
-static void
-tm2_free(resource *r)
-{
- timer2 *t = (timer2 *) r;
-
- tm2_stop(t);
-}
-
-static void
-tm2_dump(resource *r)
-{
- timer2 *t = (timer2 *) r;
-
- debug("(code %p, data %p, ", t->hook, t->data);
- if (t->randomize)
- debug("rand %d, ", t->randomize);
- if (t->recurrent)
- debug("recur %d, ", t->recurrent);
- if (t->expires)
- debug("expires in %d ms)\n", (t->expires - current_time()) TO_MS);
- else
- debug("inactive)\n");
-}
-
-
-static struct resclass tm2_class = {
- "Timer",
- sizeof(timer2),
- tm2_free,
- tm2_dump,
- NULL,
- NULL
-};
-
-timer2 *
-tm2_new(pool *p)
-{
- timer2 *t = ralloc(p, &tm2_class);
- t->index = -1;
- return t;
-}
-
-void
-tm2_set(timer2 *t, btime when)
-{
- struct birdloop *loop = birdloop_current();
- uint tc = timers_count(loop);
-
- if (!t->expires)
- {
- t->index = ++tc;
- t->expires = when;
- BUFFER_PUSH(loop->timers) = t;
- HEAP_INSERT(loop->timers.data, tc, timer2 *, TIMER_LESS, TIMER_SWAP);
- }
- else if (t->expires < when)
- {
- t->expires = when;
- HEAP_INCREASE(loop->timers.data, tc, timer2 *, TIMER_LESS, TIMER_SWAP, t->index);
- }
- else if (t->expires > when)
- {
- t->expires = when;
- HEAP_DECREASE(loop->timers.data, tc, timer2 *, TIMER_LESS, TIMER_SWAP, t->index);
- }
-
- if (loop->poll_active && (t->index == 1))
- wakeup_kick(loop);
-}
-
-void
-tm2_start(timer2 *t, btime after)
-{
- tm2_set(t, current_time() + MAX(after, 0));
-}
-
-void
-tm2_stop(timer2 *t)
-{
- if (!t->expires)
- return;
-
- struct birdloop *loop = birdloop_current();
- uint tc = timers_count(loop);
-
- HEAP_DELETE(loop->timers.data, tc, timer2 *, TIMER_LESS, TIMER_SWAP, t->index);
- BUFFER_POP(loop->timers);
-
- t->index = -1;
- t->expires = 0;
-}
-
-static void
-timers_init(struct birdloop *loop)
-{
- BUFFER_INIT(loop->timers, loop->pool, 4);
- BUFFER_PUSH(loop->timers) = NULL;
-}
-
-static void
-timers_fire(struct birdloop *loop)
-{
- btime base_time;
- timer2 *t;
-
- times_update(loop);
- base_time = loop->last_time;
-
- while (t = timers_first(loop))
- {
- if (t->expires > base_time)
- return;
-
- if (t->recurrent)
- {
- btime when = t->expires + t->recurrent;
-
- if (when <= loop->last_time)
- when = loop->last_time + t->recurrent;
-
- if (t->randomize)
- when += random() % (t->randomize + 1);
-
- tm2_set(t, when);
- }
- else
- tm2_stop(t);
-
- t->hook(t);
- }
-}
-
-
-/*
* Sockets
*/
@@ -586,7 +354,7 @@ sockets_fire(struct birdloop *loop)
sock **psk = loop->poll_sk.data;
int poll_num = loop->poll_fd.used - 1;
- times_update(loop);
+ times_update(&loop->time);
/* Last fd is internal wakeup fd */
if (pfd[poll_num].revents & POLLIN)
@@ -634,11 +402,10 @@ birdloop_new(void)
loop->pool = p;
pthread_mutex_init(&loop->mutex, NULL);
- times_init(loop);
wakeup_init(loop);
events_init(loop);
- timers_init(loop);
+ timers_init(&loop->time, p);
sockets_init(loop);
return loop;
@@ -710,7 +477,7 @@ static void *
birdloop_main(void *arg)
{
struct birdloop *loop = arg;
- timer2 *t;
+ timer *t;
int rv, timeout;
birdloop_set_current(loop);
@@ -719,13 +486,13 @@ birdloop_main(void *arg)
while (1)
{
events_fire(loop);
- timers_fire(loop);
+ timers_fire(&loop->time);
- times_update(loop);
+ times_update(&loop->time);
if (events_waiting(loop))
timeout = 0;
- else if (t = timers_first(loop))
- timeout = (tm2_remains(t) TO_MS) + 1;
+ else if (t = timers_first(&loop->time))
+ timeout = (tm_remains(t) TO_MS) + 1;
else
timeout = -1;
@@ -756,7 +523,7 @@ birdloop_main(void *arg)
if (rv)
sockets_fire(loop);
- timers_fire(loop);
+ timers_fire(&loop->time);
}
loop->stop_called = 0;
diff --git a/proto/bfd/io.h b/proto/bfd/io.h
index 641ee054..ec706e9a 100644
--- a/proto/bfd/io.h
+++ b/proto/bfd/io.h
@@ -11,80 +11,15 @@
#include "lib/lists.h"
#include "lib/resource.h"
#include "lib/event.h"
+#include "lib/timer.h"
#include "lib/socket.h"
-// #include "lib/timer.h"
-typedef struct timer2
-{
- resource r;
- void (*hook)(struct timer2 *);
- void *data;
-
- btime expires; /* 0=inactive */
- uint randomize; /* Amount of randomization */
- uint recurrent; /* Timer recurrence */
-
- int index;
-} timer2;
-
-
-btime current_time(void);
-
void ev2_schedule(event *e);
-
-timer2 *tm2_new(pool *p);
-void tm2_set(timer2 *t, btime when);
-void tm2_start(timer2 *t, btime after);
-void tm2_stop(timer2 *t);
-
-static inline int
-tm2_active(timer2 *t)
-{
- return t->expires != 0;
-}
-
-static inline btime
-tm2_remains(timer2 *t)
-{
- btime now = current_time();
- return (t->expires > now) ? (t->expires - now) : 0;
-}
-
-static inline timer2 *
-tm2_new_init(pool *p, void (*hook)(struct timer2 *), void *data, uint rec, uint rand)
-{
- timer2 *t = tm2_new(p);
- t->hook = hook;
- t->data = data;
- t->recurrent = rec;
- t->randomize = rand;
- return t;
-}
-
-static inline void
-tm2_set_max(timer2 *t, btime when)
-{
- if (when > t->expires)
- tm2_set(t, when);
-}
-
-/*
-static inline void
-tm2_start_max(timer2 *t, btime after)
-{
- btime rem = tm2_remains(t);
- tm2_start(t, MAX_(rem, after));
-}
-*/
-
-
void sk_start(sock *s);
void sk_stop(sock *s);
-
-
struct birdloop *birdloop_new(void);
void birdloop_start(struct birdloop *loop);
void birdloop_stop(struct birdloop *loop);
diff --git a/proto/bfd/packets.c b/proto/bfd/packets.c
index 129db72f..b76efda6 100644
--- a/proto/bfd/packets.c
+++ b/proto/bfd/packets.c
@@ -248,7 +248,7 @@ bfd_check_authentication(struct bfd_proto *p, struct bfd_session *s, struct bfd_
/* BFD CSNs are in 32-bit circular number space */
u32 csn = ntohl(auth->csn);
if (s->rx_csn_known &&
- (((csn - s->rx_csn) > (3 * s->detect_mult)) ||
+ (((csn - s->rx_csn) > (3 * (uint) s->detect_mult)) ||
(meticulous && (csn == s->rx_csn))))
{
/* We want to report both new and old CSN */
@@ -405,10 +405,11 @@ bfd_err_hook(sock *sk, int err)
}
sock *
-bfd_open_rx_sk(struct bfd_proto *p, int multihop)
+bfd_open_rx_sk(struct bfd_proto *p, int multihop, int af)
{
sock *sk = sk_new(p->tpool);
sk->type = SK_UDP;
+ sk->subtype = af;
sk->sport = !multihop ? BFD_CONTROL_PORT : BFD_MULTI_CTL_PORT;
sk->data = p;
@@ -421,10 +422,6 @@ bfd_open_rx_sk(struct bfd_proto *p, int multihop)
sk->priority = sk_priority_control;
sk->flags = SKF_THREAD | SKF_LADDR_RX | (!multihop ? SKF_TTL_RX : 0);
-#ifdef IPV6
- sk->flags |= SKF_V6ONLY;
-#endif
-
if (sk_open(sk) < 0)
goto err;
@@ -456,10 +453,6 @@ bfd_open_tx_sk(struct bfd_proto *p, ip_addr local, struct iface *ifa)
sk->ttl = ifa ? 255 : -1;
sk->flags = SKF_THREAD | SKF_BIND | SKF_HIGH_PORT;
-#ifdef IPV6
- sk->flags |= SKF_V6ONLY;
-#endif
-
if (sk_open(sk) < 0)
goto err;
diff --git a/proto/bgp/Makefile b/proto/bgp/Makefile
index a634cf0d..00aaef5e 100644
--- a/proto/bgp/Makefile
+++ b/proto/bgp/Makefile
@@ -1,5 +1,6 @@
-source=bgp.c attrs.c packets.c
-root-rel=../../
-dir-name=proto/bgp
+src := attrs.c bgp.c packets.c
+obj := $(src-o-files)
+$(all-daemon)
+$(cf-local)
-include ../../Rules
+tests_objs := $(tests_objs) $(src-o-files) \ No newline at end of file
diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c
index 73eb4040..882ba44e 100644
--- a/proto/bgp/attrs.c
+++ b/proto/bgp/attrs.c
@@ -2,6 +2,8 @@
* BIRD -- BGP Attributes
*
* (c) 2000 Martin Mares <mj@ucw.cz>
+ * (c) 2008--2016 Ondrej Zajicek <santiago@crfreenet.org>
+ * (c) 2008--2016 CZ.NIC z.s.p.o.
*
* Can be freely distributed and used under the terms of the GNU GPL.
*/
@@ -39,888 +41,1249 @@
* specifies that such updates should be ignored, but that is generally
* a bad idea.
*
- * Error checking of optional transitive attributes is done according to
- * draft-ietf-idr-optional-transitive-03, but errors are handled always
- * as withdraws.
+ * BGP attribute table has several hooks:
*
- * Unexpected AS_CONFED_* segments in AS_PATH are logged and removed,
- * but unknown segments cause a session drop with Malformed AS_PATH
- * error (see validate_path()). The behavior in such case is not
- * explicitly specified by RFC 4271. RFC 5065 specifies that
- * inconsistent AS_CONFED_* segments should cause a session drop, but
- * implementations that pass invalid AS_CONFED_* segments are
- * widespread.
+ * export - Hook that validates and normalizes attribute during export phase.
+ * Receives eattr, may modify it (e.g., sort community lists for canonical
+ * representation), UNSET() it (e.g., skip empty lists), or WITHDRAW() it if
+ * necessary. May assume that eattr has value valid w.r.t. its type, but may be
+ * invalid w.r.t. BGP constraints. Optional.
*
- * Error handling of AS4_* attributes is done as specified by RFC 6793. There
- * are several possible inconsistencies between AGGREGATOR and AS4_AGGREGATOR
- * that are not handled by that RFC, these are logged and ignored (see
- * bgp_reconstruct_4b_attrs()).
+ * encode - Hook that converts internal representation to external one during
+ * packet writing. Receives eattr and puts it in the buffer (including attribute
+ * header). Returns number of bytes, or -1 if not enough space. May assume that
+ * eattr has value valid w.r.t. its type and validated by export hook. Mandatory
+ * for all known attributes that exist internally after export phase (i.e., all
+ * except pseudoattributes MP_(UN)REACH_NLRI).
+ *
+ * decode - Hook that converts external representation to internal one during
+ * packet parsing. Receives attribute data in buffer, validates it and adds
+ * attribute to ea_list. If data are invalid, steps DISCARD(), WITHDRAW() or
+ * bgp_parse_error() may be used to escape. Mandatory for all known attributes.
+ *
+ * format - Optional hook that converts eattr to textual representation.
*/
+// XXXX review pool usage : c->c.proto->pool
-static byte bgp_mandatory_attrs[] = { BA_ORIGIN, BA_AS_PATH
-#ifndef IPV6
-,BA_NEXT_HOP
-#endif
-};
-struct attr_desc {
- char *name;
- int expected_length;
- int expected_flags;
- int type;
- int allow_in_ebgp;
- int (*validate)(struct bgp_proto *p, byte *attr, int len);
- void (*format)(eattr *ea, byte *buf, int buflen);
+struct bgp_attr_desc {
+ const char *name;
+ uint type;
+ uint flags;
+ void (*export)(struct bgp_export_state *s, eattr *a);
+ int (*encode)(struct bgp_write_state *s, eattr *a, byte *buf, uint size);
+ void (*decode)(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to);
+ void (*format)(eattr *ea, byte *buf, uint size);
};
-#define IGNORE -1
-#define WITHDRAW -2
+static const struct bgp_attr_desc bgp_attr_table[];
+
+static inline int bgp_attr_known(uint code);
+
+eattr *
+bgp_set_attr(ea_list **attrs, struct linpool *pool, uint code, uint flags, uintptr_t val)
+{
+ ASSERT(bgp_attr_known(code));
+
+ ea_list *a = lp_alloc(pool, sizeof(ea_list) + sizeof(eattr));
+ eattr *e = &a->attrs[0];
+
+ a->flags = EALF_SORTED;
+ a->count = 1;
+ a->next = *attrs;
+ *attrs = a;
+
+ e->id = EA_CODE(EAP_BGP, code);
+ e->type = bgp_attr_table[code].type;
+ e->flags = flags;
+
+ if (e->type & EAF_EMBEDDED)
+ e->u.data = (u32) val;
+ else
+ e->u.ptr = (struct adata *) val;
+
+ return e;
+}
+
+
+
+#define REPORT(msg, args...) \
+ ({ log(L_REMOTE "%s: " msg, s->proto->p.name, ## args); })
+
+#define DISCARD(msg, args...) \
+ ({ REPORT(msg, ## args); return; })
+
+#define WITHDRAW(msg, args...) \
+ ({ REPORT(msg, ## args); s->err_withdraw = 1; return; })
+
+#define UNSET(a) \
+ ({ a->type = EAF_TYPE_UNDEF; return; })
+
+#define NEW_BGP "Discarding %s attribute received from AS4-aware neighbor"
+#define BAD_EBGP "Discarding %s attribute received from EBGP neighbor"
+#define BAD_LENGTH "Malformed %s attribute - invalid length (%u)"
+#define BAD_VALUE "Malformed %s attribute - invalid value (%u)"
+#define NO_MANDATORY "Missing mandatory %s attribute"
+
+
+static inline int
+bgp_put_attr_hdr3(byte *buf, uint code, uint flags, uint len)
+{
+ *buf++ = flags;
+ *buf++ = code;
+ *buf++ = len;
+ return 3;
+}
+
+static inline int
+bgp_put_attr_hdr4(byte *buf, uint code, uint flags, uint len)
+{
+ *buf++ = flags | BAF_EXT_LEN;
+ *buf++ = code;
+ put_u16(buf, len);
+ return 4;
+}
+
+static inline int
+bgp_put_attr_hdr(byte *buf, uint code, uint flags, uint len)
+{
+ if (len < 256)
+ return bgp_put_attr_hdr3(buf, code, flags, len);
+ else
+ return bgp_put_attr_hdr4(buf, code, flags, len);
+}
static int
-bgp_check_origin(struct bgp_proto *p UNUSED, byte *a, int len UNUSED)
+bgp_encode_u8(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
{
- if (*a > 2)
- return 6;
- return 0;
+ if (size < (3+1))
+ return -1;
+
+ bgp_put_attr_hdr3(buf, EA_ID(a->id), a->flags, 1);
+ buf[3] = a->u.data;
+
+ return 3+1;
}
-static void
-bgp_format_origin(eattr *a, byte *buf, int buflen UNUSED)
+static int
+bgp_encode_u32(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
{
- static char *bgp_origin_names[] = { "IGP", "EGP", "Incomplete" };
+ if (size < (3+4))
+ return -1;
+
+ bgp_put_attr_hdr3(buf, EA_ID(a->id), a->flags, 4);
+ put_u32(buf+3, a->u.data);
- bsprintf(buf, bgp_origin_names[a->u.data]);
+ return 3+4;
}
static int
-path_segment_contains(byte *p, int bs, u32 asn)
+bgp_encode_u32s(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
{
- int i;
- int len = p[1];
- p += 2;
+ uint len = a->u.ptr->length;
- for(i=0; i<len; i++)
- {
- u32 asn2 = (bs == 4) ? get_u32(p) : get_u16(p);
- if (asn2 == asn)
- return 1;
- p += bs;
- }
+ if (size < (4+len))
+ return -1;
- return 0;
+ uint hdr = bgp_put_attr_hdr(buf, EA_ID(a->id), a->flags, len);
+ put_u32s(buf + hdr, (u32 *) a->u.ptr->data, len / 4);
+
+ return hdr + len;
}
-/* Validates path attribute, removes AS_CONFED_* segments, and also returns path length */
static int
-validate_path(struct bgp_proto *p, int as_path, int bs, byte *idata, uint *ilength)
+bgp_put_attr(byte *buf, uint size, uint code, uint flags, byte *data, uint len)
{
- int res = 0;
- u8 *a, *dst;
- int len, plen;
+ if (size < (4+len))
+ return -1;
- dst = a = idata;
- len = *ilength;
+ uint hdr = bgp_put_attr_hdr(buf, code, flags, len);
+ memcpy(buf + hdr, data, len);
- while (len)
- {
- if (len < 2)
- return -1;
-
- plen = 2 + bs * a[1];
- if (len < plen)
- return -1;
-
- if (a[1] == 0)
- {
- log(L_WARN "%s: %s_PATH attribute contains empty segment, skipping it",
- p->p.name, as_path ? "AS" : "AS4");
- goto skip;
- }
-
- switch (a[0])
- {
- case AS_PATH_SET:
- res++;
- break;
-
- case AS_PATH_SEQUENCE:
- res += a[1];
- break;
-
- case AS_PATH_CONFED_SEQUENCE:
- case AS_PATH_CONFED_SET:
- if (as_path && path_segment_contains(a, bs, p->remote_as))
- {
- log(L_WARN "%s: AS_CONFED_* segment with peer ASN found, misconfigured confederation?", p->p.name);
- return -1;
- }
-
- log(L_WARN "%s: %s_PATH attribute contains AS_CONFED_* segment, skipping segment",
- p->p.name, as_path ? "AS" : "AS4");
- goto skip;
-
- default:
- return -1;
- }
-
- if (dst != a)
- memmove(dst, a, plen);
- dst += plen;
-
- skip:
- len -= plen;
- a += plen;
- }
+ return hdr + len;
+}
- *ilength = dst - idata;
- return res;
+static int
+bgp_encode_raw(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
+{
+ return bgp_put_attr(buf, size, EA_ID(a->id), a->flags, a->u.ptr->data, a->u.ptr->length);
}
-static inline int
-validate_as_path(struct bgp_proto *p, byte *a, int *len)
+
+/*
+ * Attribute hooks
+ */
+
+static void
+bgp_export_origin(struct bgp_export_state *s, eattr *a)
{
- return validate_path(p, 1, p->as4_session ? 4 : 2, a, len);
+ if (a->u.data > 2)
+ WITHDRAW(BAD_VALUE, "ORIGIN", a->u.data);
}
-static inline int
-validate_as4_path(struct bgp_proto *p, struct adata *path)
+static void
+bgp_decode_origin(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
{
- return validate_path(p, 0, 4, path->data, &path->length);
+ if (len != 1)
+ WITHDRAW(BAD_LENGTH, "ORIGIN", len);
+
+ if (data[0] > 2)
+ WITHDRAW(BAD_VALUE, "ORIGIN", data[0]);
+
+ bgp_set_attr_u32(to, s->pool, BA_ORIGIN, flags, data[0]);
}
+static void
+bgp_format_origin(eattr *a, byte *buf, uint size UNUSED)
+{
+ static const char *bgp_origin_names[] = { "IGP", "EGP", "Incomplete" };
+
+ bsprintf(buf, (a->u.data <= 2) ? bgp_origin_names[a->u.data] : "?");
+}
+
+
static int
-bgp_check_next_hop(struct bgp_proto *p UNUSED, byte *a UNUSED6, int len UNUSED6)
+bgp_encode_as_path(struct bgp_write_state *s, eattr *a, byte *buf, uint size)
{
-#ifdef IPV6
- return IGNORE;
-#else
- ip_addr addr;
+ byte *data = a->u.ptr->data;
+ uint len = a->u.ptr->length;
+
+ if (!s->as4_session)
+ {
+ /* Prepare 16-bit AS_PATH (from 32-bit one) in a temporary buffer */
+ byte *src = data;
+ data = alloca(len);
+ len = as_path_32to16(data, src, len);
+ }
+
+ return bgp_put_attr(buf, size, BA_AS_PATH, a->flags, data, len);
+}
- memcpy(&addr, a, len);
- ipa_ntoh(addr);
- if (ipa_classify(addr) & IADDR_HOST)
+static void
+bgp_decode_as_path(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
+{
+ struct bgp_proto *p = s->proto;
+ int as_length = s->as4_session ? 4 : 2;
+ int as_confed = p->cf->confederation && p->is_interior;
+ char err[128];
+
+ if (!as_path_valid(data, len, as_length, as_confed, err, sizeof(err)))
+ WITHDRAW("Malformed AS_PATH attribute - %s", err);
+
+ /* In some circumstances check for initial AS_CONFED_SEQUENCE; RFC 5065 5.0 */
+ if (p->is_interior && !p->is_internal &&
+ ((len < 2) || (data[0] != AS_PATH_CONFED_SEQUENCE)))
+ WITHDRAW("Malformed AS_PATH attribute - %s", "missing initial AS_CONFED_SEQUENCE");
+
+ if (!s->as4_session)
+ {
+ /* Prepare 32-bit AS_PATH (from 16-bit one) in a temporary buffer */
+ byte *src = data;
+ data = alloca(2*len);
+ len = as_path_16to32(data, src, len);
+ }
+
+ bgp_set_attr_data(to, s->pool, BA_AS_PATH, flags, data, len);
+}
+
+
+static int
+bgp_encode_next_hop(struct bgp_write_state *s, eattr *a, byte *buf, uint size)
+{
+ /*
+ * The NEXT_HOP attribute is used only in traditional (IPv4) BGP. In MP-BGP,
+ * the next hop is encoded as a part of the MP_REACH_NLRI attribute, so we
+ * store it and encode it later by AFI-specific hooks.
+ */
+
+ if (s->channel->afi == BGP_AF_IPV4)
+ {
+ ASSERT(a->u.ptr->length == sizeof(ip_addr));
+
+ if (size < (3+4))
+ return -1;
+
+ bgp_put_attr_hdr3(buf, BA_NEXT_HOP, a->flags, 4);
+ put_ip4(buf+3, ipa_to_ip4( *(ip_addr *) a->u.ptr->data ));
+
+ return 3+4;
+ }
+ else
+ {
+ s->mp_next_hop = a;
return 0;
+ }
+}
+
+static void
+bgp_decode_next_hop(struct bgp_parse_state *s, uint code UNUSED, uint flags UNUSED, byte *data, uint len, ea_list **to UNUSED)
+{
+ if (len != 4)
+ WITHDRAW(BAD_LENGTH, "NEXT_HOP", len);
+
+ /* Semantic checks are done later */
+ s->ip_next_hop_len = len;
+ s->ip_next_hop_data = data;
+}
+
+/* TODO: This function should use AF-specific hook */
+static void
+bgp_format_next_hop(eattr *a, byte *buf, uint size UNUSED)
+{
+ ip_addr *nh = (void *) a->u.ptr->data;
+ uint len = a->u.ptr->length;
+
+ ASSERT((len == 16) || (len == 32));
+
+ /* in IPv6, we may have two addresses in NEXT HOP */
+ if ((len == 16) || ipa_zero(nh[1]))
+ bsprintf(buf, "%I", nh[0]);
else
- return 8;
-#endif
+ bsprintf(buf, "%I %I", nh[0], nh[1]);
}
+
static void
-bgp_format_next_hop(eattr *a, byte *buf, int buflen UNUSED)
+bgp_decode_med(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
{
- ip_addr *ipp = (ip_addr *) a->u.ptr->data;
-#ifdef IPV6
- /* in IPv6, we might have two addresses in NEXT HOP */
- if ((a->u.ptr->length == NEXT_HOP_LENGTH) && ipa_nonzero(ipp[1]))
- {
- bsprintf(buf, "%I %I", ipp[0], ipp[1]);
- return;
- }
-#endif
+ if (len != 4)
+ WITHDRAW(BAD_LENGTH, "MULTI_EXIT_DISC", len);
- bsprintf(buf, "%I", ipp[0]);
+ u32 val = get_u32(data);
+ bgp_set_attr_u32(to, s->pool, BA_MULTI_EXIT_DISC, flags, val);
}
-static int
-bgp_check_aggregator(struct bgp_proto *p, byte *a UNUSED, int len)
+
+static void
+bgp_export_local_pref(struct bgp_export_state *s, eattr *a)
{
- int exp_len = p->as4_session ? 8 : 6;
-
- return (len == exp_len) ? 0 : WITHDRAW;
+ if (!s->proto->is_interior && !s->proto->cf->allow_local_pref)
+ UNSET(a);
}
static void
-bgp_format_aggregator(eattr *a, byte *buf, int buflen UNUSED)
+bgp_decode_local_pref(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
{
- struct adata *ad = a->u.ptr;
- byte *data = ad->data;
- u32 as;
+ if (!s->proto->is_interior && !s->proto->cf->allow_local_pref)
+ DISCARD(BAD_EBGP, "LOCAL_PREF");
- as = get_u32(data);
- data += 4;
+ if (len != 4)
+ WITHDRAW(BAD_LENGTH, "LOCAL_PREF", len);
- bsprintf(buf, "%d.%d.%d.%d AS%u", data[0], data[1], data[2], data[3], as);
+ u32 val = get_u32(data);
+ bgp_set_attr_u32(to, s->pool, BA_LOCAL_PREF, flags, val);
}
-static int
-bgp_check_community(struct bgp_proto *p UNUSED, byte *a UNUSED, int len)
+
+static void
+bgp_decode_atomic_aggr(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data UNUSED, uint len, ea_list **to)
{
- return ((len % 4) == 0) ? 0 : WITHDRAW;
+ if (len != 0)
+ DISCARD(BAD_LENGTH, "ATOMIC_AGGR", len);
+
+ bgp_set_attr_data(to, s->pool, BA_ATOMIC_AGGR, flags, NULL, 0);
}
static int
-bgp_check_cluster_list(struct bgp_proto *p UNUSED, byte *a UNUSED, int len)
+bgp_encode_aggregator(struct bgp_write_state *s, eattr *a, byte *buf, uint size)
{
- return ((len % 4) == 0) ? 0 : 5;
+ byte *data = a->u.ptr->data;
+ uint len = a->u.ptr->length;
+
+ if (!s->as4_session)
+ {
+ /* Prepare 16-bit AGGREGATOR (from 32-bit one) in a temporary buffer */
+ byte *src = data;
+ data = alloca(6);
+ len = aggregator_32to16(data, src);
+ }
+
+ return bgp_put_attr(buf, size, BA_AGGREGATOR, a->flags, data, len);
}
static void
-bgp_format_cluster_list(eattr *a, byte *buf, int buflen)
+bgp_decode_aggregator(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
{
- /* Truncates cluster lists larger than buflen, probably not a problem */
- int_set_format(a->u.ptr, 0, -1, buf, buflen);
+ if (len != (s->as4_session ? 8 : 6))
+ DISCARD(BAD_LENGTH, "AGGREGATOR", len);
+
+ if (!s->as4_session)
+ {
+ /* Prepare 32-bit AGGREGATOR (from 16-bit one) in a temporary buffer */
+ byte *src = data;
+ data = alloca(8);
+ len = aggregator_16to32(data, src);
+ }
+
+ bgp_set_attr_data(to, s->pool, BA_AGGREGATOR, flags, data, len);
}
-static int
-bgp_check_reach_nlri(struct bgp_proto *p UNUSED, byte *a UNUSED, int len UNUSED)
+static void
+bgp_format_aggregator(eattr *a, byte *buf, uint size UNUSED)
{
-#ifdef IPV6
- p->mp_reach_start = a;
- p->mp_reach_len = len;
-#endif
- return IGNORE;
+ byte *data = a->u.ptr->data;
+
+ bsprintf(buf, "%I4 AS%u", get_ip4(data+4), get_u32(data+0));
}
-static int
-bgp_check_unreach_nlri(struct bgp_proto *p UNUSED, byte *a UNUSED, int len UNUSED)
+
+static void
+bgp_export_community(struct bgp_export_state *s, eattr *a)
{
-#ifdef IPV6
- p->mp_unreach_start = a;
- p->mp_unreach_len = len;
-#endif
- return IGNORE;
+ if (a->u.ptr->length == 0)
+ UNSET(a);
+
+ a->u.ptr = int_set_sort(s->pool, a->u.ptr);
}
-static int
-bgp_check_ext_community(struct bgp_proto *p UNUSED, byte *a UNUSED, int len)
+static void
+bgp_decode_community(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
{
- return ((len % 8) == 0) ? 0 : WITHDRAW;
+ if (!len || (len % 4))
+ WITHDRAW(BAD_LENGTH, "COMMUNITY", len);
+
+ struct adata *ad = lp_alloc_adata(s->pool, len);
+ get_u32s(data, (u32 *) ad->data, len / 4);
+ bgp_set_attr_ptr(to, s->pool, BA_COMMUNITY, flags, ad);
}
-static int
-bgp_check_large_community(struct bgp_proto *p UNUSED, byte *a UNUSED, int len)
-{
- return ((len % 12) == 0) ? 0 : WITHDRAW;
-}
-
-
-static struct attr_desc bgp_attr_table[] = {
- { NULL, -1, 0, 0, 0, /* Undefined */
- NULL, NULL },
- { "origin", 1, BAF_TRANSITIVE, EAF_TYPE_INT, 1, /* BA_ORIGIN */
- bgp_check_origin, bgp_format_origin },
- { "as_path", -1, BAF_TRANSITIVE, EAF_TYPE_AS_PATH, 1, /* BA_AS_PATH */
- NULL, NULL }, /* is checked by validate_as_path() as a special case */
- { "next_hop", 4, BAF_TRANSITIVE, EAF_TYPE_IP_ADDRESS, 1, /* BA_NEXT_HOP */
- bgp_check_next_hop, bgp_format_next_hop },
- { "med", 4, BAF_OPTIONAL, EAF_TYPE_INT, 1, /* BA_MULTI_EXIT_DISC */
- NULL, NULL },
- { "local_pref", 4, BAF_TRANSITIVE, EAF_TYPE_INT, 1, /* BA_LOCAL_PREF */
- NULL, NULL },
- { "atomic_aggr", 0, BAF_TRANSITIVE, EAF_TYPE_OPAQUE, 1, /* BA_ATOMIC_AGGR */
- NULL, NULL },
- { "aggregator", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_OPAQUE, 1, /* BA_AGGREGATOR */
- bgp_check_aggregator, bgp_format_aggregator },
- { "community", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_INT_SET, 1, /* BA_COMMUNITY */
- bgp_check_community, NULL },
- { "originator_id", 4, BAF_OPTIONAL, EAF_TYPE_ROUTER_ID, 0, /* BA_ORIGINATOR_ID */
- NULL, NULL },
- { "cluster_list", -1, BAF_OPTIONAL, EAF_TYPE_INT_SET, 0, /* BA_CLUSTER_LIST */
- bgp_check_cluster_list, bgp_format_cluster_list },
- { .name = NULL }, /* BA_DPA */
- { .name = NULL }, /* BA_ADVERTISER */
- { .name = NULL }, /* BA_RCID_PATH */
- { "mp_reach_nlri", -1, BAF_OPTIONAL, EAF_TYPE_OPAQUE, 1, /* BA_MP_REACH_NLRI */
- bgp_check_reach_nlri, NULL },
- { "mp_unreach_nlri", -1, BAF_OPTIONAL, EAF_TYPE_OPAQUE, 1, /* BA_MP_UNREACH_NLRI */
- bgp_check_unreach_nlri, NULL },
- { "ext_community", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_EC_SET, 1, /* BA_EXT_COMMUNITY */
- bgp_check_ext_community, NULL },
- { "as4_path", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_OPAQUE, 1, /* BA_AS4_PATH */
- NULL, NULL },
- { "as4_aggregator", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_OPAQUE, 1, /* BA_AS4_PATH */
- NULL, NULL },
- [BA_LARGE_COMMUNITY] =
- { "large_community", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_LC_SET, 1,
- bgp_check_large_community, NULL }
-};
-/* BA_AS4_PATH is type EAF_TYPE_OPAQUE and not type EAF_TYPE_AS_PATH.
- * It does not matter as this attribute does not appear on routes in the routing table.
- */
+static void
+bgp_export_originator_id(struct bgp_export_state *s, eattr *a)
+{
+ if (!s->proto->is_internal)
+ UNSET(a);
+}
+
+static void
+bgp_decode_originator_id(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
+{
+ if (!s->proto->is_internal)
+ DISCARD(BAD_EBGP, "ORIGINATOR_ID");
-#define ATTR_KNOWN(code) ((code) < ARRAY_SIZE(bgp_attr_table) && bgp_attr_table[code].name)
+ if (len != 4)
+ WITHDRAW(BAD_LENGTH, "ORIGINATOR_ID", len);
-static inline struct adata *
-bgp_alloc_adata(struct linpool *pool, unsigned len)
+ u32 val = get_u32(data);
+ bgp_set_attr_u32(to, s->pool, BA_ORIGINATOR_ID, flags, val);
+}
+
+
+static void
+bgp_export_cluster_list(struct bgp_export_state *s UNUSED, eattr *a)
{
- struct adata *ad = lp_alloc(pool, sizeof(struct adata) + len);
- ad->length = len;
- return ad;
+ if (!s->proto->is_internal)
+ UNSET(a);
+
+ if (a->u.ptr->length == 0)
+ UNSET(a);
}
static void
-bgp_set_attr(eattr *e, unsigned attr, uintptr_t val)
+bgp_decode_cluster_list(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
{
- ASSERT(ATTR_KNOWN(attr));
- e->id = EA_CODE(EAP_BGP, attr);
- e->type = bgp_attr_table[attr].type;
- e->flags = bgp_attr_table[attr].expected_flags;
- if (e->type & EAF_EMBEDDED)
- e->u.data = val;
- else
- e->u.ptr = (struct adata *) val;
+ if (!s->proto->is_internal)
+ DISCARD(BAD_EBGP, "CLUSTER_LIST");
+
+ if (!len || (len % 4))
+ WITHDRAW(BAD_LENGTH, "CLUSTER_LIST", len);
+
+ struct adata *ad = lp_alloc_adata(s->pool, len);
+ get_u32s(data, (u32 *) ad->data, len / 4);
+ bgp_set_attr_ptr(to, s->pool, BA_CLUSTER_LIST, flags, ad);
}
-static byte *
-bgp_set_attr_wa(eattr *e, struct linpool *pool, unsigned attr, unsigned len)
+static void
+bgp_format_cluster_list(eattr *a, byte *buf, uint size)
{
- struct adata *ad = bgp_alloc_adata(pool, len);
- bgp_set_attr(e, attr, (uintptr_t) ad);
- return ad->data;
+ /* Truncates cluster lists larger than buflen, probably not a problem */
+ int_set_format(a->u.ptr, 0, -1, buf, size);
}
-void
-bgp_attach_attr(ea_list **to, struct linpool *pool, unsigned attr, uintptr_t val)
+
+static inline u32
+get_af3(byte *buf)
{
- ea_list *a = lp_alloc(pool, sizeof(ea_list) + sizeof(eattr));
- a->next = *to;
- *to = a;
- a->flags = EALF_SORTED;
- a->count = 1;
- bgp_set_attr(a->attrs, attr, val);
+ return (get_u16(buf) << 16) | buf[2];
}
-byte *
-bgp_attach_attr_wa(ea_list **to, struct linpool *pool, unsigned attr, unsigned len)
+static void
+bgp_decode_mp_reach_nlri(struct bgp_parse_state *s, uint code UNUSED, uint flags UNUSED, byte *data, uint len, ea_list **to UNUSED)
{
- struct adata *ad = bgp_alloc_adata(pool, len);
- bgp_attach_attr(to, pool, attr, (uintptr_t) ad);
- return ad->data;
+ /*
+ * 2 B MP_REACH_NLRI data - Address Family Identifier
+ * 1 B MP_REACH_NLRI data - Subsequent Address Family Identifier
+ * 1 B MP_REACH_NLRI data - Length of Next Hop Network Address
+ * var MP_REACH_NLRI data - Network Address of Next Hop
+ * 1 B MP_REACH_NLRI data - Reserved (zero)
+ * var MP_REACH_NLRI data - Network Layer Reachability Information
+ */
+
+ if ((len < 5) || (len < (5 + (uint) data[3])))
+ bgp_parse_error(s, 9);
+
+ s->mp_reach_af = get_af3(data);
+ s->mp_next_hop_len = data[3];
+ s->mp_next_hop_data = data + 4;
+ s->mp_reach_len = len - 5 - s->mp_next_hop_len;
+ s->mp_reach_nlri = data + 5 + s->mp_next_hop_len;
}
-static int
-bgp_encode_attr_hdr(byte *dst, uint flags, unsigned code, int len)
+
+static void
+bgp_decode_mp_unreach_nlri(struct bgp_parse_state *s, uint code UNUSED, uint flags UNUSED, byte *data, uint len, ea_list **to UNUSED)
{
- int wlen;
+ /*
+ * 2 B MP_UNREACH_NLRI data - Address Family Identifier
+ * 1 B MP_UNREACH_NLRI data - Subsequent Address Family Identifier
+ * var MP_UNREACH_NLRI data - Network Layer Reachability Information
+ */
- DBG("\tAttribute %02x (%d bytes, flags %02x)\n", code, len, flags);
+ if (len < 3)
+ bgp_parse_error(s, 9);
+
+ s->mp_unreach_af = get_af3(data);
+ s->mp_unreach_len = len - 3;
+ s->mp_unreach_nlri = data + 3;
+}
- if (len < 256)
- {
- *dst++ = flags;
- *dst++ = code;
- *dst++ = len;
- wlen = 3;
- }
- else
- {
- *dst++ = flags | BAF_EXT_LEN;
- *dst++ = code;
- put_u16(dst, len);
- wlen = 4;
- }
- return wlen;
+static void
+bgp_export_ext_community(struct bgp_export_state *s, eattr *a)
+{
+ if (a->u.ptr->length == 0)
+ UNSET(a);
+
+ a->u.ptr = ec_set_sort(s->pool, a->u.ptr);
}
static void
-aggregator_convert_to_old(struct adata *aggr, byte *dst, int *new_used)
+bgp_decode_ext_community(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
{
- byte *src = aggr->data;
- *new_used = 0;
+ if (!len || (len % 8))
+ WITHDRAW(BAD_LENGTH, "EXT_COMMUNITY", len);
- u32 as = get_u32(src);
- if (as > 0xFFFF)
- {
- as = AS_TRANS;
- *new_used = 1;
- }
- put_u16(dst, as);
+ struct adata *ad = lp_alloc_adata(s->pool, len);
+ get_u32s(data, (u32 *) ad->data, len / 4);
+ bgp_set_attr_ptr(to, s->pool, BA_EXT_COMMUNITY, flags, ad);
+}
- /* Copy IPv4 address */
- memcpy(dst + 2, src + 4, 4);
+
+static void
+bgp_decode_as4_aggregator(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
+{
+ if (s->as4_session)
+ DISCARD(NEW_BGP, "AS4_AGGREGATOR");
+
+ if (len != 8)
+ DISCARD(BAD_LENGTH, "AS4_AGGREGATOR", len);
+
+ bgp_set_attr_data(to, s->pool, BA_AS4_AGGREGATOR, flags, data, len);
}
static void
-aggregator_convert_to_new(struct adata *aggr, byte *dst)
+bgp_decode_as4_path(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
{
- byte *src = aggr->data;
+ char err[128];
- u32 as = get_u16(src);
- put_u32(dst, as);
+ if (s->as4_session)
+ DISCARD(NEW_BGP, "AS4_PATH");
- /* Copy IPv4 address */
- memcpy(dst + 4, src + 2, 4);
+ if (len < 6)
+ DISCARD(BAD_LENGTH, "AS4_PATH", len);
+
+ if (!as_path_valid(data, len, 4, 1, err, sizeof(err)))
+ DISCARD("Malformed AS4_PATH attribute - %s", err);
+
+ struct adata *a = lp_alloc_adata(s->pool, len);
+ memcpy(a->data, data, len);
+
+ /* AS_CONFED* segments are invalid in AS4_PATH; RFC 6793 6 */
+ if (as_path_contains_confed(a))
+ {
+ REPORT("Discarding AS_CONFED* segment from AS4_PATH attribute");
+ a = as_path_strip_confed(s->pool, a);
+ }
+
+ bgp_set_attr_ptr(to, s->pool, BA_AS4_PATH, flags, a);
+}
+
+static void
+bgp_export_large_community(struct bgp_export_state *s, eattr *a)
+{
+ if (a->u.ptr->length == 0)
+ UNSET(a);
+
+ a->u.ptr = lc_set_sort(s->pool, a->u.ptr);
+}
+
+static void
+bgp_decode_large_community(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
+{
+ if (!len || (len % 12))
+ WITHDRAW(BAD_LENGTH, "LARGE_COMMUNITY", len);
+
+ struct adata *ad = lp_alloc_adata(s->pool, len);
+ get_u32s(data, (u32 *) ad->data, len / 4);
+ bgp_set_attr_ptr(to, s->pool, BA_LARGE_COMMUNITY, flags, ad);
+}
+
+static void
+bgp_export_mpls_label_stack(struct bgp_export_state *s, eattr *a)
+{
+ net_addr *n = s->route->net->n.addr;
+ u32 *labels = (u32 *) a->u.ptr->data;
+ uint lnum = a->u.ptr->length / 4;
+
+ /* Perhaps we should just ignore it? */
+ if (!s->mpls)
+ WITHDRAW("Unexpected MPLS stack");
+
+ /* Empty MPLS stack is not allowed */
+ if (!lnum)
+ WITHDRAW("Malformed MPLS stack - empty");
+
+ /* This is ugly, but we must ensure that labels fit into NLRI field */
+ if ((24*lnum + (net_is_vpn(n) ? 64 : 0) + net_pxlen(n)) > 255)
+ WITHDRAW("Malformed MPLS stack - too many labels (%u)", lnum);
+
+ for (uint i = 0; i < lnum; i++)
+ {
+ if (labels[i] > 0xfffff)
+ WITHDRAW("Malformed MPLS stack - invalid label (%u)", labels[i]);
+
+ /* TODO: Check for special-purpose label values? */
+ }
}
static int
-bgp_get_attr_len(eattr *a)
+bgp_encode_mpls_label_stack(struct bgp_write_state *s, eattr *a, byte *buf UNUSED, uint size UNUSED)
{
- int len;
- if (ATTR_KNOWN(EA_ID(a->id)))
+ /*
+ * MPLS labels are encoded as a part of the NLRI in MP_REACH_NLRI attribute,
+ * so we store MPLS_LABEL_STACK and encode it later by AFI-specific hooks.
+ */
+
+ s->mpls_labels = a->u.ptr;
+ return 0;
+}
+
+static void
+bgp_decode_mpls_label_stack(struct bgp_parse_state *s, uint code UNUSED, uint flags UNUSED, byte *data UNUSED, uint len UNUSED, ea_list **to UNUSED)
+{
+ DISCARD("Discarding received attribute #0");
+}
+
+static void
+bgp_format_mpls_label_stack(eattr *a, byte *buf, uint size)
+{
+ u32 *labels = (u32 *) a->u.ptr->data;
+ uint lnum = a->u.ptr->length / 4;
+ char *pos = buf;
+
+ for (uint i = 0; i < lnum; i++)
+ {
+ if (size < 20)
{
- int code = EA_ID(a->id);
- struct attr_desc *desc = &bgp_attr_table[code];
- len = desc->expected_length;
- if (len < 0)
- {
- ASSERT(!(a->type & EAF_EMBEDDED));
- len = a->u.ptr->length;
- }
+ bsprintf(pos, "...");
+ return;
}
+
+ uint l = bsprintf(pos, "%d/", labels[i]);
+ ADVANCE(pos, size, l);
+ }
+
+ /* Clear last slash or terminate empty string */
+ pos[lnum ? -1 : 0] = 0;
+}
+
+static inline void
+bgp_decode_unknown(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to)
+{
+ bgp_set_attr_data(to, s->pool, code, flags, data, len);
+}
+
+
+/*
+ * Attribute table
+ */
+
+static const struct bgp_attr_desc bgp_attr_table[] = {
+ [BA_ORIGIN] = {
+ .name = "origin",
+ .type = EAF_TYPE_INT,
+ .flags = BAF_TRANSITIVE,
+ .export = bgp_export_origin,
+ .encode = bgp_encode_u8,
+ .decode = bgp_decode_origin,
+ .format = bgp_format_origin,
+ },
+ [BA_AS_PATH] = {
+ .name = "as_path",
+ .type = EAF_TYPE_AS_PATH,
+ .flags = BAF_TRANSITIVE,
+ .encode = bgp_encode_as_path,
+ .decode = bgp_decode_as_path,
+ },
+ [BA_NEXT_HOP] = {
+ .name = "next_hop",
+ .type = EAF_TYPE_IP_ADDRESS,
+ .flags = BAF_TRANSITIVE,
+ .encode = bgp_encode_next_hop,
+ .decode = bgp_decode_next_hop,
+ .format = bgp_format_next_hop,
+ },
+ [BA_MULTI_EXIT_DISC] = {
+ .name = "med",
+ .type = EAF_TYPE_INT,
+ .flags = BAF_OPTIONAL,
+ .encode = bgp_encode_u32,
+ .decode = bgp_decode_med,
+ },
+ [BA_LOCAL_PREF] = {
+ .name = "local_pref",
+ .type = EAF_TYPE_INT,
+ .flags = BAF_TRANSITIVE,
+ .export = bgp_export_local_pref,
+ .encode = bgp_encode_u32,
+ .decode = bgp_decode_local_pref,
+ },
+ [BA_ATOMIC_AGGR] = {
+ .name = "atomic_aggr",
+ .type = EAF_TYPE_OPAQUE,
+ .flags = BAF_TRANSITIVE,
+ .encode = bgp_encode_raw,
+ .decode = bgp_decode_atomic_aggr,
+ },
+ [BA_AGGREGATOR] = {
+ .name = "aggregator",
+ .type = EAF_TYPE_OPAQUE,
+ .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
+ .encode = bgp_encode_aggregator,
+ .decode = bgp_decode_aggregator,
+ .format = bgp_format_aggregator,
+ },
+ [BA_COMMUNITY] = {
+ .name = "community",
+ .type = EAF_TYPE_INT_SET,
+ .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
+ .export = bgp_export_community,
+ .encode = bgp_encode_u32s,
+ .decode = bgp_decode_community,
+ },
+ [BA_ORIGINATOR_ID] = {
+ .name = "originator_id",
+ .type = EAF_TYPE_ROUTER_ID,
+ .flags = BAF_OPTIONAL,
+ .export = bgp_export_originator_id,
+ .encode = bgp_encode_u32,
+ .decode = bgp_decode_originator_id,
+ },
+ [BA_CLUSTER_LIST] = {
+ .name = "cluster_list",
+ .type = EAF_TYPE_INT_SET,
+ .flags = BAF_OPTIONAL,
+ .export = bgp_export_cluster_list,
+ .encode = bgp_encode_u32s,
+ .decode = bgp_decode_cluster_list,
+ .format = bgp_format_cluster_list,
+ },
+ [BA_MP_REACH_NLRI] = {
+ .name = "mp_reach_nlri",
+ .type = EAF_TYPE_OPAQUE,
+ .flags = BAF_OPTIONAL,
+ .decode = bgp_decode_mp_reach_nlri,
+ },
+ [BA_MP_UNREACH_NLRI] = {
+ .name = "mp_unreach_nlri",
+ .type = EAF_TYPE_OPAQUE,
+ .flags = BAF_OPTIONAL,
+ .decode = bgp_decode_mp_unreach_nlri,
+ },
+ [BA_EXT_COMMUNITY] = {
+ .name = "ext_community",
+ .type = EAF_TYPE_EC_SET,
+ .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
+ .export = bgp_export_ext_community,
+ .encode = bgp_encode_u32s,
+ .decode = bgp_decode_ext_community,
+ },
+ [BA_AS4_PATH] = {
+ .name = "as4_path",
+ .type = EAF_TYPE_AS_PATH,
+ .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
+ .encode = bgp_encode_raw,
+ .decode = bgp_decode_as4_path,
+ },
+ [BA_AS4_AGGREGATOR] = {
+ .name = "as4_aggregator",
+ .type = EAF_TYPE_OPAQUE,
+ .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
+ .encode = bgp_encode_raw,
+ .decode = bgp_decode_as4_aggregator,
+ .format = bgp_format_aggregator,
+ },
+ [BA_LARGE_COMMUNITY] = {
+ .name = "large_community",
+ .type = EAF_TYPE_LC_SET,
+ .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
+ .export = bgp_export_large_community,
+ .encode = bgp_encode_u32s,
+ .decode = bgp_decode_large_community,
+ },
+ [BA_MPLS_LABEL_STACK] = {
+ .name = "mpls_label_stack",
+ .type = EAF_TYPE_INT_SET,
+ .export = bgp_export_mpls_label_stack,
+ .encode = bgp_encode_mpls_label_stack,
+ .decode = bgp_decode_mpls_label_stack,
+ .format = bgp_format_mpls_label_stack,
+ },
+};
+
+static inline int
+bgp_attr_known(uint code)
+{
+ return (code < ARRAY_SIZE(bgp_attr_table)) && bgp_attr_table[code].name;
+}
+
+
+/*
+ * Attribute export
+ */
+
+static inline void
+bgp_export_attr(struct bgp_export_state *s, eattr *a, ea_list *to)
+{
+ if (EA_PROTO(a->id) != EAP_BGP)
+ return;
+
+ uint code = EA_ID(a->id);
+
+ if (bgp_attr_known(code))
+ {
+ const struct bgp_attr_desc *desc = &bgp_attr_table[code];
+
+ /* The flags might have been zero if the attr was added by filters */
+ a->flags = (a->flags & BAF_PARTIAL) | desc->flags;
+
+ /* Set partial bit if new opt-trans attribute is attached to non-local route */
+ if ((s->src != NULL) && (a->type & EAF_ORIGINATED) &&
+ (a->flags & BAF_OPTIONAL) && (a->flags & BAF_TRANSITIVE))
+ a->flags |= BAF_PARTIAL;
+
+ /* Call specific hook */
+ CALL(desc->export, s, a);
+
+ /* Attribute might become undefined in hook */
+ if ((a->type & EAF_TYPE_MASK) == EAF_TYPE_UNDEF)
+ return;
+ }
else
- {
- ASSERT((a->type & EAF_TYPE_MASK) == EAF_TYPE_OPAQUE);
- len = a->u.ptr->length;
- }
-
- return len;
+ {
+ /* Don't re-export unknown non-transitive attributes */
+ if (!(a->flags & BAF_TRANSITIVE))
+ return;
+
+ a->flags |= BAF_PARTIAL;
+ }
+
+ /* Append updated attribute */
+ to->attrs[to->count++] = *a;
+}
+
+/**
+ * bgp_export_attrs - export BGP attributes
+ * @s: BGP export state
+ * @attrs: a list of extended attributes
+ *
+ * The bgp_export_attrs() function takes a list of attributes and merges it to
+ * one newly allocated and sorted segment. Attributes are validated and
+ * normalized by type-specific export hooks and attribute flags are updated.
+ * Some attributes may be eliminated (e.g. unknown non-tranitive attributes, or
+ * empty community sets).
+ *
+ * Result: one sorted attribute list segment, or NULL if attributes are unsuitable.
+ */
+static inline ea_list *
+bgp_export_attrs(struct bgp_export_state *s, ea_list *attrs)
+{
+ /* Merge the attribute list */
+ ea_list *new = lp_alloc(s->pool, ea_scan(attrs));
+ ea_merge(attrs, new);
+ ea_sort(new);
+
+ uint i, count;
+ count = new->count;
+ new->count = 0;
+
+ /* Export each attribute */
+ for (i = 0; i < count; i++)
+ bgp_export_attr(s, &new->attrs[i], new);
+
+ if (s->err_withdraw)
+ return NULL;
+
+ return new;
+}
+
+
+/*
+ * Attribute encoding
+ */
+
+static inline int
+bgp_encode_attr(struct bgp_write_state *s, eattr *a, byte *buf, uint size)
+{
+ ASSERT(EA_PROTO(a->id) == EAP_BGP);
+
+ uint code = EA_ID(a->id);
+
+ if (bgp_attr_known(code))
+ return bgp_attr_table[code].encode(s, a, buf, size);
+ else
+ return bgp_encode_raw(s, a, buf, size);
}
/**
* bgp_encode_attrs - encode BGP attributes
- * @p: BGP instance
- * @w: buffer
+ * @s: BGP write state
* @attrs: a list of extended attributes
- * @remains: remaining space in the buffer
+ * @buf: buffer
+ * @end: buffer end
*
* The bgp_encode_attrs() function takes a list of extended attributes
* and converts it to its BGP representation (a part of an Update message).
*
* Result: Length of the attribute block generated or -1 if not enough space.
*/
-uint
-bgp_encode_attrs(struct bgp_proto *p, byte *w, ea_list *attrs, int remains)
+int
+bgp_encode_attrs(struct bgp_write_state *s, ea_list *attrs, byte *buf, byte *end)
{
- uint i, code, type, flags;
- byte *start = w;
- int len, rv;
+ byte *pos = buf;
+ int i, len;
- for(i=0; i<attrs->count; i++)
- {
- eattr *a = &attrs->attrs[i];
- ASSERT(EA_PROTO(a->id) == EAP_BGP);
- code = EA_ID(a->id);
-
-#ifdef IPV6
- /* When talking multiprotocol BGP, the NEXT_HOP attributes are used only temporarily. */
- if (code == BA_NEXT_HOP)
- continue;
-#endif
-
- /* When AS4-aware BGP speaker is talking to non-AS4-aware BGP speaker,
- * we have to convert our 4B AS_PATH to 2B AS_PATH and send our AS_PATH
- * as optional AS4_PATH attribute.
- */
- if ((code == BA_AS_PATH) && (! p->as4_session))
- {
- len = a->u.ptr->length;
-
- if (remains < (len + 4))
- goto err_no_buffer;
-
- /* Using temporary buffer because don't know a length of created attr
- * and therefore a length of a header. Perhaps i should better always
- * use BAF_EXT_LEN. */
-
- byte buf[len];
- int new_used;
- int nl = as_path_convert_to_old(a->u.ptr, buf, &new_used);
-
- DBG("BGP: Encoding old AS_PATH\n");
- rv = bgp_encode_attr_hdr(w, BAF_TRANSITIVE, BA_AS_PATH, nl);
- ADVANCE(w, remains, rv);
- memcpy(w, buf, nl);
- ADVANCE(w, remains, nl);
-
- if (! new_used)
- continue;
-
- if (remains < (len + 4))
- goto err_no_buffer;
-
- /* We should discard AS_CONFED_SEQUENCE or AS_CONFED_SET path segments
- * here but we don't support confederations and such paths we already
- * discarded in bgp_check_as_path().
- */
-
- DBG("BGP: Encoding AS4_PATH\n");
- rv = bgp_encode_attr_hdr(w, BAF_OPTIONAL | BAF_TRANSITIVE, BA_AS4_PATH, len);
- ADVANCE(w, remains, rv);
- memcpy(w, a->u.ptr->data, len);
- ADVANCE(w, remains, len);
-
- continue;
- }
-
- /* The same issue with AGGREGATOR attribute */
- if ((code == BA_AGGREGATOR) && (! p->as4_session))
- {
- int new_used;
-
- len = 6;
- if (remains < (len + 3))
- goto err_no_buffer;
-
- rv = bgp_encode_attr_hdr(w, BAF_OPTIONAL | BAF_TRANSITIVE, BA_AGGREGATOR, len);
- ADVANCE(w, remains, rv);
- aggregator_convert_to_old(a->u.ptr, w, &new_used);
- ADVANCE(w, remains, len);
-
- if (! new_used)
- continue;
-
- len = 8;
- if (remains < (len + 3))
- goto err_no_buffer;
-
- rv = bgp_encode_attr_hdr(w, BAF_OPTIONAL | BAF_TRANSITIVE, BA_AS4_AGGREGATOR, len);
- ADVANCE(w, remains, rv);
- memcpy(w, a->u.ptr->data, len);
- ADVANCE(w, remains, len);
-
- continue;
- }
-
- /* Standard path continues here ... */
-
- type = a->type & EAF_TYPE_MASK;
- flags = a->flags & (BAF_OPTIONAL | BAF_TRANSITIVE | BAF_PARTIAL);
- len = bgp_get_attr_len(a);
-
- /* Skip empty sets */
- if (((type == EAF_TYPE_INT_SET) || (type == EAF_TYPE_EC_SET) || (type == EAF_TYPE_LC_SET)) && (len == 0))
- continue;
-
- if (remains < len + 4)
- goto err_no_buffer;
-
- rv = bgp_encode_attr_hdr(w, flags, code, len);
- ADVANCE(w, remains, rv);
-
- switch (type)
- {
- case EAF_TYPE_INT:
- case EAF_TYPE_ROUTER_ID:
- if (len == 4)
- put_u32(w, a->u.data);
- else
- *w = a->u.data;
- break;
- case EAF_TYPE_IP_ADDRESS:
- {
- ip_addr ip = *(ip_addr *)a->u.ptr->data;
- ipa_hton(ip);
- memcpy(w, &ip, len);
- break;
- }
- case EAF_TYPE_INT_SET:
- case EAF_TYPE_LC_SET:
- case EAF_TYPE_EC_SET:
- {
- u32 *z = int_set_get_data(a->u.ptr);
- int i;
- for(i=0; i<len; i+=4)
- put_u32(w+i, *z++);
- break;
- }
- case EAF_TYPE_OPAQUE:
- case EAF_TYPE_AS_PATH:
- memcpy(w, a->u.ptr->data, len);
- break;
- default:
- bug("bgp_encode_attrs: unknown attribute type %02x", a->type);
- }
- ADVANCE(w, remains, len);
- }
- return w - start;
+ for (i = 0; i < attrs->count; i++)
+ {
+ len = bgp_encode_attr(s, &attrs->attrs[i], pos, end - pos);
+
+ if (len < 0)
+ return -1;
- err_no_buffer:
- return -1;
+ pos += len;
+ }
+
+ return pos - buf;
}
+
/*
-static void
-bgp_init_prefix(struct fib_node *N)
+ * Attribute decoding
+ */
+
+static void bgp_process_as4_attrs(ea_list **attrs, struct linpool *pool);
+
+static inline int
+bgp_as_path_loopy(struct bgp_proto *p, ea_list *attrs, u32 asn)
{
- struct bgp_prefix *p = (struct bgp_prefix *) N;
- p->bucket_node.next = NULL;
+ eattr *e = bgp_find_attr(attrs, BA_AS_PATH);
+ int num = p->cf->allow_local_as + 1;
+ return (e && (num > 0) && as_path_contains(e->u.ptr, asn, num));
}
-*/
-static int
-bgp_compare_u32(const u32 *x, const u32 *y)
+static inline int
+bgp_originator_id_loopy(struct bgp_proto *p, ea_list *attrs)
{
- return (*x < *y) ? -1 : (*x > *y) ? 1 : 0;
+ eattr *e = bgp_find_attr(attrs, BA_ORIGINATOR_ID);
+ return (e && (e->u.data == p->local_id));
}
-static inline void
-bgp_normalize_int_set(u32 *dest, u32 *src, unsigned cnt)
+static inline int
+bgp_cluster_list_loopy(struct bgp_proto *p, ea_list *attrs)
{
- memcpy(dest, src, sizeof(u32) * cnt);
- qsort(dest, cnt, sizeof(u32), (int(*)(const void *, const void *)) bgp_compare_u32);
+ eattr *e = bgp_find_attr(attrs, BA_CLUSTER_LIST);
+ return (e && int_set_contains(e->u.ptr, p->rr_cluster_id));
}
-static int
-bgp_compare_ec(const u32 *xp, const u32 *yp)
+static inline void
+bgp_decode_attr(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to)
{
- u64 x = ec_get(xp, 0);
- u64 y = ec_get(yp, 0);
- return (x < y) ? -1 : (x > y) ? 1 : 0;
+ /* Handle duplicate attributes; RFC 7606 3 (g) */
+ if (BIT32_TEST(s->attrs_seen, code))
+ {
+ if ((code == BA_MP_REACH_NLRI) || (code == BA_MP_UNREACH_NLRI))
+ bgp_parse_error(s, 1);
+ else
+ DISCARD("Discarding duplicate attribute (code %u)", code);
+ }
+ BIT32_SET(s->attrs_seen, code);
+
+ if (bgp_attr_known(code))
+ {
+ const struct bgp_attr_desc *desc = &bgp_attr_table[code];
+
+ /* Handle conflicting flags; RFC 7606 3 (c) */
+ if ((flags ^ desc->flags) & (BAF_OPTIONAL | BAF_TRANSITIVE))
+ WITHDRAW("Malformed %s attribute - conflicting flags (%02x)", desc->name, flags);
+
+ desc->decode(s, code, flags, data, len, to);
+ }
+ else /* Unknown attribute */
+ {
+ if (!(flags & BAF_OPTIONAL))
+ WITHDRAW("Unknown attribute (code %u) - conflicting flags (%02x)", code, flags);
+
+ bgp_decode_unknown(s, code, flags, data, len, to);
+ }
}
-static inline void
-bgp_normalize_ec_set(struct adata *ad, u32 *src, int internal)
+/**
+ * bgp_decode_attrs - check and decode BGP attributes
+ * @s: BGP parse state
+ * @data: start of attribute block
+ * @len: length of attribute block
+ *
+ * This function takes a BGP attribute block (a part of an Update message), checks
+ * its consistency and converts it to a list of BIRD route attributes represented
+ * by an (uncached) &rta.
+ */
+ea_list *
+bgp_decode_attrs(struct bgp_parse_state *s, byte *data, uint len)
{
- u32 *dst = int_set_get_data(ad);
+ struct bgp_proto *p = s->proto;
+ ea_list *attrs = NULL;
+ uint code, flags, alen;
+ byte *pos = data;
- /* Remove non-transitive communities (EC_TBIT active) on external sessions */
- if (! internal)
+ /* Parse the attributes */
+ while (len)
+ {
+ alen = 0;
+
+ /* Read attribute type */
+ if (len < 2)
+ goto framing_error;
+ flags = pos[0];
+ code = pos[1];
+ ADVANCE(pos, len, 2);
+
+ /* Read attribute length */
+ if (flags & BAF_EXT_LEN)
{
- int len = int_set_get_size(ad);
- u32 *t = dst;
- int i;
-
- for (i=0; i < len; i += 2)
- {
- if (src[i] & EC_TBIT)
- continue;
-
- *t++ = src[i];
- *t++ = src[i+1];
- }
-
- ad->length = (t - dst) * 4;
+ if (len < 2)
+ goto framing_error;
+ alen = get_u16(pos);
+ ADVANCE(pos, len, 2);
+ }
+ else
+ {
+ if (len < 1)
+ goto framing_error;
+ alen = *pos;
+ ADVANCE(pos, len, 1);
}
- else
- memcpy(dst, src, ad->length);
- qsort(dst, ad->length / 8, 8, (int(*)(const void *, const void *)) bgp_compare_ec);
-}
+ if (alen > len)
+ goto framing_error;
-static int
-bgp_compare_lc(const u32 *x, const u32 *y)
-{
- if (x[0] != y[0])
- return (x[0] > y[0]) ? 1 : -1;
- if (x[1] != y[1])
- return (x[1] > y[1]) ? 1 : -1;
- if (x[2] != y[2])
- return (x[2] > y[2]) ? 1 : -1;
- return 0;
+ DBG("Attr %02x %02x %u\n", code, flags, alen);
+
+ bgp_decode_attr(s, code, flags, pos, alen, &attrs);
+ ADVANCE(pos, len, alen);
+ }
+
+ if (s->err_withdraw)
+ goto withdraw;
+
+ /* If there is no reachability NLRI, we are finished */
+ if (!s->ip_reach_len && !s->mp_reach_len)
+ return NULL;
+
+
+ /* Handle missing mandatory attributes; RFC 7606 3 (d) */
+ if (!BIT32_TEST(s->attrs_seen, BA_ORIGIN))
+ { REPORT(NO_MANDATORY, "ORIGIN"); goto withdraw; }
+
+ if (!BIT32_TEST(s->attrs_seen, BA_AS_PATH))
+ { REPORT(NO_MANDATORY, "AS_PATH"); goto withdraw; }
+
+ /* When receiving attributes from non-AS4-aware BGP speaker, we have to
+ reconstruct AS_PATH and AGGREGATOR attributes; RFC 6793 4.2.3 */
+ if (!p->as4_session)
+ bgp_process_as4_attrs(&attrs, s->pool);
+
+ /* Reject routes with our ASN in AS_PATH attribute */
+ if (bgp_as_path_loopy(p, attrs, p->local_as))
+ goto withdraw;
+
+ /* Reject routes with our Confederation ID in AS_PATH attribute; RFC 5065 4.0 */
+ if ((p->public_as != p->local_as) && bgp_as_path_loopy(p, attrs, p->public_as))
+ goto withdraw;
+
+ /* Reject routes with our Router ID in ORIGINATOR_ID attribute; RFC 4456 8 */
+ if (p->is_internal && bgp_originator_id_loopy(p, attrs))
+ goto withdraw;
+
+ /* Reject routes with our Cluster ID in CLUSTER_LIST attribute; RFC 4456 8 */
+ if (p->rr_client && bgp_cluster_list_loopy(p, attrs))
+ goto withdraw;
+
+ /* If there is no local preference, define one */
+ if (!BIT32_TEST(s->attrs_seen, BA_LOCAL_PREF))
+ bgp_set_attr_u32(&attrs, s->pool, BA_LOCAL_PREF, 0, p->cf->default_local_pref);
+
+ return attrs;
+
+
+framing_error:
+ /* RFC 7606 4 - handle attribute framing errors */
+ REPORT("Malformed attribute list - framing error (%u/%u) at %d",
+ alen, len, (int) (pos - s->attrs));
+
+withdraw:
+ /* RFC 7606 5.2 - handle missing NLRI during errors */
+ if (!s->ip_reach_len && !s->mp_reach_len)
+ bgp_parse_error(s, 1);
+
+ s->err_withdraw = 1;
+ return NULL;
}
-static inline void
-bgp_normalize_lc_set(u32 *dest, u32 *src, unsigned cnt)
+
+/*
+ * Route bucket hash table
+ */
+
+#define RBH_KEY(b) b->eattrs, b->hash
+#define RBH_NEXT(b) b->next
+#define RBH_EQ(a1,h1,a2,h2) h1 == h2 && ea_same(a1, a2)
+#define RBH_FN(a,h) h
+
+#define RBH_REHASH bgp_rbh_rehash
+#define RBH_PARAMS /8, *2, 2, 2, 8, 20
+
+
+HASH_DEFINE_REHASH_FN(RBH, struct bgp_bucket)
+
+void
+bgp_init_bucket_table(struct bgp_channel *c)
{
- memcpy(dest, src, LCOMM_LENGTH * cnt);
- qsort(dest, cnt, LCOMM_LENGTH, (int(*)(const void *, const void *)) bgp_compare_lc);
-}
+ HASH_INIT(c->bucket_hash, c->pool, 8);
-static void
-bgp_rehash_buckets(struct bgp_proto *p)
-{
- struct bgp_bucket **old = p->bucket_hash;
- struct bgp_bucket **new;
- unsigned oldn = p->hash_size;
- unsigned i, e, mask;
- struct bgp_bucket *b;
-
- p->hash_size = p->hash_limit;
- DBG("BGP: Rehashing bucket table from %d to %d\n", oldn, p->hash_size);
- p->hash_limit *= 4;
- if (p->hash_limit >= 65536)
- p->hash_limit = ~0;
- new = p->bucket_hash = mb_allocz(p->p.pool, p->hash_size * sizeof(struct bgp_bucket *));
- mask = p->hash_size - 1;
- for (i=0; i<oldn; i++)
- while (b = old[i])
- {
- old[i] = b->hash_next;
- e = b->hash & mask;
- b->hash_next = new[e];
- if (b->hash_next)
- b->hash_next->hash_prev = b;
- b->hash_prev = NULL;
- new[e] = b;
- }
- mb_free(old);
+ init_list(&c->bucket_queue);
+ c->withdraw_bucket = NULL;
}
static struct bgp_bucket *
-bgp_new_bucket(struct bgp_proto *p, ea_list *new, unsigned hash)
+bgp_get_bucket(struct bgp_channel *c, ea_list *new)
{
- struct bgp_bucket *b;
- unsigned ea_size = sizeof(ea_list) + new->count * sizeof(eattr);
- unsigned ea_size_aligned = BIRD_ALIGN(ea_size, CPU_STRUCT_ALIGN);
- unsigned size = sizeof(struct bgp_bucket) + ea_size_aligned;
- unsigned i;
+ /* Hash and lookup */
+ u32 hash = ea_hash(new);
+ struct bgp_bucket *b = HASH_FIND(c->bucket_hash, RBH, new, hash);
+
+ if (b)
+ return b;
+
+ uint ea_size = sizeof(ea_list) + new->count * sizeof(eattr);
+ uint ea_size_aligned = BIRD_ALIGN(ea_size, CPU_STRUCT_ALIGN);
+ uint size = sizeof(struct bgp_bucket) + ea_size_aligned;
+ uint i;
byte *dest;
- unsigned index = hash & (p->hash_size - 1);
/* Gather total size of non-inline attributes */
- for (i=0; i<new->count; i++)
- {
- eattr *a = &new->attrs[i];
- if (!(a->type & EAF_EMBEDDED))
- size += BIRD_ALIGN(sizeof(struct adata) + a->u.ptr->length, CPU_STRUCT_ALIGN);
- }
+ for (i = 0; i < new->count; i++)
+ {
+ eattr *a = &new->attrs[i];
- /* Create the bucket and hash it */
- b = mb_alloc(p->p.pool, size);
- b->hash_next = p->bucket_hash[index];
- if (b->hash_next)
- b->hash_next->hash_prev = b;
- p->bucket_hash[index] = b;
- b->hash_prev = NULL;
- b->hash = hash;
- add_tail(&p->bucket_queue, &b->send_node);
+ if (!(a->type & EAF_EMBEDDED))
+ size += BIRD_ALIGN(sizeof(struct adata) + a->u.ptr->length, CPU_STRUCT_ALIGN);
+ }
+
+ /* Create the bucket */
+ b = mb_alloc(c->pool, size);
init_list(&b->prefixes);
+ b->hash = hash;
+
+ /* Copy list of extended attributes */
memcpy(b->eattrs, new, ea_size);
- dest = ((byte *)b->eattrs) + ea_size_aligned;
+ dest = ((byte *) b->eattrs) + ea_size_aligned;
/* Copy values of non-inline attributes */
- for (i=0; i<new->count; i++)
+ for (i = 0; i < new->count; i++)
+ {
+ eattr *a = &b->eattrs->attrs[i];
+
+ if (!(a->type & EAF_EMBEDDED))
{
- eattr *a = &b->eattrs->attrs[i];
- if (!(a->type & EAF_EMBEDDED))
- {
- struct adata *oa = a->u.ptr;
- struct adata *na = (struct adata *) dest;
- memcpy(na, oa, sizeof(struct adata) + oa->length);
- a->u.ptr = na;
- dest += BIRD_ALIGN(sizeof(struct adata) + na->length, CPU_STRUCT_ALIGN);
- }
+ struct adata *oa = a->u.ptr;
+ struct adata *na = (struct adata *) dest;
+ memcpy(na, oa, sizeof(struct adata) + oa->length);
+ a->u.ptr = na;
+ dest += BIRD_ALIGN(sizeof(struct adata) + na->length, CPU_STRUCT_ALIGN);
}
+ }
- /* If needed, rehash */
- p->hash_count++;
- if (p->hash_count > p->hash_limit)
- bgp_rehash_buckets(p);
+ /* Insert the bucket to send queue and bucket hash */
+ add_tail(&c->bucket_queue, &b->send_node);
+ HASH_INSERT2(c->bucket_hash, RBH, c->pool, b);
return b;
}
static struct bgp_bucket *
-bgp_get_bucket(struct bgp_proto *p, net *n, ea_list *attrs, int originate)
+bgp_get_withdraw_bucket(struct bgp_channel *c)
{
- ea_list *new;
- unsigned i, cnt, hash, code;
- eattr *a, *d;
- u32 seen = 0;
- struct bgp_bucket *b;
-
- /* Merge the attribute list */
- new = alloca(ea_scan(attrs));
- ea_merge(attrs, new);
- ea_sort(new);
+ if (!c->withdraw_bucket)
+ {
+ c->withdraw_bucket = mb_allocz(c->pool, sizeof(struct bgp_bucket));
+ init_list(&c->withdraw_bucket->prefixes);
+ }
- /* Normalize attributes */
- d = new->attrs;
- cnt = new->count;
- new->count = 0;
- for(i=0; i<cnt; i++)
- {
- a = &new->attrs[i];
- if (EA_PROTO(a->id) != EAP_BGP)
- continue;
- code = EA_ID(a->id);
- if (ATTR_KNOWN(code))
- {
- if (!p->is_internal)
- {
- if (!bgp_attr_table[code].allow_in_ebgp)
- continue;
- if ((code == BA_LOCAL_PREF) && !p->cf->allow_local_pref)
- continue;
- }
- /* The flags might have been zero if the attr was added by filters */
- a->flags = (a->flags & BAF_PARTIAL) | bgp_attr_table[code].expected_flags;
- if (code < 32)
- seen |= 1 << code;
- }
- else
- {
- /* Don't re-export unknown non-transitive attributes */
- if (!(a->flags & BAF_TRANSITIVE))
- continue;
- }
- *d = *a;
- if ((d->type & EAF_ORIGINATED) && !originate && (d->flags & BAF_TRANSITIVE) && (d->flags & BAF_OPTIONAL))
- d->flags |= BAF_PARTIAL;
- switch (d->type & EAF_TYPE_MASK)
- {
- case EAF_TYPE_INT_SET:
- {
- struct adata *z = alloca(sizeof(struct adata) + d->u.ptr->length);
- z->length = d->u.ptr->length;
- bgp_normalize_int_set((u32 *) z->data, (u32 *) d->u.ptr->data, z->length / 4);
- d->u.ptr = z;
- break;
- }
- case EAF_TYPE_EC_SET:
- {
- struct adata *z = alloca(sizeof(struct adata) + d->u.ptr->length);
- z->length = d->u.ptr->length;
- bgp_normalize_ec_set(z, (u32 *) d->u.ptr->data, p->is_internal);
- d->u.ptr = z;
- break;
- }
- case EAF_TYPE_LC_SET:
- {
- struct adata *z = alloca(sizeof(struct adata) + d->u.ptr->length);
- z->length = d->u.ptr->length;
- bgp_normalize_lc_set((u32 *) z->data, (u32 *) d->u.ptr->data, z->length / LCOMM_LENGTH);
- d->u.ptr = z;
- break;
- }
- default: ;
- }
- d++;
- new->count++;
- }
+ return c->withdraw_bucket;
+}
- /* Hash */
- hash = ea_hash(new);
- for(b=p->bucket_hash[hash & (p->hash_size - 1)]; b; b=b->hash_next)
- if (b->hash == hash && ea_same(b->eattrs, new))
- {
- DBG("Found bucket.\n");
- return b;
- }
-
- /* Ensure that there are all mandatory attributes */
- for(i=0; i<ARRAY_SIZE(bgp_mandatory_attrs); i++)
- if (!(seen & (1 << bgp_mandatory_attrs[i])))
- {
- log(L_ERR "%s: Mandatory attribute %s missing in route %I/%d", p->p.name, bgp_attr_table[bgp_mandatory_attrs[i]].name, n->n.prefix, n->n.pxlen);
- return NULL;
- }
-
- /* Check if next hop is valid */
- a = ea_find(new, EA_CODE(EAP_BGP, BA_NEXT_HOP));
- if (!a || ipa_equal(p->cf->remote_ip, *(ip_addr *)a->u.ptr->data))
- {
- log(L_ERR "%s: Invalid NEXT_HOP attribute in route %I/%d", p->p.name, n->n.prefix, n->n.pxlen);
- return NULL;
- }
+void
+bgp_free_bucket(struct bgp_channel *c, struct bgp_bucket *b)
+{
+ rem_node(&b->send_node);
+ HASH_REMOVE2(c->bucket_hash, RBH, c->pool, b);
+ mb_free(b);
+}
- /* Create new bucket */
- DBG("Creating bucket.\n");
- return bgp_new_bucket(p, new, hash);
+void
+bgp_defer_bucket(struct bgp_channel *c, struct bgp_bucket *b)
+{
+ rem_node(&b->send_node);
+ add_tail(&c->bucket_queue, &b->send_node);
}
void
-bgp_free_bucket(struct bgp_proto *p, struct bgp_bucket *buck)
+bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b)
{
- if (buck->hash_next)
- buck->hash_next->hash_prev = buck->hash_prev;
- if (buck->hash_prev)
- buck->hash_prev->hash_next = buck->hash_next;
- else
- p->bucket_hash[buck->hash & (p->hash_size-1)] = buck->hash_next;
- mb_free(buck);
+ struct bgp_proto *p = (void *) c->c.proto;
+ struct bgp_bucket *wb = bgp_get_withdraw_bucket(c);
+
+ log(L_ERR "%s: Attribute list too long", p->p.name);
+ while (!EMPTY_LIST(b->prefixes))
+ {
+ struct bgp_prefix *px = HEAD(b->prefixes);
+
+ log(L_ERR "%s: - withdrawing %N", p->p.name, &px->net);
+ rem_node(&px->buck_node);
+ add_tail(&wb->prefixes, &px->buck_node);
+ }
}
-/* Prefix hash table */
+/*
+ * Prefix hash table
+ */
-#define PXH_KEY(n1) n1->n.prefix, n1->n.pxlen, n1->path_id
-#define PXH_NEXT(n) n->next
-#define PXH_EQ(p1,l1,i1,p2,l2,i2) ipa_equal(p1, p2) && l1 == l2 && i1 == i2
-#define PXH_FN(p,l,i) ipa_hash32(p) ^ u32_hash((l << 16) ^ i)
+#define PXH_KEY(px) px->net, px->path_id, px->hash
+#define PXH_NEXT(px) px->next
+#define PXH_EQ(n1,i1,h1,n2,i2,h2) h1 == h2 && i1 == i2 && net_equal(n1, n2)
+#define PXH_FN(n,i,h) h
#define PXH_REHASH bgp_pxh_rehash
#define PXH_PARAMS /8, *2, 2, 2, 8, 20
@@ -929,308 +1292,282 @@ bgp_free_bucket(struct bgp_proto *p, struct bgp_bucket *buck)
HASH_DEFINE_REHASH_FN(PXH, struct bgp_prefix)
void
-bgp_init_prefix_table(struct bgp_proto *p, u32 order)
+bgp_init_prefix_table(struct bgp_channel *c)
{
- HASH_INIT(p->prefix_hash, p->p.pool, order);
+ HASH_INIT(c->prefix_hash, c->pool, 8);
- p->prefix_slab = sl_new(p->p.pool, sizeof(struct bgp_prefix));
+ uint alen = net_addr_length[c->c.net_type];
+ c->prefix_slab = alen ? sl_new(c->pool, sizeof(struct bgp_prefix) + alen) : NULL;
}
void
-bgp_free_prefix_table(struct bgp_proto *p)
+bgp_free_prefix_table(struct bgp_channel *c)
{
- HASH_FREE(p->prefix_hash);
+ HASH_FREE(c->prefix_hash);
- rfree(p->prefix_slab);
- p->prefix_slab = NULL;
+ rfree(c->prefix_slab);
+ c->prefix_slab = NULL;
}
static struct bgp_prefix *
-bgp_get_prefix(struct bgp_proto *p, ip_addr prefix, int pxlen, u32 path_id)
+bgp_get_prefix(struct bgp_channel *c, net_addr *net, u32 path_id)
{
- struct bgp_prefix *bp = HASH_FIND(p->prefix_hash, PXH, prefix, pxlen, path_id);
+ u32 hash = net_hash(net) ^ u32_hash(path_id);
+ struct bgp_prefix *px = HASH_FIND(c->prefix_hash, PXH, net, path_id, hash);
+
+ if (px)
+ {
+ rem_node(&px->buck_node);
+ return px;
+ }
- if (bp)
- return bp;
+ if (c->prefix_slab)
+ px = sl_alloc(c->prefix_slab);
+ else
+ px = mb_alloc(c->pool, sizeof(struct bgp_prefix) + net->length);
- bp = sl_alloc(p->prefix_slab);
- bp->n.prefix = prefix;
- bp->n.pxlen = pxlen;
- bp->path_id = path_id;
- bp->bucket_node.next = NULL;
+ px->buck_node.next = NULL;
+ px->buck_node.prev = NULL;
+ px->hash = hash;
+ px->path_id = path_id;
+ net_copy(px->net, net);
- HASH_INSERT2(p->prefix_hash, PXH, p->p.pool, bp);
+ HASH_INSERT2(c->prefix_hash, PXH, c->pool, px);
- return bp;
+ return px;
}
void
-bgp_free_prefix(struct bgp_proto *p, struct bgp_prefix *bp)
+bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *px)
{
- HASH_REMOVE2(p->prefix_hash, PXH, p->p.pool, bp);
- sl_free(p->prefix_slab, bp);
+ rem_node(&px->buck_node);
+ HASH_REMOVE2(c->prefix_hash, PXH, c->pool, px);
+
+ if (c->prefix_slab)
+ sl_free(c->prefix_slab, px);
+ else
+ mb_free(px);
}
-void
-bgp_rt_notify(struct proto *P, rtable *tbl UNUSED, net *n, rte *new, rte *old UNUSED, ea_list *attrs)
+/*
+ * BGP protocol glue
+ */
+
+int
+bgp_import_control(struct proto *P, rte **new, ea_list **attrs UNUSED, struct linpool *pool UNUSED)
{
+ rte *e = *new;
+ struct proto *SRC = e->attrs->src->proto;
struct bgp_proto *p = (struct bgp_proto *) P;
- struct bgp_bucket *buck;
- struct bgp_prefix *px;
- rte *key;
- u32 path_id;
+ struct bgp_proto *src = (SRC->proto == &proto_bgp) ? (struct bgp_proto *) SRC : NULL;
- DBG("BGP: Got route %I/%d %s\n", n->n.prefix, n->n.pxlen, new ? "up" : "down");
+ /* Reject our routes */
+ if (src == p)
+ return -1;
- if (new)
- {
- key = new;
- buck = bgp_get_bucket(p, n, attrs, new->attrs->source != RTS_BGP);
- if (!buck) /* Inconsistent attribute list */
- return;
- }
- else
- {
- key = old;
- if (!(buck = p->withdraw_bucket))
- {
- buck = p->withdraw_bucket = mb_alloc(P->pool, sizeof(struct bgp_bucket));
- init_list(&buck->prefixes);
- }
- }
- path_id = p->add_path_tx ? key->attrs->src->global_id : 0;
- px = bgp_get_prefix(p, n->n.prefix, n->n.pxlen, path_id);
- if (px->bucket_node.next)
- {
- DBG("\tRemoving old entry.\n");
- rem_node(&px->bucket_node);
- }
- add_tail(&buck->prefixes, &px->bucket_node);
- bgp_schedule_packet(p->conn, PKT_UPDATE);
-}
+ /* Accept non-BGP routes */
+ if (src == NULL)
+ return 0;
-static int
-bgp_create_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *pool)
-{
- ea_list *ea = lp_alloc(pool, sizeof(ea_list) + 4*sizeof(eattr));
- rta *rta = e->attrs;
- byte *z;
+ // XXXX: Check next hop AF
- ea->next = *attrs;
- *attrs = ea;
- ea->flags = EALF_SORTED;
- ea->count = 4;
+ /* IBGP route reflection, RFC 4456 */
+ if (p->is_internal && src->is_internal && (p->local_as == src->local_as))
+ {
+ /* Rejected unless configured as route reflector */
+ if (!p->rr_client && !src->rr_client)
+ return -1;
+
+ /* Generally, this should be handled when path is received, but we check it
+ also here as rr_cluster_id may be undefined or different in src. */
+ if (p->rr_cluster_id && bgp_cluster_list_loopy(p, e->attrs->eattrs))
+ return -1;
+ }
- bgp_set_attr(ea->attrs, BA_ORIGIN,
- ((rta->source == RTS_OSPF_EXT1) || (rta->source == RTS_OSPF_EXT2)) ? ORIGIN_INCOMPLETE : ORIGIN_IGP);
+ /* Handle well-known communities, RFC 1997 */
+ struct eattr *c;
+ if (p->cf->interpret_communities &&
+ (c = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_COMMUNITY))))
+ {
+ struct adata *d = c->u.ptr;
- if (p->is_internal)
- bgp_set_attr_wa(ea->attrs+1, pool, BA_AS_PATH, 0);
- else
- {
- z = bgp_set_attr_wa(ea->attrs+1, pool, BA_AS_PATH, 6);
- z[0] = AS_PATH_SEQUENCE;
- z[1] = 1; /* 1 AS */
- put_u32(z+2, p->local_as);
- }
+ /* Do not export anywhere */
+ if (int_set_contains(d, BGP_COMM_NO_ADVERTISE))
+ return -1;
- /* iBGP -> use gw, eBGP multi-hop -> use source_addr,
- eBGP single-hop -> use gw if on the same iface */
- z = bgp_set_attr_wa(ea->attrs+2, pool, BA_NEXT_HOP, NEXT_HOP_LENGTH);
- if (p->cf->next_hop_self ||
- rta->dest != RTD_ROUTER ||
- ipa_equal(rta->gw, IPA_NONE) ||
- ipa_is_link_local(rta->gw) ||
- (!p->is_internal && !p->cf->next_hop_keep &&
- (!p->neigh || (rta->iface != p->neigh->iface))))
- set_next_hop(z, p->source_addr);
- else
- set_next_hop(z, rta->gw);
+ /* Do not export outside of AS (or member-AS) */
+ if (!p->is_internal && int_set_contains(d, BGP_COMM_NO_EXPORT_SUBCONFED))
+ return -1;
- bgp_set_attr(ea->attrs+3, BA_LOCAL_PREF, p->cf->default_local_pref);
+ /* Do not export outside of AS (or confederation) */
+ if (!p->is_interior && int_set_contains(d, BGP_COMM_NO_EXPORT))
+ return -1;
+ }
- return 0; /* Leave decision to the filters */
+ return 0;
}
-static inline int
-bgp_as_path_loopy(struct bgp_proto *p, rta *a)
-{
- int num = p->cf->allow_local_as + 1;
- eattr *e = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
- return (e && (num > 0) && as_path_contains(e->u.ptr, p->local_as, num));
-}
+static adata null_adata; /* adata of length 0 */
-static inline int
-bgp_originator_id_loopy(struct bgp_proto *p, rta *a)
-{
- eattr *e = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID));
- return (e && (e->u.data == p->local_id));
-}
-
-static inline int
-bgp_cluster_list_loopy(struct bgp_proto *p, rta *a)
+static ea_list *
+bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *attrs0, struct linpool *pool)
{
- eattr *e = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_CLUSTER_LIST));
- return (e && p->rr_client && int_set_contains(e->u.ptr, p->rr_cluster_id));
-}
+ struct proto *SRC = e->attrs->src->proto;
+ struct bgp_proto *src = (SRC->proto == &proto_bgp) ? (void *) SRC : NULL;
+ struct bgp_export_state s = { .proto = p, .channel = c, .pool = pool, .src = src, .route = e, .mpls = c->desc->mpls };
+ ea_list *attrs = attrs0;
+ eattr *a;
+ adata *ad;
+ /* ORIGIN attribute - mandatory, attach if missing */
+ if (! bgp_find_attr(attrs0, BA_ORIGIN))
+ bgp_set_attr_u32(&attrs, pool, BA_ORIGIN, 0, src ? ORIGIN_INCOMPLETE : ORIGIN_IGP);
-static inline void
-bgp_path_prepend(rte *e, ea_list **attrs, struct linpool *pool, u32 as)
-{
- eattr *a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
- bgp_attach_attr(attrs, pool, BA_AS_PATH, (uintptr_t) as_path_prepend(pool, a->u.ptr, as));
-}
+ /* AS_PATH attribute - mandatory */
+ a = bgp_find_attr(attrs0, BA_AS_PATH);
+ ad = a ? a->u.ptr : &null_adata;
-static inline void
-bgp_cluster_list_prepend(rte *e, ea_list **attrs, struct linpool *pool, u32 cid)
-{
- eattr *a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_CLUSTER_LIST));
- bgp_attach_attr(attrs, pool, BA_CLUSTER_LIST, (uintptr_t) int_set_prepend(pool, a ? a->u.ptr : NULL, cid));
-}
+ /* AS_PATH attribute - strip AS_CONFED* segments outside confederation */
+ if ((!p->cf->confederation || !p->is_interior) && as_path_contains_confed(ad))
+ ad = as_path_strip_confed(pool, ad);
-static int
-bgp_update_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *pool, int rr)
-{
- eattr *a;
+ /* AS_PATH attribute - keep or prepend ASN */
+ if (p->is_internal ||
+ (p->rs_client && src && src->rs_client))
+ {
+ /* IBGP or route server -> just ensure there is one */
+ if (!a)
+ bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, &null_adata);
+ }
+ else if (p->is_interior)
+ {
+ /* Confederation -> prepend ASN as AS_CONFED_SEQUENCE */
+ ad = as_path_prepend2(pool, ad, AS_PATH_CONFED_SEQUENCE, p->public_as);
+ bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, ad);
+ }
+ else /* Regular EBGP (no RS, no confederation) */
+ {
+ /* Regular EBGP -> prepend ASN as regular sequence */
+ ad = as_path_prepend2(pool, ad, AS_PATH_SEQUENCE, p->public_as);
+ bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, ad);
+
+ /* MULTI_EXIT_DESC attribute - accept only if set in export filter */
+ a = bgp_find_attr(attrs0, BA_MULTI_EXIT_DISC);
+ if (a && !(a->type & EAF_FRESH))
+ bgp_unset_attr(&attrs, pool, BA_MULTI_EXIT_DISC);
+ }
- if (!p->is_internal && !p->rs_client)
- {
- bgp_path_prepend(e, attrs, pool, p->local_as);
-
- /* The MULTI_EXIT_DISC attribute received from a neighboring AS MUST NOT be
- * propagated to other neighboring ASes.
- * Perhaps it would be better to undefine it.
- */
- a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
- if (a)
- bgp_attach_attr(attrs, pool, BA_MULTI_EXIT_DISC, 0);
- }
+ /* NEXT_HOP attribute - delegated to AF-specific hook */
+ a = bgp_find_attr(attrs0, BA_NEXT_HOP);
+ bgp_update_next_hop(&s, a, &attrs);
- /* iBGP -> keep next_hop, eBGP multi-hop -> use source_addr,
- * eBGP single-hop -> keep next_hop if on the same iface.
- * If the next_hop is zero (i.e. link-local), keep only if on the same iface.
- *
- * Note that same-iface-check uses iface from route, which is based on gw.
- */
- a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP));
- if (a && !p->cf->next_hop_self &&
- (p->cf->next_hop_keep ||
- (p->is_internal && ipa_nonzero(*((ip_addr *) a->u.ptr->data))) ||
- (p->neigh && (e->attrs->iface == p->neigh->iface))))
- {
- /* Leave the original next hop attribute, will check later where does it point */
- }
- else
- {
- /* Need to create new one */
- byte *b = bgp_attach_attr_wa(attrs, pool, BA_NEXT_HOP, NEXT_HOP_LENGTH);
- set_next_hop(b, p->source_addr);
- }
+ /* LOCAL_PREF attribute - required for IBGP, attach if missing */
+ if (p->is_interior && ! bgp_find_attr(attrs0, BA_LOCAL_PREF))
+ bgp_set_attr_u32(&attrs, pool, BA_LOCAL_PREF, 0, p->cf->default_local_pref);
- if (rr)
- {
- /* Handling route reflection, RFC 4456 */
- struct bgp_proto *src = (struct bgp_proto *) e->attrs->src->proto;
+ /* IBGP route reflection, RFC 4456 */
+ if (src && src->is_internal && p->is_internal && (src->local_as == p->local_as))
+ {
+ /* ORIGINATOR_ID attribute - attach if not already set */
+ if (! bgp_find_attr(attrs0, BA_ORIGINATOR_ID))
+ bgp_set_attr_u32(&attrs, pool, BA_ORIGINATOR_ID, 0, src->remote_id);
- a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID));
- if (!a)
- bgp_attach_attr(attrs, pool, BA_ORIGINATOR_ID, src->remote_id);
+ /* CLUSTER_LIST attribute - prepend cluster ID */
+ a = bgp_find_attr(attrs0, BA_CLUSTER_LIST);
+ ad = a ? a->u.ptr : NULL;
- /* We attach proper cluster ID according to whether the route is entering or leaving the cluster */
- bgp_cluster_list_prepend(e, attrs, pool, src->rr_client ? src->rr_cluster_id : p->rr_cluster_id);
+ /* Prepend src cluster ID */
+ if (src->rr_cluster_id)
+ ad = int_set_prepend(pool, ad, src->rr_cluster_id);
- /* Two RR clients with different cluster ID, hmmm */
- if (src->rr_client && p->rr_client && (src->rr_cluster_id != p->rr_cluster_id))
- bgp_cluster_list_prepend(e, attrs, pool, p->rr_cluster_id);
- }
+ /* Prepend dst cluster ID if src and dst clusters are different */
+ if (p->rr_cluster_id && (src->rr_cluster_id != p->rr_cluster_id))
+ ad = int_set_prepend(pool, ad, p->rr_cluster_id);
- return 0; /* Leave decision to the filters */
-}
+ /* Should be at least one prepended cluster ID */
+ bgp_set_attr_ptr(&attrs, pool, BA_CLUSTER_LIST, 0, ad);
+ }
-static int
-bgp_community_filter(struct bgp_proto *p, rte *e)
-{
- eattr *a;
- struct adata *d;
+ /* AS4_* transition attributes, RFC 6793 4.2.2 */
+ if (! p->as4_session)
+ {
+ a = bgp_find_attr(attrs, BA_AS_PATH);
+ if (a && as_path_contains_as4(a->u.ptr))
+ {
+ bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, as_path_to_old(pool, a->u.ptr));
+ bgp_set_attr_ptr(&attrs, pool, BA_AS4_PATH, 0, as_path_strip_confed(pool, a->u.ptr));
+ }
- /* Check if we aren't forbidden to export the route by communities */
- a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_COMMUNITY));
- if (a)
+ a = bgp_find_attr(attrs, BA_AGGREGATOR);
+ if (a && aggregator_contains_as4(a->u.ptr))
{
- d = a->u.ptr;
- if (int_set_contains(d, BGP_COMM_NO_ADVERTISE))
- {
- DBG("\tNO_ADVERTISE\n");
- return 1;
- }
- if (!p->is_internal &&
- (int_set_contains(d, BGP_COMM_NO_EXPORT) ||
- int_set_contains(d, BGP_COMM_NO_EXPORT_SUBCONFED)))
- {
- DBG("\tNO_EXPORT\n");
- return 1;
- }
+ bgp_set_attr_ptr(&attrs, pool, BA_AGGREGATOR, 0, aggregator_to_old(pool, a->u.ptr));
+ bgp_set_attr_ptr(&attrs, pool, BA_AS4_AGGREGATOR, 0, a->u.ptr);
}
+ }
- return 0;
+ /*
+ * Presence of mandatory attributes ORIGIN and AS_PATH is ensured by above
+ * conditions. Presence and validity of quasi-mandatory NEXT_HOP attribute
+ * should be checked in AF-specific hooks.
+ */
+
+ /* Apply per-attribute export hooks for validatation and normalization */
+ return bgp_export_attrs(&s, attrs);
}
-int
-bgp_import_control(struct proto *P, rte **new, ea_list **attrs, struct linpool *pool)
+void
+bgp_rt_notify(struct proto *P, struct channel *C, net *n, rte *new, rte *old, ea_list *attrs)
{
- rte *e = *new;
- struct bgp_proto *p = (struct bgp_proto *) P;
- struct bgp_proto *new_bgp = (e->attrs->src->proto->proto == &proto_bgp) ?
- (struct bgp_proto *) e->attrs->src->proto : NULL;
+ struct bgp_proto *p = (void *) P;
+ struct bgp_channel *c = (void *) C;
+ struct bgp_bucket *buck;
+ struct bgp_prefix *px;
+ u32 path;
- if (p == new_bgp) /* Poison reverse updates */
- return -1;
- if (new_bgp)
- {
- /* We should check here for cluster list loop, because the receiving BGP instance
- might have different cluster ID */
- if (bgp_cluster_list_loopy(p, e->attrs))
- return -1;
-
- if (p->cf->interpret_communities && bgp_community_filter(p, e))
- return -1;
-
- if (p->local_as == new_bgp->local_as && p->is_internal && new_bgp->is_internal)
- {
- /* Redistribution of internal routes with IBGP */
- if (p->rr_client || new_bgp->rr_client)
- /* Route reflection, RFC 4456 */
- return bgp_update_attrs(p, e, attrs, pool, 1);
- else
- return -1;
- }
- else
- return bgp_update_attrs(p, e, attrs, pool, 0);
- }
+ if (new)
+ {
+ attrs = bgp_update_attrs(p, c, new, attrs, bgp_linpool2);
+
+ /* If attributes are invalid, we fail back to withdraw */
+ buck = attrs ? bgp_get_bucket(c, attrs) : bgp_get_withdraw_bucket(c);
+ path = new->attrs->src->global_id;
+
+ lp_flush(bgp_linpool2);
+ }
else
- return bgp_create_attrs(p, e, attrs, pool);
+ {
+ buck = bgp_get_withdraw_bucket(c);
+ path = old->attrs->src->global_id;
+ }
+
+ px = bgp_get_prefix(c, n->n.addr, c->add_path_tx ? path : 0);
+ add_tail(&buck->prefixes, &px->buck_node);
+
+ bgp_schedule_packet(p->conn, c, PKT_UPDATE);
}
+
static inline u32
bgp_get_neighbor(rte *r)
{
eattr *e = ea_find(r->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
u32 as;
- if (e && as_path_get_first(e->u.ptr, &as))
+ if (e && as_path_get_first_regular(e->u.ptr, &as))
return as;
- else
- return ((struct bgp_proto *) r->attrs->src->proto)->remote_as;
+
+ /* If AS_PATH is not defined, we treat rte as locally originated */
+ struct bgp_proto *p = (void *) r->attrs->src->proto;
+ return p->cf->confederation ?: p->local_as;
}
static inline int
rte_resolvable(rte *rt)
{
- int rd = rt->attrs->dest;
- return (rd == RTD_ROUTER) || (rd == RTD_DEVICE) || (rd == RTD_MULTIPATH);
+ return rt->attrs->dest == RTD_UNICAST;
}
int
@@ -1269,16 +1606,16 @@ bgp_rte_better(rte *new, rte *old)
/* RFC 4271 9.1.2.2. a) Use AS path lengths */
if (new_bgp->cf->compare_path_lengths || old_bgp->cf->compare_path_lengths)
- {
- x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
- y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
- n = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN;
- o = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN;
- if (n < o)
- return 1;
- if (n > o)
- return 0;
- }
+ {
+ x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
+ y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
+ n = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN;
+ o = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN;
+ if (n < o)
+ return 1;
+ if (n > o)
+ return 0;
+ }
/* RFC 4271 9.1.2.2. b) Use origins */
x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGIN));
@@ -1303,21 +1640,21 @@ bgp_rte_better(rte *new, rte *old)
*/
if (new_bgp->cf->med_metric || old_bgp->cf->med_metric ||
(bgp_get_neighbor(new) == bgp_get_neighbor(old)))
- {
- x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
- y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
- n = x ? x->u.data : new_bgp->cf->default_med;
- o = y ? y->u.data : old_bgp->cf->default_med;
- if (n < o)
- return 1;
- if (n > o)
- return 0;
- }
+ {
+ x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
+ y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
+ n = x ? x->u.data : new_bgp->cf->default_med;
+ o = y ? y->u.data : old_bgp->cf->default_med;
+ if (n < o)
+ return 1;
+ if (n > o)
+ return 0;
+ }
/* RFC 4271 9.1.2.2. d) Prefer external peers */
- if (new_bgp->is_internal > old_bgp->is_internal)
+ if (new_bgp->is_interior > old_bgp->is_interior)
return 0;
- if (new_bgp->is_internal < old_bgp->is_internal)
+ if (new_bgp->is_interior < old_bgp->is_interior)
return 1;
/* RFC 4271 9.1.2.2. e) Compare IGP metrics */
@@ -1329,7 +1666,7 @@ bgp_rte_better(rte *new, rte *old)
return 0;
/* RFC 4271 9.1.2.2. f) Compare BGP identifiers */
- /* RFC 4456 9. a) Use ORIGINATOR_ID instead of local neighor ID */
+ /* RFC 4456 9. a) Use ORIGINATOR_ID instead of local neighbor ID */
x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID));
y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID));
n = x ? x->u.data : new_bgp->remote_id;
@@ -1388,18 +1725,18 @@ bgp_rte_mergable(rte *pri, rte *sec)
/* RFC 4271 9.1.2.2. a) Use AS path lengths */
if (pri_bgp->cf->compare_path_lengths || sec_bgp->cf->compare_path_lengths)
- {
- x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
- y = ea_find(sec->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
- p = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN;
- s = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN;
+ {
+ x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
+ y = ea_find(sec->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
+ p = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN;
+ s = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN;
- if (p != s)
- return 0;
+ if (p != s)
+ return 0;
-// if (DELTA(p, s) > pri_bgp->cf->relax_multipath)
-// return 0;
- }
+// if (DELTA(p, s) > pri_bgp->cf->relax_multipath)
+// return 0;
+ }
/* RFC 4271 9.1.2.2. b) Use origins */
x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGIN));
@@ -1412,17 +1749,17 @@ bgp_rte_mergable(rte *pri, rte *sec)
/* RFC 4271 9.1.2.2. c) Compare MED's */
if (pri_bgp->cf->med_metric || sec_bgp->cf->med_metric ||
(bgp_get_neighbor(pri) == bgp_get_neighbor(sec)))
- {
- x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
- y = ea_find(sec->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
- p = x ? x->u.data : pri_bgp->cf->default_med;
- s = y ? y->u.data : sec_bgp->cf->default_med;
- if (p != s)
- return 0;
- }
+ {
+ x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
+ y = ea_find(sec->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
+ p = x ? x->u.data : pri_bgp->cf->default_med;
+ s = y ? y->u.data : sec_bgp->cf->default_med;
+ if (p != s)
+ return 0;
+ }
/* RFC 4271 9.1.2.2. d) Prefer external peers */
- if (pri_bgp->is_internal != sec_bgp->is_internal)
+ if (pri_bgp->is_interior != sec_bgp->is_interior)
return 0;
/* RFC 4271 9.1.2.2. e) Compare IGP metrics */
@@ -1437,7 +1774,6 @@ bgp_rte_mergable(rte *pri, rte *sec)
}
-
static inline int
same_group(rte *r, u32 lpref, u32 lasn)
{
@@ -1482,7 +1818,7 @@ bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best)
* that this fn is not called for them.
*
* The idea is simple, the implementation is more problematic,
- * mostly because of optimizations in rte_recalculate() that
+ * mostly because of optimizations in rte_recalculate() that
* avoids full recalculation in most cases.
*
* We can assume that at least one of new, old is non-NULL and both
@@ -1494,14 +1830,14 @@ bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best)
/* If new and old are from different groups, we just process that
as two independent events */
if (new && old && !same_group(old, lpref, lasn))
- {
- int i1, i2;
- i1 = bgp_rte_recalculate(table, net, NULL, old, old_best);
- i2 = bgp_rte_recalculate(table, net, new, NULL, old_best);
- return i1 || i2;
- }
+ {
+ int i1, i2;
+ i1 = bgp_rte_recalculate(table, net, NULL, old, old_best);
+ i2 = bgp_rte_recalculate(table, net, new, NULL, old_best);
+ return i1 || i2;
+ }
- /*
+ /*
* We could find the best-in-group and then make some shortcuts like
* in rte_recalculate, but as we would have to walk through all
* net->routes just to find it, it is probably not worth. So we
@@ -1513,35 +1849,35 @@ bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best)
new->u.bgp.suppressed = 1;
if (old)
+ {
+ old_is_group_best = !old->u.bgp.suppressed;
+ old->u.bgp.suppressed = 1;
+ int new_is_better = new && bgp_rte_better(new, old);
+
+ /* The first case - replace not best with worse (or remove not best) */
+ if (!old_is_group_best && !new_is_better)
+ return 0;
+
+ /* The second case - replace the best with better */
+ if (old_is_group_best && new_is_better)
{
- old_is_group_best = !old->u.bgp.suppressed;
- old->u.bgp.suppressed = 1;
- int new_is_better = new && bgp_rte_better(new, old);
-
- /* The first case - replace not best with worse (or remove not best) */
- if (!old_is_group_best && !new_is_better)
- return 0;
-
- /* The second case - replace the best with better */
- if (old_is_group_best && new_is_better)
- {
- /* new is best-in-group, the see discussion below - this is
- a special variant of NBG && OBG. From OBG we can deduce
- that same_group(old_best) iff (old == old_best) */
- new->u.bgp.suppressed = 0;
- return (old == old_best);
- }
+ /* new is best-in-group, the see discussion below - this is
+ a special variant of NBG && OBG. From OBG we can deduce
+ that same_group(old_best) iff (old == old_best) */
+ new->u.bgp.suppressed = 0;
+ return (old == old_best);
}
+ }
/* The default case - find a new best-in-group route */
r = new; /* new may not be in the list */
for (s=net->routes; rte_is_valid(s); s=s->next)
if (use_deterministic_med(s) && same_group(s, lpref, lasn))
- {
- s->u.bgp.suppressed = 1;
- if (!r || bgp_rte_better(s, r))
- r = s;
- }
+ {
+ s->u.bgp.suppressed = 1;
+ if (!r || bgp_rte_better(s, r))
+ r = s;
+ }
/* Simple case - the last route in group disappears */
if (!r)
@@ -1580,397 +1916,77 @@ bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best)
return old_is_group_best;
}
-static struct adata *
-bgp_aggregator_convert_to_new(struct adata *old, struct linpool *pool)
-{
- struct adata *newa = lp_alloc(pool, sizeof(struct adata) + 8);
- newa->length = 8;
- aggregator_convert_to_new(old, newa->data);
- return newa;
-}
-
-/* Take last req_as ASNs from path old2 (in 2B format), convert to 4B format
- * and append path old4 (in 4B format).
+/*
+ * Reconstruct AS_PATH and AGGREGATOR according to RFC 6793 4.2.3
*/
-static struct adata *
-bgp_merge_as_paths(struct adata *old2, struct adata *old4, int req_as, struct linpool *pool)
-{
- byte buf[old2->length * 2];
-
- int ol = as_path_convert_to_new(old2, buf, req_as);
- int nl = ol + (old4 ? old4->length : 0);
-
- struct adata *newa = lp_alloc(pool, sizeof(struct adata) + nl);
- newa->length = nl;
- memcpy(newa->data, buf, ol);
- if (old4) memcpy(newa->data + ol, old4->data, old4->length);
-
- return newa;
-}
-
-static int
-as4_aggregator_valid(struct adata *aggr)
-{
- return aggr->length == 8;
-}
-
-
-/* Reconstruct 4B AS_PATH and AGGREGATOR according to RFC 4893 4.2.3 */
-static void
-bgp_reconstruct_4b_atts(struct bgp_proto *p, rta *a, struct linpool *pool)
-{
- eattr *p2 =ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
- eattr *p4 =ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AS4_PATH));
- eattr *a2 =ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AGGREGATOR));
- eattr *a4 =ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AS4_AGGREGATOR));
- int a4_removed = 0;
-
- if (a4 && !as4_aggregator_valid(a4->u.ptr))
- {
- log(L_WARN "%s: AS4_AGGREGATOR attribute is invalid, skipping attribute", p->p.name);
- a4 = NULL;
- a4_removed = 1;
- }
-
- if (a2)
- {
- u32 a2_as = get_u16(a2->u.ptr->data);
-
- if (a4)
- {
- if (a2_as != AS_TRANS)
- {
- /* Routes were aggregated by old router and therefore AS4_PATH
- * and AS4_AGGREGATOR is invalid
- *
- * Convert AS_PATH and AGGREGATOR to 4B format and finish.
- */
-
- a2->u.ptr = bgp_aggregator_convert_to_new(a2->u.ptr, pool);
- p2->u.ptr = bgp_merge_as_paths(p2->u.ptr, NULL, AS_PATH_MAXLEN, pool);
-
- return;
- }
- else
- {
- /* Common case, use AS4_AGGREGATOR attribute */
- a2->u.ptr = a4->u.ptr;
- }
- }
- else
- {
- /* Common case, use old AGGREGATOR attribute */
- a2->u.ptr = bgp_aggregator_convert_to_new(a2->u.ptr, pool);
-
- if ((a2_as == AS_TRANS) && !a4_removed)
- log(L_WARN "%s: AGGREGATOR attribute contain AS_TRANS, but AS4_AGGREGATOR is missing", p->p.name);
- }
- }
- else
- if (a4)
- log(L_WARN "%s: AS4_AGGREGATOR attribute received, but AGGREGATOR attribute is missing", p->p.name);
-
- int p2_len = as_path_getlen_int(p2->u.ptr, 2);
- int p4_len = p4 ? validate_as4_path(p, p4->u.ptr) : -1;
-
- if (p4 && (p4_len < 0))
- log(L_WARN "%s: AS4_PATH attribute is malformed, skipping attribute", p->p.name);
-
- if ((p4_len <= 0) || (p2_len < p4_len))
- p2->u.ptr = bgp_merge_as_paths(p2->u.ptr, NULL, AS_PATH_MAXLEN, pool);
- else
- p2->u.ptr = bgp_merge_as_paths(p2->u.ptr, p4->u.ptr, p2_len - p4_len, pool);
-}
-
static void
-bgp_remove_as4_attrs(struct bgp_proto *p, rta *a)
+bgp_process_as4_attrs(ea_list **attrs, struct linpool *pool)
{
- unsigned id1 = EA_CODE(EAP_BGP, BA_AS4_PATH);
- unsigned id2 = EA_CODE(EAP_BGP, BA_AS4_AGGREGATOR);
- ea_list **el = &(a->eattrs);
+ eattr *p2 = bgp_find_attr(*attrs, BA_AS_PATH);
+ eattr *p4 = bgp_find_attr(*attrs, BA_AS4_PATH);
+ eattr *a2 = bgp_find_attr(*attrs, BA_AGGREGATOR);
+ eattr *a4 = bgp_find_attr(*attrs, BA_AS4_AGGREGATOR);
- /* We know that ea_lists constructed in bgp_decode attrs have one attribute per ea_list struct */
- while (*el != NULL)
- {
- unsigned fid = (*el)->attrs[0].id;
-
- if ((fid == id1) || (fid == id2))
- {
- *el = (*el)->next;
- if (p->as4_session)
- log(L_WARN "%s: Unexpected AS4_* attributes received", p->p.name);
- }
- else
- el = &((*el)->next);
- }
-}
+ /* First, unset AS4_* attributes */
+ if (p4) bgp_unset_attr(attrs, pool, BA_AS4_PATH);
+ if (a4) bgp_unset_attr(attrs, pool, BA_AS4_AGGREGATOR);
-/**
- * bgp_decode_attrs - check and decode BGP attributes
- * @conn: connection
- * @attr: start of attribute block
- * @len: length of attribute block
- * @pool: linear pool to make all the allocations in
- * @mandatory: 1 iff presence of mandatory attributes has to be checked
- *
- * This function takes a BGP attribute block (a part of an Update message), checks
- * its consistency and converts it to a list of BIRD route attributes represented
- * by a &rta.
- */
-struct rta *
-bgp_decode_attrs(struct bgp_conn *conn, byte *attr, uint len, struct linpool *pool, int mandatory)
-{
- struct bgp_proto *bgp = conn->bgp;
- rta *a = lp_alloc(pool, sizeof(struct rta));
- uint flags, code, l, i, type;
- int errcode;
- byte *z, *attr_start;
- byte seen[256/8];
- ea_list *ea;
- struct adata *ad;
- int withdraw = 0;
-
- bzero(a, sizeof(rta));
- a->source = RTS_BGP;
- a->scope = SCOPE_UNIVERSE;
- a->cast = RTC_UNICAST;
- /* a->dest = RTD_ROUTER; -- set in bgp_set_next_hop() */
- a->from = bgp->cf->remote_ip;
-
- /* Parse the attributes */
- bzero(seen, sizeof(seen));
- DBG("BGP: Parsing attributes\n");
- while (len)
- {
- if (len < 2)
- goto malformed;
- attr_start = attr;
- flags = *attr++;
- code = *attr++;
- len -= 2;
- if (flags & BAF_EXT_LEN)
- {
- if (len < 2)
- goto malformed;
- l = get_u16(attr);
- attr += 2;
- len -= 2;
- }
- else
- {
- if (len < 1)
- goto malformed;
- l = *attr++;
- len--;
- }
- if (l > len)
- goto malformed;
- len -= l;
- z = attr;
- attr += l;
- DBG("Attr %02x %02x %d\n", code, flags, l);
- if (seen[code/8] & (1 << (code%8)))
- goto malformed;
- if (ATTR_KNOWN(code))
- {
- struct attr_desc *desc = &bgp_attr_table[code];
- if (desc->expected_length >= 0 && desc->expected_length != (int) l)
- { errcode = 5; goto err; }
- if ((desc->expected_flags ^ flags) & (BAF_OPTIONAL | BAF_TRANSITIVE))
- { errcode = 4; goto err; }
- if (!bgp->is_internal)
- {
- if (!desc->allow_in_ebgp)
- continue;
- if ((code == BA_LOCAL_PREF) && !bgp->cf->allow_local_pref)
- continue;
- }
- if (desc->validate)
- {
- errcode = desc->validate(bgp, z, l);
- if (errcode > 0)
- goto err;
- if (errcode == IGNORE)
- continue;
- if (errcode <= WITHDRAW)
- {
- log(L_WARN "%s: Attribute %s is malformed, withdrawing update",
- bgp->p.name, desc->name);
- withdraw = 1;
- }
- }
- else if (code == BA_AS_PATH)
- {
- /* Special case as it might also trim the attribute */
- if (validate_as_path(bgp, z, &l) < 0)
- { errcode = 11; goto err; }
- }
- type = desc->type;
- }
- else /* Unknown attribute */
- {
- if (!(flags & BAF_OPTIONAL))
- { errcode = 2; goto err; }
- type = EAF_TYPE_OPAQUE;
- }
-
- // Only OPTIONAL and TRANSITIVE attributes may have non-zero PARTIAL flag
- // if (!((flags & BAF_OPTIONAL) && (flags & BAF_TRANSITIVE)) && (flags & BAF_PARTIAL))
- // { errcode = 4; goto err; }
-
- seen[code/8] |= (1 << (code%8));
- ea = lp_alloc(pool, sizeof(ea_list) + sizeof(eattr));
- ea->next = a->eattrs;
- a->eattrs = ea;
- ea->flags = 0;
- ea->count = 1;
- ea->attrs[0].id = EA_CODE(EAP_BGP, code);
- ea->attrs[0].flags = flags;
- ea->attrs[0].type = type;
- if (type & EAF_EMBEDDED)
- ad = NULL;
- else
- {
- ad = lp_alloc(pool, sizeof(struct adata) + l);
- ea->attrs[0].u.ptr = ad;
- ad->length = l;
- memcpy(ad->data, z, l);
- }
- switch (type)
- {
- case EAF_TYPE_ROUTER_ID:
- case EAF_TYPE_INT:
- if (l == 1)
- ea->attrs[0].u.data = *z;
- else
- ea->attrs[0].u.data = get_u32(z);
- break;
- case EAF_TYPE_IP_ADDRESS:
- ipa_ntoh(*(ip_addr *)ad->data);
- break;
- case EAF_TYPE_INT_SET:
- case EAF_TYPE_LC_SET:
- case EAF_TYPE_EC_SET:
- {
- u32 *z = (u32 *) ad->data;
- for(i=0; i<ad->length/4; i++)
- z[i] = ntohl(z[i]);
- break;
- }
- }
- }
-
- if (withdraw)
- goto withdraw;
-
-#ifdef IPV6
- /* If we received MP_REACH_NLRI we should check mandatory attributes */
- if (bgp->mp_reach_len != 0)
- mandatory = 1;
-#endif
-
- /* If there is no (reachability) NLRI, we should exit now */
- if (! mandatory)
- return a;
-
- /* Check if all mandatory attributes are present */
- for(i=0; i < ARRAY_SIZE(bgp_mandatory_attrs); i++)
- {
- code = bgp_mandatory_attrs[i];
- if (!(seen[code/8] & (1 << (code%8))))
- {
- bgp_error(conn, 3, 3, &bgp_mandatory_attrs[i], 1);
- return NULL;
- }
- }
-
- /* When receiving attributes from non-AS4-aware BGP speaker,
- * we have to reconstruct 4B AS_PATH and AGGREGATOR attributes
- */
- if (! bgp->as4_session)
- bgp_reconstruct_4b_atts(bgp, a, pool);
-
- bgp_remove_as4_attrs(bgp, a);
-
- /* If the AS path attribute contains our AS, reject the routes */
- if (bgp_as_path_loopy(bgp, a))
- goto withdraw;
-
- /* Two checks for IBGP loops caused by route reflection, RFC 4456 */
- if (bgp_originator_id_loopy(bgp, a) ||
- bgp_cluster_list_loopy(bgp, a))
- goto withdraw;
+ /* Handle AGGREGATOR attribute */
+ if (a2 && a4)
+ {
+ u32 a2_asn = get_u32(a2->u.ptr->data);
- /* If there's no local preference, define one */
- if (!(seen[0] & (1 << BA_LOCAL_PREF)))
- bgp_attach_attr(&a->eattrs, pool, BA_LOCAL_PREF, bgp->cf->default_local_pref);
+ /* If routes were aggregated by an old router, then AS4_PATH and
+ AS4_AGGREGATOR are invalid. In that case we give up. */
+ if (a2_asn != AS_TRANS)
+ return;
- return a;
+ /* Use AS4_AGGREGATOR instead of AGGREGATOR */
+ a2->u.ptr = a4->u.ptr;
+ }
-withdraw:
- return NULL;
+ /* Handle AS_PATH attribute */
+ if (p2 && p4)
+ {
+ /* Both as_path_getlen() and as_path_cut() take AS_CONFED* as zero length */
+ int p2_len = as_path_getlen(p2->u.ptr);
+ int p4_len = as_path_getlen(p4->u.ptr);
-malformed:
- bgp_error(conn, 3, 1, NULL, 0);
- return NULL;
+ /* AS_PATH is too short, give up */
+ if (p2_len < p4_len)
+ return;
-err:
- bgp_error(conn, 3, errcode, attr_start, z+l-attr_start);
- return NULL;
+ /* Merge AS_PATH and AS4_PATH */
+ as_path_cut(p2->u.ptr, p2_len - p4_len);
+ p2->u.ptr = as_path_merge(pool, p2->u.ptr, p4->u.ptr);
+ }
}
int
bgp_get_attr(eattr *a, byte *buf, int buflen)
{
uint i = EA_ID(a->id);
- struct attr_desc *d;
+ const struct bgp_attr_desc *d;
int len;
- if (ATTR_KNOWN(i))
+ if (bgp_attr_known(i))
+ {
+ d = &bgp_attr_table[i];
+ len = bsprintf(buf, "%s", d->name);
+ buf += len;
+ if (d->format)
{
- d = &bgp_attr_table[i];
- len = bsprintf(buf, "%s", d->name);
- buf += len;
- if (d->format)
- {
- *buf++ = ':';
- *buf++ = ' ';
- d->format(a, buf, buflen - len - 2);
- return GA_FULL;
- }
- return GA_NAME;
+ *buf++ = ':';
+ *buf++ = ' ';
+ d->format(a, buf, buflen - len - 2);
+ return GA_FULL;
}
- bsprintf(buf, "%02x%s", i, (a->flags & BAF_TRANSITIVE) ? " [t]" : "");
- return GA_NAME;
-}
-
-void
-bgp_init_bucket_table(struct bgp_proto *p)
-{
- p->hash_size = 256;
- p->hash_limit = p->hash_size * 4;
- p->bucket_hash = mb_allocz(p->p.pool, p->hash_size * sizeof(struct bgp_bucket *));
- init_list(&p->bucket_queue);
- p->withdraw_bucket = NULL;
- // fib_init(&p->prefix_fib, p->p.pool, sizeof(struct bgp_prefix), 0, bgp_init_prefix);
-}
-
-void
-bgp_free_bucket_table(struct bgp_proto *p)
-{
- mb_free(p->bucket_hash);
- p->bucket_hash = NULL;
-
- struct bgp_bucket *b;
- WALK_LIST_FIRST(b, p->bucket_queue)
- {
- rem_node(&b->send_node);
- mb_free(b);
+ return GA_NAME;
}
- mb_free(p->withdraw_bucket);
- p->withdraw_bucket = NULL;
+ bsprintf(buf, "%02x%s", i, (a->flags & BAF_TRANSITIVE) ? " [t]" : "");
+ return GA_NAME;
}
void
@@ -1986,14 +2002,14 @@ bgp_get_route_info(rte *e, byte *buf, ea_list *attrs)
buf += bsprintf(buf, "-");
if (e->attrs->hostentry)
- {
- if (!rte_resolvable(e))
- buf += bsprintf(buf, "/-");
- else if (e->attrs->igp_metric >= IGP_METRIC_UNKNOWN)
- buf += bsprintf(buf, "/?");
- else
- buf += bsprintf(buf, "/%d", e->attrs->igp_metric);
- }
+ {
+ if (!rte_resolvable(e))
+ buf += bsprintf(buf, "/-");
+ else if (e->attrs->igp_metric >= IGP_METRIC_UNKNOWN)
+ buf += bsprintf(buf, "/?");
+ else
+ buf += bsprintf(buf, "/%d", e->attrs->igp_metric);
+ }
buf += bsprintf(buf, ") [");
if (p && as_path_get_last(p->u.ptr, &origas))
diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c
index 8a6b2f02..b0814791 100644
--- a/proto/bgp/bgp.c
+++ b/proto/bgp/bgp.c
@@ -2,6 +2,8 @@
* BIRD -- The Border Gateway Protocol
*
* (c) 2000 Martin Mares <mj@ucw.cz>
+ * (c) 2008--2016 Ondrej Zajicek <santiago@crfreenet.org>
+ * (c) 2008--2016 CZ.NIC z.s.p.o.
*
* Can be freely distributed and used under the terms of the GNU GPL.
*/
@@ -9,48 +11,52 @@
/**
* DOC: Border Gateway Protocol
*
- * The BGP protocol is implemented in three parts: |bgp.c| which takes care of the
- * connection and most of the interface with BIRD core, |packets.c| handling
+ * The BGP protocol is implemented in three parts: |bgp.c| which takes care of
+ * the connection and most of the interface with BIRD core, |packets.c| handling
* both incoming and outgoing BGP packets and |attrs.c| containing functions for
* manipulation with BGP attribute lists.
*
- * As opposed to the other existing routing daemons, BIRD has a sophisticated core
- * architecture which is able to keep all the information needed by BGP in the
- * primary routing table, therefore no complex data structures like a central
- * BGP table are needed. This increases memory footprint of a BGP router with
- * many connections, but not too much and, which is more important, it makes
- * BGP much easier to implement.
+ * As opposed to the other existing routing daemons, BIRD has a sophisticated
+ * core architecture which is able to keep all the information needed by BGP in
+ * the primary routing table, therefore no complex data structures like a
+ * central BGP table are needed. This increases memory footprint of a BGP router
+ * with many connections, but not too much and, which is more important, it
+ * makes BGP much easier to implement.
*
- * Each instance of BGP (corresponding to a single BGP peer) is described by a &bgp_proto
- * structure to which are attached individual connections represented by &bgp_connection
- * (usually, there exists only one connection, but during BGP session setup, there
- * can be more of them). The connections are handled according to the BGP state machine
- * defined in the RFC with all the timers and all the parameters configurable.
+ * Each instance of BGP (corresponding to a single BGP peer) is described by a
+ * &bgp_proto structure to which are attached individual connections represented
+ * by &bgp_connection (usually, there exists only one connection, but during BGP
+ * session setup, there can be more of them). The connections are handled
+ * according to the BGP state machine defined in the RFC with all the timers and
+ * all the parameters configurable.
*
- * In incoming direction, we listen on the connection's socket and each time we receive
- * some input, we pass it to bgp_rx(). It decodes packet headers and the markers and
- * passes complete packets to bgp_rx_packet() which distributes the packet according
- * to its type.
+ * In incoming direction, we listen on the connection's socket and each time we
+ * receive some input, we pass it to bgp_rx(). It decodes packet headers and the
+ * markers and passes complete packets to bgp_rx_packet() which distributes the
+ * packet according to its type.
*
- * In outgoing direction, we gather all the routing updates and sort them to buckets
- * (&bgp_bucket) according to their attributes (we keep a hash table for fast comparison
- * of &rta's and a &fib which helps us to find if we already have another route for
- * the same destination queued for sending, so that we can replace it with the new one
- * immediately instead of sending both updates). There also exists a special bucket holding
- * all the route withdrawals which cannot be queued anywhere else as they don't have any
- * attributes. If we have any packet to send (due to either new routes or the connection
- * tracking code wanting to send a Open, Keepalive or Notification message), we call
- * bgp_schedule_packet() which sets the corresponding bit in a @packet_to_send
- * bit field in &bgp_conn and as soon as the transmit socket buffer becomes empty,
- * we call bgp_fire_tx(). It inspects state of all the packet type bits and calls
- * the corresponding bgp_create_xx() functions, eventually rescheduling the same packet
- * type if we have more data of the same type to send.
+ * In outgoing direction, we gather all the routing updates and sort them to
+ * buckets (&bgp_bucket) according to their attributes (we keep a hash table for
+ * fast comparison of &rta's and a &fib which helps us to find if we already
+ * have another route for the same destination queued for sending, so that we
+ * can replace it with the new one immediately instead of sending both
+ * updates). There also exists a special bucket holding all the route
+ * withdrawals which cannot be queued anywhere else as they don't have any
+ * attributes. If we have any packet to send (due to either new routes or the
+ * connection tracking code wanting to send a Open, Keepalive or Notification
+ * message), we call bgp_schedule_packet() which sets the corresponding bit in a
+ * @packet_to_send bit field in &bgp_conn and as soon as the transmit socket
+ * buffer becomes empty, we call bgp_fire_tx(). It inspects state of all the
+ * packet type bits and calls the corresponding bgp_create_xx() functions,
+ * eventually rescheduling the same packet type if we have more data of the same
+ * type to send.
*
- * The processing of attributes consists of two functions: bgp_decode_attrs() for checking
- * of the attribute blocks and translating them to the language of BIRD's extended attributes
- * and bgp_encode_attrs() which does the converse. Both functions are built around a
- * @bgp_attr_table array describing all important characteristics of all known attributes.
- * Unknown transitive attributes are attached to the route as %EAF_TYPE_OPAQUE byte streams.
+ * The processing of attributes consists of two functions: bgp_decode_attrs()
+ * for checking of the attribute blocks and translating them to the language of
+ * BIRD's extended attributes and bgp_encode_attrs() which does the
+ * converse. Both functions are built around a @bgp_attr_table array describing
+ * all important characteristics of all known attributes. Unknown transitive
+ * attributes are attached to the route as %EAF_TYPE_OPAQUE byte streams.
*
* BGP protocol implements graceful restart in both restarting (local restart)
* and receiving (neighbor restart) roles. The first is handled mostly by the
@@ -61,10 +67,44 @@
* point of view and therefore maintaining received routes. Routing table
* refresh cycle (rt_refresh_begin(), rt_refresh_end()) is used for removing
* stale routes after reestablishment of BGP session during graceful restart.
- */
+ *
+ * Supported standards:
+ * <itemize>
+ * <item> <rfc id="4271"> - Border Gateway Protocol 4 (BGP)
+ * <item> <rfc id="1997"> - BGP Communities Attribute
+ * <item> <rfc id="2385"> - Protection of BGP Sessions via TCP MD5 Signature
+ * <item> <rfc id="2545"> - Use of BGP Multiprotocol Extensions for IPv6
+ * <item> <rfc id="2918"> - Route Refresh Capability
+ * <item> <rfc id="3107"> - Carrying Label Information in BGP
+ * <item> <rfc id="4360"> - BGP Extended Communities Attribute
+ * <item> <rfc id="4364"> - BGP/MPLS IPv4 Virtual Private Networks
+ * <item> <rfc id="4456"> - BGP Route Reflection
+ * <item> <rfc id="4486"> - Subcodes for BGP Cease Notification Message
+ * <item> <rfc id="4659"> - BGP/MPLS IPv6 Virtual Private Networks
+ * <item> <rfc id="4724"> - Graceful Restart Mechanism for BGP
+ * <item> <rfc id="4760"> - Multiprotocol extensions for BGP
+ * <item> <rfc id="4798"> - Connecting IPv6 Islands over IPv4 MPLS
+ * <item> <rfc id="5065"> - AS confederations for BGP
+ * <item> <rfc id="5082"> - Generalized TTL Security Mechanism
+ * <item> <rfc id="5492"> - Capabilities Advertisement with BGP
+ * <item> <rfc id="5549"> - Advertising IPv4 NLRI with an IPv6 Next Hop
+ * <item> <rfc id="5575"> - Dissemination of Flow Specification Rules
+ * <item> <rfc id="5668"> - 4-Octet AS Specific BGP Extended Community
+ * <item> <rfc id="6286"> - AS-Wide Unique BGP Identifier
+ * <item> <rfc id="6608"> - Subcodes for BGP Finite State Machine Error
+ * <item> <rfc id="6793"> - BGP Support for 4-Octet AS Numbers
+ * <item> <rfc id="7313"> - Enhanced Route Refresh Capability for BGP
+ * <item> <rfc id="7606"> - Revised Error Handling for BGP UPDATE Messages
+ * <item> <rfc id="7911"> - Advertisement of Multiple Paths in BGP
+ * <item> <rfc id="7947"> - Internet Exchange BGP Route Server
+ * <item> <rfc id="8092"> - BGP Large Communities Attribute
+ * </itemize>
+*/
#undef LOCAL_DEBUG
+#include <stdlib.h>
+
#include "nest/bird.h"
#include "nest/iface.h"
#include "nest/protocol.h"
@@ -80,70 +120,150 @@
struct linpool *bgp_linpool; /* Global temporary pool */
-static sock *bgp_listen_sk; /* Global listening socket */
-static int bgp_counter; /* Number of protocol instances using the listening socket */
+struct linpool *bgp_linpool2; /* Global temporary pool for bgp_rt_notify() */
+static list bgp_sockets; /* Global list of listening sockets */
+
-static void bgp_close(struct bgp_proto *p, int apply_md5);
static void bgp_connect(struct bgp_proto *p);
static void bgp_active(struct bgp_proto *p);
-static sock *bgp_setup_listen_sk(ip_addr addr, unsigned port, u32 flags);
static void bgp_update_bfd(struct bgp_proto *p, int use_bfd);
+static int bgp_incoming_connection(sock *sk, uint dummy UNUSED);
+static void bgp_listen_sock_err(sock *sk UNUSED, int err);
/**
* bgp_open - open a BGP instance
* @p: BGP instance
*
- * This function allocates and configures shared BGP resources.
- * Should be called as the last step during initialization
- * (when lock is acquired and neighbor is ready).
- * When error, state changed to PS_DOWN, -1 is returned and caller
- * should return immediately.
+ * This function allocates and configures shared BGP resources, mainly listening
+ * sockets. Should be called as the last step during initialization (when lock
+ * is acquired and neighbor is ready). When error, caller should change state to
+ * PS_DOWN and return immediately.
*/
static int
bgp_open(struct bgp_proto *p)
{
- struct config *cfg = p->cf->c.global;
- int errcode;
+ struct bgp_socket *bs = NULL;
+ struct iface *ifa = p->cf->strict_bind ? p->cf->iface : NULL;
+ ip_addr addr = p->cf->strict_bind ? p->cf->local_ip :
+ (ipa_is_ip4(p->cf->remote_ip) ? IPA_NONE4 : IPA_NONE6);
+ uint port = p->cf->local_port;
- if (!bgp_listen_sk)
- bgp_listen_sk = bgp_setup_listen_sk(cfg->listen_bgp_addr, cfg->listen_bgp_port, cfg->listen_bgp_flags);
+ /* FIXME: Add some global init? */
+ if (!bgp_linpool)
+ init_list(&bgp_sockets);
+
+ /* We assume that cf->iface is defined iff cf->local_ip is link-local */
- if (!bgp_listen_sk)
+ WALK_LIST(bs, bgp_sockets)
+ if (ipa_equal(bs->sk->saddr, addr) && (bs->sk->iface == ifa) && (bs->sk->sport == port))
{
- errcode = BEM_NO_SOCKET;
- goto err;
+ bs->uc++;
+ p->sock = bs;
+ return 0;
}
- if (!bgp_linpool)
- bgp_linpool = lp_new(&root_pool, 4080);
+ sock *sk = sk_new(proto_pool);
+ sk->type = SK_TCP_PASSIVE;
+ sk->ttl = 255;
+ sk->saddr = addr;
+ sk->sport = port;
+ sk->flags = 0;
+ sk->tos = IP_PREC_INTERNET_CONTROL;
+ sk->rbsize = BGP_RX_BUFFER_SIZE;
+ sk->tbsize = BGP_TX_BUFFER_SIZE;
+ sk->rx_hook = bgp_incoming_connection;
+ sk->err_hook = bgp_listen_sock_err;
+
+ if (sk_open(sk) < 0)
+ goto err;
- bgp_counter++;
+ bs = mb_allocz(proto_pool, sizeof(struct bgp_socket));
+ bs->sk = sk;
+ bs->uc = 1;
+ p->sock = bs;
- if (p->cf->password)
- if (sk_set_md5_auth(bgp_listen_sk, p->cf->source_addr, p->cf->remote_ip,
- p->cf->iface, p->cf->password, p->cf->setkey) < 0)
- {
- sk_log_error(bgp_listen_sk, p->p.name);
- bgp_close(p, 0);
- errcode = BEM_INVALID_MD5;
- goto err;
- }
+ add_tail(&bgp_sockets, &bs->n);
+
+ if (!bgp_linpool)
+ {
+ bgp_linpool = lp_new_default(proto_pool);
+ bgp_linpool2 = lp_new_default(proto_pool);
+ }
return 0;
err:
- p->p.disabled = 1;
- bgp_store_error(p, NULL, BE_MISC, errcode);
- proto_notify_state(&p->p, PS_DOWN);
+ sk_log_error(sk, p->p.name);
+ log(L_ERR "%s: Cannot open listening socket", p->p.name);
+ rfree(sk);
return -1;
}
+/**
+ * bgp_close - close a BGP instance
+ * @p: BGP instance
+ *
+ * This function frees and deconfigures shared BGP resources.
+ */
+static void
+bgp_close(struct bgp_proto *p)
+{
+ struct bgp_socket *bs = p->sock;
+
+ ASSERT(bs && bs->uc);
+
+ if (--bs->uc)
+ return;
+
+ rfree(bs->sk);
+ rem_node(&bs->n);
+ mb_free(bs);
+
+ if (!EMPTY_LIST(bgp_sockets))
+ return;
+
+ rfree(bgp_linpool);
+ bgp_linpool = NULL;
+
+ rfree(bgp_linpool2);
+ bgp_linpool2 = NULL;
+}
+
+static inline int
+bgp_setup_auth(struct bgp_proto *p, int enable)
+{
+ if (p->cf->password)
+ {
+ int rv = sk_set_md5_auth(p->sock->sk,
+ p->cf->local_ip, p->cf->remote_ip, p->cf->iface,
+ enable ? p->cf->password : NULL, p->cf->setkey);
+
+ if (rv < 0)
+ sk_log_error(p->sock->sk, p->p.name);
+
+ return rv;
+ }
+ else
+ return 0;
+}
+
+static inline struct bgp_channel *
+bgp_find_channel(struct bgp_proto *p, u32 afi)
+{
+ struct bgp_channel *c;
+ WALK_LIST(c, p->p.channels)
+ if (c->afi == afi)
+ return c;
+
+ return NULL;
+}
+
static void
bgp_startup(struct bgp_proto *p)
{
BGP_TRACE(D_EVENTS, "Started");
- p->start_state = p->cf->capabilities ? BSS_CONNECT : BSS_CONNECT_NOCAP;
+ p->start_state = BSS_CONNECT;
if (!p->cf->passive)
bgp_active(p);
@@ -159,70 +279,57 @@ bgp_startup_timeout(timer *t)
static void
bgp_initiate(struct bgp_proto *p)
{
- int rv = bgp_open(p);
- if (rv < 0)
- return;
+ int err_val;
+
+ if (bgp_open(p) < 0)
+ { err_val = BEM_NO_SOCKET; goto err1; }
+
+ if (bgp_setup_auth(p, 1) < 0)
+ { err_val = BEM_INVALID_MD5; goto err2; }
if (p->cf->bfd)
bgp_update_bfd(p, p->cf->bfd);
if (p->startup_delay)
- {
- p->start_state = BSS_DELAY;
- BGP_TRACE(D_EVENTS, "Startup delayed by %d seconds due to errors", p->startup_delay);
- bgp_start_timer(p->startup_timer, p->startup_delay);
- }
+ {
+ p->start_state = BSS_DELAY;
+ BGP_TRACE(D_EVENTS, "Startup delayed by %d seconds due to errors", p->startup_delay);
+ bgp_start_timer(p->startup_timer, p->startup_delay);
+ }
else
bgp_startup(p);
-}
-/**
- * bgp_close - close a BGP instance
- * @p: BGP instance
- * @apply_md5: 0 to disable unsetting MD5 auth
- *
- * This function frees and deconfigures shared BGP resources.
- * @apply_md5 is set to 0 when bgp_close is called as a cleanup
- * from failed bgp_open().
- */
-static void
-bgp_close(struct bgp_proto *p, int apply_md5)
-{
- ASSERT(bgp_counter);
- bgp_counter--;
+ return;
- if (p->cf->password && apply_md5)
- if (sk_set_md5_auth(bgp_listen_sk, p->cf->source_addr, p->cf->remote_ip,
- p->cf->iface, NULL, p->cf->setkey) < 0)
- sk_log_error(bgp_listen_sk, p->p.name);
+err2:
+ bgp_close(p);
+err1:
+ p->p.disabled = 1;
+ bgp_store_error(p, NULL, BE_MISC, err_val);
+ proto_notify_state(&p->p, PS_DOWN);
- if (!bgp_counter)
- {
- rfree(bgp_listen_sk);
- bgp_listen_sk = NULL;
- rfree(bgp_linpool);
- bgp_linpool = NULL;
- }
+ return;
}
/**
* bgp_start_timer - start a BGP timer
* @t: timer
- * @value: time to fire (0 to disable the timer)
+ * @value: time (in seconds) to fire (0 to disable the timer)
*
- * This functions calls tm_start() on @t with time @value and the
- * amount of randomization suggested by the BGP standard. Please use
- * it for all BGP timers.
+ * This functions calls tm_start() on @t with time @value and the amount of
+ * randomization suggested by the BGP standard. Please use it for all BGP
+ * timers.
*/
void
-bgp_start_timer(timer *t, int value)
+bgp_start_timer(timer *t, uint value)
{
if (value)
- {
- /* The randomization procedure is specified in RFC 1771: 9.2.3.3 */
- t->randomize = value / 4;
- tm_start(t, value - t->randomize);
- }
+ {
+ /* The randomization procedure is specified in RFC 4271 section 10 */
+ btime time = value S;
+ btime randomize = random() % ((time / 4) + 1);
+ tm_start(t, time - randomize);
+ }
else
tm_stop(t);
}
@@ -231,8 +338,8 @@ bgp_start_timer(timer *t, int value)
* bgp_close_conn - close a BGP connection
* @conn: connection to close
*
- * This function takes a connection described by the &bgp_conn structure,
- * closes its socket and frees all resources associated with it.
+ * This function takes a connection described by the &bgp_conn structure, closes
+ * its socket and frees all resources associated with it.
*/
void
bgp_close_conn(struct bgp_conn *conn)
@@ -241,16 +348,22 @@ bgp_close_conn(struct bgp_conn *conn)
DBG("BGP: Closing connection\n");
conn->packets_to_send = 0;
- rfree(conn->connect_retry_timer);
- conn->connect_retry_timer = NULL;
+ conn->channels_to_send = 0;
+ rfree(conn->connect_timer);
+ conn->connect_timer = NULL;
rfree(conn->keepalive_timer);
conn->keepalive_timer = NULL;
rfree(conn->hold_timer);
conn->hold_timer = NULL;
- rfree(conn->sk);
- conn->sk = NULL;
rfree(conn->tx_ev);
conn->tx_ev = NULL;
+ rfree(conn->sk);
+ conn->sk = NULL;
+
+ mb_free(conn->local_caps);
+ conn->local_caps = NULL;
+ mb_free(conn->remote_caps);
+ conn->remote_caps = NULL;
}
@@ -258,9 +371,9 @@ bgp_close_conn(struct bgp_conn *conn)
* bgp_update_startup_delay - update a startup delay
* @p: BGP instance
*
- * This function updates a startup delay that is used to postpone next BGP connect.
- * It also handles disable_after_error and might stop BGP instance when error
- * happened and disable_after_error is on.
+ * This function updates a startup delay that is used to postpone next BGP
+ * connect. It also handles disable_after_error and might stop BGP instance
+ * when error happened and disable_after_error is on.
*
* It should be called when BGP protocol error happened.
*/
@@ -271,17 +384,17 @@ bgp_update_startup_delay(struct bgp_proto *p)
DBG("BGP: Updating startup delay\n");
- if (p->last_proto_error && ((now - p->last_proto_error) >= (int) cf->error_amnesia_time))
+ if (p->last_proto_error && ((current_time() - p->last_proto_error) >= cf->error_amnesia_time S))
p->startup_delay = 0;
- p->last_proto_error = now;
+ p->last_proto_error = current_time();
if (cf->disable_after_error)
- {
- p->startup_delay = 0;
- p->p.disabled = 1;
- return;
- }
+ {
+ p->startup_delay = 0;
+ p->p.disabled = 1;
+ return;
+ }
if (!p->startup_delay)
p->startup_delay = cf->error_delay_time_min;
@@ -290,32 +403,38 @@ bgp_update_startup_delay(struct bgp_proto *p)
}
static void
-bgp_graceful_close_conn(struct bgp_conn *conn, unsigned subcode)
+bgp_graceful_close_conn(struct bgp_conn *conn, uint subcode)
{
switch (conn->state)
- {
- case BS_IDLE:
- case BS_CLOSE:
- return;
- case BS_CONNECT:
- case BS_ACTIVE:
- bgp_conn_enter_idle_state(conn);
- return;
- case BS_OPENSENT:
- case BS_OPENCONFIRM:
- case BS_ESTABLISHED:
- bgp_error(conn, 6, subcode, NULL, 0);
- return;
- default:
- bug("bgp_graceful_close_conn: Unknown state %d", conn->state);
- }
+ {
+ case BS_IDLE:
+ case BS_CLOSE:
+ return;
+
+ case BS_CONNECT:
+ case BS_ACTIVE:
+ bgp_conn_enter_idle_state(conn);
+ return;
+
+ case BS_OPENSENT:
+ case BS_OPENCONFIRM:
+ case BS_ESTABLISHED:
+ bgp_error(conn, 6, subcode, NULL, 0);
+ return;
+
+ default:
+ bug("bgp_graceful_close_conn: Unknown state %d", conn->state);
+ }
}
static void
bgp_down(struct bgp_proto *p)
{
if (p->start_state > BSS_PREPARE)
- bgp_close(p, 1);
+ {
+ bgp_setup_auth(p, 0);
+ bgp_close(p);
+ }
BGP_TRACE(D_EVENTS, "Down");
proto_notify_state(&p->p, PS_DOWN);
@@ -327,20 +446,20 @@ bgp_decision(void *vp)
struct bgp_proto *p = vp;
DBG("BGP: Decision start\n");
- if ((p->p.proto_state == PS_START)
- && (p->outgoing_conn.state == BS_IDLE)
- && (p->incoming_conn.state != BS_OPENCONFIRM)
- && (!p->cf->passive))
+ if ((p->p.proto_state == PS_START) &&
+ (p->outgoing_conn.state == BS_IDLE) &&
+ (p->incoming_conn.state != BS_OPENCONFIRM) &&
+ !p->cf->passive)
bgp_active(p);
- if ((p->p.proto_state == PS_STOP)
- && (p->outgoing_conn.state == BS_IDLE)
- && (p->incoming_conn.state == BS_IDLE))
+ if ((p->p.proto_state == PS_STOP) &&
+ (p->outgoing_conn.state == BS_IDLE) &&
+ (p->incoming_conn.state == BS_IDLE))
bgp_down(p);
}
void
-bgp_stop(struct bgp_proto *p, unsigned subcode)
+bgp_stop(struct bgp_proto *p, uint subcode)
{
proto_notify_state(&p->p, PS_STOP);
bgp_graceful_close_conn(&p->outgoing_conn, subcode);
@@ -349,7 +468,7 @@ bgp_stop(struct bgp_proto *p, unsigned subcode)
}
static inline void
-bgp_conn_set_state(struct bgp_conn *conn, unsigned new_state)
+bgp_conn_set_state(struct bgp_conn *conn, uint new_state)
{
if (conn->bgp->p.mrtdump & MD_STATES)
mrt_dump_bgp_state_change(conn, conn->state, new_state);
@@ -364,13 +483,17 @@ bgp_conn_enter_openconfirm_state(struct bgp_conn *conn)
bgp_conn_set_state(conn, BS_OPENCONFIRM);
}
+static const struct bgp_af_caps dummy_af_caps = { };
+
void
bgp_conn_enter_established_state(struct bgp_conn *conn)
{
struct bgp_proto *p = conn->bgp;
+ struct bgp_caps *local = conn->local_caps;
+ struct bgp_caps *peer = conn->remote_caps;
+ struct bgp_channel *c;
BGP_TRACE(D_EVENTS, "BGP session established");
- DBG("BGP: UP!!!\n");
/* For multi-hop BGP sessions */
if (ipa_zero(p->source_addr))
@@ -381,30 +504,92 @@ bgp_conn_enter_established_state(struct bgp_conn *conn)
p->conn = conn;
p->last_error_class = 0;
p->last_error_code = 0;
- p->feed_state = BFS_NONE;
- p->load_state = BFS_NONE;
- bgp_init_bucket_table(p);
- bgp_init_prefix_table(p, 8);
- int peer_gr_ready = conn->peer_gr_aware && !(conn->peer_gr_flags & BGP_GRF_RESTART);
+ p->as4_session = conn->as4_session;
- if (p->p.gr_recovery && !peer_gr_ready)
- proto_graceful_restart_unlock(&p->p);
+ p->route_refresh = peer->route_refresh;
+ p->enhanced_refresh = local->enhanced_refresh && peer->enhanced_refresh;
- if (p->p.gr_recovery && (p->cf->gr_mode == BGP_GR_ABLE) && peer_gr_ready)
- p->p.gr_wait = 1;
+ /* Whether we may handle possible GR of peer (it has some AF GR-able) */
+ p->gr_ready = 0; /* Updated later */
- if (p->gr_active)
+ /* Whether peer is ready to handle our GR recovery */
+ int peer_gr_ready = peer->gr_aware && !(peer->gr_flags & BGP_GRF_RESTART);
+
+ if (p->gr_active_num)
tm_stop(p->gr_timer);
- if (p->gr_active && (!conn->peer_gr_able || !(conn->peer_gr_aflags & BGP_GRF_FORWARDING)))
- bgp_graceful_restart_done(p);
+ /* Number of active channels */
+ int num = 0;
+
+ WALK_LIST(c, p->p.channels)
+ {
+ const struct bgp_af_caps *loc = bgp_find_af_caps(local, c->afi);
+ const struct bgp_af_caps *rem = bgp_find_af_caps(peer, c->afi);
+
+ /* Ignore AFIs that were not announced in multiprotocol capability */
+ if (!loc || !loc->ready)
+ loc = &dummy_af_caps;
+
+ if (!rem || !rem->ready)
+ rem = &dummy_af_caps;
+
+ int active = loc->ready && rem->ready;
+ c->c.disabled = !active;
+ c->c.reloadable = p->route_refresh;
+
+ c->index = active ? num++ : 0;
- /* GR capability implies that neighbor will send End-of-RIB */
- if (conn->peer_gr_aware)
- p->load_state = BFS_LOADING;
+ c->feed_state = BFS_NONE;
+ c->load_state = BFS_NONE;
- /* proto_notify_state() will likely call bgp_feed_begin(), setting p->feed_state */
+ /* Channels where peer may do GR */
+ c->gr_ready = active && local->gr_aware && rem->gr_able;
+ p->gr_ready = p->gr_ready || c->gr_ready;
+
+ /* Channels not able to recover gracefully */
+ if (p->p.gr_recovery && (!active || !peer_gr_ready))
+ channel_graceful_restart_unlock(&c->c);
+
+ /* Channels waiting for local convergence */
+ if (p->p.gr_recovery && loc->gr_able && peer_gr_ready)
+ c->c.gr_wait = 1;
+
+ /* Channels where peer is not able to recover gracefully */
+ if (c->gr_active && ! (c->gr_ready && (rem->gr_af_flags & BGP_GRF_FORWARDING)))
+ bgp_graceful_restart_done(c);
+
+ /* GR capability implies that neighbor will send End-of-RIB */
+ if (peer->gr_aware)
+ c->load_state = BFS_LOADING;
+
+ c->ext_next_hop = c->cf->ext_next_hop && (bgp_channel_is_ipv6(c) || rem->ext_next_hop);
+ c->add_path_rx = (loc->add_path & BGP_ADD_PATH_RX) && (rem->add_path & BGP_ADD_PATH_TX);
+ c->add_path_tx = (loc->add_path & BGP_ADD_PATH_TX) && (rem->add_path & BGP_ADD_PATH_RX);
+
+ /* Update RA mode */
+ if (c->add_path_tx)
+ c->c.ra_mode = RA_ANY;
+ else if (c->cf->secondary)
+ c->c.ra_mode = RA_ACCEPTED;
+ else
+ c->c.ra_mode = RA_OPTIMAL;
+ }
+
+ p->afi_map = mb_alloc(p->p.pool, num * sizeof(u32));
+ p->channel_map = mb_alloc(p->p.pool, num * sizeof(void *));
+ p->channel_count = num;
+
+ WALK_LIST(c, p->p.channels)
+ {
+ if (c->c.disabled)
+ continue;
+
+ p->afi_map[c->index] = c->afi;
+ p->channel_map[c->index] = c;
+ }
+
+ /* proto_notify_state() will likely call bgp_feed_begin(), setting c->feed_state */
bgp_conn_set_state(conn, BS_ESTABLISHED);
proto_notify_state(&p->p, PS_UP);
@@ -416,8 +601,9 @@ bgp_conn_leave_established_state(struct bgp_proto *p)
BGP_TRACE(D_EVENTS, "BGP session closed");
p->conn = NULL;
- bgp_free_prefix_table(p);
- bgp_free_bucket_table(p);
+ // XXXX free these tables to avoid memory leak during graceful restart
+ // bgp_free_prefix_table(p);
+ // bgp_free_bucket_table(p);
if (p->p.proto_state == PS_UP)
bgp_stop(p, 0);
@@ -471,34 +657,57 @@ bgp_handle_graceful_restart(struct bgp_proto *p)
ASSERT(p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready);
BGP_TRACE(D_EVENTS, "Neighbor graceful restart detected%s",
- p->gr_active ? " - already pending" : "");
- proto_notify_state(&p->p, PS_START);
+ p->gr_active_num ? " - already pending" : "");
- if (p->gr_active)
- rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook);
+ p->gr_active_num = 0;
+
+ struct bgp_channel *c;
+ WALK_LIST(c, p->p.channels)
+ {
+ if (c->gr_ready)
+ {
+ if (c->gr_active)
+ rt_refresh_end(c->c.table, &c->c);
+
+ c->gr_active = 1;
+ p->gr_active_num++;
+ rt_refresh_begin(c->c.table, &c->c);
+ }
+ else
+ {
+ /* Just flush the routes */
+ rt_refresh_begin(c->c.table, &c->c);
+ rt_refresh_end(c->c.table, &c->c);
+ }
+ }
- p->gr_active = 1;
- bgp_start_timer(p->gr_timer, p->conn->peer_gr_time);
- rt_refresh_begin(p->p.main_ahook->table, p->p.main_ahook);
+ proto_notify_state(&p->p, PS_START);
+ bgp_start_timer(p->gr_timer, p->conn->local_caps->gr_time);
}
/**
* bgp_graceful_restart_done - finish active BGP graceful restart
- * @p: BGP instance
+ * @c: BGP channel
*
* This function is called when the active BGP graceful restart of the neighbor
- * should be finished - either successfully (the neighbor sends all paths and
- * reports end-of-RIB on the new session) or unsuccessfully (the neighbor does
- * not support BGP graceful restart on the new session). The function ends
- * routing table refresh cycle and stops BGP restart timer.
+ * should be finished for channel @c - either successfully (the neighbor sends
+ * all paths and reports end-of-RIB for given AFI/SAFI on the new session) or
+ * unsuccessfully (the neighbor does not support BGP graceful restart on the new
+ * session). The function ends the routing table refresh cycle.
*/
void
-bgp_graceful_restart_done(struct bgp_proto *p)
+bgp_graceful_restart_done(struct bgp_channel *c)
{
- BGP_TRACE(D_EVENTS, "Neighbor graceful restart done");
- p->gr_active = 0;
- tm_stop(p->gr_timer);
- rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook);
+ struct bgp_proto *p = (void *) c->c.proto;
+
+ ASSERT(c->gr_active);
+ c->gr_active = 0;
+ p->gr_active_num--;
+
+ if (!p->gr_active_num)
+ BGP_TRACE(D_EVENTS, "Neighbor graceful restart done");
+
+ rt_refresh_end(c->c.table, &c->c);
}
/**
@@ -522,7 +731,7 @@ bgp_graceful_restart_timeout(timer *t)
/**
* bgp_refresh_begin - start incoming enhanced route refresh sequence
- * @p: BGP instance
+ * @c: BGP channel
*
* This function is called when an incoming enhanced route refresh sequence is
* started by the neighbor, demarcated by the BoRR packet. The function updates
@@ -531,18 +740,20 @@ bgp_graceful_restart_timeout(timer *t)
* ensure that these two sequences do not overlap.
*/
void
-bgp_refresh_begin(struct bgp_proto *p)
+bgp_refresh_begin(struct bgp_channel *c)
{
- if (p->load_state == BFS_LOADING)
- { log(L_WARN "%s: BEGIN-OF-RR received before END-OF-RIB, ignoring", p->p.name); return; }
+ struct bgp_proto *p = (void *) c->c.proto;
+
+ if (c->load_state == BFS_LOADING)
+ { log(L_WARN "%s: BEGIN-OF-RR received before END-OF-RIB, ignoring", p->p.name); return; }
- p->load_state = BFS_REFRESHING;
- rt_refresh_begin(p->p.main_ahook->table, p->p.main_ahook);
+ c->load_state = BFS_REFRESHING;
+ rt_refresh_begin(c->c.table, &c->c);
}
/**
* bgp_refresh_end - finish incoming enhanced route refresh sequence
- * @p: BGP instance
+ * @c: BGP channel
*
* This function is called when an incoming enhanced route refresh sequence is
* finished by the neighbor, demarcated by the EoRR packet. The function updates
@@ -550,39 +761,26 @@ bgp_refresh_begin(struct bgp_proto *p)
* during the sequence are removed by the nest.
*/
void
-bgp_refresh_end(struct bgp_proto *p)
+bgp_refresh_end(struct bgp_channel *c)
{
- if (p->load_state != BFS_REFRESHING)
- { log(L_WARN "%s: END-OF-RR received without prior BEGIN-OF-RR, ignoring", p->p.name); return; }
+ struct bgp_proto *p = (void *) c->c.proto;
+
+ if (c->load_state != BFS_REFRESHING)
+ { log(L_WARN "%s: END-OF-RR received without prior BEGIN-OF-RR, ignoring", p->p.name); return; }
- p->load_state = BFS_NONE;
- rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook);
+ c->load_state = BFS_NONE;
+ rt_refresh_end(c->c.table, &c->c);
}
static void
bgp_send_open(struct bgp_conn *conn)
{
- conn->start_state = conn->bgp->start_state;
-
- // Default values, possibly changed by receiving capabilities.
- conn->advertised_as = 0;
- conn->peer_refresh_support = 0;
- conn->peer_as4_support = 0;
- conn->peer_add_path = 0;
- conn->peer_enhanced_refresh_support = 0;
- conn->peer_gr_aware = 0;
- conn->peer_gr_able = 0;
- conn->peer_gr_time = 0;
- conn->peer_gr_flags = 0;
- conn->peer_gr_aflags = 0;
- conn->peer_ext_messages_support = 0;
-
DBG("BGP: Sending open\n");
conn->sk->rx_hook = bgp_rx;
conn->sk->tx_hook = bgp_tx;
- tm_stop(conn->connect_retry_timer);
- bgp_schedule_packet(conn, PKT_OPEN);
+ tm_stop(conn->connect_timer);
+ bgp_schedule_packet(conn, NULL, PKT_OPEN);
bgp_conn_set_state(conn, BS_OPENSENT);
bgp_start_timer(conn->hold_timer, conn->bgp->cf->initial_hold_time);
}
@@ -605,10 +803,10 @@ bgp_connect_timeout(timer *t)
DBG("BGP: connect_timeout\n");
if (p->p.proto_state == PS_START)
- {
- bgp_close_conn(conn);
- bgp_connect(p);
- }
+ {
+ bgp_close_conn(conn);
+ bgp_connect(p);
+ }
else
bgp_conn_enter_idle_state(conn);
}
@@ -672,7 +870,7 @@ bgp_keepalive_timeout(timer *t)
struct bgp_conn *conn = t->data;
DBG("BGP: Keepalive timer\n");
- bgp_schedule_packet(conn, PKT_KEEPALIVE);
+ bgp_schedule_packet(conn, NULL, PKT_KEEPALIVE);
/* Kick TX a bit faster */
if (ev_active(conn->tx_ev))
@@ -682,21 +880,18 @@ bgp_keepalive_timeout(timer *t)
static void
bgp_setup_conn(struct bgp_proto *p, struct bgp_conn *conn)
{
- timer *t;
-
conn->sk = NULL;
conn->bgp = p;
+
conn->packets_to_send = 0;
+ conn->channels_to_send = 0;
+ conn->last_channel = 0;
+ conn->last_channel_count = 0;
+
+ conn->connect_timer = tm_new_init(p->p.pool, bgp_connect_timeout, conn, 0, 0);
+ conn->hold_timer = tm_new_init(p->p.pool, bgp_hold_timeout, conn, 0, 0);
+ conn->keepalive_timer = tm_new_init(p->p.pool, bgp_keepalive_timeout, conn, 0, 0);
- t = conn->connect_retry_timer = tm_new(p->p.pool);
- t->hook = bgp_connect_timeout;
- t->data = conn;
- t = conn->hold_timer = tm_new(p->p.pool);
- t->hook = bgp_hold_timeout;
- t->data = conn;
- t = conn->keepalive_timer = tm_new(p->p.pool);
- t->hook = bgp_keepalive_timeout;
- t->data = conn;
conn->tx_ev = ev_new(p->p.pool);
conn->tx_ev->hook = bgp_kick_tx;
conn->tx_ev->data = conn;
@@ -720,7 +915,7 @@ bgp_active(struct bgp_proto *p)
BGP_TRACE(D_EVENTS, "Connect delayed by %d seconds", delay);
bgp_setup_conn(p, conn);
bgp_conn_set_state(conn, BS_ACTIVE);
- bgp_start_timer(conn->connect_retry_timer, delay);
+ bgp_start_timer(conn->connect_timer, delay);
}
/**
@@ -734,12 +929,11 @@ bgp_active(struct bgp_proto *p)
static void
bgp_connect(struct bgp_proto *p) /* Enter Connect state and start establishing connection */
{
- sock *s;
struct bgp_conn *conn = &p->outgoing_conn;
int hops = p->cf->multihop ? : 1;
DBG("BGP: Connecting\n");
- s = sk_new(p->p.pool);
+ sock *s = sk_new(p->p.pool);
s->type = SK_TCP_ACTIVE;
s->saddr = p->source_addr;
s->daddr = p->cf->remote_ip;
@@ -767,10 +961,10 @@ bgp_connect(struct bgp_proto *p) /* Enter Connect state and start establishing c
goto err;
DBG("BGP: Waiting for connect success\n");
- bgp_start_timer(conn->connect_retry_timer, p->cf->connect_retry_time);
+ bgp_start_timer(conn->connect_timer, p->cf->connect_retry_time);
return;
- err:
+err:
sk_log_error(s, p->p.name);
bgp_sock_err(s, 0);
return;
@@ -784,16 +978,15 @@ bgp_connect(struct bgp_proto *p) /* Enter Connect state and start establishing c
static struct bgp_proto *
bgp_find_proto(sock *sk)
{
- struct proto_config *pc;
+ struct bgp_proto *p;
- WALK_LIST(pc, config->protos)
- if ((pc->protocol == &proto_bgp) && pc->proto)
- {
- struct bgp_proto *p = (struct bgp_proto *) pc->proto;
- if (ipa_equal(p->cf->remote_ip, sk->daddr) &&
- (!p->cf->iface || (p->cf->iface == sk->iface)))
- return p;
- }
+ WALK_LIST(p, proto_list)
+ if ((p->p.proto == &proto_bgp) &&
+ ipa_equal(p->cf->remote_ip, sk->daddr) &&
+ (!p->cf->iface || (p->cf->iface == sk->iface)) &&
+ (ipa_zero(p->cf->local_ip) || ipa_equal(p->cf->local_ip, sk->saddr)) &&
+ (p->cf->local_port == sk->sport))
+ return p;
return NULL;
}
@@ -819,12 +1012,12 @@ bgp_incoming_connection(sock *sk, uint dummy UNUSED)
DBG("BGP: Incoming connection from %I port %d\n", sk->daddr, sk->dport);
p = bgp_find_proto(sk);
if (!p)
- {
- log(L_WARN "BGP: Unexpected connect from unknown address %I%J (port %d)",
- sk->daddr, ipa_is_link_local(sk->daddr) ? sk->iface : NULL, sk->dport);
- rfree(sk);
- return 0;
- }
+ {
+ log(L_WARN "BGP: Unexpected connect from unknown address %I%J (port %d)",
+ sk->daddr, ipa_is_link_local(sk->daddr) ? sk->iface : NULL, sk->dport);
+ rfree(sk);
+ return 0;
+ }
/*
* BIRD should keep multiple incoming connections in OpenSent state (for
@@ -837,26 +1030,26 @@ bgp_incoming_connection(sock *sk, uint dummy UNUSED)
(p->start_state >= BSS_CONNECT) && (!p->incoming_conn.sk);
if (p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready)
- {
- bgp_store_error(p, NULL, BE_MISC, BEM_GRACEFUL_RESTART);
- bgp_handle_graceful_restart(p);
- bgp_conn_enter_idle_state(p->conn);
- acc = 1;
-
- /* There might be separate incoming connection in OpenSent state */
- if (p->incoming_conn.state > BS_ACTIVE)
- bgp_close_conn(&p->incoming_conn);
- }
+ {
+ bgp_store_error(p, NULL, BE_MISC, BEM_GRACEFUL_RESTART);
+ bgp_handle_graceful_restart(p);
+ bgp_conn_enter_idle_state(p->conn);
+ acc = 1;
+
+ /* There might be separate incoming connection in OpenSent state */
+ if (p->incoming_conn.state > BS_ACTIVE)
+ bgp_close_conn(&p->incoming_conn);
+ }
BGP_TRACE(D_EVENTS, "Incoming connection from %I%J (port %d) %s",
sk->daddr, ipa_is_link_local(sk->daddr) ? sk->iface : NULL,
sk->dport, acc ? "accepted" : "rejected");
if (!acc)
- {
- rfree(sk);
- return 0;
- }
+ {
+ rfree(sk);
+ return 0;
+ }
hops = p->cf->multihop ? : 1;
@@ -868,11 +1061,11 @@ bgp_incoming_connection(sock *sk, uint dummy UNUSED)
goto err;
if (p->cf->enable_extended_messages)
- {
- sk->rbsize = BGP_RX_BUFFER_EXT_SIZE;
- sk->tbsize = BGP_TX_BUFFER_EXT_SIZE;
- sk_reallocate(sk);
- }
+ {
+ sk->rbsize = BGP_RX_BUFFER_EXT_SIZE;
+ sk->tbsize = BGP_TX_BUFFER_EXT_SIZE;
+ sk_reallocate(sk);
+ }
bgp_setup_conn(p, &p->incoming_conn);
bgp_setup_sk(&p->incoming_conn, sk);
@@ -895,34 +1088,6 @@ bgp_listen_sock_err(sock *sk UNUSED, int err)
log(L_ERR "BGP: Error on listening socket: %M", err);
}
-static sock *
-bgp_setup_listen_sk(ip_addr addr, unsigned port, u32 flags)
-{
- sock *s = sk_new(&root_pool);
- DBG("BGP: Creating listening socket\n");
- s->type = SK_TCP_PASSIVE;
- s->ttl = 255;
- s->saddr = addr;
- s->sport = port ? port : BGP_PORT;
- s->flags = flags ? 0 : SKF_V6ONLY;
- s->tos = IP_PREC_INTERNET_CONTROL;
- s->rbsize = BGP_RX_BUFFER_SIZE;
- s->tbsize = BGP_TX_BUFFER_SIZE;
- s->rx_hook = bgp_incoming_connection;
- s->err_hook = bgp_listen_sock_err;
-
- if (sk_open(s) < 0)
- goto err;
-
- return s;
-
- err:
- sk_log_error(s, "BGP");
- log(L_ERR "BGP: Cannot open listening socket");
- rfree(s);
- return NULL;
-}
-
static void
bgp_start_neighbor(struct bgp_proto *p)
{
@@ -931,23 +1096,10 @@ bgp_start_neighbor(struct bgp_proto *p)
if (ipa_zero(p->source_addr))
p->source_addr = p->neigh->ifa->ip;
-#ifdef IPV6
- {
- struct ifa *a;
- p->local_link = IPA_NONE;
- WALK_LIST(a, p->neigh->iface->addrs)
- if (a->scope == SCOPE_LINK)
- {
- p->local_link = a->ip;
- break;
- }
-
- if (! ipa_nonzero(p->local_link))
- log(L_WARN "%s: Missing link local address on interface %s", p->p.name, p->neigh->iface->name);
-
- DBG("BGP: Selected link-level address %I\n", p->local_link);
- }
-#endif
+ if (ipa_is_link_local(p->source_addr))
+ p->link_addr = p->source_addr;
+ else if (p->neigh->iface->llv6)
+ p->link_addr = p->neigh->iface->llv6->ip;
bgp_initiate(p);
}
@@ -967,34 +1119,34 @@ bgp_neigh_notify(neighbor *n)
int prepare = (ps == PS_START) && (p->start_state == BSS_PREPARE);
if (n->scope <= 0)
+ {
+ if (!prepare)
{
- if (!prepare)
- {
- BGP_TRACE(D_EVENTS, "Neighbor lost");
- bgp_store_error(p, NULL, BE_MISC, BEM_NEIGHBOR_LOST);
- /* Perhaps also run bgp_update_startup_delay(p)? */
- bgp_stop(p, 0);
- }
+ BGP_TRACE(D_EVENTS, "Neighbor lost");
+ bgp_store_error(p, NULL, BE_MISC, BEM_NEIGHBOR_LOST);
+ /* Perhaps also run bgp_update_startup_delay(p)? */
+ bgp_stop(p, 0);
}
+ }
else if (p->cf->check_link && !(n->iface->flags & IF_LINK_UP))
+ {
+ if (!prepare)
{
- if (!prepare)
- {
- BGP_TRACE(D_EVENTS, "Link down");
- bgp_store_error(p, NULL, BE_MISC, BEM_LINK_DOWN);
- if (ps == PS_UP)
- bgp_update_startup_delay(p);
- bgp_stop(p, 0);
- }
+ BGP_TRACE(D_EVENTS, "Link down");
+ bgp_store_error(p, NULL, BE_MISC, BEM_LINK_DOWN);
+ if (ps == PS_UP)
+ bgp_update_startup_delay(p);
+ bgp_stop(p, 0);
}
+ }
else
+ {
+ if (prepare)
{
- if (prepare)
- {
- BGP_TRACE(D_EVENTS, "Neighbor ready");
- bgp_start_neighbor(p);
- }
+ BGP_TRACE(D_EVENTS, "Neighbor ready");
+ bgp_start_neighbor(p);
}
+ }
}
static void
@@ -1004,13 +1156,13 @@ bgp_bfd_notify(struct bfd_request *req)
int ps = p->p.proto_state;
if (req->down && ((ps == PS_START) || (ps == PS_UP)))
- {
- BGP_TRACE(D_EVENTS, "BFD session down");
- bgp_store_error(p, NULL, BE_MISC, BEM_BFD_DOWN);
- if (ps == PS_UP)
- bgp_update_startup_delay(p);
- bgp_stop(p, 0);
- }
+ {
+ BGP_TRACE(D_EVENTS, "BFD session down");
+ bgp_store_error(p, NULL, BE_MISC, BEM_BFD_DOWN);
+ if (ps == PS_UP)
+ bgp_update_startup_delay(p);
+ bgp_stop(p, 0);
+ }
}
static void
@@ -1022,71 +1174,72 @@ bgp_update_bfd(struct bgp_proto *p, int use_bfd)
bgp_bfd_notify, p);
if (!use_bfd && p->bfd_req)
- {
- rfree(p->bfd_req);
- p->bfd_req = NULL;
- }
+ {
+ rfree(p->bfd_req);
+ p->bfd_req = NULL;
+ }
}
-static int
-bgp_reload_routes(struct proto *P)
+static void
+bgp_reload_routes(struct channel *C)
{
- struct bgp_proto *p = (struct bgp_proto *) P;
- if (!p->conn || !p->conn->peer_refresh_support)
- return 0;
+ struct bgp_proto *p = (void *) C->proto;
+ struct bgp_channel *c = (void *) C;
- bgp_schedule_packet(p->conn, PKT_ROUTE_REFRESH);
- return 1;
+ ASSERT(p->conn && p->route_refresh);
+
+ bgp_schedule_packet(p->conn, c, PKT_ROUTE_REFRESH);
}
static void
-bgp_feed_begin(struct proto *P, int initial)
+bgp_feed_begin(struct channel *C, int initial)
{
- struct bgp_proto *p = (struct bgp_proto *) P;
+ struct bgp_proto *p = (void *) C->proto;
+ struct bgp_channel *c = (void *) C;
/* This should not happen */
if (!p->conn)
return;
if (initial && p->cf->gr_mode)
- p->feed_state = BFS_LOADING;
+ c->feed_state = BFS_LOADING;
/* It is refeed and both sides support enhanced route refresh */
- if (!initial && p->cf->enable_refresh &&
- p->conn->peer_enhanced_refresh_support)
- {
- /* BoRR must not be sent before End-of-RIB */
- if (p->feed_state == BFS_LOADING || p->feed_state == BFS_LOADED)
- return;
+ if (!initial && p->enhanced_refresh)
+ {
+ /* BoRR must not be sent before End-of-RIB */
+ if (c->feed_state == BFS_LOADING || c->feed_state == BFS_LOADED)
+ return;
- p->feed_state = BFS_REFRESHING;
- bgp_schedule_packet(p->conn, PKT_BEGIN_REFRESH);
- }
+ c->feed_state = BFS_REFRESHING;
+ bgp_schedule_packet(p->conn, c, PKT_BEGIN_REFRESH);
+ }
}
static void
-bgp_feed_end(struct proto *P)
+bgp_feed_end(struct channel *C)
{
- struct bgp_proto *p = (struct bgp_proto *) P;
+ struct bgp_proto *p = (void *) C->proto;
+ struct bgp_channel *c = (void *) C;
/* This should not happen */
if (!p->conn)
return;
/* Non-demarcated feed ended, nothing to do */
- if (p->feed_state == BFS_NONE)
+ if (c->feed_state == BFS_NONE)
return;
/* Schedule End-of-RIB packet */
- if (p->feed_state == BFS_LOADING)
- p->feed_state = BFS_LOADED;
+ if (c->feed_state == BFS_LOADING)
+ c->feed_state = BFS_LOADED;
/* Schedule EoRR packet */
- if (p->feed_state == BFS_REFRESHING)
- p->feed_state = BFS_REFRESHED;
+ if (c->feed_state == BFS_REFRESHING)
+ c->feed_state = BFS_REFRESHED;
/* Kick TX hook */
- bgp_schedule_packet(p->conn, PKT_UPDATE);
+ bgp_schedule_packet(p->conn, c, PKT_UPDATE);
}
@@ -1097,30 +1250,30 @@ bgp_start_locked(struct object_lock *lock)
struct bgp_config *cf = p->cf;
if (p->p.proto_state != PS_START)
- {
- DBG("BGP: Got lock in different state %d\n", p->p.proto_state);
- return;
- }
+ {
+ DBG("BGP: Got lock in different state %d\n", p->p.proto_state);
+ return;
+ }
DBG("BGP: Got lock\n");
if (cf->multihop)
- {
- /* Multi-hop sessions do not use neighbor entries */
- bgp_initiate(p);
- return;
- }
+ {
+ /* Multi-hop sessions do not use neighbor entries */
+ bgp_initiate(p);
+ return;
+ }
neighbor *n = neigh_find2(&p->p, &cf->remote_ip, cf->iface, NEF_STICKY);
if (!n)
- {
- log(L_ERR "%s: Invalid remote address %I%J", p->p.name, cf->remote_ip, cf->iface);
- /* As we do not start yet, we can just disable protocol */
- p->p.disabled = 1;
- bgp_store_error(p, NULL, BE_MISC, BEM_INVALID_NEXT_HOP);
- proto_notify_state(&p->p, PS_DOWN);
- return;
- }
+ {
+ log(L_ERR "%s: Invalid remote address %I%J", p->p.name, cf->remote_ip, cf->iface);
+ /* As we do not start yet, we can just disable protocol */
+ p->p.disabled = 1;
+ bgp_store_error(p, NULL, BE_MISC, BEM_INVALID_NEXT_HOP);
+ proto_notify_state(&p->p, PS_DOWN);
+ return;
+ }
p->neigh = n;
@@ -1145,36 +1298,34 @@ bgp_start(struct proto *P)
p->neigh = NULL;
p->bfd_req = NULL;
p->gr_ready = 0;
- p->gr_active = 0;
-
- rt_lock_table(p->igp_table);
+ p->gr_active_num = 0;
p->event = ev_new(p->p.pool);
p->event->hook = bgp_decision;
p->event->data = p;
- p->startup_timer = tm_new(p->p.pool);
- p->startup_timer->hook = bgp_startup_timeout;
- p->startup_timer->data = p;
-
- p->gr_timer = tm_new(p->p.pool);
- p->gr_timer->hook = bgp_graceful_restart_timeout;
- p->gr_timer->data = p;
+ p->startup_timer = tm_new_init(p->p.pool, bgp_startup_timeout, p, 0, 0);
+ p->gr_timer = tm_new_init(p->p.pool, bgp_graceful_restart_timeout, p, 0, 0);
p->local_id = proto_get_router_id(P->cf);
if (p->rr_client)
p->rr_cluster_id = p->cf->rr_cluster_id ? p->cf->rr_cluster_id : p->local_id;
p->remote_id = 0;
- p->source_addr = p->cf->source_addr;
+ p->source_addr = p->cf->local_ip;
+ p->link_addr = IPA_NONE;
+ /* XXXX */
if (p->p.gr_recovery && p->cf->gr_mode)
- proto_graceful_restart_lock(P);
+ {
+ struct bgp_channel *c;
+ WALK_LIST(c, p->p.channels)
+ channel_graceful_restart_lock(&c->c);
+ }
/*
- * Before attempting to create the connection, we need to lock the
- * port, so that are sure we're the only instance attempting to talk
- * with that neighbor.
+ * Before attempting to create the connection, we need to lock the port,
+ * so that we are the only instance attempting to talk with that neighbor.
*/
lock = p->lock = olock_new(P->pool);
@@ -1196,78 +1347,64 @@ static int
bgp_shutdown(struct proto *P)
{
struct bgp_proto *p = (struct bgp_proto *) P;
- unsigned subcode = 0;
+ uint subcode = 0;
BGP_TRACE(D_EVENTS, "Shutdown requested");
switch (P->down_code)
- {
- case PDC_CF_REMOVE:
- case PDC_CF_DISABLE:
- subcode = 3; // Errcode 6, 3 - peer de-configured
- break;
-
- case PDC_CF_RESTART:
- subcode = 6; // Errcode 6, 6 - other configuration change
- break;
-
- case PDC_CMD_DISABLE:
- case PDC_CMD_SHUTDOWN:
- subcode = 2; // Errcode 6, 2 - administrative shutdown
- break;
-
- case PDC_CMD_RESTART:
- subcode = 4; // Errcode 6, 4 - administrative reset
- break;
-
- case PDC_RX_LIMIT_HIT:
- case PDC_IN_LIMIT_HIT:
- subcode = 1; // Errcode 6, 1 - max number of prefixes reached
- /* log message for compatibility */
- log(L_WARN "%s: Route limit exceeded, shutting down", p->p.name);
- goto limit;
-
- case PDC_OUT_LIMIT_HIT:
- subcode = proto_restart ? 4 : 2; // Administrative reset or shutdown
-
- limit:
- bgp_store_error(p, NULL, BE_AUTO_DOWN, BEA_ROUTE_LIMIT_EXCEEDED);
- if (proto_restart)
- bgp_update_startup_delay(p);
- else
- p->startup_delay = 0;
- goto done;
- }
+ {
+ case PDC_CF_REMOVE:
+ case PDC_CF_DISABLE:
+ subcode = 3; // Errcode 6, 3 - peer de-configured
+ break;
+
+ case PDC_CF_RESTART:
+ subcode = 6; // Errcode 6, 6 - other configuration change
+ break;
+
+ case PDC_CMD_DISABLE:
+ case PDC_CMD_SHUTDOWN:
+ subcode = 2; // Errcode 6, 2 - administrative shutdown
+ break;
+
+ case PDC_CMD_RESTART:
+ subcode = 4; // Errcode 6, 4 - administrative reset
+ break;
+
+ case PDC_RX_LIMIT_HIT:
+ case PDC_IN_LIMIT_HIT:
+ subcode = 1; // Errcode 6, 1 - max number of prefixes reached
+ /* log message for compatibility */
+ log(L_WARN "%s: Route limit exceeded, shutting down", p->p.name);
+ goto limit;
+
+ case PDC_OUT_LIMIT_HIT:
+ subcode = proto_restart ? 4 : 2; // Administrative reset or shutdown
+
+ limit:
+ bgp_store_error(p, NULL, BE_AUTO_DOWN, BEA_ROUTE_LIMIT_EXCEEDED);
+ if (proto_restart)
+ bgp_update_startup_delay(p);
+ else
+ p->startup_delay = 0;
+ goto done;
+ }
bgp_store_error(p, NULL, BE_MAN_DOWN, 0);
p->startup_delay = 0;
- done:
+done:
bgp_stop(p, subcode);
return p->p.proto_state;
}
-static void
-bgp_cleanup(struct proto *P)
-{
- struct bgp_proto *p = (struct bgp_proto *) P;
- rt_unlock_table(p->igp_table);
-}
-
-static rtable *
-get_igp_table(struct bgp_config *cf)
-{
- return cf->igp_table ? cf->igp_table->table : cf->c.table->table;
-}
-
static struct proto *
-bgp_init(struct proto_config *C)
+bgp_init(struct proto_config *CF)
{
- struct proto *P = proto_new(C, sizeof(struct bgp_proto));
- struct bgp_config *c = (struct bgp_config *) C;
+ struct proto *P = proto_new(CF);
struct bgp_proto *p = (struct bgp_proto *) P;
+ struct bgp_config *cf = (struct bgp_config *) CF;
- P->accept_ra_types = c->secondary ? RA_ACCEPTED : RA_OPTIMAL;
P->rt_notify = bgp_rt_notify;
P->import_control = bgp_import_control;
P->neigh_notify = bgp_neigh_notify;
@@ -1276,102 +1413,276 @@ bgp_init(struct proto_config *C)
P->feed_end = bgp_feed_end;
P->rte_better = bgp_rte_better;
P->rte_mergable = bgp_rte_mergable;
- P->rte_recalculate = c->deterministic_med ? bgp_rte_recalculate : NULL;
-
- p->cf = c;
- p->local_as = c->local_as;
- p->remote_as = c->remote_as;
- p->is_internal = (c->local_as == c->remote_as);
- p->rs_client = c->rs_client;
- p->rr_client = c->rr_client;
- p->igp_table = get_igp_table(c);
+ P->rte_recalculate = cf->deterministic_med ? bgp_rte_recalculate : NULL;
+
+ p->cf = cf;
+ p->local_as = cf->local_as;
+ p->remote_as = cf->remote_as;
+ p->public_as = cf->local_as;
+ p->is_internal = (cf->local_as == cf->remote_as);
+ p->is_interior = p->is_internal || cf->confederation_member;
+ p->rs_client = cf->rs_client;
+ p->rr_client = cf->rr_client;
+
+ /* Confederation ID is used for truly external peers */
+ if (cf->confederation && !p->is_interior)
+ p->public_as = cf->confederation;
+
+ /* Add all channels */
+ struct bgp_channel_config *cc;
+ WALK_LIST(cc, CF->channels)
+ proto_add_channel(P, &cc->c);
return P;
}
+static void
+bgp_channel_init(struct channel *C, struct channel_config *CF)
+{
+ struct bgp_channel *c = (void *) C;
+ struct bgp_channel_config *cf = (void *) CF;
+
+ c->cf = cf;
+ c->afi = cf->afi;
+ c->desc = cf->desc;
+
+ if (cf->igp_table_ip4)
+ c->igp_table_ip4 = cf->igp_table_ip4->table;
+
+ if (cf->igp_table_ip6)
+ c->igp_table_ip6 = cf->igp_table_ip6->table;
+}
+
+static int
+bgp_channel_start(struct channel *C)
+{
+ struct bgp_proto *p = (void *) C->proto;
+ struct bgp_channel *c = (void *) C;
+ ip_addr src = p->source_addr;
+
+ if (c->igp_table_ip4)
+ rt_lock_table(c->igp_table_ip4);
+
+ if (c->igp_table_ip6)
+ rt_lock_table(c->igp_table_ip6);
+
+ c->pool = p->p.pool; // XXXX
+ bgp_init_bucket_table(c);
+ bgp_init_prefix_table(c);
+
+ c->next_hop_addr = c->cf->next_hop_addr;
+ c->link_addr = IPA_NONE;
+ c->packets_to_send = 0;
+
+ /* Try to use source address as next hop address */
+ if (ipa_zero(c->next_hop_addr))
+ {
+ if (bgp_channel_is_ipv4(c) && (ipa_is_ip4(src) || c->ext_next_hop))
+ c->next_hop_addr = src;
+
+ if (bgp_channel_is_ipv6(c) && (ipa_is_ip6(src) || c->ext_next_hop))
+ c->next_hop_addr = src;
+ }
+
+ /* Exit if no feasible next hop address is found */
+ if (ipa_zero(c->next_hop_addr))
+ {
+ log(L_WARN "%s: Missing next hop address", p->p.name);
+ return 0;
+ }
+
+ /* Set link-local address for IPv6 single-hop BGP */
+ if (ipa_is_ip6(c->next_hop_addr) && p->neigh)
+ {
+ c->link_addr = p->link_addr;
+
+ if (ipa_zero(c->link_addr))
+ log(L_WARN "%s: Missing link-local address", p->p.name);
+ }
+
+ /* Link local address is already in c->link_addr */
+ if (ipa_is_link_local(c->next_hop_addr))
+ c->next_hop_addr = IPA_NONE;
+
+ return 0; /* XXXX: Currently undefined */
+}
+
+static void
+bgp_channel_shutdown(struct channel *C)
+{
+ struct bgp_channel *c = (void *) C;
+
+ /* XXXX: cleanup bucket and prefix tables */
+
+ c->next_hop_addr = IPA_NONE;
+ c->link_addr = IPA_NONE;
+}
+
+static void
+bgp_channel_cleanup(struct channel *C)
+{
+ struct bgp_channel *c = (void *) C;
+
+ if (c->igp_table_ip4)
+ rt_unlock_table(c->igp_table_ip4);
+
+ if (c->igp_table_ip6)
+ rt_unlock_table(c->igp_table_ip6);
+}
+
+static inline struct bgp_channel_config *
+bgp_find_channel_config(struct bgp_config *cf, u32 afi)
+{
+ struct bgp_channel_config *cc;
+
+ WALK_LIST(cc, cf->c.channels)
+ if (cc->afi == afi)
+ return cc;
+
+ return NULL;
+}
+
+struct rtable_config *
+bgp_default_igp_table(struct bgp_config *cf, struct bgp_channel_config *cc, u32 type)
+{
+ struct bgp_channel_config *cc2;
+ struct rtable_config *tab;
+
+ /* First, try table connected by the channel */
+ if (cc->c.table->addr_type == type)
+ return cc->c.table;
+
+ /* Find paired channel with the same SAFI but the other AFI */
+ u32 afi2 = cc->afi ^ 0x30000;
+ cc2 = bgp_find_channel_config(cf, afi2);
+
+ /* Second, try IGP table configured in the paired channel */
+ if (cc2 && (tab = (type == NET_IP4) ? cc2->igp_table_ip4 : cc2->igp_table_ip6))
+ return tab;
+
+ /* Third, try table connected by the paired channel */
+ if (cc2 && (cc2->c.table->addr_type == type))
+ return cc2->c.table;
+
+ /* Last, try default table of given type */
+ if (tab = cf->c.global->def_tables[type])
+ return tab;
+
+ cf_error("Undefined IGP table");
+}
+
void
-bgp_check_config(struct bgp_config *c)
+bgp_postconfig(struct proto_config *CF)
{
- int internal = (c->local_as == c->remote_as);
+ struct bgp_config *cf = (void *) CF;
+ int internal = (cf->local_as == cf->remote_as);
/* Do not check templates at all */
- if (c->c.class == SYM_TEMPLATE)
+ if (cf->c.class == SYM_TEMPLATE)
return;
/* EBGP direct by default, IBGP multihop by default */
- if (c->multihop < 0)
- c->multihop = internal ? 64 : 0;
-
- /* Different default for gw_mode */
- if (!c->gw_mode)
- c->gw_mode = c->multihop ? GW_RECURSIVE : GW_DIRECT;
+ if (cf->multihop < 0)
+ cf->multihop = internal ? 64 : 0;
- /* Different default based on rs_client */
- if (!c->missing_lladdr)
- c->missing_lladdr = c->rs_client ? MLL_IGNORE : MLL_SELF;
- /* Disable after error incompatible with restart limit action */
- if (c->c.in_limit && (c->c.in_limit->action == PLA_RESTART) && c->disable_after_error)
- c->c.in_limit->action = PLA_DISABLE;
-
-
- if (!c->local_as)
+ if (!cf->local_as)
cf_error("Local AS number must be set");
- if (ipa_zero(c->remote_ip))
+ if (ipa_zero(cf->remote_ip))
cf_error("Neighbor must be configured");
- if (!c->remote_as)
+ if (!cf->remote_as)
cf_error("Remote AS number must be set");
- if (ipa_is_link_local(c->remote_ip) && !c->iface)
+ if (ipa_is_link_local(cf->remote_ip) && !cf->iface)
cf_error("Link-local neighbor address requires specified interface");
- if (!(c->capabilities && c->enable_as4) && (c->remote_as > 0xFFFF))
+ if (!(cf->capabilities && cf->enable_as4) && (cf->remote_as > 0xFFFF))
cf_error("Neighbor AS number out of range (AS4 not available)");
- if (!internal && c->rr_client)
+ if (!internal && cf->rr_client)
cf_error("Only internal neighbor can be RR client");
- if (internal && c->rs_client)
+ if (internal && cf->rs_client)
cf_error("Only external neighbor can be RS client");
- if (c->multihop && (c->gw_mode == GW_DIRECT))
- cf_error("Multihop BGP cannot use direct gateway mode");
+ if (!cf->confederation && cf->confederation_member)
+ cf_error("Confederation ID must be set for member sessions");
- if (c->multihop && (ipa_is_link_local(c->remote_ip) ||
- ipa_is_link_local(c->source_addr)))
+ if (cf->multihop && (ipa_is_link_local(cf->local_ip) ||
+ ipa_is_link_local(cf->remote_ip)))
cf_error("Multihop BGP cannot be used with link-local addresses");
- if (c->multihop && c->iface)
+ if (cf->multihop && cf->iface)
cf_error("Multihop BGP cannot be bound to interface");
- if (c->multihop && c->check_link)
+ if (cf->multihop && cf->check_link)
cf_error("Multihop BGP cannot depend on link state");
- if (c->multihop && c->bfd && ipa_zero(c->source_addr))
- cf_error("Multihop BGP with BFD requires specified source address");
+ if (cf->multihop && cf->bfd && ipa_zero(cf->local_ip))
+ cf_error("Multihop BGP with BFD requires specified local address");
- if ((c->gw_mode == GW_RECURSIVE) && c->c.table->sorted)
- cf_error("BGP in recursive mode prohibits sorted table");
- if (c->deterministic_med && c->c.table->sorted)
- cf_error("BGP with deterministic MED prohibits sorted table");
+ struct bgp_channel_config *cc;
+ WALK_LIST(cc, CF->channels)
+ {
+ /* Disable after error incompatible with restart limit action */
+ if ((cc->c.in_limit.action == PLA_RESTART) && cf->disable_after_error)
+ cc->c.in_limit.action = PLA_DISABLE;
- if (c->secondary && !c->c.table->sorted)
- cf_error("BGP with secondary option requires sorted table");
+ /* Different default based on rs_client */
+ if (!cc->missing_lladdr)
+ cc->missing_lladdr = cf->rs_client ? MLL_IGNORE : MLL_SELF;
+
+ /* Different default for gw_mode */
+ if (!cc->gw_mode)
+ cc->gw_mode = cf->multihop ? GW_RECURSIVE : GW_DIRECT;
+
+ /* Default based on proto config */
+ if (cc->gr_able == 0xff)
+ cc->gr_able = (cf->gr_mode == BGP_GR_ABLE);
+
+ /* Default values of IGP tables */
+ if ((cc->gw_mode == GW_RECURSIVE) && !cc->desc->no_igp)
+ {
+ if (!cc->igp_table_ip4 && (bgp_cc_is_ipv4(cc) || cc->ext_next_hop))
+ cc->igp_table_ip4 = bgp_default_igp_table(cf, cc, NET_IP4);
+
+ if (!cc->igp_table_ip6 && (bgp_cc_is_ipv6(cc) || cc->ext_next_hop))
+ cc->igp_table_ip6 = bgp_default_igp_table(cf, cc, NET_IP6);
+
+ if (cc->igp_table_ip4 && bgp_cc_is_ipv6(cc) && !cc->ext_next_hop)
+ cf_error("Mismatched IGP table type");
+
+ if (cc->igp_table_ip6 && bgp_cc_is_ipv4(cc) && !cc->ext_next_hop)
+ cf_error("Mismatched IGP table type");
+ }
+
+ if (cf->multihop && (cc->gw_mode == GW_DIRECT))
+ cf_error("Multihop BGP cannot use direct gateway mode");
+
+ if ((cc->gw_mode == GW_RECURSIVE) && cc->c.table->sorted)
+ cf_error("BGP in recursive mode prohibits sorted table");
+
+ if (cf->deterministic_med && cc->c.table->sorted)
+ cf_error("BGP with deterministic MED prohibits sorted table");
+
+ if (cc->secondary && !cc->c.table->sorted)
+ cf_error("BGP with secondary option requires sorted table");
+ }
}
static int
-bgp_reconfigure(struct proto *P, struct proto_config *C)
+bgp_reconfigure(struct proto *P, struct proto_config *CF)
{
- struct bgp_config *new = (struct bgp_config *) C;
- struct bgp_proto *p = (struct bgp_proto *) P;
+ struct bgp_proto *p = (void *) P;
+ struct bgp_config *new = (void *) CF;
struct bgp_config *old = p->cf;
- if (proto_get_router_id(C) != p->local_id)
+ if (proto_get_router_id(CF) != p->local_id)
return 0;
int same = !memcmp(((byte *) old) + sizeof(struct proto_config),
@@ -1379,8 +1690,26 @@ bgp_reconfigure(struct proto *P, struct proto_config *C)
// password item is last and must be checked separately
OFFSETOF(struct bgp_config, password) - sizeof(struct proto_config))
&& ((!old->password && !new->password)
- || (old->password && new->password && !strcmp(old->password, new->password)))
- && (get_igp_table(old) == get_igp_table(new));
+ || (old->password && new->password && !strcmp(old->password, new->password)));
+
+ /* FIXME: Move channel reconfiguration to generic protocol code ? */
+ struct channel *C, *C2;
+ struct bgp_channel_config *cc;
+
+ WALK_LIST(C, p->p.channels)
+ C->stale = 1;
+
+ WALK_LIST(cc, new->c.channels)
+ {
+ C = (struct channel *) bgp_find_channel(p, cc->afi);
+ same = proto_configure_channel(P, &C, &cc->c) && same;
+ C->stale = 0;
+ }
+
+ WALK_LIST_DELSAFE(C, C2, p->p.channels)
+ if (C->stale)
+ same = proto_configure_channel(P, &C, NULL) && same;
+
if (same && (p->start_state > BSS_PREPARE))
bgp_update_bfd(p, new->bfd);
@@ -1392,11 +1721,34 @@ bgp_reconfigure(struct proto *P, struct proto_config *C)
return same;
}
+#define IGP_TABLE(cf, sym) ((cf)->igp_table_##sym ? (cf)->igp_table_##sym ->table : NULL )
+
+static int
+bgp_channel_reconfigure(struct channel *C, struct channel_config *CC)
+{
+ struct bgp_channel *c = (void *) C;
+ struct bgp_channel_config *new = (void *) CC;
+ struct bgp_channel_config *old = c->cf;
+
+ if (memcmp(((byte *) old) + sizeof(struct channel_config),
+ ((byte *) new) + sizeof(struct channel_config),
+ /* Remaining items must be checked separately */
+ OFFSETOF(struct bgp_channel_config, rest) - sizeof(struct channel_config)))
+ return 0;
+
+ /* Check change in IGP tables */
+ if ((IGP_TABLE(old, ip4) != IGP_TABLE(new, ip4)) ||
+ (IGP_TABLE(old, ip6) != IGP_TABLE(new, ip6)))
+ return 0;
+
+ c->cf = new;
+ return 1;
+}
+
static void
-bgp_copy_config(struct proto_config *dest, struct proto_config *src)
+bgp_copy_config(struct proto_config *dest UNUSED, struct proto_config *src UNUSED)
{
/* Just a shallow copy */
- proto_copy_rest(dest, src, sizeof(struct bgp_config));
}
@@ -1413,14 +1765,14 @@ bgp_copy_config(struct proto_config *dest, struct proto_config *src)
* closes the connection.
*/
void
-bgp_error(struct bgp_conn *c, unsigned code, unsigned subcode, byte *data, int len)
+bgp_error(struct bgp_conn *c, uint code, uint subcode, byte *data, int len)
{
struct bgp_proto *p = c->bgp;
if (c->state == BS_CLOSE)
return;
- bgp_log_error(p, BE_BGP_TX, "Error", code, subcode, data, (len > 0) ? len : -len);
+ bgp_log_error(p, BE_BGP_TX, "Error", code, subcode, data, ABS(len));
bgp_store_error(p, c, BE_BGP_TX, (code << 16) | subcode);
bgp_conn_enter_close_state(c);
@@ -1428,13 +1780,13 @@ bgp_error(struct bgp_conn *c, unsigned code, unsigned subcode, byte *data, int l
c->notify_subcode = subcode;
c->notify_data = data;
c->notify_size = (len > 0) ? len : 0;
- bgp_schedule_packet(c, PKT_NOTIFICATION);
+ bgp_schedule_packet(c, NULL, PKT_NOTIFICATION);
if (code != 6)
- {
- bgp_update_startup_delay(p);
- bgp_stop(p, 0);
- }
+ {
+ bgp_update_startup_delay(p);
+ bgp_stop(p, 0);
+ }
}
/**
@@ -1473,19 +1825,19 @@ static const char *
bgp_last_errmsg(struct bgp_proto *p)
{
switch (p->last_error_class)
- {
- case BE_MISC:
- return bgp_misc_errors[p->last_error_code];
- case BE_SOCKET:
- return (p->last_error_code == 0) ? "Connection closed" : strerror(p->last_error_code);
- case BE_BGP_RX:
- case BE_BGP_TX:
- return bgp_error_dsc(p->last_error_code >> 16, p->last_error_code & 0xFF);
- case BE_AUTO_DOWN:
- return bgp_auto_errors[p->last_error_code];
- default:
- return "";
- }
+ {
+ case BE_MISC:
+ return bgp_misc_errors[p->last_error_code];
+ case BE_SOCKET:
+ return (p->last_error_code == 0) ? "Connection closed" : strerror(p->last_error_code);
+ case BE_BGP_RX:
+ case BE_BGP_TX:
+ return bgp_error_dsc(p->last_error_code >> 16, p->last_error_code & 0xFF);
+ case BE_AUTO_DOWN:
+ return bgp_auto_errors[p->last_error_code];
+ default:
+ return "";
+ }
}
static const char *
@@ -1516,48 +1868,165 @@ bgp_get_status(struct proto *P, byte *buf)
}
static void
+bgp_show_afis(int code, char *s, u32 *afis, uint count)
+{
+ buffer b;
+ LOG_BUFFER_INIT(b);
+
+ buffer_puts(&b, s);
+
+ for (u32 *af = afis; af < (afis + count); af++)
+ {
+ const struct bgp_af_desc *desc = bgp_get_af_desc(*af);
+ if (desc)
+ buffer_print(&b, " %s", desc->name);
+ else
+ buffer_print(&b, " <%u/%u>", BGP_AFI(*af), BGP_SAFI(*af));
+ }
+
+ if (b.pos == b.end)
+ strcpy(b.end - 32, " ... <too long>");
+
+ cli_msg(code, b.start);
+}
+
+static void
+bgp_show_capabilities(struct bgp_proto *p UNUSED, struct bgp_caps *caps)
+{
+ struct bgp_af_caps *ac;
+ uint any_mp_bgp = 0;
+ uint any_gr_able = 0;
+ uint any_add_path = 0;
+ uint any_ext_next_hop = 0;
+ u32 *afl1 = alloca(caps->af_count * sizeof(u32));
+ u32 *afl2 = alloca(caps->af_count * sizeof(u32));
+ uint afn1, afn2;
+
+ WALK_AF_CAPS(caps, ac)
+ {
+ any_mp_bgp |= ac->ready;
+ any_gr_able |= ac->gr_able;
+ any_add_path |= ac->add_path;
+ any_ext_next_hop |= ac->ext_next_hop;
+ }
+
+ if (any_mp_bgp)
+ {
+ cli_msg(-1006, " Multiprotocol");
+
+ afn1 = 0;
+ WALK_AF_CAPS(caps, ac)
+ if (ac->ready)
+ afl1[afn1++] = ac->afi;
+
+ bgp_show_afis(-1006, " AF announced:", afl1, afn1);
+ }
+
+ if (caps->route_refresh)
+ cli_msg(-1006, " Route refresh");
+
+ if (any_ext_next_hop)
+ {
+ cli_msg(-1006, " Extended next hop");
+
+ afn1 = 0;
+ WALK_AF_CAPS(caps, ac)
+ if (ac->ext_next_hop)
+ afl1[afn1++] = ac->afi;
+
+ bgp_show_afis(-1006, " IPv6 nexthop:", afl1, afn1);
+ }
+
+ if (caps->ext_messages)
+ cli_msg(-1006, " Extended message");
+
+ if (caps->gr_aware)
+ cli_msg(-1006, " Graceful restart");
+
+ if (any_gr_able)
+ {
+ /* Continues from gr_aware */
+ cli_msg(-1006, " Restart time: %u", caps->gr_time);
+ if (caps->gr_flags & BGP_GRF_RESTART)
+ cli_msg(-1006, " Restart recovery");
+
+ afn1 = afn2 = 0;
+ WALK_AF_CAPS(caps, ac)
+ {
+ if (ac->gr_able)
+ afl1[afn1++] = ac->afi;
+
+ if (ac->gr_af_flags & BGP_GRF_FORWARDING)
+ afl2[afn2++] = ac->afi;
+ }
+
+ bgp_show_afis(-1006, " AF supported:", afl1, afn1);
+ bgp_show_afis(-1006, " AF preserved:", afl2, afn2);
+ }
+
+ if (caps->as4_support)
+ cli_msg(-1006, " 4-octet AS numbers");
+
+ if (any_add_path)
+ {
+ cli_msg(-1006, " ADD-PATH");
+
+ afn1 = afn2 = 0;
+ WALK_AF_CAPS(caps, ac)
+ {
+ if (ac->add_path & BGP_ADD_PATH_RX)
+ afl1[afn1++] = ac->afi;
+
+ if (ac->add_path & BGP_ADD_PATH_TX)
+ afl2[afn2++] = ac->afi;
+ }
+
+ bgp_show_afis(-1006, " RX:", afl1, afn1);
+ bgp_show_afis(-1006, " TX:", afl2, afn2);
+ }
+
+ if (caps->enhanced_refresh)
+ cli_msg(-1006, " Enhanced refresh");
+}
+
+static void
bgp_show_proto_info(struct proto *P)
{
struct bgp_proto *p = (struct bgp_proto *) P;
- struct bgp_conn *c = p->conn;
-
- proto_show_basic_info(P);
cli_msg(-1006, " BGP state: %s", bgp_state_dsc(p));
cli_msg(-1006, " Neighbor address: %I%J", p->cf->remote_ip, p->cf->iface);
cli_msg(-1006, " Neighbor AS: %u", p->remote_as);
- if (p->gr_active)
+ if (p->gr_active_num)
cli_msg(-1006, " Neighbor graceful restart active");
if (P->proto_state == PS_START)
- {
- struct bgp_conn *oc = &p->outgoing_conn;
+ {
+ struct bgp_conn *oc = &p->outgoing_conn;
- if ((p->start_state < BSS_CONNECT) &&
- (p->startup_timer->expires))
- cli_msg(-1006, " Error wait: %d/%d",
- p->startup_timer->expires - now, p->startup_delay);
+ if ((p->start_state < BSS_CONNECT) &&
+ (tm_active(p->startup_timer)))
+ cli_msg(-1006, " Error wait: %t/%u",
+ tm_remains(p->startup_timer), p->startup_delay);
- if ((oc->state == BS_ACTIVE) &&
- (oc->connect_retry_timer->expires))
- cli_msg(-1006, " Connect delay: %d/%d",
- oc->connect_retry_timer->expires - now, p->cf->connect_delay_time);
+ if ((oc->state == BS_ACTIVE) &&
+ (tm_active(oc->connect_timer)))
+ cli_msg(-1006, " Connect delay: %t/%u",
+ tm_remains(oc->connect_timer), p->cf->connect_delay_time);
- if (p->gr_active && p->gr_timer->expires)
- cli_msg(-1006, " Restart timer: %d/-", p->gr_timer->expires - now);
- }
+ if (p->gr_active_num && tm_active(p->gr_timer))
+ cli_msg(-1006, " Restart timer: %t/-",
+ tm_remains(p->gr_timer));
+ }
else if (P->proto_state == PS_UP)
- {
- cli_msg(-1006, " Neighbor ID: %R", p->remote_id);
- cli_msg(-1006, " Neighbor caps: %s%s%s%s%s%s%s",
- c->peer_refresh_support ? " refresh" : "",
- c->peer_enhanced_refresh_support ? " enhanced-refresh" : "",
- c->peer_gr_able ? " restart-able" : (c->peer_gr_aware ? " restart-aware" : ""),
- c->peer_as4_support ? " AS4" : "",
- (c->peer_add_path & ADD_PATH_RX) ? " add-path-rx" : "",
- (c->peer_add_path & ADD_PATH_TX) ? " add-path-tx" : "",
- c->peer_ext_messages_support ? " ext-messages" : "");
+ {
+ cli_msg(-1006, " Neighbor ID: %R", p->remote_id);
+ cli_msg(-1006, " Local capabilities");
+ bgp_show_capabilities(p, p->conn->local_caps);
+ cli_msg(-1006, " Neighbor capabilities");
+ bgp_show_capabilities(p, p->conn->remote_caps);
+/* XXXX
cli_msg(-1006, " Session: %s%s%s%s%s%s%s%s",
p->is_internal ? "internal" : "external",
p->cf->multihop ? " multihop" : "",
@@ -1567,35 +2036,60 @@ bgp_show_proto_info(struct proto *P)
p->add_path_rx ? " add-path-rx" : "",
p->add_path_tx ? " add-path-tx" : "",
p->ext_messages ? " ext-messages" : "");
- cli_msg(-1006, " Source address: %I", p->source_addr);
- if (P->cf->in_limit)
- cli_msg(-1006, " Route limit: %d/%d",
- p->p.stats.imp_routes + p->p.stats.filt_routes, P->cf->in_limit->limit);
- cli_msg(-1006, " Hold timer: %d/%d",
- tm_remains(c->hold_timer), c->hold_time);
- cli_msg(-1006, " Keepalive timer: %d/%d",
- tm_remains(c->keepalive_timer), c->keepalive_time);
- }
+*/
+ cli_msg(-1006, " Source address: %I", p->source_addr);
+ cli_msg(-1006, " Hold timer: %t/%u",
+ tm_remains(p->conn->hold_timer), p->conn->hold_time);
+ cli_msg(-1006, " Keepalive timer: %t/%u",
+ tm_remains(p->conn->keepalive_timer), p->conn->keepalive_time);
+ }
if ((p->last_error_class != BE_NONE) &&
(p->last_error_class != BE_MAN_DOWN))
+ {
+ const char *err1 = bgp_err_classes[p->last_error_class];
+ const char *err2 = bgp_last_errmsg(p);
+ cli_msg(-1006, " Last error: %s%s", err1, err2);
+ }
+
+ {
+ /* XXXX ?? */
+ struct bgp_channel *c;
+ WALK_LIST(c, p->p.channels)
{
- const char *err1 = bgp_err_classes[p->last_error_class];
- const char *err2 = bgp_last_errmsg(p);
- cli_msg(-1006, " Last error: %s%s", err1, err2);
+ channel_show_info(&c->c);
+
+ if (c->igp_table_ip4)
+ cli_msg(-1006, " IGP IPv4 table: %s", c->igp_table_ip4->name);
+
+ if (c->igp_table_ip6)
+ cli_msg(-1006, " IGP IPv6 table: %s", c->igp_table_ip6->name);
}
+ }
}
+struct channel_class channel_bgp = {
+ .channel_size = sizeof(struct bgp_channel),
+ .config_size = sizeof(struct bgp_channel_config),
+ .init = bgp_channel_init,
+ .start = bgp_channel_start,
+ .shutdown = bgp_channel_shutdown,
+ .cleanup = bgp_channel_cleanup,
+ .reconfigure = bgp_channel_reconfigure,
+};
+
struct protocol proto_bgp = {
.name = "BGP",
.template = "bgp%d",
.attr_class = EAP_BGP,
.preference = DEF_PREF_BGP,
+ .channel_mask = NB_IP | NB_VPN | NB_FLOW,
+ .proto_size = sizeof(struct bgp_proto),
.config_size = sizeof(struct bgp_config),
+ .postconfig = bgp_postconfig,
.init = bgp_init,
.start = bgp_start,
.shutdown = bgp_shutdown,
- .cleanup = bgp_cleanup,
.reconfigure = bgp_reconfigure,
.copy_config = bgp_copy_config,
.get_status = bgp_get_status,
diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h
index e47a0eb1..3d940c22 100644
--- a/proto/bgp/bgp.h
+++ b/proto/bgp/bgp.h
@@ -2,6 +2,8 @@
* BIRD -- The Border Gateway Protocol
*
* (c) 2000 Martin Mares <mj@ucw.cz>
+ * (c) 2008--2016 Ondrej Zajicek <santiago@crfreenet.org>
+ * (c) 2008--2016 CZ.NIC z.s.p.o.
*
* Can be freely distributed and used under the terms of the GNU GPL.
*/
@@ -10,26 +12,80 @@
#define _BIRD_BGP_H_
#include <stdint.h>
+#include <setjmp.h>
+#include "nest/bird.h"
#include "nest/route.h"
#include "nest/bfd.h"
+//#include "lib/lists.h"
#include "lib/hash.h"
+#include "lib/socket.h"
struct linpool;
struct eattr;
+
+/* Address families */
+
+#define BGP_AFI_IPV4 1
+#define BGP_AFI_IPV6 2
+
+#define BGP_SAFI_UNICAST 1
+#define BGP_SAFI_MULTICAST 2
+#define BGP_SAFI_MPLS 4
+#define BGP_SAFI_MPLS_VPN 128
+#define BGP_SAFI_VPN_MULTICAST 129
+#define BGP_SAFI_FLOW 133
+
+/* Internal AF codes */
+
+#define BGP_AF(A, B) (((u32)(A) << 16) | (u32)(B))
+#define BGP_AFI(A) ((u32)(A) >> 16)
+#define BGP_SAFI(A) ((u32)(A) & 0xFFFF)
+
+#define BGP_AF_IPV4 BGP_AF( BGP_AFI_IPV4, BGP_SAFI_UNICAST )
+#define BGP_AF_IPV6 BGP_AF( BGP_AFI_IPV6, BGP_SAFI_UNICAST )
+#define BGP_AF_IPV4_MC BGP_AF( BGP_AFI_IPV4, BGP_SAFI_MULTICAST )
+#define BGP_AF_IPV6_MC BGP_AF( BGP_AFI_IPV6, BGP_SAFI_MULTICAST )
+#define BGP_AF_IPV4_MPLS BGP_AF( BGP_AFI_IPV4, BGP_SAFI_MPLS )
+#define BGP_AF_IPV6_MPLS BGP_AF( BGP_AFI_IPV6, BGP_SAFI_MPLS )
+#define BGP_AF_VPN4_MPLS BGP_AF( BGP_AFI_IPV4, BGP_SAFI_MPLS_VPN )
+#define BGP_AF_VPN6_MPLS BGP_AF( BGP_AFI_IPV6, BGP_SAFI_MPLS_VPN )
+#define BGP_AF_VPN4_MC BGP_AF( BGP_AFI_IPV4, BGP_SAFI_VPN_MULTICAST )
+#define BGP_AF_VPN6_MC BGP_AF( BGP_AFI_IPV6, BGP_SAFI_VPN_MULTICAST )
+#define BGP_AF_FLOW4 BGP_AF( BGP_AFI_IPV4, BGP_SAFI_FLOW )
+#define BGP_AF_FLOW6 BGP_AF( BGP_AFI_IPV6, BGP_SAFI_FLOW )
+
+
+struct bgp_write_state;
+struct bgp_parse_state;
+struct bgp_export_state;
+struct bgp_bucket;
+
+struct bgp_af_desc {
+ u32 afi;
+ u32 net;
+ u8 mpls;
+ u8 no_igp;
+ const char *name;
+ uint (*encode_nlri)(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size);
+ void (*decode_nlri)(struct bgp_parse_state *s, byte *pos, uint len, rta *a);
+ void (*update_next_hop)(struct bgp_export_state *s, eattr *nh, ea_list **to);
+ uint (*encode_next_hop)(struct bgp_write_state *s, eattr *nh, byte *buf, uint size);
+ void (*decode_next_hop)(struct bgp_parse_state *s, byte *pos, uint len, rta *a);
+};
+
+
struct bgp_config {
struct proto_config c;
u32 local_as, remote_as;
+ ip_addr local_ip; /* Source address to use */
ip_addr remote_ip;
- ip_addr source_addr; /* Source address to use */
struct iface *iface; /* Interface for link-local addresses */
+ u16 local_port; /* Local listening port */
u16 remote_port; /* Neighbor destination port */
int multihop; /* Number of hops if multihop */
- int ttl_security; /* Enable TTL security [RFC5082] */
- int next_hop_self; /* Always set next hop to local IP address */
- int next_hop_keep; /* Do not touch next hop attribute */
- int missing_lladdr; /* What we will do when we don' know link-local addr, see MLL_* */
- int gw_mode; /* How we compute route gateway from next_hop attr, see GW_* */
+ int strict_bind; /* Bind listening socket to local address */
+ int ttl_security; /* Enable TTL security [RFC 5082] */
int compare_path_lengths; /* Use path lengths when selecting best route */
int med_metric; /* Compare MULTI_EXIT_DISC even between routes from differen ASes */
int igp_metric; /* Use IGP metrics when selecting best route */
@@ -37,22 +93,22 @@ struct bgp_config {
int deterministic_med; /* Use more complicated algo to have strict RFC 4271 MED comparison */
u32 default_local_pref; /* Default value for LOCAL_PREF attribute */
u32 default_med; /* Default value for MULTI_EXIT_DISC attribute */
- int capabilities; /* Enable capability handshake [RFC3392] */
- int enable_refresh; /* Enable local support for route refresh [RFC2918] */
- int enable_as4; /* Enable local support for 4B AS numbers [RFC4893] */
+ int capabilities; /* Enable capability handshake [RFC 5492] */
+ int enable_refresh; /* Enable local support for route refresh [RFC 2918] */
+ int enable_as4; /* Enable local support for 4B AS numbers [RFC 6793] */
int enable_extended_messages; /* Enable local support for extended messages [draft] */
u32 rr_cluster_id; /* Route reflector cluster ID, if different from local ID */
int rr_client; /* Whether neighbor is RR client of me */
int rs_client; /* Whether neighbor is RS client of me */
- int advertise_ipv4; /* Whether we should add IPv4 capability advertisement to OPEN message */
+ u32 confederation; /* Confederation ID, or zero if confeds not active */
+ int confederation_member; /* Whether neighbor AS is member of our confederation */
int passive; /* Do not initiate outgoing connection */
int interpret_communities; /* Hardwired handling of well-known communities */
- int secondary; /* Accept also non-best routes (i.e. RA_ACCEPTED) */
- int add_path; /* Use ADD-PATH extension [RFC7911] */
int allow_local_as; /* Allow that number of local ASNs in incoming AS_PATHs */
int allow_local_pref; /* Allow LOCAL_PREF in EBGP sessions */
int gr_mode; /* Graceful restart mode (BGP_GR_*) */
int setkey; /* Set MD5 password to system SA/SP database */
+ /* Times below are in seconds */
unsigned gr_time; /* Graceful restart timeout */
unsigned connect_delay_time; /* Minimum delay between connect attempts */
unsigned connect_retry_time; /* Timeout for connect attempts */
@@ -64,11 +120,31 @@ struct bgp_config {
unsigned disable_after_error; /* Disable the protocol when error is detected */
char *password; /* Password used for MD5 authentication */
- struct rtable_config *igp_table; /* Table used for recursive next hop lookups */
int check_link; /* Use iface link state for liveness detection */
int bfd; /* Use BFD for liveness detection */
};
+struct bgp_channel_config {
+ struct channel_config c;
+
+ u32 afi;
+ const struct bgp_af_desc *desc;
+
+ ip_addr next_hop_addr; /* Local address for NEXT_HOP attribute */
+ u8 next_hop_self; /* Always set next hop to local IP address */
+ u8 next_hop_keep; /* Do not touch next hop attribute */
+ u8 missing_lladdr; /* What we will do when we don' know link-local addr, see MLL_* */
+ u8 gw_mode; /* How we compute route gateway from next_hop attr, see GW_* */
+ u8 secondary; /* Accept also non-best routes (i.e. RA_ACCEPTED) */
+ u8 gr_able; /* Allow full graceful restart for the channel */
+ u8 ext_next_hop; /* Allow both IPv4 and IPv6 next hops */
+ u8 add_path; /* Use ADD-PATH extension [RFC 7911] */
+
+ uint rest[0]; /* Remaining items are reconfigured separately */
+ struct rtable_config *igp_table_ip4; /* Table for recursive IPv4 next hop lookups */
+ struct rtable_config *igp_table_ip6; /* Table for recursive IPv6 next hop lookups */
+};
+
#define MLL_SELF 1
#define MLL_DROP 2
#define MLL_IGNORE 3
@@ -76,112 +152,241 @@ struct bgp_config {
#define GW_DIRECT 1
#define GW_RECURSIVE 2
-#define ADD_PATH_RX 1
-#define ADD_PATH_TX 2
-#define ADD_PATH_FULL 3
+#define BGP_ADD_PATH_RX 1
+#define BGP_ADD_PATH_TX 2
+#define BGP_ADD_PATH_FULL 3
-#define BGP_GR_ABLE 1
-#define BGP_GR_AWARE 2
+#define BGP_GR_ABLE 1
+#define BGP_GR_AWARE 2
-/* For peer_gr_flags */
+/* For GR capability common flags */
#define BGP_GRF_RESTART 0x80
-/* For peer_gr_aflags */
+/* For GR capability per-AF flags */
#define BGP_GRF_FORWARDING 0x80
+struct bgp_af_caps {
+ u32 afi;
+ u8 ready; /* Multiprotocol capability, RFC 4760 */
+ u8 gr_able; /* Graceful restart support, RFC 4724 */
+ u8 gr_af_flags; /* Graceful restart per-AF flags */
+ u8 ext_next_hop; /* Extended IPv6 next hop, RFC 5549 */
+ u8 add_path; /* Multiple paths support, RFC 7911 */
+};
+
+struct bgp_caps {
+ u32 as4_number; /* Announced ASN */
+
+ u8 as4_support; /* Four-octet AS capability, RFC 6793 */
+ u8 ext_messages; /* Extended message length, RFC draft */
+ u8 route_refresh; /* Route refresh capability, RFC 2918 */
+ u8 enhanced_refresh; /* Enhanced route refresh, RFC 7313 */
+
+ u8 gr_aware; /* Graceful restart capability, RFC 4724 */
+ u8 gr_flags; /* Graceful restart flags */
+ u16 gr_time; /* Graceful restart time in seconds */
+
+ u16 af_count; /* Number of af_data items */
+
+ struct bgp_af_caps af_data[0]; /* Per-AF capability data */
+};
+
+#define WALK_AF_CAPS(caps,ac) \
+ for (ac = caps->af_data; ac < &caps->af_data[caps->af_count]; ac++)
+
+
+struct bgp_socket {
+ node n; /* Node in global bgp_sockets */
+ sock *sk; /* Real listening socket */
+ u32 uc; /* Use count */
+};
+
struct bgp_conn {
struct bgp_proto *bgp;
struct birdsock *sk;
- uint state; /* State of connection state machine */
- struct timer *connect_retry_timer;
- struct timer *hold_timer;
- struct timer *keepalive_timer;
- struct event *tx_ev;
- int packets_to_send; /* Bitmap of packet types to be sent */
+ u8 state; /* State of connection state machine */
+ u8 as4_session; /* Session uses 4B AS numbers in AS_PATH (both sides support it) */
+ u8 ext_messages; /* Session uses extended message length */
+
+ struct bgp_caps *local_caps;
+ struct bgp_caps *remote_caps;
+ timer *connect_timer;
+ timer *hold_timer;
+ timer *keepalive_timer;
+ event *tx_ev;
+ u32 packets_to_send; /* Bitmap of packet types to be sent */
+ u32 channels_to_send; /* Bitmap of channels with packets to be sent */
+ u8 last_channel; /* Channel used last time for TX */
+ u8 last_channel_count; /* Number of times the last channel was used in succession */
int notify_code, notify_subcode, notify_size;
byte *notify_data;
- u32 advertised_as; /* Temporary value for AS number received */
- int start_state; /* protocol start_state snapshot when connection established */
- u8 peer_refresh_support; /* Peer supports route refresh [RFC2918] */
- u8 peer_as4_support; /* Peer supports 4B AS numbers [RFC4893] */
- u8 peer_add_path; /* Peer supports ADD-PATH [RFC7911] */
- u8 peer_enhanced_refresh_support; /* Peer supports enhanced refresh [RFC7313] */
- u8 peer_gr_aware;
- u8 peer_gr_able;
- u16 peer_gr_time;
- u8 peer_gr_flags;
- u8 peer_gr_aflags;
- u8 peer_ext_messages_support; /* Peer supports extended message length [draft] */
- unsigned hold_time, keepalive_time; /* Times calculated from my and neighbor's requirements */
+
+ uint hold_time, keepalive_time; /* Times calculated from my and neighbor's requirements */
};
struct bgp_proto {
struct proto p;
struct bgp_config *cf; /* Shortcut to BGP configuration */
u32 local_as, remote_as;
- int start_state; /* Substates that partitions BS_START */
- u8 is_internal; /* Internal BGP connection (local_as == remote_as) */
- u8 as4_session; /* Session uses 4B AS numbers in AS_PATH (both sides support it) */
- u8 add_path_rx; /* Session expects receive of ADD-PATH extended NLRI */
- u8 add_path_tx; /* Session expects transmit of ADD-PATH extended NLRI */
- u8 ext_messages; /* Session allows to use extended messages (both sides support it) */
+ u32 public_as; /* Externally visible ASN (local_as or confederation id) */
u32 local_id; /* BGP identifier of this router */
u32 remote_id; /* BGP identifier of the neighbor */
u32 rr_cluster_id; /* Route reflector cluster ID */
- int rr_client; /* Whether neighbor is RR client of me */
- int rs_client; /* Whether neighbor is RS client of me */
+ int start_state; /* Substates that partitions BS_START */
+ u8 is_internal; /* Internal BGP session (local_as == remote_as) */
+ u8 is_interior; /* Internal or intra-confederation BGP session */
+ u8 as4_session; /* Session uses 4B AS numbers in AS_PATH (both sides support it) */
+ u8 rr_client; /* Whether neighbor is RR client of me */
+ u8 rs_client; /* Whether neighbor is RS client of me */
+ u8 route_refresh; /* Route refresh allowed to send [RFC 2918] */
+ u8 enhanced_refresh; /* Enhanced refresh is negotiated [RFC 7313] */
u8 gr_ready; /* Neighbor could do graceful restart */
- u8 gr_active; /* Neighbor is doing graceful restart */
- u8 feed_state; /* Feed state (TX) for EoR, RR packets, see BFS_* */
- u8 load_state; /* Load state (RX) for EoR, RR packets, see BFS_* */
+ u8 gr_active_num; /* Neighbor is doing GR, number of active channels */
+ u8 channel_count; /* Number of active channels */
+ u32 *afi_map; /* Map channel index -> AFI */
+ struct bgp_channel **channel_map; /* Map channel index -> channel */
struct bgp_conn *conn; /* Connection we have established */
struct bgp_conn outgoing_conn; /* Outgoing connection we're working with */
struct bgp_conn incoming_conn; /* Incoming connection we have neither accepted nor rejected yet */
struct object_lock *lock; /* Lock for neighbor connection */
struct neighbor *neigh; /* Neighbor entry corresponding to remote ip, NULL if multihop */
+ struct bgp_socket *sock; /* Shared listening socket */
struct bfd_request *bfd_req; /* BFD request, if BFD is used */
ip_addr source_addr; /* Local address used as an advertised next hop */
- rtable *igp_table; /* Table used for recursive next hop lookups */
- struct event *event; /* Event for respawning and shutting process */
- struct timer *startup_timer; /* Timer used to delay protocol startup due to previous errors (startup_delay) */
- struct timer *gr_timer; /* Timer waiting for reestablishment after graceful restart */
- struct bgp_bucket **bucket_hash; /* Hash table of attribute buckets */
- uint hash_size, hash_count, hash_limit;
- HASH(struct bgp_prefix) prefix_hash; /* Prefixes to be sent */
- slab *prefix_slab; /* Slab holding prefix nodes */
- list bucket_queue; /* Queue of buckets to send */
- struct bgp_bucket *withdraw_bucket; /* Withdrawn routes */
- unsigned startup_delay; /* Time to delay protocol startup by due to errors */
- bird_clock_t last_proto_error; /* Time of last error that leads to protocol stop */
+ ip_addr link_addr; /* Link-local version of source_addr */
+ event *event; /* Event for respawning and shutting process */
+ timer *startup_timer; /* Timer used to delay protocol startup due to previous errors (startup_delay) */
+ timer *gr_timer; /* Timer waiting for reestablishment after graceful restart */
+ uint startup_delay; /* Delay (in seconds) of protocol startup due to previous errors */
+ btime last_proto_error; /* Time of last error that leads to protocol stop */
u8 last_error_class; /* Error class of last error */
u32 last_error_code; /* Error code of last error. BGP protocol errors
are encoded as (bgp_err_code << 16 | bgp_err_subcode) */
-#ifdef IPV6
- byte *mp_reach_start, *mp_unreach_start; /* Multiprotocol BGP attribute notes */
- unsigned mp_reach_len, mp_unreach_len;
- ip_addr local_link; /* Link-level version of source_addr */
-#endif
+};
+
+struct bgp_channel {
+ struct channel c;
+
+ /* Rest are BGP specific data */
+ struct bgp_channel_config *cf;
+ pool *pool; /* XXXX */
+
+ u32 afi;
+ u32 index;
+ const struct bgp_af_desc *desc;
+
+ HASH(struct bgp_bucket) bucket_hash; /* Hash table of route buckets */
+ struct bgp_bucket *withdraw_bucket; /* Withdrawn routes */
+ list bucket_queue; /* Queue of buckets to send (struct bgp_bucket) */
+
+ HASH(struct bgp_prefix) prefix_hash; /* Prefixes to be sent */
+ slab *prefix_slab; /* Slab holding prefix nodes */
+
+ rtable *igp_table_ip4; /* Table for recursive IPv4 next hop lookups */
+ rtable *igp_table_ip6; /* Table for recursive IPv6 next hop lookups */
+ ip_addr next_hop_addr; /* Local address for NEXT_HOP attribute */
+ ip_addr link_addr; /* Link-local version of next_hop_addr */
+
+ u32 packets_to_send; /* Bitmap of packet types to be sent */
+
+ u8 gr_ready; /* Neighbor could do GR on this AF */
+ u8 gr_active; /* Neighbor is doing GR and keeping fwd state */
+
+ u8 ext_next_hop; /* Session allows both IPv4 and IPv6 next hops */
+
+ u8 add_path_rx; /* Session expects receive of ADD-PATH extended NLRI */
+ u8 add_path_tx; /* Session expects transmit of ADD-PATH extended NLRI */
+
+ u8 feed_state; /* Feed state (TX) for EoR, RR packets, see BFS_* */
+ u8 load_state; /* Load state (RX) for EoR, RR packets, see BFS_* */
};
struct bgp_prefix {
- struct {
- ip_addr prefix;
- int pxlen;
- } n;
+ node buck_node; /* Node in per-bucket list */
+ struct bgp_prefix *next; /* Node in prefix hash table */
+ u32 hash;
u32 path_id;
- struct bgp_prefix *next;
- node bucket_node; /* Node in per-bucket list */
+ net_addr net[0];
};
struct bgp_bucket {
node send_node; /* Node in send queue */
- struct bgp_bucket *hash_next, *hash_prev; /* Node in bucket hash table */
- unsigned hash; /* Hash over extended attributes */
- list prefixes; /* Prefixes in this buckets */
+ struct bgp_bucket *next; /* Node in bucket hash table */
+ list prefixes; /* Prefixes in this bucket (struct bgp_prefix) */
+ u32 hash; /* Hash over extended attributes */
ea_list eattrs[0]; /* Per-bucket extended attributes */
};
+struct bgp_export_state {
+ struct bgp_proto *proto;
+ struct bgp_channel *channel;
+ struct linpool *pool;
+
+ struct bgp_proto *src;
+ rte *route;
+ int mpls;
+
+ u32 attrs_seen[1];
+ uint err_withdraw;
+};
+
+struct bgp_write_state {
+ struct bgp_proto *proto;
+ struct bgp_channel *channel;
+ struct linpool *pool;
+
+ int as4_session;
+ int add_path;
+ int mpls;
+
+ eattr *mp_next_hop;
+ adata *mpls_labels;
+};
+
+struct bgp_parse_state {
+ struct bgp_proto *proto;
+ struct bgp_channel *channel;
+ struct linpool *pool;
+
+ int as4_session;
+ int add_path;
+ int mpls;
+
+ u32 attrs_seen[256/32];
+
+ u32 mp_reach_af;
+ u32 mp_unreach_af;
+
+ uint attr_len;
+ uint ip_reach_len;
+ uint ip_unreach_len;
+ uint ip_next_hop_len;
+ uint mp_reach_len;
+ uint mp_unreach_len;
+ uint mp_next_hop_len;
+
+ byte *attrs;
+ byte *ip_reach_nlri;
+ byte *ip_unreach_nlri;
+ byte *ip_next_hop_data;
+ byte *mp_reach_nlri;
+ byte *mp_unreach_nlri;
+ byte *mp_next_hop_data;
+
+ uint err_withdraw;
+ uint err_subcode;
+ jmp_buf err_jmpbuf;
+
+ struct hostentry *hostentry;
+ adata *mpls_labels;
+
+ /* Cached state for bgp_rte_update() */
+ u32 last_id;
+ struct rte_src *last_src;
+ rta *cached_rta;
+};
+
#define BGP_PORT 179
#define BGP_VERSION 4
#define BGP_HEADER_LENGTH 19
@@ -192,13 +397,33 @@ struct bgp_bucket {
#define BGP_RX_BUFFER_EXT_SIZE 65535
#define BGP_TX_BUFFER_EXT_SIZE 65535
-static inline uint bgp_max_packet_length(struct bgp_proto *p)
-{ return p->ext_messages ? BGP_MAX_EXT_MSG_LENGTH : BGP_MAX_MESSAGE_LENGTH; }
+static inline int bgp_channel_is_ipv4(struct bgp_channel *c)
+{ return BGP_AFI(c->afi) == BGP_AFI_IPV4; }
+
+static inline int bgp_channel_is_ipv6(struct bgp_channel *c)
+{ return BGP_AFI(c->afi) == BGP_AFI_IPV6; }
+
+static inline int bgp_cc_is_ipv4(struct bgp_channel_config *c)
+{ return BGP_AFI(c->afi) == BGP_AFI_IPV4; }
+
+static inline int bgp_cc_is_ipv6(struct bgp_channel_config *c)
+{ return BGP_AFI(c->afi) == BGP_AFI_IPV6; }
+
+static inline uint bgp_max_packet_length(struct bgp_conn *conn)
+{ return conn->ext_messages ? BGP_MAX_EXT_MSG_LENGTH : BGP_MAX_MESSAGE_LENGTH; }
+
+static inline void
+bgp_parse_error(struct bgp_parse_state *s, uint subcode)
+{
+ s->err_subcode = subcode;
+ longjmp(s->err_jmpbuf, 1);
+}
extern struct linpool *bgp_linpool;
+extern struct linpool *bgp_linpool2;
-void bgp_start_timer(struct timer *t, int value);
+void bgp_start_timer(timer *t, uint value);
void bgp_check_config(struct bgp_config *c);
void bgp_error(struct bgp_conn *c, unsigned code, unsigned subcode, byte *data, int len);
void bgp_close_conn(struct bgp_conn *c);
@@ -208,9 +433,9 @@ void bgp_conn_enter_established_state(struct bgp_conn *conn);
void bgp_conn_enter_close_state(struct bgp_conn *conn);
void bgp_conn_enter_idle_state(struct bgp_conn *conn);
void bgp_handle_graceful_restart(struct bgp_proto *p);
-void bgp_graceful_restart_done(struct bgp_proto *p);
-void bgp_refresh_begin(struct bgp_proto *p);
-void bgp_refresh_end(struct bgp_proto *p);
+void bgp_graceful_restart_done(struct bgp_channel *c);
+void bgp_refresh_begin(struct bgp_channel *c);
+void bgp_refresh_end(struct bgp_channel *c);
void bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code);
void bgp_stop(struct bgp_proto *p, unsigned subcode);
@@ -233,48 +458,71 @@ struct rte_source *bgp_get_source(struct bgp_proto *p, u32 path_id);
/* attrs.c */
-/* Hack: although BA_NEXT_HOP attribute has type EAF_TYPE_IP_ADDRESS, in IPv6
- * we store two addesses in it - a global address and a link local address.
- */
-#ifdef IPV6
-#define NEXT_HOP_LENGTH (2*sizeof(ip_addr))
-static inline void set_next_hop(byte *b, ip_addr addr) { ((ip_addr *) b)[0] = addr; ((ip_addr *) b)[1] = IPA_NONE; }
-#else
-#define NEXT_HOP_LENGTH sizeof(ip_addr)
-static inline void set_next_hop(byte *b, ip_addr addr) { ((ip_addr *) b)[0] = addr; }
-#endif
+static inline eattr *
+bgp_find_attr(ea_list *attrs, uint code)
+{
+ return ea_find(attrs, EA_CODE(EAP_BGP, code));
+}
+
+eattr *
+bgp_set_attr(ea_list **attrs, struct linpool *pool, uint code, uint flags, uintptr_t val);
+
+static inline void
+bgp_set_attr_u32(ea_list **to, struct linpool *pool, uint code, uint flags, u32 val)
+{ bgp_set_attr(to, pool, code, flags, (uintptr_t) val); }
+
+static inline void
+bgp_set_attr_ptr(ea_list **to, struct linpool *pool, uint code, uint flags, struct adata *val)
+{ bgp_set_attr(to, pool, code, flags, (uintptr_t) val); }
+
+static inline void
+bgp_set_attr_data(ea_list **to, struct linpool *pool, uint code, uint flags, void *data, uint len)
+{
+ struct adata *a = lp_alloc_adata(pool, len);
+ memcpy(a->data, data, len);
+ bgp_set_attr(to, pool, code, flags, (uintptr_t) a);
+}
+
+static inline void
+bgp_unset_attr(ea_list **to, struct linpool *pool, uint code)
+{ eattr *e = bgp_set_attr(to, pool, code, 0, 0); e->type = EAF_TYPE_UNDEF; }
+
+
+int bgp_encode_attrs(struct bgp_write_state *s, ea_list *attrs, byte *buf, byte *end);
+ea_list * bgp_decode_attrs(struct bgp_parse_state *s, byte *data, uint len);
+
+void bgp_init_bucket_table(struct bgp_channel *c);
+void bgp_free_bucket(struct bgp_channel *c, struct bgp_bucket *b);
+void bgp_defer_bucket(struct bgp_channel *c, struct bgp_bucket *b);
+void bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b);
+
+void bgp_init_prefix_table(struct bgp_channel *c);
+void bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *bp);
-void bgp_attach_attr(struct ea_list **to, struct linpool *pool, unsigned attr, uintptr_t val);
-byte *bgp_attach_attr_wa(struct ea_list **to, struct linpool *pool, unsigned attr, unsigned len);
-struct rta *bgp_decode_attrs(struct bgp_conn *conn, byte *a, uint len, struct linpool *pool, int mandatory);
-int bgp_get_attr(struct eattr *e, byte *buf, int buflen);
int bgp_rte_better(struct rte *, struct rte *);
int bgp_rte_mergable(rte *pri, rte *sec);
int bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best);
-void bgp_rt_notify(struct proto *P, rtable *tbl UNUSED, net *n, rte *new, rte *old UNUSED, ea_list *attrs);
+void bgp_rt_notify(struct proto *P, struct channel *C, net *n, rte *new, rte *old, ea_list *attrs);
int bgp_import_control(struct proto *, struct rte **, struct ea_list **, struct linpool *);
-void bgp_init_bucket_table(struct bgp_proto *);
-void bgp_free_bucket_table(struct bgp_proto *p);
-void bgp_free_bucket(struct bgp_proto *p, struct bgp_bucket *buck);
-void bgp_init_prefix_table(struct bgp_proto *p, u32 order);
-void bgp_free_prefix_table(struct bgp_proto *p);
-void bgp_free_prefix(struct bgp_proto *p, struct bgp_prefix *bp);
-uint bgp_encode_attrs(struct bgp_proto *p, byte *w, ea_list *attrs, int remains);
+int bgp_get_attr(struct eattr *e, byte *buf, int buflen);
void bgp_get_route_info(struct rte *, byte *buf, struct ea_list *attrs);
-inline static void bgp_attach_attr_ip(struct ea_list **to, struct linpool *pool, unsigned attr, ip_addr a)
-{ *(ip_addr *) bgp_attach_attr_wa(to, pool, attr, sizeof(ip_addr)) = a; }
/* packets.c */
void mrt_dump_bgp_state_change(struct bgp_conn *conn, unsigned old, unsigned new);
-void bgp_schedule_packet(struct bgp_conn *conn, int type);
+const struct bgp_af_desc *bgp_get_af_desc(u32 afi);
+const struct bgp_af_caps *bgp_find_af_caps(struct bgp_caps *caps, u32 afi);
+void bgp_schedule_packet(struct bgp_conn *conn, struct bgp_channel *c, int type);
void bgp_kick_tx(void *vconn);
void bgp_tx(struct birdsock *sk);
int bgp_rx(struct birdsock *sk, uint size);
const char * bgp_error_dsc(unsigned code, unsigned subcode);
void bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsigned subcode, byte *data, unsigned len);
+void bgp_update_next_hop(struct bgp_export_state *s, eattr *a, ea_list **to);
+
+
/* Packet types */
#define PKT_OPEN 0x01
@@ -292,26 +540,25 @@ void bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsi
#define BAF_PARTIAL 0x20
#define BAF_EXT_LEN 0x10
-#define BA_ORIGIN 0x01 /* [RFC1771] */ /* WM */
+#define BA_ORIGIN 0x01 /* RFC 4271 */ /* WM */
#define BA_AS_PATH 0x02 /* WM */
#define BA_NEXT_HOP 0x03 /* WM */
#define BA_MULTI_EXIT_DISC 0x04 /* ON */
#define BA_LOCAL_PREF 0x05 /* WD */
#define BA_ATOMIC_AGGR 0x06 /* WD */
#define BA_AGGREGATOR 0x07 /* OT */
-#define BA_COMMUNITY 0x08 /* [RFC1997] */ /* OT */
-#define BA_ORIGINATOR_ID 0x09 /* [RFC1966] */ /* ON */
-#define BA_CLUSTER_LIST 0x0a /* ON */
-/* We don't support these: */
-#define BA_DPA 0x0b /* ??? */
-#define BA_ADVERTISER 0x0c /* [RFC1863] */
-#define BA_RCID_PATH 0x0d
-#define BA_MP_REACH_NLRI 0x0e /* [RFC2283] */
-#define BA_MP_UNREACH_NLRI 0x0f
-#define BA_EXT_COMMUNITY 0x10 /* [RFC4360] */
-#define BA_AS4_PATH 0x11 /* [RFC4893] */
-#define BA_AS4_AGGREGATOR 0x12
-#define BA_LARGE_COMMUNITY 0x20 /* [RFC8092] */
+#define BA_COMMUNITY 0x08 /* RFC 1997 */ /* OT */
+#define BA_ORIGINATOR_ID 0x09 /* RFC 4456 */ /* ON */
+#define BA_CLUSTER_LIST 0x0a /* RFC 4456 */ /* ON */
+#define BA_MP_REACH_NLRI 0x0e /* RFC 4760 */
+#define BA_MP_UNREACH_NLRI 0x0f /* RFC 4760 */
+#define BA_EXT_COMMUNITY 0x10 /* RFC 4360 */
+#define BA_AS4_PATH 0x11 /* RFC 6793 */
+#define BA_AS4_AGGREGATOR 0x12 /* RFC 6793 */
+#define BA_LARGE_COMMUNITY 0x20 /* RFC 8092 */
+
+/* Bird's private internal BGP attributes */
+#define BA_MPLS_LABEL_STACK 0xfe /* MPLS label stack transfer attribute */
/* BGP connection states */
@@ -331,14 +578,12 @@ void bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsi
*
* When BGP protocol is started by core, it goes to BSS_PREPARE. When BGP
* protocol done what is neccessary to start itself (like acquiring the lock),
- * it goes to BSS_CONNECT. When some connection attempt failed because of
- * option or capability error, it goes to BSS_CONNECT_NOCAP.
+ * it goes to BSS_CONNECT.
*/
#define BSS_PREPARE 0 /* Used before ordinary BGP started, i. e. waiting for lock */
#define BSS_DELAY 1 /* Startup delay due to previous errors */
#define BSS_CONNECT 2 /* Ordinary BGP connecting */
-#define BSS_CONNECT_NOCAP 3 /* Legacy BGP connecting (without capabilities) */
/* BGP feed states (TX)
@@ -347,7 +592,7 @@ void bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsi
*
* RFC 7313 specifies that a route refresh should be demarcated by BoRR and EoRR packets.
*
- * These states (stored in p->feed_state) are used to keep track of these
+ * These states (stored in c->feed_state) are used to keep track of these
* requirements. When such feed is started, BFS_LOADING / BFS_REFRESHING is
* set. When it ended, BFS_LOADED / BFS_REFRESHED is set to schedule End-of-RIB
* or EoRR packet. When the packet is sent, the state returned to BFS_NONE.
@@ -403,15 +648,5 @@ void bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsi
#define ORIGIN_EGP 1
#define ORIGIN_INCOMPLETE 2
-/* Address families */
-
-#define BGP_AF_IPV4 1
-#define BGP_AF_IPV6 2
-
-#ifdef IPV6
-#define BGP_AF BGP_AF_IPV6
-#else
-#define BGP_AF BGP_AF_IPV4
-#endif
#endif
diff --git a/proto/bgp/config.Y b/proto/bgp/config.Y
index 55c602f1..941ae5b6 100644
--- a/proto/bgp/config.Y
+++ b/proto/bgp/config.Y
@@ -13,28 +13,32 @@ CF_HDR
CF_DEFINES
#define BGP_CFG ((struct bgp_config *) this_proto)
+#define BGP_CC ((struct bgp_channel_config *) this_channel)
CF_DECLS
-CF_KEYWORDS(BGP, LOCAL, NEIGHBOR, AS, HOLD, TIME, CONNECT, RETRY,
- KEEPALIVE, MULTIHOP, STARTUP, VIA, NEXT, HOP, SELF, DEFAULT,
- PATH, METRIC, ERROR, START, DELAY, FORGET, WAIT, ENABLE,
- DISABLE, AFTER, BGP_PATH, BGP_LOCAL_PREF, BGP_MED, BGP_ORIGIN,
- BGP_NEXT_HOP, BGP_ATOMIC_AGGR, BGP_AGGREGATOR, BGP_COMMUNITY,
- BGP_EXT_COMMUNITY, SOURCE, ADDRESS, PASSWORD, RR, RS, CLIENT,
- CLUSTER, ID, AS4, ADVERTISE, IPV4, CAPABILITIES, LIMIT, PASSIVE,
- PREFER, OLDER, MISSING, LLADDR, DROP, IGNORE, ROUTE, REFRESH,
- INTERPRET, COMMUNITIES, BGP_ORIGINATOR_ID, BGP_CLUSTER_LIST, IGP,
- TABLE, GATEWAY, DIRECT, RECURSIVE, MED, TTL, SECURITY, DETERMINISTIC,
- SECONDARY, ALLOW, BFD, ADD, PATHS, RX, TX, GRACEFUL, RESTART, AWARE,
- CHECK, LINK, PORT, EXTENDED, MESSAGES, SETKEY, BGP_LARGE_COMMUNITY)
+CF_KEYWORDS(BGP, LOCAL, NEIGHBOR, AS, HOLD, TIME, CONNECT, RETRY, KEEPALIVE,
+ MULTIHOP, STARTUP, VIA, NEXT, HOP, SELF, DEFAULT, PATH, METRIC, ERROR,
+ START, DELAY, FORGET, WAIT, ENABLE, DISABLE, AFTER, BGP_PATH,
+ BGP_LOCAL_PREF, BGP_MED, BGP_ORIGIN, BGP_NEXT_HOP, BGP_ATOMIC_AGGR,
+ BGP_AGGREGATOR, BGP_COMMUNITY, BGP_EXT_COMMUNITY, BGP_LARGE_COMMUNITY,
+ SOURCE, ADDRESS, PASSWORD, RR, RS, CLIENT, CLUSTER, ID, AS4, ADVERTISE,
+ IPV4, CAPABILITIES, LIMIT, PASSIVE, PREFER, OLDER, MISSING, LLADDR,
+ DROP, IGNORE, ROUTE, REFRESH, INTERPRET, COMMUNITIES, BGP_ORIGINATOR_ID,
+ BGP_CLUSTER_LIST, IGP, TABLE, GATEWAY, DIRECT, RECURSIVE, MED, TTL,
+ SECURITY, DETERMINISTIC, SECONDARY, ALLOW, BFD, ADD, PATHS, RX, TX,
+ GRACEFUL, RESTART, AWARE, CHECK, LINK, PORT, EXTENDED, MESSAGES, SETKEY,
+ STRICT, BIND, CONFEDERATION, MEMBER, MULTICAST, FLOW4, FLOW6)
+
+%type <i32> bgp_afi
CF_GRAMMAR
-CF_ADDTO(proto, bgp_proto '}' { bgp_check_config(BGP_CFG); } )
+CF_ADDTO(proto, bgp_proto '}' )
bgp_proto_start: proto_start BGP {
this_proto = proto_config_new(&proto_bgp, $1);
+ BGP_CFG->local_port = BGP_PORT;
BGP_CFG->remote_port = BGP_PORT;
BGP_CFG->multihop = -1; /* undefined */
BGP_CFG->hold_time = 240;
@@ -49,26 +53,35 @@ bgp_proto_start: proto_start BGP {
BGP_CFG->enable_refresh = 1;
BGP_CFG->enable_as4 = 1;
BGP_CFG->capabilities = 2;
- BGP_CFG->advertise_ipv4 = 1;
BGP_CFG->interpret_communities = 1;
BGP_CFG->default_local_pref = 100;
BGP_CFG->gr_mode = BGP_GR_AWARE;
BGP_CFG->gr_time = 120;
BGP_CFG->setkey = 1;
- }
+ }
+ ;
+
+bgp_loc_opts:
+ /* empty */
+ | bgp_loc_opts PORT expr { BGP_CFG->local_port = $3; if (($3<1) || ($3>65535)) cf_error("Invalid port number"); }
+ | bgp_loc_opts AS expr { BGP_CFG->local_as = $3; }
;
bgp_nbr_opts:
/* empty */
- | bgp_nbr_opts PORT expr { BGP_CFG->remote_port = $3; if (($3<1) || ($3>65535)) cf_error("Invalid port number"); }
+ | bgp_nbr_opts PORT expr { BGP_CFG->remote_port = $3; if (($3<1) || ($3>65535)) cf_error("Invalid port number"); }
| bgp_nbr_opts AS expr { BGP_CFG->remote_as = $3; }
;
bgp_proto:
bgp_proto_start proto_name '{'
| bgp_proto proto_item ';'
- | bgp_proto LOCAL AS expr ';' { BGP_CFG->local_as = $4; }
- | bgp_proto LOCAL ipa AS expr ';' { BGP_CFG->source_addr = $3; BGP_CFG->local_as = $5; }
+ | bgp_proto bgp_proto_channel ';'
+ | bgp_proto LOCAL bgp_loc_opts ';'
+ | bgp_proto LOCAL ipa ipa_scope bgp_loc_opts ';' {
+ BGP_CFG->local_ip = $3;
+ if ($4) BGP_CFG->iface = $4;
+ }
| bgp_proto NEIGHBOR bgp_nbr_opts ';'
| bgp_proto NEIGHBOR ipa ipa_scope bgp_nbr_opts ';' {
if (ipa_nonzero(BGP_CFG->remote_ip))
@@ -78,20 +91,16 @@ bgp_proto:
}
| bgp_proto INTERFACE TEXT ';' { BGP_CFG->iface = if_get_by_name($3); }
| bgp_proto RR CLUSTER ID idval ';' { BGP_CFG->rr_cluster_id = $5; }
- | bgp_proto RR CLIENT ';' { BGP_CFG->rr_client = 1; }
- | bgp_proto RS CLIENT ';' { BGP_CFG->rs_client = 1; }
+ | bgp_proto RR CLIENT bool ';' { BGP_CFG->rr_client = $4; }
+ | bgp_proto RS CLIENT bool ';' { BGP_CFG->rs_client = $4; }
+ | bgp_proto CONFEDERATION expr ';' { BGP_CFG->confederation = $3; }
+ | bgp_proto CONFEDERATION MEMBER bool ';' { BGP_CFG->confederation_member = $4; }
| bgp_proto HOLD TIME expr ';' { BGP_CFG->hold_time = $4; }
| bgp_proto STARTUP HOLD TIME expr ';' { BGP_CFG->initial_hold_time = $5; }
| bgp_proto DIRECT ';' { BGP_CFG->multihop = 0; }
| bgp_proto MULTIHOP ';' { BGP_CFG->multihop = 64; }
| bgp_proto MULTIHOP expr ';' { BGP_CFG->multihop = $3; if (($3<1) || ($3>255)) cf_error("Multihop must be in range 1-255"); }
- | bgp_proto NEXT HOP SELF ';' { BGP_CFG->next_hop_self = 1; BGP_CFG->next_hop_keep = 0; }
- | bgp_proto NEXT HOP KEEP ';' { BGP_CFG->next_hop_keep = 1; BGP_CFG->next_hop_self = 0; }
- | bgp_proto MISSING LLADDR SELF ';' { BGP_CFG->missing_lladdr = MLL_SELF; }
- | bgp_proto MISSING LLADDR DROP ';' { BGP_CFG->missing_lladdr = MLL_DROP; }
- | bgp_proto MISSING LLADDR IGNORE ';' { BGP_CFG->missing_lladdr = MLL_IGNORE; }
- | bgp_proto GATEWAY DIRECT ';' { BGP_CFG->gw_mode = GW_DIRECT; }
- | bgp_proto GATEWAY RECURSIVE ';' { BGP_CFG->gw_mode = GW_RECURSIVE; }
+ | bgp_proto STRICT BIND bool ';' { BGP_CFG->strict_bind = $4; }
| bgp_proto PATH METRIC bool ';' { BGP_CFG->compare_path_lengths = $4; }
| bgp_proto MED METRIC bool ';' { BGP_CFG->med_metric = $4; }
| bgp_proto IGP METRIC bool ';' { BGP_CFG->igp_metric = $4; }
@@ -99,7 +108,7 @@ bgp_proto:
| bgp_proto DETERMINISTIC MED bool ';' { BGP_CFG->deterministic_med = $4; }
| bgp_proto DEFAULT BGP_MED expr ';' { BGP_CFG->default_med = $4; }
| bgp_proto DEFAULT BGP_LOCAL_PREF expr ';' { BGP_CFG->default_local_pref = $4; }
- | bgp_proto SOURCE ADDRESS ipa ';' { BGP_CFG->source_addr = $4; }
+ | bgp_proto SOURCE ADDRESS ipa ';' { BGP_CFG->local_ip = $4; }
| bgp_proto START DELAY TIME expr ';' { BGP_CFG->connect_delay_time = $5; log(L_WARN "%s: Start delay time option is deprecated, use connect delay time", this_proto->name); }
| bgp_proto CONNECT DELAY TIME expr ';' { BGP_CFG->connect_delay_time = $5; }
| bgp_proto CONNECT RETRY TIME expr ';' { BGP_CFG->connect_retry_time = $5; }
@@ -111,33 +120,101 @@ bgp_proto:
| bgp_proto ENABLE AS4 bool ';' { BGP_CFG->enable_as4 = $4; }
| bgp_proto ENABLE EXTENDED MESSAGES bool ';' { BGP_CFG->enable_extended_messages = $5; }
| bgp_proto CAPABILITIES bool ';' { BGP_CFG->capabilities = $3; }
- | bgp_proto ADVERTISE IPV4 bool ';' { BGP_CFG->advertise_ipv4 = $4; }
| bgp_proto PASSWORD text ';' { BGP_CFG->password = $3; }
| bgp_proto SETKEY bool ';' { BGP_CFG->setkey = $3; }
- | bgp_proto ROUTE LIMIT expr ';' {
- this_proto->in_limit = cfg_allocz(sizeof(struct proto_limit));
- this_proto->in_limit->limit = $4;
- this_proto->in_limit->action = PLA_RESTART;
- log(L_WARN "%s: Route limit option is deprecated, use import limit", this_proto->name);
- }
| bgp_proto PASSIVE bool ';' { BGP_CFG->passive = $3; }
| bgp_proto INTERPRET COMMUNITIES bool ';' { BGP_CFG->interpret_communities = $4; }
- | bgp_proto SECONDARY bool ';' { BGP_CFG->secondary = $3; }
- | bgp_proto ADD PATHS RX ';' { BGP_CFG->add_path = ADD_PATH_RX; }
- | bgp_proto ADD PATHS TX ';' { BGP_CFG->add_path = ADD_PATH_TX; }
- | bgp_proto ADD PATHS bool ';' { BGP_CFG->add_path = $4 ? ADD_PATH_FULL : 0; }
- | bgp_proto ALLOW BGP_LOCAL_PREF bool ';' { BGP_CFG->allow_local_pref = $4; }
| bgp_proto ALLOW LOCAL AS ';' { BGP_CFG->allow_local_as = -1; }
| bgp_proto ALLOW LOCAL AS expr ';' { BGP_CFG->allow_local_as = $5; }
+ | bgp_proto ALLOW BGP_LOCAL_PREF bool ';' { BGP_CFG->allow_local_pref = $4; }
| bgp_proto GRACEFUL RESTART bool ';' { BGP_CFG->gr_mode = $4; }
| bgp_proto GRACEFUL RESTART AWARE ';' { BGP_CFG->gr_mode = BGP_GR_AWARE; }
| bgp_proto GRACEFUL RESTART TIME expr ';' { BGP_CFG->gr_time = $5; }
- | bgp_proto IGP TABLE rtable ';' { BGP_CFG->igp_table = $4; }
| bgp_proto TTL SECURITY bool ';' { BGP_CFG->ttl_security = $4; }
| bgp_proto CHECK LINK bool ';' { BGP_CFG->check_link = $4; }
| bgp_proto BFD bool ';' { BGP_CFG->bfd = $3; cf_check_bfd($3); }
;
+bgp_afi:
+ IPV4 { $$ = BGP_AF_IPV4; }
+ | IPV6 { $$ = BGP_AF_IPV6; }
+ | IPV4 MULTICAST { $$ = BGP_AF_IPV4_MC; }
+ | IPV6 MULTICAST { $$ = BGP_AF_IPV6_MC; }
+ | IPV4 MPLS { $$ = BGP_AF_IPV4_MPLS; }
+ | IPV6 MPLS { $$ = BGP_AF_IPV6_MPLS; }
+ | VPN4 MPLS { $$ = BGP_AF_VPN4_MPLS; }
+ | VPN6 MPLS { $$ = BGP_AF_VPN6_MPLS; }
+ | VPN4 MULTICAST { $$ = BGP_AF_VPN4_MC; }
+ | VPN6 MULTICAST { $$ = BGP_AF_VPN6_MC; }
+ | FLOW4 { $$ = BGP_AF_FLOW4; }
+ | FLOW6 { $$ = BGP_AF_FLOW6; }
+ ;
+
+bgp_channel_start: bgp_afi
+{
+ const struct bgp_af_desc *desc = bgp_get_af_desc($1);
+
+ if (!desc)
+ cf_error("Unknown AFI/SAFI");
+
+ this_channel = channel_config_new(&channel_bgp, desc->net, this_proto);
+ BGP_CC->c.name = desc->name;
+ BGP_CC->c.ra_mode = RA_UNDEF;
+ BGP_CC->afi = $1;
+ BGP_CC->desc = desc;
+ BGP_CC->gr_able = 0xff; /* undefined */
+};
+
+bgp_channel_item:
+ channel_item
+ | NEXT HOP ADDRESS ipa { BGP_CC->next_hop_addr = $4; }
+ | NEXT HOP SELF { BGP_CC->next_hop_self = 1; BGP_CC->next_hop_keep = 0; }
+ | NEXT HOP KEEP { BGP_CC->next_hop_keep = 1; BGP_CC->next_hop_self = 0; }
+ | MISSING LLADDR SELF { BGP_CC->missing_lladdr = MLL_SELF; }
+ | MISSING LLADDR DROP { BGP_CC->missing_lladdr = MLL_DROP; }
+ | MISSING LLADDR IGNORE { BGP_CC->missing_lladdr = MLL_IGNORE; }
+ | GATEWAY DIRECT { BGP_CC->gw_mode = GW_DIRECT; }
+ | GATEWAY RECURSIVE { BGP_CC->gw_mode = GW_RECURSIVE; }
+ | SECONDARY bool { BGP_CC->secondary = $2; }
+ | GRACEFUL RESTART bool { BGP_CC->gr_able = $3; }
+ | EXTENDED NEXT HOP bool { BGP_CC->ext_next_hop = $4; }
+ | ADD PATHS RX { BGP_CC->add_path = BGP_ADD_PATH_RX; }
+ | ADD PATHS TX { BGP_CC->add_path = BGP_ADD_PATH_TX; }
+ | ADD PATHS bool { BGP_CC->add_path = $3 ? BGP_ADD_PATH_FULL : 0; }
+ | IGP TABLE rtable {
+ if (BGP_CC->desc->no_igp)
+ cf_error("IGP table not allowed here");
+
+ if ($3->addr_type == NET_IP4)
+ BGP_CC->igp_table_ip4 = $3;
+ else if ($3->addr_type == NET_IP6)
+ BGP_CC->igp_table_ip6 = $3;
+ else
+ cf_error("Mismatched IGP table type");
+ }
+ ;
+
+bgp_channel_opts:
+ /* empty */
+ | bgp_channel_opts bgp_channel_item ';'
+ ;
+
+bgp_channel_opt_list:
+ /* empty */
+ | '{' bgp_channel_opts '}'
+ ;
+
+bgp_channel_end:
+{
+ if (!this_channel->table)
+ cf_error("Routing table not specified");
+
+ this_channel = NULL;
+};
+
+bgp_proto_channel: bgp_channel_start bgp_channel_opt_list bgp_channel_end;
+
+
CF_ADDTO(dynamic_attr, BGP_ORIGIN
{ $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_ENUM_BGP_ORIGIN, EA_CODE(EAP_BGP, BA_ORIGIN)); })
CF_ADDTO(dynamic_attr, BGP_PATH
diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c
index ab87bdcc..0e974746 100644
--- a/proto/bgp/packets.c
+++ b/proto/bgp/packets.c
@@ -2,12 +2,16 @@
* BIRD -- BGP Packet Processing
*
* (c) 2000 Martin Mares <mj@ucw.cz>
+ * (c) 2008--2016 Ondrej Zajicek <santiago@crfreenet.org>
+ * (c) 2008--2016 CZ.NIC z.s.p.o.
*
* Can be freely distributed and used under the terms of the GNU GPL.
*/
#undef LOCAL_DEBUG
+#include <stdlib.h>
+
#include "nest/bird.h"
#include "nest/iface.h"
#include "nest/protocol.h"
@@ -16,6 +20,7 @@
#include "nest/mrtdump.h"
#include "conf/conf.h"
#include "lib/unaligned.h"
+#include "lib/flowspec.h"
#include "lib/socket.h"
#include "nest/cli.h"
@@ -27,6 +32,13 @@
#define BGP_RR_BEGIN 1
#define BGP_RR_END 2
+#define BGP_NLRI_MAX (4 + 1 + 32)
+
+#define BGP_MPLS_BOS 1 /* Bottom-of-stack bit */
+#define BGP_MPLS_MAX 10 /* Max number of labels that 24*n <= 255 */
+#define BGP_MPLS_NULL 3 /* Implicit NULL label */
+#define BGP_MPLS_MAGIC 0x800000 /* Magic withdraw label value, RFC 3107 3 */
+
static struct tbf rl_rcv_update = TBF_DEFAULT_LOG_LIMITS;
static struct tbf rl_snd_update = TBF_DEFAULT_LOG_LIMITS;
@@ -38,6 +50,46 @@ static byte fsm_err_subcode[BS_MAX] = {
[BS_ESTABLISHED] = 3
};
+
+static struct bgp_channel *
+bgp_get_channel(struct bgp_proto *p, u32 afi)
+{
+ uint i;
+
+ for (i = 0; i < p->channel_count; i++)
+ if (p->afi_map[i] == afi)
+ return p->channel_map[i];
+
+ return NULL;
+}
+
+static inline void
+put_af3(byte *buf, u32 id)
+{
+ put_u16(buf, id >> 16);
+ buf[2] = id & 0xff;
+}
+
+static inline void
+put_af4(byte *buf, u32 id)
+{
+ put_u16(buf, id >> 16);
+ buf[2] = 0;
+ buf[3] = id & 0xff;
+}
+
+static inline u32
+get_af3(byte *buf)
+{
+ return (get_u16(buf) << 16) | buf[2];
+}
+
+static inline u32
+get_af4(byte *buf)
+{
+ return (get_u16(buf) << 16) | buf[3];
+}
+
/*
* MRT Dump format is not semantically specified.
* We will use these values in appropriate fields:
@@ -58,31 +110,41 @@ static byte *
mrt_put_bgp4_hdr(byte *buf, struct bgp_conn *conn, int as4)
{
struct bgp_proto *p = conn->bgp;
+ uint v4 = ipa_is_ip4(p->cf->remote_ip);
if (as4)
- {
- put_u32(buf+0, p->remote_as);
- put_u32(buf+4, p->local_as);
- buf+=8;
- }
+ {
+ put_u32(buf+0, p->remote_as);
+ put_u32(buf+4, p->public_as);
+ buf+=8;
+ }
else
- {
- put_u16(buf+0, (p->remote_as <= 0xFFFF) ? p->remote_as : AS_TRANS);
- put_u16(buf+2, (p->local_as <= 0xFFFF) ? p->local_as : AS_TRANS);
- buf+=4;
- }
+ {
+ put_u16(buf+0, (p->remote_as <= 0xFFFF) ? p->remote_as : AS_TRANS);
+ put_u16(buf+2, (p->public_as <= 0xFFFF) ? p->public_as : AS_TRANS);
+ buf+=4;
+ }
put_u16(buf+0, (p->neigh && p->neigh->iface) ? p->neigh->iface->index : 0);
- put_u16(buf+2, BGP_AF);
+ put_u16(buf+2, v4 ? BGP_AFI_IPV4 : BGP_AFI_IPV6);
buf+=4;
- buf = put_ipa(buf, conn->sk ? conn->sk->daddr : IPA_NONE);
- buf = put_ipa(buf, conn->sk ? conn->sk->saddr : IPA_NONE);
+
+ if (v4)
+ {
+ buf = put_ip4(buf, conn->sk ? ipa_to_ip4(conn->sk->daddr) : IP4_NONE);
+ buf = put_ip4(buf, conn->sk ? ipa_to_ip4(conn->sk->saddr) : IP4_NONE);
+ }
+ else
+ {
+ buf = put_ip6(buf, conn->sk ? ipa_to_ip6(conn->sk->daddr) : IP6_NONE);
+ buf = put_ip6(buf, conn->sk ? ipa_to_ip6(conn->sk->saddr) : IP6_NONE);
+ }
return buf;
}
static void
-mrt_dump_bgp_packet(struct bgp_conn *conn, byte *pkt, unsigned len)
+mrt_dump_bgp_packet(struct bgp_conn *conn, byte *pkt, uint len)
{
byte *buf = alloca(128+len); /* 128 is enough for MRT headers */
byte *bp = buf + MRTDUMP_HDR_LENGTH;
@@ -96,14 +158,14 @@ mrt_dump_bgp_packet(struct bgp_conn *conn, byte *pkt, unsigned len)
}
static inline u16
-convert_state(unsigned state)
+convert_state(uint state)
{
/* Convert state from our BS_* values to values used in MRTDump */
return (state == BS_CLOSE) ? 1 : state + 1;
}
void
-mrt_dump_bgp_state_change(struct bgp_conn *conn, unsigned old, unsigned new)
+mrt_dump_bgp_state_change(struct bgp_conn *conn, uint old, uint new)
{
byte buf[128];
byte *bp = buf + MRTDUMP_HDR_LENGTH;
@@ -127,1303 +189,2426 @@ bgp_create_notification(struct bgp_conn *conn, byte *buf)
return buf + 2 + conn->notify_size;
}
-#ifdef IPV6
-static byte *
-bgp_put_cap_ipv6(struct bgp_proto *p UNUSED, byte *buf)
-{
- *buf++ = 1; /* Capability 1: Multiprotocol extensions */
- *buf++ = 4; /* Capability data length */
- *buf++ = 0; /* We support AF IPv6 */
- *buf++ = BGP_AF_IPV6;
- *buf++ = 0; /* RFU */
- *buf++ = 1; /* and SAFI 1 */
- return buf;
-}
-#else
+/* Capability negotiation as per RFC 5492 */
-static byte *
-bgp_put_cap_ipv4(struct bgp_proto *p UNUSED, byte *buf)
-{
- *buf++ = 1; /* Capability 1: Multiprotocol extensions */
- *buf++ = 4; /* Capability data length */
- *buf++ = 0; /* We support AF IPv4 */
- *buf++ = BGP_AF_IPV4;
- *buf++ = 0; /* RFU */
- *buf++ = 1; /* and SAFI 1 */
- return buf;
+const struct bgp_af_caps *
+bgp_find_af_caps(struct bgp_caps *caps, u32 afi)
+{
+ struct bgp_af_caps *ac;
+
+ WALK_AF_CAPS(caps, ac)
+ if (ac->afi == afi)
+ return ac;
+
+ return NULL;
}
-#endif
-static byte *
-bgp_put_cap_rr(struct bgp_proto *p UNUSED, byte *buf)
+static struct bgp_af_caps *
+bgp_get_af_caps(struct bgp_caps *caps, u32 afi)
{
- *buf++ = 2; /* Capability 2: Support for route refresh */
- *buf++ = 0; /* Capability data length */
- return buf;
+ struct bgp_af_caps *ac;
+
+ WALK_AF_CAPS(caps, ac)
+ if (ac->afi == afi)
+ return ac;
+
+ ac = &caps->af_data[caps->af_count++];
+ memset(ac, 0, sizeof(struct bgp_af_caps));
+ ac->afi = afi;
+
+ return ac;
}
-static byte *
-bgp_put_cap_ext_msg(struct bgp_proto *p UNUSED, byte *buf)
+static int
+bgp_af_caps_cmp(const void *X, const void *Y)
{
- *buf++ = 6; /* Capability 6: Support for extended messages */
- *buf++ = 0; /* Capability data length */
- return buf;
+ const struct bgp_af_caps *x = X, *y = Y;
+ return (x->afi < y->afi) ? -1 : (x->afi > y->afi) ? 1 : 0;
}
+
static byte *
-bgp_put_cap_gr1(struct bgp_proto *p, byte *buf)
+bgp_write_capabilities(struct bgp_conn *conn, byte *buf)
{
- *buf++ = 64; /* Capability 64: Support for graceful restart */
- *buf++ = 6; /* Capability data length */
+ struct bgp_proto *p = conn->bgp;
+ struct bgp_channel *c;
+ struct bgp_caps *caps;
+ struct bgp_af_caps *ac;
+ uint any_ext_next_hop = 0;
+ uint any_add_path = 0;
+ byte *data;
- put_u16(buf, p->cf->gr_time);
- if (p->p.gr_recovery)
- buf[0] |= BGP_GRF_RESTART;
- buf += 2;
+ /* Prepare bgp_caps structure */
+
+ int n = list_length(&p->p.channels);
+ caps = mb_allocz(p->p.pool, sizeof(struct bgp_caps) + n * sizeof(struct bgp_af_caps));
+ conn->local_caps = caps;
+
+ caps->as4_support = p->cf->enable_as4;
+ caps->ext_messages = p->cf->enable_extended_messages;
+ caps->route_refresh = p->cf->enable_refresh;
+ caps->enhanced_refresh = p->cf->enable_refresh;
+
+ if (caps->as4_support)
+ caps->as4_number = p->public_as;
+
+ if (p->cf->gr_mode)
+ {
+ caps->gr_aware = 1;
+ caps->gr_time = p->cf->gr_time;
+ caps->gr_flags = p->p.gr_recovery ? BGP_GRF_RESTART : 0;
+ }
+
+ /* Allocate and fill per-AF fields */
+ WALK_LIST(c, p->p.channels)
+ {
+ ac = &caps->af_data[caps->af_count++];
+ ac->afi = c->afi;
+ ac->ready = 1;
+
+ ac->ext_next_hop = bgp_channel_is_ipv4(c) && c->cf->ext_next_hop;
+ any_ext_next_hop |= ac->ext_next_hop;
+
+ ac->add_path = c->cf->add_path;
+ any_add_path |= ac->add_path;
+
+ if (c->cf->gr_able)
+ {
+ ac->gr_able = 1;
+
+ if (p->p.gr_recovery)
+ ac->gr_af_flags |= BGP_GRF_FORWARDING;
+ }
+ }
+
+ /* Sort capability fields by AFI/SAFI */
+ qsort(caps->af_data, caps->af_count, sizeof(struct bgp_af_caps), bgp_af_caps_cmp);
- *buf++ = 0; /* Appropriate AF */
- *buf++ = BGP_AF;
- *buf++ = 1; /* and SAFI 1 */
- *buf++ = p->p.gr_recovery ? BGP_GRF_FORWARDING : 0;
+
+ /* Create capability list in buffer */
+
+ /*
+ * Note that max length is ~ 20+14*af_count. With max 12 channels that is
+ * 188. Option limit is 253 and buffer size is 4096, so we cannot overflow
+ * unless we add new capabilities or more AFs.
+ */
+
+ WALK_AF_CAPS(caps, ac)
+ if (ac->ready)
+ {
+ *buf++ = 1; /* Capability 1: Multiprotocol extensions */
+ *buf++ = 4; /* Capability data length */
+ put_af4(buf, ac->afi);
+ buf += 4;
+ }
+
+ if (caps->route_refresh)
+ {
+ *buf++ = 2; /* Capability 2: Support for route refresh */
+ *buf++ = 0; /* Capability data length */
+ }
+
+ if (any_ext_next_hop)
+ {
+ *buf++ = 5; /* Capability 5: Support for extended next hop */
+ *buf++ = 0; /* Capability data length, will be fixed later */
+ data = buf;
+
+ WALK_AF_CAPS(caps, ac)
+ if (ac->ext_next_hop)
+ {
+ put_af4(buf, ac->afi);
+ put_u16(buf+4, BGP_AFI_IPV6);
+ buf += 6;
+ }
+
+ data[-1] = buf - data;
+ }
+
+ if (caps->ext_messages)
+ {
+ *buf++ = 6; /* Capability 6: Support for extended messages */
+ *buf++ = 0; /* Capability data length */
+ }
+
+ if (caps->gr_aware)
+ {
+ *buf++ = 64; /* Capability 64: Support for graceful restart */
+ *buf++ = 0; /* Capability data length, will be fixed later */
+ data = buf;
+
+ put_u16(buf, caps->gr_time);
+ buf[0] |= caps->gr_flags;
+ buf += 2;
+
+ WALK_AF_CAPS(caps, ac)
+ if (ac->gr_able)
+ {
+ put_af3(buf, ac->afi);
+ buf[3] = ac->gr_af_flags;
+ buf += 4;
+ }
+
+ data[-1] = buf - data;
+ }
+
+ if (caps->as4_support)
+ {
+ *buf++ = 65; /* Capability 65: Support for 4-octet AS number */
+ *buf++ = 4; /* Capability data length */
+ put_u32(buf, p->public_as);
+ buf += 4;
+ }
+
+ if (any_add_path)
+ {
+ *buf++ = 69; /* Capability 69: Support for ADD-PATH */
+ *buf++ = 0; /* Capability data length, will be fixed later */
+ data = buf;
+
+ WALK_AF_CAPS(caps, ac)
+ if (ac->add_path)
+ {
+ put_af3(buf, ac->afi);
+ buf[3] = ac->add_path;
+ buf += 4;
+ }
+
+ data[-1] = buf - data;
+ }
+
+ if (caps->enhanced_refresh)
+ {
+ *buf++ = 70; /* Capability 70: Support for enhanced route refresh */
+ *buf++ = 0; /* Capability data length */
+ }
return buf;
}
-static byte *
-bgp_put_cap_gr2(struct bgp_proto *p UNUSED, byte *buf)
+static void
+bgp_read_capabilities(struct bgp_conn *conn, struct bgp_caps *caps, byte *pos, int len)
{
- *buf++ = 64; /* Capability 64: Support for graceful restart */
- *buf++ = 2; /* Capability data length */
- put_u16(buf, 0);
- return buf + 2;
-}
+ struct bgp_proto *p = conn->bgp;
+ struct bgp_af_caps *ac;
+ int i, cl;
+ u32 af;
-static byte *
-bgp_put_cap_as4(struct bgp_proto *p, byte *buf)
-{
- *buf++ = 65; /* Capability 65: Support for 4-octet AS number */
- *buf++ = 4; /* Capability data length */
- put_u32(buf, p->local_as);
- return buf + 4;
-}
+ while (len > 0)
+ {
+ if (len < 2 || len < (2 + pos[1]))
+ goto err;
-static byte *
-bgp_put_cap_add_path(struct bgp_proto *p, byte *buf)
-{
- *buf++ = 69; /* Capability 69: Support for ADD-PATH */
- *buf++ = 4; /* Capability data length */
+ /* Capability length */
+ cl = pos[1];
- *buf++ = 0; /* Appropriate AF */
- *buf++ = BGP_AF;
- *buf++ = 1; /* SAFI 1 */
+ /* Capability type */
+ switch (pos[0])
+ {
+ case 1: /* Multiprotocol capability, RFC 4760 */
+ if (cl != 4)
+ goto err;
- *buf++ = p->cf->add_path;
+ af = get_af4(pos+2);
+ ac = bgp_get_af_caps(caps, af);
+ ac->ready = 1;
+ break;
- return buf;
+ case 2: /* Route refresh capability, RFC 2918 */
+ if (cl != 0)
+ goto err;
+
+ caps->route_refresh = 1;
+ break;
+
+ case 5: /* Extended next hop encoding capability, RFC 5549 */
+ if (cl % 6)
+ goto err;
+
+ for (i = 0; i < cl; i += 6)
+ {
+ /* Specified only for IPv4 prefixes with IPv6 next hops */
+ if ((get_u16(pos+2+i+0) != BGP_AFI_IPV4) ||
+ (get_u16(pos+2+i+4) != BGP_AFI_IPV6))
+ continue;
+
+ af = get_af4(pos+2+i);
+ ac = bgp_get_af_caps(caps, af);
+ ac->ext_next_hop = 1;
+ }
+ break;
+
+ case 6: /* Extended message length capability, RFC draft */
+ if (cl != 0)
+ goto err;
+
+ caps->ext_messages = 1;
+ break;
+
+ case 64: /* Graceful restart capability, RFC 4724 */
+ if (cl % 4 != 2)
+ goto err;
+
+ /* Only the last instance is valid */
+ WALK_AF_CAPS(caps, ac)
+ {
+ ac->gr_able = 0;
+ ac->gr_af_flags = 0;
+ }
+
+ caps->gr_aware = 1;
+ caps->gr_flags = pos[2] & 0xf0;
+ caps->gr_time = get_u16(pos + 2) & 0x0fff;
+
+ for (i = 2; i < cl; i += 4)
+ {
+ af = get_af3(pos+2+i);
+ ac = bgp_get_af_caps(caps, af);
+ ac->gr_able = 1;
+ ac->gr_af_flags = pos[2+i+3];
+ }
+ break;
+
+ case 65: /* AS4 capability, RFC 6793 */
+ if (cl != 4)
+ goto err;
+
+ caps->as4_support = 1;
+ caps->as4_number = get_u32(pos + 2);
+ break;
+
+ case 69: /* ADD-PATH capability, RFC 7911 */
+ if (cl % 4)
+ goto err;
+
+ for (i = 0; i < cl; i += 4)
+ {
+ byte val = pos[2+i+3];
+ if (!val || (val > BGP_ADD_PATH_FULL))
+ {
+ log(L_WARN "%s: Got ADD-PATH capability with unknown value %u, ignoring",
+ p->p.name, val);
+ break;
+ }
+ }
+
+ for (i = 0; i < cl; i += 4)
+ {
+ af = get_af3(pos+2+i);
+ ac = bgp_get_af_caps(caps, af);
+ ac->add_path = pos[2+i+3];
+ }
+ break;
+
+ case 70: /* Enhanced route refresh capability, RFC 7313 */
+ if (cl != 0)
+ goto err;
+
+ caps->enhanced_refresh = 1;
+ break;
+
+ /* We can safely ignore all other capabilities */
+ }
+
+ ADVANCE(pos, len, 2 + cl);
+ }
+ return;
+
+err:
+ bgp_error(conn, 2, 0, NULL, 0);
+ return;
}
-static byte *
-bgp_put_cap_err(struct bgp_proto *p UNUSED, byte *buf)
+static int
+bgp_read_options(struct bgp_conn *conn, byte *pos, int len)
{
- *buf++ = 70; /* Capability 70: Support for enhanced route refresh */
- *buf++ = 0; /* Capability data length */
- return buf;
-}
+ struct bgp_proto *p = conn->bgp;
+ struct bgp_caps *caps;
+ int ol;
+
+ /* Max number of announced AFIs is limited by max option length (255) */
+ caps = alloca(sizeof(struct bgp_caps) + 64 * sizeof(struct bgp_af_caps));
+ memset(caps, 0, sizeof(struct bgp_caps));
+
+ while (len > 0)
+ {
+ if ((len < 2) || (len < (2 + pos[1])))
+ { bgp_error(conn, 2, 0, NULL, 0); return -1; }
+ ol = pos[1];
+ if (pos[0] == 2)
+ {
+ /* BGP capabilities, RFC 5492 */
+ if (p->cf->capabilities)
+ bgp_read_capabilities(conn, caps, pos + 2, ol);
+ }
+ else
+ {
+ /* Unknown option */
+ bgp_error(conn, 2, 4, pos, ol); /* FIXME: ol or ol+2 ? */
+ return -1;
+ }
+
+ ADVANCE(pos, len, 2 + ol);
+ }
+
+ uint n = sizeof(struct bgp_caps) + caps->af_count * sizeof(struct bgp_af_caps);
+ conn->remote_caps = mb_allocz(p->p.pool, n);
+ memcpy(conn->remote_caps, caps, n);
+
+ return 0;
+}
static byte *
bgp_create_open(struct bgp_conn *conn, byte *buf)
{
struct bgp_proto *p = conn->bgp;
- byte *cap;
- int cap_len;
BGP_TRACE(D_PACKETS, "Sending OPEN(ver=%d,as=%d,hold=%d,id=%08x)",
- BGP_VERSION, p->local_as, p->cf->hold_time, p->local_id);
+ BGP_VERSION, p->public_as, p->cf->hold_time, p->local_id);
+
buf[0] = BGP_VERSION;
- put_u16(buf+1, (p->local_as < 0xFFFF) ? p->local_as : AS_TRANS);
+ put_u16(buf+1, (p->public_as < 0xFFFF) ? p->public_as : AS_TRANS);
put_u16(buf+3, p->cf->hold_time);
put_u32(buf+5, p->local_id);
- if (conn->start_state == BSS_CONNECT_NOCAP)
- {
- BGP_TRACE(D_PACKETS, "Skipping capabilities");
- buf[9] = 0;
- return buf + 10;
- }
+ if (p->cf->capabilities)
+ {
+ /* Prepare local_caps and write capabilities to buffer */
+ byte *end = bgp_write_capabilities(conn, buf+12);
+ uint len = end - (buf+12);
+
+ buf[9] = len + 2; /* Optional parameters length */
+ buf[10] = 2; /* Option 2: Capability list */
+ buf[11] = len; /* Option data length */
+
+ return end;
+ }
+ else
+ {
+ /* Prepare empty local_caps */
+ conn->local_caps = mb_allocz(p->p.pool, sizeof(struct bgp_caps));
+
+ buf[9] = 0; /* No optional parameters */
+ return buf + 10;
+ }
+
+ return buf;
+}
+
+static void
+bgp_rx_open(struct bgp_conn *conn, byte *pkt, uint len)
+{
+ struct bgp_proto *p = conn->bgp;
+ struct bgp_conn *other;
+ u32 asn, hold, id;
- /* Skipped 3 B for length field and Capabilities parameter header */
- cap = buf + 12;
+ /* Check state */
+ if (conn->state != BS_OPENSENT)
+ { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
+
+ /* Check message contents */
+ if (len < 29 || len != 29 + (uint) pkt[28])
+ { bgp_error(conn, 1, 2, pkt+16, 2); return; }
-#ifndef IPV6
- if (p->cf->advertise_ipv4)
- cap = bgp_put_cap_ipv4(p, cap);
-#endif
+ if (pkt[19] != BGP_VERSION)
+ { u16 val = BGP_VERSION; bgp_error(conn, 2, 1, (byte *) &val, 2); return; }
-#ifdef IPV6
- cap = bgp_put_cap_ipv6(p, cap);
-#endif
+ asn = get_u16(pkt+20);
+ hold = get_u16(pkt+22);
+ id = get_u32(pkt+24);
+ BGP_TRACE(D_PACKETS, "Got OPEN(as=%d,hold=%d,id=%R)", asn, hold, id);
- if (p->cf->enable_refresh)
- cap = bgp_put_cap_rr(p, cap);
+ if (bgp_read_options(conn, pkt+29, pkt[28]) < 0)
+ return;
- if (p->cf->gr_mode == BGP_GR_ABLE)
- cap = bgp_put_cap_gr1(p, cap);
- else if (p->cf->gr_mode == BGP_GR_AWARE)
- cap = bgp_put_cap_gr2(p, cap);
+ if (hold > 0 && hold < 3)
+ { bgp_error(conn, 2, 6, pkt+22, 2); return; }
- if (p->cf->enable_as4)
- cap = bgp_put_cap_as4(p, cap);
+ /* RFC 6286 2.2 - router ID is nonzero and AS-wide unique */
+ if (!id || (p->is_internal && id == p->local_id))
+ { bgp_error(conn, 2, 3, pkt+24, -4); return; }
- if (p->cf->add_path)
- cap = bgp_put_cap_add_path(p, cap);
+ struct bgp_caps *caps = conn->remote_caps;
- if (p->cf->enable_refresh)
- cap = bgp_put_cap_err(p, cap);
+ if (caps->as4_support)
+ {
+ u32 as4 = caps->as4_number;
- if (p->cf->enable_extended_messages)
- cap = bgp_put_cap_ext_msg(p, cap);
+ if ((as4 != asn) && (asn != AS_TRANS))
+ log(L_WARN "%s: Peer advertised inconsistent AS numbers", p->p.name);
- cap_len = cap - buf - 12;
- if (cap_len > 0)
- {
- buf[9] = cap_len + 2; /* Optional params len */
- buf[10] = 2; /* Option: Capability list */
- buf[11] = cap_len; /* Option length */
- return cap;
- }
+ if (as4 != p->remote_as)
+ { as4 = htonl(as4); bgp_error(conn, 2, 2, (byte *) &as4, 4); return; }
+ }
else
+ {
+ if (asn != p->remote_as)
+ { bgp_error(conn, 2, 2, pkt+20, 2); return; }
+ }
+
+ /* Check the other connection */
+ other = (conn == &p->outgoing_conn) ? &p->incoming_conn : &p->outgoing_conn;
+ switch (other->state)
+ {
+ case BS_CONNECT:
+ case BS_ACTIVE:
+ /* Stop outgoing connection attempts */
+ bgp_conn_enter_idle_state(other);
+ break;
+
+ case BS_IDLE:
+ case BS_OPENSENT:
+ case BS_CLOSE:
+ break;
+
+ case BS_OPENCONFIRM:
+ /*
+ * Description of collision detection rules in RFC 4271 is confusing and
+ * contradictory, but it is essentially:
+ *
+ * 1. Router with higher ID is dominant
+ * 2. If both have the same ID, router with higher ASN is dominant [RFC6286]
+ * 3. When both connections are in OpenConfirm state, one initiated by
+ * the dominant router is kept.
+ *
+ * The first line in the expression below evaluates whether the neighbor
+ * is dominant, the second line whether the new connection was initiated
+ * by the neighbor. If both are true (or both are false), we keep the new
+ * connection, otherwise we keep the old one.
+ */
+ if (((p->local_id < id) || ((p->local_id == id) && (p->public_as < p->remote_as)))
+ == (conn == &p->incoming_conn))
{
- buf[9] = 0; /* No optional parameters */
- return buf + 10;
+ /* Should close the other connection */
+ BGP_TRACE(D_EVENTS, "Connection collision, giving up the other connection");
+ bgp_error(other, 6, 7, NULL, 0);
+ break;
}
+ /* Fall thru */
+ case BS_ESTABLISHED:
+ /* Should close this connection */
+ BGP_TRACE(D_EVENTS, "Connection collision, giving up this connection");
+ bgp_error(conn, 6, 7, NULL, 0);
+ return;
+
+ default:
+ bug("bgp_rx_open: Unknown state");
+ }
+
+ /* Update our local variables */
+ conn->hold_time = MIN(hold, p->cf->hold_time);
+ conn->keepalive_time = p->cf->keepalive_time ? : conn->hold_time / 3;
+ conn->as4_session = conn->local_caps->as4_support && caps->as4_support;
+ conn->ext_messages = conn->local_caps->ext_messages && caps->ext_messages;
+ p->remote_id = id;
+
+ DBG("BGP: Hold timer set to %d, keepalive to %d, AS to %d, ID to %x, AS4 session to %d\n",
+ conn->hold_time, conn->keepalive_time, p->remote_as, p->remote_id, conn->as4_session);
+
+ bgp_schedule_packet(conn, NULL, PKT_KEEPALIVE);
+ bgp_start_timer(conn->hold_timer, conn->hold_time);
+ bgp_conn_enter_openconfirm_state(conn);
}
-static uint
-bgp_encode_prefixes(struct bgp_proto *p, byte *w, struct bgp_bucket *buck, uint remains)
+
+/*
+ * Next hop handling
+ */
+
+#define REPORT(msg, args...) \
+ ({ log(L_REMOTE "%s: " msg, s->proto->p.name, ## args); })
+
+#define DISCARD(msg, args...) \
+ ({ REPORT(msg, ## args); return; })
+
+#define WITHDRAW(msg, args...) \
+ ({ REPORT(msg, ## args); s->err_withdraw = 1; return; })
+
+#define BAD_AFI "Unexpected AF <%u/%u> in UPDATE"
+#define BAD_NEXT_HOP "Invalid NEXT_HOP attribute"
+#define NO_NEXT_HOP "Missing NEXT_HOP attribute"
+#define NO_LABEL_STACK "Missing MPLS stack"
+
+
+static void
+bgp_apply_next_hop(struct bgp_parse_state *s, rta *a, ip_addr gw, ip_addr ll)
{
- byte *start = w;
- ip_addr a;
- int bytes;
+ struct bgp_proto *p = s->proto;
+ struct bgp_channel *c = s->channel;
- while (!EMPTY_LIST(buck->prefixes) && (remains >= (5+sizeof(ip_addr))))
- {
- struct bgp_prefix *px = SKIP_BACK(struct bgp_prefix, bucket_node, HEAD(buck->prefixes));
- DBG("\tDequeued route %I/%d\n", px->n.prefix, px->n.pxlen);
+ if (c->cf->gw_mode == GW_DIRECT)
+ {
+ neighbor *nbr = NULL;
- if (p->add_path_tx)
- {
- put_u32(w, px->path_id);
- w += 4;
- remains -= 4;
- }
+ /* GW_DIRECT -> single_hop -> p->neigh != NULL */
+ if (ipa_nonzero(gw))
+ nbr = neigh_find2(&p->p, &gw, NULL, 0);
+ else if (ipa_nonzero(ll))
+ nbr = neigh_find2(&p->p, &ll, p->neigh->iface, 0);
- *w++ = px->n.pxlen;
- bytes = (px->n.pxlen + 7) / 8;
- a = px->n.prefix;
- ipa_hton(a);
- memcpy(w, &a, bytes);
- w += bytes;
- remains -= bytes + 1;
- rem_node(&px->bucket_node);
- bgp_free_prefix(p, px);
- // fib_delete(&p->prefix_fib, px);
- }
- return w - start;
+ if (!nbr || (nbr->scope == SCOPE_HOST))
+ WITHDRAW(BAD_NEXT_HOP);
+
+ a->dest = RTD_UNICAST;
+ a->nh.gw = nbr->addr;
+ a->nh.iface = nbr->iface;
+ }
+ else /* GW_RECURSIVE */
+ {
+ if (ipa_zero(gw))
+ WITHDRAW(BAD_NEXT_HOP);
+
+ rtable *tab = ipa_is_ip4(gw) ? c->igp_table_ip4 : c->igp_table_ip6;
+ s->hostentry = rt_get_hostentry(tab, gw, ll, c->c.table);
+
+ if (!s->mpls)
+ rta_apply_hostentry(a, s->hostentry, NULL);
+
+ /* With MPLS, hostentry is applied later in bgp_apply_mpls_labels() */
+ }
}
static void
-bgp_flush_prefixes(struct bgp_proto *p, struct bgp_bucket *buck)
+bgp_apply_mpls_labels(struct bgp_parse_state *s, rta *a, u32 *labels, uint lnum)
{
- while (!EMPTY_LIST(buck->prefixes))
- {
- struct bgp_prefix *px = SKIP_BACK(struct bgp_prefix, bucket_node, HEAD(buck->prefixes));
- log(L_ERR "%s: - route %I/%d skipped", p->p.name, px->n.prefix, px->n.pxlen);
- rem_node(&px->bucket_node);
- bgp_free_prefix(p, px);
- // fib_delete(&p->prefix_fib, px);
- }
+ if (lnum > MPLS_MAX_LABEL_STACK)
+ {
+ REPORT("Too many MPLS labels ($u)", lnum);
+
+ a->dest = RTD_UNREACHABLE;
+ a->hostentry = NULL;
+ a->nh = (struct nexthop) { };
+ return;
+ }
+
+ /* Handle implicit NULL as empty MPLS stack */
+ if ((lnum == 1) && (labels[0] == BGP_MPLS_NULL))
+ lnum = 0;
+
+ if (s->channel->cf->gw_mode == GW_DIRECT)
+ {
+ a->nh.labels = lnum;
+ memcpy(a->nh.label, labels, 4*lnum);
+ }
+ else /* GW_RECURSIVE */
+ {
+ mpls_label_stack ms;
+
+ ms.len = lnum;
+ memcpy(ms.stack, labels, 4*lnum);
+ rta_apply_hostentry(a, s->hostentry, &ms);
+ }
}
-#ifndef IPV6 /* IPv4 version */
-static byte *
-bgp_create_update(struct bgp_conn *conn, byte *buf)
+static inline int
+bgp_use_next_hop(struct bgp_export_state *s, eattr *a)
{
- struct bgp_proto *p = conn->bgp;
- struct bgp_bucket *buck;
- int remains = bgp_max_packet_length(p) - BGP_HEADER_LENGTH - 4;
- byte *w;
- int wd_size = 0;
- int r_size = 0;
- int a_size = 0;
-
- w = buf+2;
- if ((buck = p->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
- {
- DBG("Withdrawn routes:\n");
- wd_size = bgp_encode_prefixes(p, w, buck, remains);
- w += wd_size;
- remains -= wd_size;
- }
- put_u16(buf, wd_size);
+ struct bgp_proto *p = s->proto;
+ ip_addr *nh = (void *) a->u.ptr->data;
- if (!wd_size)
- {
- while ((buck = (struct bgp_bucket *) HEAD(p->bucket_queue))->send_node.next)
- {
- if (EMPTY_LIST(buck->prefixes))
- {
- DBG("Deleting empty bucket %p\n", buck);
- rem_node(&buck->send_node);
- bgp_free_bucket(p, buck);
- continue;
- }
-
- DBG("Processing bucket %p\n", buck);
- a_size = bgp_encode_attrs(p, w+2, buck->eattrs, remains - 1024);
-
- if (a_size < 0)
- {
- log(L_ERR "%s: Attribute list too long, skipping corresponding routes", p->p.name);
- bgp_flush_prefixes(p, buck);
- rem_node(&buck->send_node);
- bgp_free_bucket(p, buck);
- continue;
- }
-
- put_u16(w, a_size);
- w += a_size + 2;
- r_size = bgp_encode_prefixes(p, w, buck, remains - a_size);
- w += r_size;
- break;
- }
- }
- if (!a_size) /* Attributes not already encoded */
+ if (s->channel->cf->next_hop_self)
+ return 0;
+
+ if (s->channel->cf->next_hop_keep)
+ return 1;
+
+ /* Keep it when explicitly set in export filter */
+ if (a->type & EAF_FRESH)
+ return 1;
+
+ /* Keep it when exported to internal peers */
+ if (p->is_interior && ipa_nonzero(*nh))
+ return 1;
+
+ /* Keep it when forwarded between single-hop BGPs on the same iface */
+ struct iface *ifa = (s->src && s->src->neigh) ? s->src->neigh->iface : NULL;
+ return p->neigh && (p->neigh->iface == ifa);
+}
+
+static inline int
+bgp_use_gateway(struct bgp_export_state *s)
+{
+ struct bgp_proto *p = s->proto;
+ rta *ra = s->route->attrs;
+
+ if (s->channel->cf->next_hop_self)
+ return 0;
+
+ /* We need one valid global gateway */
+ if ((ra->dest != RTD_UNICAST) || ra->nh.next || ipa_zero(ra->nh.gw) || ipa_is_link_local(ra->nh.gw))
+ return 0;
+
+ /* Use it when exported to internal peers */
+ if (p->is_interior)
+ return 1;
+
+ /* Use it when forwarded to single-hop BGP peer on on the same iface */
+ return p->neigh && (p->neigh->iface == ra->nh.iface);
+}
+
+static void
+bgp_update_next_hop_ip(struct bgp_export_state *s, eattr *a, ea_list **to)
+{
+ if (!a || !bgp_use_next_hop(s, a))
+ {
+ if (bgp_use_gateway(s))
{
- put_u16(w, 0);
- w += 2;
+ rta *ra = s->route->attrs;
+ ip_addr nh[1] = { ra->nh.gw };
+ bgp_set_attr_data(to, s->pool, BA_NEXT_HOP, 0, nh, 16);
+
+ if (s->mpls)
+ {
+ u32 implicit_null = BGP_MPLS_NULL;
+ u32 *labels = ra->nh.labels ? ra->nh.label : &implicit_null;
+ uint lnum = ra->nh.labels ? ra->nh.labels : 1;
+ bgp_set_attr_data(to, s->pool, BA_MPLS_LABEL_STACK, 0, labels, lnum * 4);
+ }
}
- if (wd_size || r_size)
+ else
{
- BGP_TRACE_RL(&rl_snd_update, D_PACKETS, "Sending UPDATE");
- return w;
+ ip_addr nh[2] = { s->channel->next_hop_addr, s->channel->link_addr };
+ bgp_set_attr_data(to, s->pool, BA_NEXT_HOP, 0, nh, ipa_nonzero(nh[1]) ? 32 : 16);
+
+ /* TODO: Use local MPLS assigned label */
+ if (s->mpls)
+ bgp_unset_attr(to, s->pool, BA_MPLS_LABEL_STACK);
}
+ }
+
+ /* Check if next hop is valid */
+ a = bgp_find_attr(*to, BA_NEXT_HOP);
+ if (!a)
+ WITHDRAW(NO_NEXT_HOP);
+
+ ip_addr *nh = (void *) a->u.ptr->data;
+ ip_addr peer = s->proto->cf->remote_ip;
+ uint len = a->u.ptr->length;
+
+ /* Forbid zero next hop */
+ if (ipa_zero(nh[0]) && ((len != 32) || ipa_zero(nh[1])))
+ WITHDRAW(BAD_NEXT_HOP);
+
+ /* Forbid next hop equal to neighbor IP */
+ if (ipa_equal(peer, nh[0]) || ((len == 32) && ipa_equal(peer, nh[1])))
+ WITHDRAW(BAD_NEXT_HOP);
+
+ /* Forbid next hop with non-matching AF */
+ if ((ipa_is_ip4(nh[0]) != bgp_channel_is_ipv4(s->channel)) &&
+ !s->channel->ext_next_hop)
+ WITHDRAW(BAD_NEXT_HOP);
+
+ /* Just check if MPLS stack */
+ if (s->mpls && !bgp_find_attr(*to, BA_MPLS_LABEL_STACK))
+ WITHDRAW(NO_LABEL_STACK);
+}
+
+static uint
+bgp_encode_next_hop_ip(struct bgp_write_state *s, eattr *a, byte *buf, uint size UNUSED)
+{
+ /* This function is used only for MP-BGP, see bgp_encode_next_hop() for IPv4 BGP */
+ ip_addr *nh = (void *) a->u.ptr->data;
+ uint len = a->u.ptr->length;
+
+ ASSERT((len == 16) || (len == 32));
+
+ /*
+ * Both IPv4 and IPv6 next hops can be used (with ext_next_hop enabled). This
+ * is specified in RFC 5549 for IPv4 and in RFC 4798 for IPv6. The difference
+ * is that IPv4 address is directly encoded with IPv4 NLRI, but as IPv4-mapped
+ * IPv6 address with IPv6 NLRI.
+ */
+
+ if (bgp_channel_is_ipv4(s->channel) && ipa_is_ip4(nh[0]))
+ {
+ put_ip4(buf, ipa_to_ip4(nh[0]));
+ return 4;
+ }
+
+ put_ip6(buf, ipa_to_ip6(nh[0]));
+
+ if (len == 32)
+ put_ip6(buf+16, ipa_to_ip6(nh[1]));
+
+ return len;
+}
+
+static void
+bgp_decode_next_hop_ip(struct bgp_parse_state *s, byte *data, uint len, rta *a)
+{
+ struct bgp_channel *c = s->channel;
+ struct adata *ad = lp_alloc_adata(s->pool, 32);
+ ip_addr *nh = (void *) ad->data;
+
+ if (len == 4)
+ {
+ nh[0] = ipa_from_ip4(get_ip4(data));
+ nh[1] = IPA_NONE;
+ }
+ else if (len == 16)
+ {
+ nh[0] = ipa_from_ip6(get_ip6(data));
+ nh[1] = IPA_NONE;
+
+ if (ipa_is_link_local(nh[0]))
+ { nh[1] = nh[0]; nh[0] = IPA_NONE; }
+ }
+ else if (len == 32)
+ {
+ nh[0] = ipa_from_ip6(get_ip6(data));
+ nh[1] = ipa_from_ip6(get_ip6(data+16));
+
+ if (ipa_is_ip4(nh[0]) || !ip6_is_link_local(nh[1]))
+ nh[1] = IPA_NONE;
+ }
else
- return NULL;
+ bgp_parse_error(s, 9);
+
+ if (ipa_zero(nh[1]))
+ ad->length = 16;
+
+ if ((bgp_channel_is_ipv4(c) != ipa_is_ip4(nh[0])) && !c->ext_next_hop)
+ WITHDRAW(BAD_NEXT_HOP);
+
+ // XXXX validate next hop
+
+ bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_NEXT_HOP, 0, ad);
+ bgp_apply_next_hop(s, a, nh[0], nh[1]);
}
-static byte *
-bgp_create_end_mark(struct bgp_conn *conn, byte *buf)
+static uint
+bgp_encode_next_hop_vpn(struct bgp_write_state *s, eattr *a, byte *buf, uint size UNUSED)
{
- struct bgp_proto *p = conn->bgp;
- BGP_TRACE(D_PACKETS, "Sending END-OF-RIB");
+ ip_addr *nh = (void *) a->u.ptr->data;
+ uint len = a->u.ptr->length;
- put_u32(buf, 0);
- return buf+4;
+ ASSERT((len == 16) || (len == 32));
+
+ /*
+ * Both IPv4 and IPv6 next hops can be used (with ext_next_hop enabled). This
+ * is specified in RFC 5549 for VPNv4 and in RFC 4659 for VPNv6. The difference
+ * is that IPv4 address is directly encoded with VPNv4 NLRI, but as IPv4-mapped
+ * IPv6 address with VPNv6 NLRI.
+ */
+
+ if (bgp_channel_is_ipv4(s->channel) && ipa_is_ip4(nh[0]))
+ {
+ put_u64(buf, 0); /* VPN RD is 0 */
+ put_ip4(buf+8, ipa_to_ip4(nh[0]));
+ return 12;
+ }
+
+ put_u64(buf, 0); /* VPN RD is 0 */
+ put_ip6(buf+8, ipa_to_ip6(nh[0]));
+
+ if (len == 16)
+ return 24;
+
+ put_u64(buf+24, 0); /* VPN RD is 0 */
+ put_ip6(buf+32, ipa_to_ip6(nh[1]));
+
+ return 48;
}
-#else /* IPv6 version */
+static void
+bgp_decode_next_hop_vpn(struct bgp_parse_state *s, byte *data, uint len, rta *a)
+{
+ struct bgp_channel *c = s->channel;
+ struct adata *ad = lp_alloc_adata(s->pool, 32);
+ ip_addr *nh = (void *) ad->data;
-static inline int
-same_iface(struct bgp_proto *p, ip_addr *ip)
+ if (len == 12)
+ {
+ nh[0] = ipa_from_ip4(get_ip4(data+8));
+ nh[1] = IPA_NONE;
+ }
+ else if (len == 24)
+ {
+ nh[0] = ipa_from_ip6(get_ip6(data+8));
+ nh[1] = IPA_NONE;
+
+ if (ipa_is_link_local(nh[0]))
+ { nh[1] = nh[0]; nh[0] = IPA_NONE; }
+ }
+ else if (len == 48)
+ {
+ nh[0] = ipa_from_ip6(get_ip6(data+8));
+ nh[1] = ipa_from_ip6(get_ip6(data+32));
+
+ if (ipa_is_ip4(nh[0]) || !ip6_is_link_local(nh[1]))
+ nh[1] = IPA_NONE;
+ }
+ else
+ bgp_parse_error(s, 9);
+
+ if (ipa_zero(nh[1]))
+ ad->length = 16;
+
+ /* XXXX which error */
+ if ((get_u64(data) != 0) || ((len == 48) && (get_u64(data+24) != 0)))
+ bgp_parse_error(s, 9);
+
+ if ((bgp_channel_is_ipv4(c) != ipa_is_ip4(nh[0])) && !c->ext_next_hop)
+ WITHDRAW(BAD_NEXT_HOP);
+
+ // XXXX validate next hop
+
+ bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_NEXT_HOP, 0, ad);
+ bgp_apply_next_hop(s, a, nh[0], nh[1]);
+}
+
+
+
+static uint
+bgp_encode_next_hop_none(struct bgp_write_state *s UNUSED, eattr *a UNUSED, byte *buf UNUSED, uint size UNUSED)
{
- neighbor *n = neigh_find(&p->p, ip, 0);
- return n && p->neigh && n->iface == p->neigh->iface;
+ return 0;
}
-static byte *
-bgp_create_update(struct bgp_conn *conn, byte *buf)
+static void
+bgp_decode_next_hop_none(struct bgp_parse_state *s UNUSED, byte *data UNUSED, uint len UNUSED, rta *a UNUSED)
{
- struct bgp_proto *p = conn->bgp;
- struct bgp_bucket *buck;
- int size, second, rem_stored;
- int remains = bgp_max_packet_length(p) - BGP_HEADER_LENGTH - 4;
- byte *w, *w_stored, *tmp, *tstart;
- ip_addr *ipp, ip, ip_ll;
- ea_list *ea;
- eattr *nh;
+ /*
+ * Although we expect no next hop and RFC 7606 7.11 states that attribute
+ * MP_REACH_NLRI with unexpected next hop length is considered malformed,
+ * FlowSpec RFC 5575 4 states that next hop shall be ignored on receipt.
+ */
+
+ return;
+}
- put_u16(buf, 0);
- w = buf+4;
+static void
+bgp_update_next_hop_none(struct bgp_export_state *s, eattr *a, ea_list **to)
+{
+ /* NEXT_HOP shall not pass */
+ if (a)
+ bgp_unset_attr(to, s->pool, BA_NEXT_HOP);
+}
+
+
+/*
+ * UPDATE
+ */
+
+static void
+bgp_rte_update(struct bgp_parse_state *s, net_addr *n, u32 path_id, rta *a0)
+{
+ if (path_id != s->last_id)
+ {
+ s->last_src = rt_get_source(&s->proto->p, path_id);
+ s->last_id = path_id;
- if ((buck = p->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
+ rta_free(s->cached_rta);
+ s->cached_rta = NULL;
+ }
+
+ if (!a0)
+ {
+ /* Route withdraw */
+ rte_update2(&s->channel->c, n, NULL, s->last_src);
+ return;
+ }
+
+ /* Prepare cached route attributes */
+ if (s->cached_rta == NULL)
+ {
+ a0->src = s->last_src;
+
+ /* Workaround for rta_lookup() breaking eattrs */
+ ea_list *ea = a0->eattrs;
+ s->cached_rta = rta_lookup(a0);
+ a0->eattrs = ea;
+ }
+
+ rta *a = rta_clone(s->cached_rta);
+ rte *e = rte_get_temp(a);
+
+ e->pflags = 0;
+ e->u.bgp.suppressed = 0;
+ rte_update2(&s->channel->c, n, e, s->last_src);
+}
+
+static void
+bgp_encode_mpls_labels(struct bgp_write_state *s UNUSED, adata *mpls, byte **pos, uint *size, byte *pxlen)
+{
+ u32 dummy = 0;
+ u32 *labels = mpls ? (u32 *) mpls->data : &dummy;
+ uint lnum = mpls ? (mpls->length / 4) : 1;
+
+ for (uint i = 0; i < lnum; i++)
+ {
+ put_u24(*pos, labels[i] << 4);
+ ADVANCE(*pos, *size, 3);
+ }
+
+ /* Add bottom-of-stack flag */
+ (*pos)[-1] |= BGP_MPLS_BOS;
+
+ *pxlen += 24 * lnum;
+}
+
+static void
+bgp_decode_mpls_labels(struct bgp_parse_state *s, byte **pos, uint *len, uint *pxlen, rta *a)
+{
+ u32 labels[BGP_MPLS_MAX], label;
+ uint lnum = 0;
+
+ do {
+ if (*pxlen < 24)
+ bgp_parse_error(s, 1);
+
+ label = get_u24(*pos);
+ labels[lnum++] = label >> 4;
+ ADVANCE(*pos, *len, 3);
+ *pxlen -= 24;
+
+ /* Withdraw: Magic label stack value 0x800000 according to RFC 3107, section 3, last paragraph */
+ if (!a && !s->err_withdraw && (lnum == 1) && (label == BGP_MPLS_MAGIC))
+ break;
+ }
+ while (!(label & BGP_MPLS_BOS));
+
+ if (!a)
+ return;
+
+ /* Attach MPLS attribute unless we already have one */
+ if (!s->mpls_labels)
+ {
+ s->mpls_labels = lp_alloc_adata(s->pool, 4*BGP_MPLS_MAX);
+ bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_MPLS_LABEL_STACK, 0, s->mpls_labels);
+ }
+
+ /* Overwrite data in the attribute */
+ s->mpls_labels->length = 4*lnum;
+ memcpy(s->mpls_labels->data, labels, 4*lnum);
+
+ /* Update next hop entry in rta */
+ bgp_apply_mpls_labels(s, a, labels, lnum);
+
+ /* Attributes were changed, invalidate cached entry */
+ rta_free(s->cached_rta);
+ s->cached_rta = NULL;
+
+ return;
+}
+
+static uint
+bgp_encode_nlri_ip4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
+{
+ byte *pos = buf;
+
+ while (!EMPTY_LIST(buck->prefixes) && (size >= BGP_NLRI_MAX))
+ {
+ struct bgp_prefix *px = HEAD(buck->prefixes);
+ struct net_addr_ip4 *net = (void *) px->net;
+
+ /* Encode path ID */
+ if (s->add_path)
{
- DBG("Withdrawn routes:\n");
- tmp = bgp_attach_attr_wa(&ea, bgp_linpool, BA_MP_UNREACH_NLRI, remains-8);
- *tmp++ = 0;
- *tmp++ = BGP_AF_IPV6;
- *tmp++ = 1;
- ea->attrs[0].u.ptr->length = 3 + bgp_encode_prefixes(p, tmp, buck, remains-11);
- size = bgp_encode_attrs(p, w, ea, remains);
- ASSERT(size >= 0);
- w += size;
- remains -= size;
+ put_u32(pos, px->path_id);
+ ADVANCE(pos, size, 4);
}
- else
+
+ /* Encode prefix length */
+ *pos = net->pxlen;
+ ADVANCE(pos, size, 1);
+
+ /* Encode MPLS labels */
+ if (s->mpls)
+ bgp_encode_mpls_labels(s, s->mpls_labels, &pos, &size, pos - 1);
+
+ /* Encode prefix body */
+ ip4_addr a = ip4_hton(net->prefix);
+ uint b = (net->pxlen + 7) / 8;
+ memcpy(pos, &a, b);
+ ADVANCE(pos, size, b);
+
+ bgp_free_prefix(s->channel, px);
+ }
+
+ return pos - buf;
+}
+
+static void
+bgp_decode_nlri_ip4(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
+{
+ while (len)
+ {
+ net_addr_ip4 net;
+ u32 path_id = 0;
+
+ /* Decode path ID */
+ if (s->add_path)
{
- while ((buck = (struct bgp_bucket *) HEAD(p->bucket_queue))->send_node.next)
- {
- if (EMPTY_LIST(buck->prefixes))
- {
- DBG("Deleting empty bucket %p\n", buck);
- rem_node(&buck->send_node);
- bgp_free_bucket(p, buck);
- continue;
- }
-
- DBG("Processing bucket %p\n", buck);
- rem_stored = remains;
- w_stored = w;
-
- size = bgp_encode_attrs(p, w, buck->eattrs, remains - 1024);
- if (size < 0)
- {
- log(L_ERR "%s: Attribute list too long, skipping corresponding routes", p->p.name);
- bgp_flush_prefixes(p, buck);
- rem_node(&buck->send_node);
- bgp_free_bucket(p, buck);
- continue;
- }
- w += size;
- remains -= size;
-
- /* We have two addresses here in NEXT_HOP eattr. Really.
- Unless NEXT_HOP was modified by filter */
- nh = ea_find(buck->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP));
- ASSERT(nh);
- second = (nh->u.ptr->length == NEXT_HOP_LENGTH);
- ipp = (ip_addr *) nh->u.ptr->data;
- ip = ipp[0];
- ip_ll = IPA_NONE;
-
- if (ipa_equal(ip, p->source_addr))
- ip_ll = p->local_link;
- else
- {
- /* If we send a route with 'third party' next hop destinated
- * in the same interface, we should also send a link local
- * next hop address. We use the received one (stored in the
- * other part of BA_NEXT_HOP eattr). If we didn't received
- * it (for example it is a static route), we can't use
- * 'third party' next hop and we have to use local IP address
- * as next hop. Sending original next hop address without
- * link local address seems to be a natural way to solve that
- * problem, but it is contrary to RFC 2545 and Quagga does not
- * accept such routes.
- *
- * There are two cases, either we have global IP, or
- * IPA_NONE if the neighbor is link-local. For IPA_NONE,
- * we suppose it is on the same iface, see bgp_update_attrs().
- */
-
- if (ipa_zero(ip) || same_iface(p, &ip))
- {
- if (second && ipa_nonzero(ipp[1]))
- ip_ll = ipp[1];
- else
- {
- switch (p->cf->missing_lladdr)
- {
- case MLL_SELF:
- ip = p->source_addr;
- ip_ll = p->local_link;
- break;
- case MLL_DROP:
- log(L_ERR "%s: Missing link-local next hop address, skipping corresponding routes", p->p.name);
- w = w_stored;
- remains = rem_stored;
- bgp_flush_prefixes(p, buck);
- rem_node(&buck->send_node);
- bgp_free_bucket(p, buck);
- continue;
- case MLL_IGNORE:
- break;
- }
- }
- }
- }
-
- tstart = tmp = bgp_attach_attr_wa(&ea, bgp_linpool, BA_MP_REACH_NLRI, remains-8);
- *tmp++ = 0;
- *tmp++ = BGP_AF_IPV6;
- *tmp++ = 1;
-
- if (ipa_is_link_local(ip))
- ip = IPA_NONE;
-
- if (ipa_nonzero(ip_ll))
- {
- *tmp++ = 32;
- ipa_hton(ip);
- memcpy(tmp, &ip, 16);
- ipa_hton(ip_ll);
- memcpy(tmp+16, &ip_ll, 16);
- tmp += 32;
- }
- else
- {
- *tmp++ = 16;
- ipa_hton(ip);
- memcpy(tmp, &ip, 16);
- tmp += 16;
- }
-
- *tmp++ = 0; /* No SNPA information */
- tmp += bgp_encode_prefixes(p, tmp, buck, remains - (8+3+32+1));
- ea->attrs[0].u.ptr->length = tmp - tstart;
- size = bgp_encode_attrs(p, w, ea, remains);
- ASSERT(size >= 0);
- w += size;
- break;
- }
+ if (len < 5)
+ bgp_parse_error(s, 1);
+
+ path_id = get_u32(pos);
+ ADVANCE(pos, len, 4);
}
- size = w - (buf+4);
- put_u16(buf+2, size);
- lp_flush(bgp_linpool);
- if (size)
+ /* Decode prefix length */
+ uint l = *pos;
+ ADVANCE(pos, len, 1);
+
+ if (len < ((l + 7) / 8))
+ bgp_parse_error(s, 1);
+
+ /* Decode MPLS labels */
+ if (s->mpls)
+ bgp_decode_mpls_labels(s, &pos, &len, &l, a);
+
+ if (l > IP4_MAX_PREFIX_LENGTH)
+ bgp_parse_error(s, 10);
+
+ /* Decode prefix body */
+ ip4_addr addr = IP4_NONE;
+ uint b = (l + 7) / 8;
+ memcpy(&addr, pos, b);
+ ADVANCE(pos, len, b);
+
+ net = NET_ADDR_IP4(ip4_ntoh(addr), l);
+ net_normalize_ip4(&net);
+
+ // XXXX validate prefix
+
+ bgp_rte_update(s, (net_addr *) &net, path_id, a);
+ }
+}
+
+
+static uint
+bgp_encode_nlri_ip6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
+{
+ byte *pos = buf;
+
+ while (!EMPTY_LIST(buck->prefixes) && (size >= BGP_NLRI_MAX))
+ {
+ struct bgp_prefix *px = HEAD(buck->prefixes);
+ struct net_addr_ip6 *net = (void *) px->net;
+
+ /* Encode path ID */
+ if (s->add_path)
{
- BGP_TRACE_RL(&rl_snd_update, D_PACKETS, "Sending UPDATE");
- return w;
+ put_u32(pos, px->path_id);
+ ADVANCE(pos, size, 4);
}
- else
- return NULL;
+
+ /* Encode prefix length */
+ *pos = net->pxlen;
+ ADVANCE(pos, size, 1);
+
+ /* Encode MPLS labels */
+ if (s->mpls)
+ bgp_encode_mpls_labels(s, s->mpls_labels, &pos, &size, pos - 1);
+
+ /* Encode prefix body */
+ ip6_addr a = ip6_hton(net->prefix);
+ uint b = (net->pxlen + 7) / 8;
+ memcpy(pos, &a, b);
+ ADVANCE(pos, size, b);
+
+ bgp_free_prefix(s->channel, px);
+ }
+
+ return pos - buf;
}
-static byte *
-bgp_create_end_mark(struct bgp_conn *conn, byte *buf)
+static void
+bgp_decode_nlri_ip6(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
{
- struct bgp_proto *p = conn->bgp;
- BGP_TRACE(D_PACKETS, "Sending END-OF-RIB");
+ while (len)
+ {
+ net_addr_ip6 net;
+ u32 path_id = 0;
- put_u16(buf+0, 0);
- put_u16(buf+2, 6); /* length 4-9 */
- buf += 4;
+ /* Decode path ID */
+ if (s->add_path)
+ {
+ if (len < 5)
+ bgp_parse_error(s, 1);
- /* Empty MP_UNREACH_NLRI atribute */
- *buf++ = BAF_OPTIONAL;
- *buf++ = BA_MP_UNREACH_NLRI;
- *buf++ = 3; /* Length 7-9 */
- *buf++ = 0; /* AFI */
- *buf++ = BGP_AF_IPV6;
- *buf++ = 1; /* SAFI */
- return buf;
-}
+ path_id = get_u32(pos);
+ ADVANCE(pos, len, 4);
+ }
-#endif
+ /* Decode prefix length */
+ uint l = *pos;
+ ADVANCE(pos, len, 1);
-static inline byte *
-bgp_create_route_refresh(struct bgp_conn *conn, byte *buf)
-{
- struct bgp_proto *p = conn->bgp;
- BGP_TRACE(D_PACKETS, "Sending ROUTE-REFRESH");
+ if (len < ((l + 7) / 8))
+ bgp_parse_error(s, 1);
- /* Original original route refresh request, RFC 2918 */
- *buf++ = 0;
- *buf++ = BGP_AF;
- *buf++ = BGP_RR_REQUEST;
- *buf++ = 1; /* SAFI */
- return buf;
+ /* Decode MPLS labels */
+ if (s->mpls)
+ bgp_decode_mpls_labels(s, &pos, &len, &l, a);
+
+ if (l > IP6_MAX_PREFIX_LENGTH)
+ bgp_parse_error(s, 10);
+
+ /* Decode prefix body */
+ ip6_addr addr = IP6_NONE;
+ uint b = (l + 7) / 8;
+ memcpy(&addr, pos, b);
+ ADVANCE(pos, len, b);
+
+ net = NET_ADDR_IP6(ip6_ntoh(addr), l);
+ net_normalize_ip6(&net);
+
+ // XXXX validate prefix
+
+ bgp_rte_update(s, (net_addr *) &net, path_id, a);
+ }
}
-static inline byte *
-bgp_create_begin_refresh(struct bgp_conn *conn, byte *buf)
+static uint
+bgp_encode_nlri_vpn4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
{
- struct bgp_proto *p = conn->bgp;
- BGP_TRACE(D_PACKETS, "Sending BEGIN-OF-RR");
+ byte *pos = buf;
- /* Demarcation of beginning of route refresh (BoRR), RFC 7313 */
- *buf++ = 0;
- *buf++ = BGP_AF;
- *buf++ = BGP_RR_BEGIN;
- *buf++ = 1; /* SAFI */
- return buf;
+ while (!EMPTY_LIST(buck->prefixes) && (size >= BGP_NLRI_MAX))
+ {
+ struct bgp_prefix *px = HEAD(buck->prefixes);
+ struct net_addr_vpn4 *net = (void *) px->net;
+
+ /* Encode path ID */
+ if (s->add_path)
+ {
+ put_u32(pos, px->path_id);
+ ADVANCE(pos, size, 4);
+ }
+
+ /* Encode prefix length */
+ *pos = 64 + net->pxlen;
+ ADVANCE(pos, size, 1);
+
+ /* Encode MPLS labels */
+ if (s->mpls)
+ bgp_encode_mpls_labels(s, s->mpls_labels, &pos, &size, pos - 1);
+
+ /* Encode route distinguisher */
+ put_u64(pos, net->rd);
+ ADVANCE(pos, size, 8);
+
+ /* Encode prefix body */
+ ip4_addr a = ip4_hton(net->prefix);
+ uint b = (net->pxlen + 7) / 8;
+ memcpy(pos, &a, b);
+ ADVANCE(pos, size, b);
+
+ bgp_free_prefix(s->channel, px);
+ }
+
+ return pos - buf;
}
-static inline byte *
-bgp_create_end_refresh(struct bgp_conn *conn, byte *buf)
+static void
+bgp_decode_nlri_vpn4(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
{
- struct bgp_proto *p = conn->bgp;
- BGP_TRACE(D_PACKETS, "Sending END-OF-RR");
+ while (len)
+ {
+ net_addr_vpn4 net;
+ u32 path_id = 0;
- /* Demarcation of ending of route refresh (EoRR), RFC 7313 */
- *buf++ = 0;
- *buf++ = BGP_AF;
- *buf++ = BGP_RR_END;
- *buf++ = 1; /* SAFI */
- return buf;
+ /* Decode path ID */
+ if (s->add_path)
+ {
+ if (len < 5)
+ bgp_parse_error(s, 1);
+
+ path_id = get_u32(pos);
+ ADVANCE(pos, len, 4);
+ }
+
+ /* Decode prefix length */
+ uint l = *pos;
+ ADVANCE(pos, len, 1);
+
+ if (len < ((l + 7) / 8))
+ bgp_parse_error(s, 1);
+
+ /* Decode MPLS labels */
+ if (s->mpls)
+ bgp_decode_mpls_labels(s, &pos, &len, &l, a);
+
+ /* Decode route distinguisher */
+ if (l < 64)
+ bgp_parse_error(s, 1);
+
+ u64 rd = get_u64(pos);
+ ADVANCE(pos, len, 8);
+ l -= 64;
+
+ if (l > IP4_MAX_PREFIX_LENGTH)
+ bgp_parse_error(s, 10);
+
+ /* Decode prefix body */
+ ip4_addr addr = IP4_NONE;
+ uint b = (l + 7) / 8;
+ memcpy(&addr, pos, b);
+ ADVANCE(pos, len, b);
+
+ net = NET_ADDR_VPN4(ip4_ntoh(addr), l, rd);
+ net_normalize_vpn4(&net);
+
+ // XXXX validate prefix
+
+ bgp_rte_update(s, (net_addr *) &net, path_id, a);
+ }
}
+static uint
+bgp_encode_nlri_vpn6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
+{
+ byte *pos = buf;
+
+ while (!EMPTY_LIST(buck->prefixes) && (size >= BGP_NLRI_MAX))
+ {
+ struct bgp_prefix *px = HEAD(buck->prefixes);
+ struct net_addr_vpn6 *net = (void *) px->net;
+
+ /* Encode path ID */
+ if (s->add_path)
+ {
+ put_u32(pos, px->path_id);
+ ADVANCE(pos, size, 4);
+ }
+
+ /* Encode prefix length */
+ *pos = 64 + net->pxlen;
+ ADVANCE(pos, size, 1);
+
+ /* Encode MPLS labels */
+ bgp_encode_mpls_labels(s, s->mpls_labels, &pos, &size, pos - 1);
+
+ /* Encode route distinguisher */
+ put_u64(pos, net->rd);
+ ADVANCE(pos, size, 8);
+
+ /* Encode prefix body */
+ ip6_addr a = ip6_hton(net->prefix);
+ uint b = (net->pxlen + 7) / 8;
+ memcpy(pos, &a, b);
+ ADVANCE(pos, size, b);
+
+ bgp_free_prefix(s->channel, px);
+ }
+
+ return pos - buf;
+}
+
static void
-bgp_create_header(byte *buf, uint len, uint type)
+bgp_decode_nlri_vpn6(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
{
- memset(buf, 0xff, 16); /* Marker */
- put_u16(buf+16, len);
- buf[18] = type;
+ while (len)
+ {
+ net_addr_vpn6 net;
+ u32 path_id = 0;
+
+ /* Decode path ID */
+ if (s->add_path)
+ {
+ if (len < 5)
+ bgp_parse_error(s, 1);
+
+ path_id = get_u32(pos);
+ ADVANCE(pos, len, 4);
+ }
+
+ /* Decode prefix length */
+ uint l = *pos;
+ ADVANCE(pos, len, 1);
+
+ if (len < ((l + 7) / 8))
+ bgp_parse_error(s, 1);
+
+ /* Decode MPLS labels */
+ if (s->mpls)
+ bgp_decode_mpls_labels(s, &pos, &len, &l, a);
+
+ /* Decode route distinguisher */
+ if (l < 64)
+ bgp_parse_error(s, 1);
+
+ u64 rd = get_u64(pos);
+ ADVANCE(pos, len, 8);
+ l -= 64;
+
+ if (l > IP6_MAX_PREFIX_LENGTH)
+ bgp_parse_error(s, 10);
+
+ /* Decode prefix body */
+ ip6_addr addr = IP6_NONE;
+ uint b = (l + 7) / 8;
+ memcpy(&addr, pos, b);
+ ADVANCE(pos, len, b);
+
+ net = NET_ADDR_VPN6(ip6_ntoh(addr), l, rd);
+ net_normalize_vpn6(&net);
+
+ // XXXX validate prefix
+
+ bgp_rte_update(s, (net_addr *) &net, path_id, a);
+ }
}
-/**
- * bgp_fire_tx - transmit packets
- * @conn: connection
- *
- * Whenever the transmit buffers of the underlying TCP connection
- * are free and we have any packets queued for sending, the socket functions
- * call bgp_fire_tx() which takes care of selecting the highest priority packet
- * queued (Notification > Keepalive > Open > Update), assembling its header
- * and body and sending it to the connection.
- */
-static int
-bgp_fire_tx(struct bgp_conn *conn)
+
+static uint
+bgp_encode_nlri_flow4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
{
- struct bgp_proto *p = conn->bgp;
- uint s = conn->packets_to_send;
- sock *sk = conn->sk;
- byte *buf, *pkt, *end;
- int type;
+ byte *pos = buf;
+
+ while (!EMPTY_LIST(buck->prefixes) && (size >= 4))
+ {
+ struct bgp_prefix *px = HEAD(buck->prefixes);
+ struct net_addr_flow4 *net = (void *) px->net;
+ uint flen = net->length - sizeof(net_addr_flow4);
- if (!sk)
+ /* Encode path ID */
+ if (s->add_path)
{
- conn->packets_to_send = 0;
- return 0;
+ put_u32(pos, px->path_id);
+ ADVANCE(pos, size, 4);
}
- buf = sk->tbuf;
- pkt = buf + BGP_HEADER_LENGTH;
- if (s & (1 << PKT_SCHEDULE_CLOSE))
+ if (flen > size)
+ break;
+
+ /* Copy whole flow data including length */
+ memcpy(pos, net->data, flen);
+ ADVANCE(pos, size, flen);
+
+ bgp_free_prefix(s->channel, px);
+ }
+
+ return pos - buf;
+}
+
+static void
+bgp_decode_nlri_flow4(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
+{
+ while (len)
+ {
+ u32 path_id = 0;
+
+ /* Decode path ID */
+ if (s->add_path)
{
- /* We can finally close connection and enter idle state */
- bgp_conn_enter_idle_state(conn);
- return 0;
+ if (len < 4)
+ bgp_parse_error(s, 1);
+
+ path_id = get_u32(pos);
+ ADVANCE(pos, len, 4);
}
- if (s & (1 << PKT_NOTIFICATION))
+
+ if (len < 2)
+ bgp_parse_error(s, 1);
+
+ /* Decode flow length */
+ uint hlen = flow_hdr_length(pos);
+ uint dlen = flow_read_length(pos);
+ uint flen = hlen + dlen;
+ byte *data = pos + hlen;
+
+ if (len < flen)
+ bgp_parse_error(s, 1);
+
+ /* Validate flow data */
+ enum flow_validated_state r = flow4_validate(data, dlen);
+ if (r != FLOW_ST_VALID)
{
- s = 1 << PKT_SCHEDULE_CLOSE;
- type = PKT_NOTIFICATION;
- end = bgp_create_notification(conn, pkt);
+ log(L_REMOTE "%s: Invalid flow route: %s", s->proto->p.name, flow_validated_state_str(r));
+ bgp_parse_error(s, 1);
}
- else if (s & (1 << PKT_KEEPALIVE))
+
+ if (data[0] != FLOW_TYPE_DST_PREFIX)
{
- s &= ~(1 << PKT_KEEPALIVE);
- type = PKT_KEEPALIVE;
- end = pkt; /* Keepalives carry no data */
- BGP_TRACE(D_PACKETS, "Sending KEEPALIVE");
- bgp_start_timer(conn->keepalive_timer, conn->keepalive_time);
+ log(L_REMOTE "%s: No dst prefix at first pos", s->proto->p.name);
+ bgp_parse_error(s, 1);
}
- else if (s & (1 << PKT_OPEN))
+
+ /* Decode dst prefix */
+ ip4_addr px = IP4_NONE;
+ uint pxlen = data[1];
+
+ // FIXME: Use some generic function
+ memcpy(&px, data, BYTES(pxlen));
+ px = ip4_and(px, ip4_mkmask(pxlen));
+
+ /* Prepare the flow */
+ net_addr *n = alloca(sizeof(struct net_addr_flow4) + flen);
+ net_fill_flow4(n, px, pxlen, pos, flen);
+ ADVANCE(pos, len, flen);
+
+ bgp_rte_update(s, n, path_id, a);
+ }
+}
+
+
+static uint
+bgp_encode_nlri_flow6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
+{
+ byte *pos = buf;
+
+ while (!EMPTY_LIST(buck->prefixes) && (size >= 4))
+ {
+ struct bgp_prefix *px = HEAD(buck->prefixes);
+ struct net_addr_flow6 *net = (void *) px->net;
+ uint flen = net->length - sizeof(net_addr_flow6);
+
+ /* Encode path ID */
+ if (s->add_path)
{
- s &= ~(1 << PKT_OPEN);
- type = PKT_OPEN;
- end = bgp_create_open(conn, pkt);
+ put_u32(pos, px->path_id);
+ ADVANCE(pos, size, 4);
}
- else if (s & (1 << PKT_ROUTE_REFRESH))
+
+ if (flen > size)
+ break;
+
+ /* Copy whole flow data including length */
+ memcpy(pos, net->data, flen);
+ ADVANCE(pos, size, flen);
+
+ bgp_free_prefix(s->channel, px);
+ }
+
+ return pos - buf;
+}
+
+static void
+bgp_decode_nlri_flow6(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
+{
+ while (len)
+ {
+ u32 path_id = 0;
+
+ /* Decode path ID */
+ if (s->add_path)
{
- s &= ~(1 << PKT_ROUTE_REFRESH);
- type = PKT_ROUTE_REFRESH;
- end = bgp_create_route_refresh(conn, pkt);
+ if (len < 4)
+ bgp_parse_error(s, 1);
+
+ path_id = get_u32(pos);
+ ADVANCE(pos, len, 4);
}
- else if (s & (1 << PKT_BEGIN_REFRESH))
+
+ if (len < 2)
+ bgp_parse_error(s, 1);
+
+ /* Decode flow length */
+ uint hlen = flow_hdr_length(pos);
+ uint dlen = flow_read_length(pos);
+ uint flen = hlen + dlen;
+ byte *data = pos + hlen;
+
+ if (len < flen)
+ bgp_parse_error(s, 1);
+
+ /* Validate flow data */
+ enum flow_validated_state r = flow6_validate(data, dlen);
+ if (r != FLOW_ST_VALID)
{
- s &= ~(1 << PKT_BEGIN_REFRESH);
- type = PKT_ROUTE_REFRESH; /* BoRR is a subtype of RR */
- end = bgp_create_begin_refresh(conn, pkt);
+ log(L_REMOTE "%s: Invalid flow route: %s", s->proto->p.name, flow_validated_state_str(r));
+ bgp_parse_error(s, 1);
}
- else if (s & (1 << PKT_UPDATE))
+
+ if (data[0] != FLOW_TYPE_DST_PREFIX)
{
- type = PKT_UPDATE;
- end = bgp_create_update(conn, pkt);
+ log(L_REMOTE "%s: No dst prefix at first pos", s->proto->p.name);
+ bgp_parse_error(s, 1);
+ }
- if (!end)
- {
- /* No update to send, perhaps we need to send End-of-RIB or EoRR */
+ /* Decode dst prefix */
+ ip6_addr px = IP6_NONE;
+ uint pxlen = data[1];
- conn->packets_to_send = 0;
+ // FIXME: Use some generic function
+ memcpy(&px, data, BYTES(pxlen));
+ px = ip6_and(px, ip6_mkmask(pxlen));
- if (p->feed_state == BFS_LOADED)
- {
- type = PKT_UPDATE;
- end = bgp_create_end_mark(conn, pkt);
- }
+ /* Prepare the flow */
+ net_addr *n = alloca(sizeof(struct net_addr_flow6) + flen);
+ net_fill_flow6(n, px, pxlen, pos, flen);
+ ADVANCE(pos, len, flen);
- else if (p->feed_state == BFS_REFRESHED)
- {
- type = PKT_ROUTE_REFRESH;
- end = bgp_create_end_refresh(conn, pkt);
- }
+ bgp_rte_update(s, n, path_id, a);
+ }
+}
- else /* Really nothing to send */
- return 0;
- p->feed_state = BFS_NONE;
- }
- }
- else
- return 0;
+static const struct bgp_af_desc bgp_af_table[] = {
+ {
+ .afi = BGP_AF_IPV4,
+ .net = NET_IP4,
+ .name = "ipv4",
+ .encode_nlri = bgp_encode_nlri_ip4,
+ .decode_nlri = bgp_decode_nlri_ip4,
+ .encode_next_hop = bgp_encode_next_hop_ip,
+ .decode_next_hop = bgp_decode_next_hop_ip,
+ .update_next_hop = bgp_update_next_hop_ip,
+ },
+ {
+ .afi = BGP_AF_IPV4_MC,
+ .net = NET_IP4,
+ .name = "ipv4-mc",
+ .encode_nlri = bgp_encode_nlri_ip4,
+ .decode_nlri = bgp_decode_nlri_ip4,
+ .encode_next_hop = bgp_encode_next_hop_ip,
+ .decode_next_hop = bgp_decode_next_hop_ip,
+ .update_next_hop = bgp_update_next_hop_ip,
+ },
+ {
+ .afi = BGP_AF_IPV4_MPLS,
+ .net = NET_IP4,
+ .mpls = 1,
+ .name = "ipv4-mpls",
+ .encode_nlri = bgp_encode_nlri_ip4,
+ .decode_nlri = bgp_decode_nlri_ip4,
+ .encode_next_hop = bgp_encode_next_hop_ip,
+ .decode_next_hop = bgp_decode_next_hop_ip,
+ .update_next_hop = bgp_update_next_hop_ip,
+ },
+ {
+ .afi = BGP_AF_IPV6,
+ .net = NET_IP6,
+ .name = "ipv6",
+ .encode_nlri = bgp_encode_nlri_ip6,
+ .decode_nlri = bgp_decode_nlri_ip6,
+ .encode_next_hop = bgp_encode_next_hop_ip,
+ .decode_next_hop = bgp_decode_next_hop_ip,
+ .update_next_hop = bgp_update_next_hop_ip,
+ },
+ {
+ .afi = BGP_AF_IPV6_MC,
+ .net = NET_IP6,
+ .name = "ipv6-mc",
+ .encode_nlri = bgp_encode_nlri_ip6,
+ .decode_nlri = bgp_decode_nlri_ip6,
+ .encode_next_hop = bgp_encode_next_hop_ip,
+ .decode_next_hop = bgp_decode_next_hop_ip,
+ .update_next_hop = bgp_update_next_hop_ip,
+ },
+ {
+ .afi = BGP_AF_IPV6_MPLS,
+ .net = NET_IP6,
+ .mpls = 1,
+ .name = "ipv6-mpls",
+ .encode_nlri = bgp_encode_nlri_ip6,
+ .decode_nlri = bgp_decode_nlri_ip6,
+ .encode_next_hop = bgp_encode_next_hop_ip,
+ .decode_next_hop = bgp_decode_next_hop_ip,
+ .update_next_hop = bgp_update_next_hop_ip,
+ },
+ {
+ .afi = BGP_AF_VPN4_MPLS,
+ .net = NET_VPN4,
+ .mpls = 1,
+ .name = "vpn4-mpls",
+ .encode_nlri = bgp_encode_nlri_vpn4,
+ .decode_nlri = bgp_decode_nlri_vpn4,
+ .encode_next_hop = bgp_encode_next_hop_vpn,
+ .decode_next_hop = bgp_decode_next_hop_vpn,
+ .update_next_hop = bgp_update_next_hop_ip,
+ },
+ {
+ .afi = BGP_AF_VPN6_MPLS,
+ .net = NET_VPN6,
+ .mpls = 1,
+ .name = "vpn6-mpls",
+ .encode_nlri = bgp_encode_nlri_vpn6,
+ .decode_nlri = bgp_decode_nlri_vpn6,
+ .encode_next_hop = bgp_encode_next_hop_vpn,
+ .decode_next_hop = bgp_decode_next_hop_vpn,
+ .update_next_hop = bgp_update_next_hop_ip,
+ },
+ {
+ .afi = BGP_AF_VPN4_MC,
+ .net = NET_VPN4,
+ .name = "vpn4-mc",
+ .encode_nlri = bgp_encode_nlri_vpn4,
+ .decode_nlri = bgp_decode_nlri_vpn4,
+ .encode_next_hop = bgp_encode_next_hop_vpn,
+ .decode_next_hop = bgp_decode_next_hop_vpn,
+ .update_next_hop = bgp_update_next_hop_ip,
+ },
+ {
+ .afi = BGP_AF_VPN6_MC,
+ .net = NET_VPN6,
+ .name = "vpn6-mc",
+ .encode_nlri = bgp_encode_nlri_vpn6,
+ .decode_nlri = bgp_decode_nlri_vpn6,
+ .encode_next_hop = bgp_encode_next_hop_vpn,
+ .decode_next_hop = bgp_decode_next_hop_vpn,
+ .update_next_hop = bgp_update_next_hop_ip,
+ },
+ {
+ .afi = BGP_AF_FLOW4,
+ .net = NET_FLOW4,
+ .no_igp = 1,
+ .name = "flow4",
+ .encode_nlri = bgp_encode_nlri_flow4,
+ .decode_nlri = bgp_decode_nlri_flow4,
+ .encode_next_hop = bgp_encode_next_hop_none,
+ .decode_next_hop = bgp_decode_next_hop_none,
+ .update_next_hop = bgp_update_next_hop_none,
+ },
+ {
+ .afi = BGP_AF_FLOW6,
+ .net = NET_FLOW6,
+ .no_igp = 1,
+ .name = "flow6",
+ .encode_nlri = bgp_encode_nlri_flow6,
+ .decode_nlri = bgp_decode_nlri_flow6,
+ .encode_next_hop = bgp_encode_next_hop_none,
+ .decode_next_hop = bgp_decode_next_hop_none,
+ .update_next_hop = bgp_update_next_hop_none,
+ },
+};
- conn->packets_to_send = s;
- bgp_create_header(buf, end - buf, type);
- return sk_send(sk, end - buf);
+const struct bgp_af_desc *
+bgp_get_af_desc(u32 afi)
+{
+ uint i;
+ for (i = 0; i < ARRAY_SIZE(bgp_af_table); i++)
+ if (bgp_af_table[i].afi == afi)
+ return &bgp_af_table[i];
+
+ return NULL;
}
-/**
- * bgp_schedule_packet - schedule a packet for transmission
- * @conn: connection
- * @type: packet type
- *
- * Schedule a packet of type @type to be sent as soon as possible.
- */
-void
-bgp_schedule_packet(struct bgp_conn *conn, int type)
+static inline uint
+bgp_encode_nlri(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end)
{
- DBG("BGP: Scheduling packet type %d\n", type);
- conn->packets_to_send |= 1 << type;
- if (conn->sk && conn->sk->tpos == conn->sk->tbuf && !ev_active(conn->tx_ev))
- ev_schedule(conn->tx_ev);
+ return s->channel->desc->encode_nlri(s, buck, buf, end - buf);
}
-void
-bgp_kick_tx(void *vconn)
+static inline uint
+bgp_encode_next_hop(struct bgp_write_state *s, eattr *nh, byte *buf)
{
- struct bgp_conn *conn = vconn;
-
- DBG("BGP: kicking TX\n");
- while (bgp_fire_tx(conn) > 0)
- ;
+ return s->channel->desc->encode_next_hop(s, nh, buf, 255);
}
void
-bgp_tx(sock *sk)
+bgp_update_next_hop(struct bgp_export_state *s, eattr *a, ea_list **to)
{
- struct bgp_conn *conn = sk->data;
-
- DBG("BGP: TX hook\n");
- while (bgp_fire_tx(conn) > 0)
- ;
+ s->channel->desc->update_next_hop(s, a, to);
}
-/* Capatibility negotiation as per RFC 2842 */
+#define MAX_ATTRS_LENGTH (end-buf+BGP_HEADER_LENGTH - 1024)
-void
-bgp_parse_capabilities(struct bgp_conn *conn, byte *opt, int len)
+static byte *
+bgp_create_ip_reach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end)
{
- // struct bgp_proto *p = conn->bgp;
- int i, cl;
+ /*
+ * 2 B Withdrawn Routes Length (zero)
+ * --- IPv4 Withdrawn Routes NLRI (unused)
+ * 2 B Total Path Attribute Length
+ * var Path Attributes
+ * var IPv4 Network Layer Reachability Information
+ */
+
+ int lr, la;
+
+ la = bgp_encode_attrs(s, buck->eattrs, buf+4, buf + MAX_ATTRS_LENGTH);
+ if (la < 0)
+ {
+ /* Attribute list too long */
+ bgp_withdraw_bucket(s->channel, buck);
+ return NULL;
+ }
- while (len > 0)
- {
- if (len < 2 || len < 2 + opt[1])
- goto err;
+ put_u16(buf+0, 0);
+ put_u16(buf+2, la);
- cl = opt[1];
+ lr = bgp_encode_nlri(s, buck, buf+4+la, end);
- switch (opt[0])
- {
- case 2: /* Route refresh capability, RFC 2918 */
- if (cl != 0)
- goto err;
- conn->peer_refresh_support = 1;
- break;
+ return buf+4+la+lr;
+}
- case 6: /* Extended message length capability, draft */
- if (cl != 0)
- goto err;
- conn->peer_ext_messages_support = 1;
- break;
+static byte *
+bgp_create_mp_reach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end)
+{
+ /*
+ * 2 B IPv4 Withdrawn Routes Length (zero)
+ * --- IPv4 Withdrawn Routes NLRI (unused)
+ * 2 B Total Path Attribute Length
+ * 1 B MP_REACH_NLRI hdr - Attribute Flags
+ * 1 B MP_REACH_NLRI hdr - Attribute Type Code
+ * 2 B MP_REACH_NLRI hdr - Length of Attribute Data
+ * 2 B MP_REACH_NLRI data - Address Family Identifier
+ * 1 B MP_REACH_NLRI data - Subsequent Address Family Identifier
+ * 1 B MP_REACH_NLRI data - Length of Next Hop Network Address
+ * var MP_REACH_NLRI data - Network Address of Next Hop
+ * 1 B MP_REACH_NLRI data - Reserved (zero)
+ * var MP_REACH_NLRI data - Network Layer Reachability Information
+ * var Rest of Path Attributes
+ * --- IPv4 Network Layer Reachability Information (unused)
+ */
+
+ int lh, lr, la; /* Lengths of next hop, NLRI and attributes */
+
+ /* Begin of MP_REACH_NLRI atribute */
+ buf[4] = BAF_OPTIONAL | BAF_EXT_LEN;
+ buf[5] = BA_MP_REACH_NLRI;
+ put_u16(buf+6, 0); /* Will be fixed later */
+ put_af3(buf+8, s->channel->afi);
+ byte *pos = buf+11;
+
+ /* Encode attributes to temporary buffer */
+ byte *abuf = alloca(MAX_ATTRS_LENGTH);
+ la = bgp_encode_attrs(s, buck->eattrs, abuf, abuf + MAX_ATTRS_LENGTH);
+ if (la < 0)
+ {
+ /* Attribute list too long */
+ bgp_withdraw_bucket(s->channel, buck);
+ return NULL;
+ }
- case 64: /* Graceful restart capability, RFC 4724 */
- if (cl % 4 != 2)
- goto err;
- conn->peer_gr_aware = 1;
- conn->peer_gr_able = 0;
- conn->peer_gr_time = get_u16(opt + 2) & 0x0fff;
- conn->peer_gr_flags = opt[2] & 0xf0;
- conn->peer_gr_aflags = 0;
- for (i = 2; i < cl; i += 4)
- if (opt[2+i+0] == 0 && opt[2+i+1] == BGP_AF && opt[2+i+2] == 1) /* Match AFI/SAFI */
- {
- conn->peer_gr_able = 1;
- conn->peer_gr_aflags = opt[2+i+3];
- }
- break;
+ /* Encode the next hop */
+ lh = bgp_encode_next_hop(s, s->mp_next_hop, pos+1);
+ *pos = lh;
+ pos += 1+lh;
- case 65: /* AS4 capability, RFC 4893 */
- if (cl != 4)
- goto err;
- conn->peer_as4_support = 1;
- if (conn->bgp->cf->enable_as4)
- conn->advertised_as = get_u32(opt + 2);
- break;
+ /* Reserved field */
+ *pos++ = 0;
- case 69: /* ADD-PATH capability, RFC 7911 */
- if (cl % 4)
- goto err;
- for (i = 0; i < cl; i += 4)
- if (opt[2+i+0] == 0 && opt[2+i+1] == BGP_AF && opt[2+i+2] == 1) /* Match AFI/SAFI */
- conn->peer_add_path = opt[2+i+3];
- if (conn->peer_add_path > ADD_PATH_FULL)
- goto err;
- break;
+ /* Encode the NLRI */
+ lr = bgp_encode_nlri(s, buck, pos, end - la);
+ pos += lr;
- case 70: /* Enhanced route refresh capability, RFC 7313 */
- if (cl != 0)
- goto err;
- conn->peer_enhanced_refresh_support = 1;
- break;
+ /* End of MP_REACH_NLRI atribute, update data length */
+ put_u16(buf+6, pos-buf-8);
- /* We can safely ignore all other capabilities */
- }
- len -= 2 + cl;
- opt += 2 + cl;
- }
- return;
+ /* Copy remaining attributes */
+ memcpy(pos, abuf, la);
+ pos += la;
- err:
- bgp_error(conn, 2, 0, NULL, 0);
- return;
+ /* Initial UPDATE fields */
+ put_u16(buf+0, 0);
+ put_u16(buf+2, pos-buf-4);
+
+ return pos;
}
-static int
-bgp_parse_options(struct bgp_conn *conn, byte *opt, int len)
+#undef MAX_ATTRS_LENGTH
+
+static byte *
+bgp_create_ip_unreach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end)
{
- struct bgp_proto *p = conn->bgp;
- int ol;
+ /*
+ * 2 B Withdrawn Routes Length
+ * var IPv4 Withdrawn Routes NLRI
+ * 2 B Total Path Attribute Length (zero)
+ * --- Path Attributes (unused)
+ * --- IPv4 Network Layer Reachability Information (unused)
+ */
- while (len > 0)
- {
- if (len < 2 || len < 2 + opt[1])
- { bgp_error(conn, 2, 0, NULL, 0); return 0; }
-#ifdef LOCAL_DEBUG
- {
- int i;
- DBG("\tOption %02x:", opt[0]);
- for(i=0; i<opt[1]; i++)
- DBG(" %02x", opt[2+i]);
- DBG("\n");
- }
-#endif
+ uint len = bgp_encode_nlri(s, buck, buf+2, end);
- ol = opt[1];
- switch (opt[0])
- {
- case 2:
- if (conn->start_state == BSS_CONNECT_NOCAP)
- BGP_TRACE(D_PACKETS, "Ignoring received capabilities");
- else
- bgp_parse_capabilities(conn, opt + 2, ol);
- break;
+ put_u16(buf+0, len);
+ put_u16(buf+2+len, 0);
- default:
- /*
- * BGP specs don't tell us to send which option
- * we didn't recognize, but it's common practice
- * to do so. Also, capability negotiation with
- * Cisco routers doesn't work without that.
- */
- bgp_error(conn, 2, 4, opt, ol);
- return 0;
- }
- len -= 2 + ol;
- opt += 2 + ol;
- }
- return 0;
+ return buf+4+len;
}
-static void
-bgp_rx_open(struct bgp_conn *conn, byte *pkt, uint len)
+static byte *
+bgp_create_mp_unreach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end)
{
- struct bgp_conn *other;
- struct bgp_proto *p = conn->bgp;
- unsigned hold;
- u16 base_as;
- u32 id;
+ /*
+ * 2 B Withdrawn Routes Length (zero)
+ * --- IPv4 Withdrawn Routes NLRI (unused)
+ * 2 B Total Path Attribute Length
+ * 1 B MP_UNREACH_NLRI hdr - Attribute Flags
+ * 1 B MP_UNREACH_NLRI hdr - Attribute Type Code
+ * 2 B MP_UNREACH_NLRI hdr - Length of Attribute Data
+ * 2 B MP_UNREACH_NLRI data - Address Family Identifier
+ * 1 B MP_UNREACH_NLRI data - Subsequent Address Family Identifier
+ * var MP_UNREACH_NLRI data - Network Layer Reachability Information
+ * --- IPv4 Network Layer Reachability Information (unused)
+ */
+
+ uint len = bgp_encode_nlri(s, buck, buf+11, end);
- /* Check state */
- if (conn->state != BS_OPENSENT)
- { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
+ put_u16(buf+0, 0);
+ put_u16(buf+2, 7+len);
- /* Check message contents */
- if (len < 29 || len != 29U + pkt[28])
- { bgp_error(conn, 1, 2, pkt+16, 2); return; }
- if (pkt[19] != BGP_VERSION)
- { bgp_error(conn, 2, 1, pkt+19, 1); return; } /* RFC 1771 says 16 bits, draft-09 tells to use 8 */
- conn->advertised_as = base_as = get_u16(pkt+20);
- hold = get_u16(pkt+22);
- id = get_u32(pkt+24);
- BGP_TRACE(D_PACKETS, "Got OPEN(as=%d,hold=%d,id=%08x)", conn->advertised_as, hold, id);
+ /* Begin of MP_UNREACH_NLRI atribute */
+ buf[4] = BAF_OPTIONAL | BAF_EXT_LEN;
+ buf[5] = BA_MP_UNREACH_NLRI;
+ put_u16(buf+6, 3+len);
+ put_af3(buf+8, s->channel->afi);
- if (bgp_parse_options(conn, pkt+29, pkt[28]))
- return;
+ return buf+11+len;
+}
- if (hold > 0 && hold < 3)
- { bgp_error(conn, 2, 6, pkt+22, 2); return; }
+static byte *
+bgp_create_update(struct bgp_channel *c, byte *buf)
+{
+ struct bgp_proto *p = (void *) c->c.proto;
+ struct bgp_bucket *buck;
+ byte *end = buf + (bgp_max_packet_length(p->conn) - BGP_HEADER_LENGTH);
+ byte *res = NULL;
+
+again: ;
+
+ /* Initialize write state */
+ struct bgp_write_state s = {
+ .proto = p,
+ .channel = c,
+ .pool = bgp_linpool,
+ .as4_session = p->as4_session,
+ .add_path = c->add_path_tx,
+ .mpls = c->desc->mpls,
+ };
+
+ /* Try unreachable bucket */
+ if ((buck = c->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
+ {
+ res = (c->afi == BGP_AF_IPV4) && !c->ext_next_hop ?
+ bgp_create_ip_unreach(&s, buck, buf, end):
+ bgp_create_mp_unreach(&s, buck, buf, end);
- /* RFC 6286 2.2 - router ID is nonzero and AS-wide unique */
- if (!id || (p->is_internal && id == p->local_id))
- { bgp_error(conn, 2, 3, pkt+24, -4); return; }
+ goto done;
+ }
- if ((conn->advertised_as != base_as) && (base_as != AS_TRANS))
- log(L_WARN "%s: Peer advertised inconsistent AS numbers", p->p.name);
+ /* Try reachable buckets */
+ if (!EMPTY_LIST(c->bucket_queue))
+ {
+ buck = HEAD(c->bucket_queue);
- if (conn->advertised_as != p->remote_as)
+ /* Cleanup empty buckets */
+ if (EMPTY_LIST(buck->prefixes))
{
- if (conn->peer_as4_support)
- {
- u32 val = htonl(conn->advertised_as);
- bgp_error(conn, 2, 2, (byte *) &val, 4);
- }
- else
- bgp_error(conn, 2, 2, pkt+20, 2);
-
- return;
+ bgp_free_bucket(c, buck);
+ goto again;
}
- /* Check the other connection */
- other = (conn == &p->outgoing_conn) ? &p->incoming_conn : &p->outgoing_conn;
- switch (other->state)
- {
- case BS_CONNECT:
- case BS_ACTIVE:
- /* Stop outgoing connection attempts */
- bgp_conn_enter_idle_state(other);
- break;
+ res = (c->afi == BGP_AF_IPV4) && !c->ext_next_hop ?
+ bgp_create_ip_reach(&s, buck, buf, end):
+ bgp_create_mp_reach(&s, buck, buf, end);
- case BS_IDLE:
- case BS_OPENSENT:
- case BS_CLOSE:
- break;
+ if (EMPTY_LIST(buck->prefixes))
+ bgp_free_bucket(c, buck);
+ else
+ bgp_defer_bucket(c, buck);
- case BS_OPENCONFIRM:
- /*
- * Description of collision detection rules in RFC 4271 is confusing and
- * contradictory, but it is essentially:
- *
- * 1. Router with higher ID is dominant
- * 2. If both have the same ID, router with higher ASN is dominant [RFC6286]
- * 3. When both connections are in OpenConfirm state, one initiated by
- * the dominant router is kept.
- *
- * The first line in the expression below evaluates whether the neighbor
- * is dominant, the second line whether the new connection was initiated
- * by the neighbor. If both are true (or both are false), we keep the new
- * connection, otherwise we keep the old one.
- */
- if (((p->local_id < id) || ((p->local_id == id) && (p->local_as < p->remote_as)))
- == (conn == &p->incoming_conn))
- {
- /* Should close the other connection */
- BGP_TRACE(D_EVENTS, "Connection collision, giving up the other connection");
- bgp_error(other, 6, 7, NULL, 0);
- break;
- }
- /* Fall thru */
- case BS_ESTABLISHED:
- /* Should close this connection */
- BGP_TRACE(D_EVENTS, "Connection collision, giving up this connection");
- bgp_error(conn, 6, 7, NULL, 0);
- return;
- default:
- bug("bgp_rx_open: Unknown state");
- }
+ if (!res)
+ goto again;
- /* Update our local variables */
- conn->hold_time = MIN(hold, p->cf->hold_time);
- conn->keepalive_time = p->cf->keepalive_time ? : conn->hold_time / 3;
- p->remote_id = id;
- p->as4_session = p->cf->enable_as4 && conn->peer_as4_support;
- p->add_path_rx = (p->cf->add_path & ADD_PATH_RX) && (conn->peer_add_path & ADD_PATH_TX);
- p->add_path_tx = (p->cf->add_path & ADD_PATH_TX) && (conn->peer_add_path & ADD_PATH_RX);
- p->gr_ready = p->cf->gr_mode && conn->peer_gr_able;
- p->ext_messages = p->cf->enable_extended_messages && conn->peer_ext_messages_support;
-
- /* Update RA mode */
- if (p->add_path_tx)
- p->p.accept_ra_types = RA_ANY;
- else if (p->cf->secondary)
- p->p.accept_ra_types = RA_ACCEPTED;
- else
- p->p.accept_ra_types = RA_OPTIMAL;
+ goto done;
+ }
- DBG("BGP: Hold timer set to %d, keepalive to %d, AS to %d, ID to %x, AS4 session to %d\n", conn->hold_time, conn->keepalive_time, p->remote_as, p->remote_id, p->as4_session);
+ /* No more prefixes to send */
+ return NULL;
- bgp_schedule_packet(conn, PKT_KEEPALIVE);
- bgp_start_timer(conn->hold_timer, conn->hold_time);
- bgp_conn_enter_openconfirm_state(conn);
+done:
+ BGP_TRACE_RL(&rl_snd_update, D_PACKETS, "Sending UPDATE");
+ lp_flush(s.pool);
+
+ return res;
}
+static byte *
+bgp_create_ip_end_mark(struct bgp_channel *c UNUSED, byte *buf)
+{
+ /* Empty update packet */
+ put_u32(buf, 0);
-static inline void
-bgp_rx_end_mark(struct bgp_proto *p)
+ return buf+4;
+}
+
+static byte *
+bgp_create_mp_end_mark(struct bgp_channel *c, byte *buf)
{
- BGP_TRACE(D_PACKETS, "Got END-OF-RIB");
+ put_u16(buf+0, 0);
+ put_u16(buf+2, 6); /* length 4--9 */
- if (p->load_state == BFS_LOADING)
- p->load_state = BFS_NONE;
+ /* Empty MP_UNREACH_NLRI atribute */
+ buf[4] = BAF_OPTIONAL;
+ buf[5] = BA_MP_UNREACH_NLRI;
+ buf[6] = 3; /* Length 7--9 */
+ put_af3(buf+7, c->afi);
- if (p->p.gr_recovery)
- proto_graceful_restart_unlock(&p->p);
-
- if (p->gr_active)
- bgp_graceful_restart_done(p);
-}
-
-
-#define DECODE_PREFIX(pp, ll) do { \
- if (p->add_path_rx) \
- { \
- if (ll < 5) { err=1; goto done; } \
- path_id = get_u32(pp); \
- pp += 4; \
- ll -= 4; \
- } \
- int b = *pp++; \
- int q; \
- ll--; \
- if (b > BITS_PER_IP_ADDRESS) { err=10; goto done; } \
- q = (b+7) / 8; \
- if (ll < q) { err=1; goto done; } \
- memcpy(&prefix, pp, q); \
- pp += q; \
- ll -= q; \
- ipa_ntoh(prefix); \
- prefix = ipa_and(prefix, ipa_mkmask(b)); \
- pxlen = b; \
-} while (0)
+ return buf+10;
+}
+static byte *
+bgp_create_end_mark(struct bgp_channel *c, byte *buf)
+{
+ struct bgp_proto *p = (void *) c->c.proto;
+
+ BGP_TRACE(D_PACKETS, "Sending END-OF-RIB");
+
+ return (c->afi == BGP_AF_IPV4) ?
+ bgp_create_ip_end_mark(c, buf):
+ bgp_create_mp_end_mark(c, buf);
+}
static inline void
-bgp_rte_update(struct bgp_proto *p, ip_addr prefix, int pxlen,
- u32 path_id, u32 *last_id, struct rte_src **src,
- rta *a0, rta **a)
+bgp_rx_end_mark(struct bgp_parse_state *s, u32 afi)
{
- if (path_id != *last_id)
- {
- *src = rt_get_source(&p->p, path_id);
- *last_id = path_id;
+ struct bgp_proto *p = s->proto;
+ struct bgp_channel *c = bgp_get_channel(p, afi);
- if (*a)
- {
- rta_free(*a);
- *a = NULL;
- }
- }
+ BGP_TRACE(D_PACKETS, "Got END-OF-RIB");
- /* Prepare cached route attributes */
- if (!*a)
- {
- a0->src = *src;
+ if (!c)
+ DISCARD(BAD_AFI, BGP_AFI(afi), BGP_SAFI(afi));
- /* Workaround for rta_lookup() breaking eattrs */
- ea_list *ea = a0->eattrs;
- *a = rta_lookup(a0);
- a0->eattrs = ea;
- }
+ if (c->load_state == BFS_LOADING)
+ c->load_state = BFS_NONE;
- net *n = net_get(p->p.table, prefix, pxlen);
- rte *e = rte_get_temp(rta_clone(*a));
- e->net = n;
- e->pflags = 0;
- e->u.bgp.suppressed = 0;
- rte_update2(p->p.main_ahook, n, e, *src);
+ if (p->p.gr_recovery)
+ channel_graceful_restart_unlock(&c->c);
+
+ if (c->gr_active)
+ bgp_graceful_restart_done(c);
}
static inline void
-bgp_rte_withdraw(struct bgp_proto *p, ip_addr prefix, int pxlen,
- u32 path_id, u32 *last_id, struct rte_src **src)
+bgp_decode_nlri(struct bgp_parse_state *s, u32 afi, byte *nlri, uint len, ea_list *ea, byte *nh, uint nh_len)
{
- if (path_id != *last_id)
- {
- *src = rt_find_source(&p->p, path_id);
- *last_id = path_id;
- }
+ struct bgp_channel *c = bgp_get_channel(s->proto, afi);
+ rta *a = NULL;
- net *n = net_find(p->p.table, prefix, pxlen);
- rte_update2( p->p.main_ahook, n, NULL, *src);
-}
+ if (!c)
+ DISCARD(BAD_AFI, BGP_AFI(afi), BGP_SAFI(afi));
-static inline int
-bgp_set_next_hop(struct bgp_proto *p, rta *a)
-{
- struct eattr *nh = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP));
- ip_addr *nexthop = (ip_addr *) nh->u.ptr->data;
+ s->channel = c;
+ s->add_path = c->add_path_rx;
+ s->mpls = c->desc->mpls;
-#ifdef IPV6
- int second = (nh->u.ptr->length == NEXT_HOP_LENGTH) && ipa_nonzero(nexthop[1]);
+ s->last_id = 0;
+ s->last_src = s->proto->p.main_source;
- /* First address should not be link-local, but may be zero in direct mode */
- if (ipa_is_link_local(*nexthop))
- *nexthop = IPA_NONE;
-#else
- int second = 0;
-#endif
-
- if (p->cf->gw_mode == GW_DIRECT)
- {
- neighbor *ng = NULL;
+ /*
+ * IPv4 BGP and MP-BGP may be used together in one update, therefore we do not
+ * add BA_NEXT_HOP in bgp_decode_attrs(), but we add it here independently for
+ * IPv4 BGP and MP-BGP. We undo the attribute (and possibly others attached by
+ * decode_next_hop hooks) by restoring a->eattrs afterwards.
+ */
- if (ipa_nonzero(*nexthop))
- ng = neigh_find(&p->p, nexthop, 0);
- else if (second) /* GW_DIRECT -> single_hop -> p->neigh != NULL */
- ng = neigh_find2(&p->p, nexthop + 1, p->neigh->iface, 0);
+ if (ea)
+ {
+ a = allocz(RTA_MAX_SIZE);
- /* Fallback */
- if (!ng)
- ng = p->neigh;
+ a->source = RTS_BGP;
+ a->scope = SCOPE_UNIVERSE;
+ a->from = s->proto->cf->remote_ip;
+ a->eattrs = ea;
- if (ng->scope == SCOPE_HOST)
- return 0;
+ c->desc->decode_next_hop(s, nh, nh_len, a);
- a->dest = RTD_ROUTER;
- a->gw = ng->addr;
- a->iface = ng->iface;
- a->hostentry = NULL;
- a->igp_metric = 0;
- }
- else /* GW_RECURSIVE */
- {
- if (ipa_zero(*nexthop))
- return 0;
+ /* Handle withdraw during next hop decoding */
+ if (s->err_withdraw)
+ a = NULL;
+ }
- rta_set_recursive_next_hop(p->p.table, a, p->igp_table, nexthop, nexthop + second);
- }
+ c->desc->decode_nlri(s, nlri, len, a);
- return 1;
+ rta_free(s->cached_rta);
+ s->cached_rta = NULL;
}
-#ifndef IPV6 /* IPv4 version */
-
static void
-bgp_do_rx_update(struct bgp_conn *conn,
- byte *withdrawn, int withdrawn_len,
- byte *nlri, int nlri_len,
- byte *attrs, int attr_len)
+bgp_rx_update(struct bgp_conn *conn, byte *pkt, uint len)
{
struct bgp_proto *p = conn->bgp;
- struct rte_src *src = p->p.main_source;
- rta *a0, *a = NULL;
- ip_addr prefix;
- int pxlen, err = 0;
- u32 path_id = 0;
- u32 last_id = 0;
+ ea_list *ea = NULL;
- /* Check for End-of-RIB marker */
- if (!withdrawn_len && !attr_len && !nlri_len)
- {
- bgp_rx_end_mark(p);
- return;
- }
+ BGP_TRACE_RL(&rl_rcv_update, D_PACKETS, "Got UPDATE");
- /* Withdraw routes */
- while (withdrawn_len)
- {
- DECODE_PREFIX(withdrawn, withdrawn_len);
- DBG("Withdraw %I/%d\n", prefix, pxlen);
+ /* Workaround for some BGP implementations that skip initial KEEPALIVE */
+ if (conn->state == BS_OPENCONFIRM)
+ bgp_conn_enter_established_state(conn);
- bgp_rte_withdraw(p, prefix, pxlen, path_id, &last_id, &src);
- }
+ if (conn->state != BS_ESTABLISHED)
+ { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
- if (!attr_len && !nlri_len) /* shortcut */
- return;
+ bgp_start_timer(conn->hold_timer, conn->hold_time);
- a0 = bgp_decode_attrs(conn, attrs, attr_len, bgp_linpool, nlri_len);
+ /* Initialize parse state */
+ struct bgp_parse_state s = {
+ .proto = p,
+ .pool = bgp_linpool,
+ .as4_session = p->as4_session,
+ };
- if (conn->state != BS_ESTABLISHED) /* fatal error during decoding */
- return;
+ /* Parse error handler */
+ if (setjmp(s.err_jmpbuf))
+ {
+ bgp_error(conn, 3, s.err_subcode, NULL, 0);
+ goto done;
+ }
- if (a0 && nlri_len && !bgp_set_next_hop(p, a0))
- a0 = NULL;
+ /* Check minimal length */
+ if (len < 23)
+ { bgp_error(conn, 1, 2, pkt+16, 2); return; }
- last_id = 0;
- src = p->p.main_source;
+ /* Skip fixed header */
+ uint pos = 19;
- while (nlri_len)
- {
- DECODE_PREFIX(nlri, nlri_len);
- DBG("Add %I/%d\n", prefix, pxlen);
+ /*
+ * UPDATE message format
+ *
+ * 2 B IPv4 Withdrawn Routes Length
+ * var IPv4 Withdrawn Routes NLRI
+ * 2 B Total Path Attribute Length
+ * var Path Attributes
+ * var IPv4 Reachable Routes NLRI
+ */
- if (a0)
- bgp_rte_update(p, prefix, pxlen, path_id, &last_id, &src, a0, &a);
- else /* Forced withdraw as a result of soft error */
- bgp_rte_withdraw(p, prefix, pxlen, path_id, &last_id, &src);
- }
+ s.ip_unreach_len = get_u16(pkt + pos);
+ s.ip_unreach_nlri = pkt + pos + 2;
+ pos += 2 + s.ip_unreach_len;
- done:
- if (a)
- rta_free(a);
+ if (pos + 2 > len)
+ bgp_parse_error(&s, 1);
- if (err)
- bgp_error(conn, 3, err, NULL, 0);
+ s.attr_len = get_u16(pkt + pos);
+ s.attrs = pkt + pos + 2;
+ pos += 2 + s.attr_len;
+
+ if (pos > len)
+ bgp_parse_error(&s, 1);
+ s.ip_reach_len = len - pos;
+ s.ip_reach_nlri = pkt + pos;
+
+
+ if (s.attr_len)
+ ea = bgp_decode_attrs(&s, s.attrs, s.attr_len);
+
+ /* Check for End-of-RIB marker */
+ if (!s.attr_len && !s.ip_unreach_len && !s.ip_reach_len)
+ { bgp_rx_end_mark(&s, BGP_AF_IPV4); goto done; }
+
+ /* Check for MP End-of-RIB marker */
+ if ((s.attr_len < 8) && !s.ip_unreach_len && !s.ip_reach_len &&
+ !s.mp_reach_len && !s.mp_unreach_len && s.mp_unreach_af)
+ { bgp_rx_end_mark(&s, s.mp_unreach_af); goto done; }
+
+ if (s.ip_unreach_len)
+ bgp_decode_nlri(&s, BGP_AF_IPV4, s.ip_unreach_nlri, s.ip_unreach_len, NULL, NULL, 0);
+
+ if (s.mp_unreach_len)
+ bgp_decode_nlri(&s, s.mp_unreach_af, s.mp_unreach_nlri, s.mp_unreach_len, NULL, NULL, 0);
+
+ if (s.ip_reach_len)
+ bgp_decode_nlri(&s, BGP_AF_IPV4, s.ip_reach_nlri, s.ip_reach_len,
+ ea, s.ip_next_hop_data, s.ip_next_hop_len);
+
+ if (s.mp_reach_len)
+ bgp_decode_nlri(&s, s.mp_reach_af, s.mp_reach_nlri, s.mp_reach_len,
+ ea, s.mp_next_hop_data, s.mp_next_hop_len);
+
+done:
+ rta_free(s.cached_rta);
+ lp_flush(s.pool);
return;
}
-#else /* IPv6 version */
-#define DO_NLRI(name) \
- x = p->name##_start; \
- len = len0 = p->name##_len; \
- if (len) \
- { \
- if (len < 3) { err=9; goto done; } \
- af = get_u16(x); \
- x += 3; \
- len -= 3; \
- DBG("\tNLRI AF=%d sub=%d len=%d\n", af, x[-1], len);\
- } \
- else \
- af = 0; \
- if (af == BGP_AF_IPV6)
+/*
+ * ROUTE-REFRESH
+ */
-static void
-bgp_attach_next_hop(rta *a0, byte *x)
+static inline byte *
+bgp_create_route_refresh(struct bgp_channel *c, byte *buf)
{
- ip_addr *nh = (ip_addr *) bgp_attach_attr_wa(&a0->eattrs, bgp_linpool, BA_NEXT_HOP, NEXT_HOP_LENGTH);
- memcpy(nh, x+1, 16);
- ipa_ntoh(nh[0]);
+ struct bgp_proto *p = (void *) c->c.proto;
- /* We store received link local address in the other part of BA_NEXT_HOP eattr. */
- if (*x == 32)
- {
- memcpy(nh+1, x+17, 16);
- ipa_ntoh(nh[1]);
- }
- else
- nh[1] = IPA_NONE;
+ BGP_TRACE(D_PACKETS, "Sending ROUTE-REFRESH");
+
+ /* Original route refresh request, RFC 2918 */
+ put_af4(buf, c->afi);
+ buf[2] = BGP_RR_REQUEST;
+
+ return buf+4;
+}
+
+static inline byte *
+bgp_create_begin_refresh(struct bgp_channel *c, byte *buf)
+{
+ struct bgp_proto *p = (void *) c->c.proto;
+
+ BGP_TRACE(D_PACKETS, "Sending BEGIN-OF-RR");
+
+ /* Demarcation of beginning of route refresh (BoRR), RFC 7313 */
+ put_af4(buf, c->afi);
+ buf[2] = BGP_RR_BEGIN;
+
+ return buf+4;
}
+static inline byte *
+bgp_create_end_refresh(struct bgp_channel *c, byte *buf)
+{
+ struct bgp_proto *p = (void *) c->c.proto;
+
+ BGP_TRACE(D_PACKETS, "Sending END-OF-RR");
+
+ /* Demarcation of ending of route refresh (EoRR), RFC 7313 */
+ put_af4(buf, c->afi);
+ buf[2] = BGP_RR_END;
+
+ return buf+4;
+}
static void
-bgp_do_rx_update(struct bgp_conn *conn,
- byte *withdrawn UNUSED, int withdrawn_len,
- byte *nlri UNUSED, int nlri_len,
- byte *attrs, int attr_len)
+bgp_rx_route_refresh(struct bgp_conn *conn, byte *pkt, uint len)
{
struct bgp_proto *p = conn->bgp;
- struct rte_src *src = p->p.main_source;
- byte *x;
- int len, len0;
- unsigned af;
- rta *a0, *a = NULL;
- ip_addr prefix;
- int pxlen, err = 0;
- u32 path_id = 0;
- u32 last_id = 0;
-
- p->mp_reach_len = 0;
- p->mp_unreach_len = 0;
- a0 = bgp_decode_attrs(conn, attrs, attr_len, bgp_linpool, 0);
-
- if (conn->state != BS_ESTABLISHED) /* fatal error during decoding */
- return;
- /* Check for End-of-RIB marker */
- if ((attr_len < 8) && !withdrawn_len && !nlri_len && !p->mp_reach_len &&
- (p->mp_unreach_len == 3) && (get_u16(p->mp_unreach_start) == BGP_AF_IPV6))
- {
- bgp_rx_end_mark(p);
- return;
- }
+ if (conn->state != BS_ESTABLISHED)
+ { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
- DO_NLRI(mp_unreach)
- {
- while (len)
- {
- DECODE_PREFIX(x, len);
- DBG("Withdraw %I/%d\n", prefix, pxlen);
- bgp_rte_withdraw(p, prefix, pxlen, path_id, &last_id, &src);
- }
- }
+ if (!conn->local_caps->route_refresh)
+ { bgp_error(conn, 1, 3, pkt+18, 1); return; }
- DO_NLRI(mp_reach)
- {
- /* Create fake NEXT_HOP attribute */
- if (len < 1 || (*x != 16 && *x != 32) || len < *x + 2)
- { err = 9; goto done; }
+ if (len < (BGP_HEADER_LENGTH + 4))
+ { bgp_error(conn, 1, 2, pkt+16, 2); return; }
- if (a0)
- bgp_attach_next_hop(a0, x);
+ if (len > (BGP_HEADER_LENGTH + 4))
+ { bgp_error(conn, 7, 1, pkt, MIN(len, 2048)); return; }
- /* Also ignore one reserved byte */
- len -= *x + 2;
- x += *x + 2;
+ struct bgp_channel *c = bgp_get_channel(p, get_af4(pkt+19));
+ if (!c)
+ {
+ log(L_WARN "%s: Got ROUTE-REFRESH subtype %u for AF %u.%u, ignoring",
+ p->p.name, pkt[21], get_u16(pkt+19), pkt[22]);
+ return;
+ }
- if (a0 && ! bgp_set_next_hop(p, a0))
- a0 = NULL;
+ /* RFC 7313 redefined reserved field as RR message subtype */
+ uint subtype = p->enhanced_refresh ? pkt[21] : BGP_RR_REQUEST;
- last_id = 0;
- src = p->p.main_source;
+ switch (subtype)
+ {
+ case BGP_RR_REQUEST:
+ BGP_TRACE(D_PACKETS, "Got ROUTE-REFRESH");
+ channel_request_feeding(&c->c);
+ break;
- while (len)
- {
- DECODE_PREFIX(x, len);
- DBG("Add %I/%d\n", prefix, pxlen);
+ case BGP_RR_BEGIN:
+ BGP_TRACE(D_PACKETS, "Got BEGIN-OF-RR");
+ bgp_refresh_begin(c);
+ break;
- if (a0)
- bgp_rte_update(p, prefix, pxlen, path_id, &last_id, &src, a0, &a);
- else /* Forced withdraw as a result of soft error */
- bgp_rte_withdraw(p, prefix, pxlen, path_id, &last_id, &src);
- }
- }
+ case BGP_RR_END:
+ BGP_TRACE(D_PACKETS, "Got END-OF-RR");
+ bgp_refresh_end(c);
+ break;
- done:
- if (a)
- rta_free(a);
+ default:
+ log(L_WARN "%s: Got ROUTE-REFRESH message with unknown subtype %u, ignoring",
+ p->p.name, subtype);
+ break;
+ }
+}
+
+static inline struct bgp_channel *
+bgp_get_channel_to_send(struct bgp_proto *p, struct bgp_conn *conn)
+{
+ uint i = conn->last_channel;
- if (err) /* Use subcode 9, not err */
- bgp_error(conn, 3, 9, NULL, 0);
+ /* Try the last channel, but at most several times */
+ if ((conn->channels_to_send & (1 << i)) &&
+ (conn->last_channel_count < 16))
+ goto found;
- return;
+ /* Find channel with non-zero channels_to_send */
+ do
+ {
+ i++;
+ if (i >= p->channel_count)
+ i = 0;
+ }
+ while (! (conn->channels_to_send & (1 << i)));
+
+ /* Use that channel */
+ conn->last_channel = i;
+ conn->last_channel_count = 0;
+
+found:
+ conn->last_channel_count++;
+ return p->channel_map[i];
}
-#endif
+static inline int
+bgp_send(struct bgp_conn *conn, uint type, uint len)
+{
+ sock *sk = conn->sk;
+ byte *buf = sk->tbuf;
-static void
-bgp_rx_update(struct bgp_conn *conn, byte *pkt, uint len)
+ memset(buf, 0xff, 16); /* Marker */
+ put_u16(buf+16, len);
+ buf[18] = type;
+
+ return sk_send(sk, len);
+}
+
+/**
+ * bgp_fire_tx - transmit packets
+ * @conn: connection
+ *
+ * Whenever the transmit buffers of the underlying TCP connection
+ * are free and we have any packets queued for sending, the socket functions
+ * call bgp_fire_tx() which takes care of selecting the highest priority packet
+ * queued (Notification > Keepalive > Open > Update), assembling its header
+ * and body and sending it to the connection.
+ */
+static int
+bgp_fire_tx(struct bgp_conn *conn)
{
struct bgp_proto *p = conn->bgp;
- byte *withdrawn, *attrs, *nlri;
- uint withdrawn_len, attr_len, nlri_len;
+ struct bgp_channel *c;
+ byte *buf, *pkt, *end;
+ uint s;
- BGP_TRACE_RL(&rl_rcv_update, D_PACKETS, "Got UPDATE");
+ if (!conn->sk)
+ return 0;
- /* Workaround for some BGP implementations that skip initial KEEPALIVE */
- if (conn->state == BS_OPENCONFIRM)
- bgp_conn_enter_established_state(conn);
+ buf = conn->sk->tbuf;
+ pkt = buf + BGP_HEADER_LENGTH;
+ s = conn->packets_to_send;
- if (conn->state != BS_ESTABLISHED)
- { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
- bgp_start_timer(conn->hold_timer, conn->hold_time);
+ if (s & (1 << PKT_SCHEDULE_CLOSE))
+ {
+ /* We can finally close connection and enter idle state */
+ bgp_conn_enter_idle_state(conn);
+ return 0;
+ }
+ if (s & (1 << PKT_NOTIFICATION))
+ {
+ conn->packets_to_send = 1 << PKT_SCHEDULE_CLOSE;
+ end = bgp_create_notification(conn, pkt);
+ return bgp_send(conn, PKT_NOTIFICATION, end - buf);
+ }
+ else if (s & (1 << PKT_KEEPALIVE))
+ {
+ conn->packets_to_send &= ~(1 << PKT_KEEPALIVE);
+ BGP_TRACE(D_PACKETS, "Sending KEEPALIVE");
+ bgp_start_timer(conn->keepalive_timer, conn->keepalive_time);
+ return bgp_send(conn, PKT_KEEPALIVE, BGP_HEADER_LENGTH);
+ }
+ else if (s & (1 << PKT_OPEN))
+ {
+ conn->packets_to_send &= ~(1 << PKT_OPEN);
+ end = bgp_create_open(conn, pkt);
+ return bgp_send(conn, PKT_OPEN, end - buf);
+ }
+ else while (conn->channels_to_send)
+ {
+ c = bgp_get_channel_to_send(p, conn);
+ s = c->packets_to_send;
- /* Find parts of the packet and check sizes */
- if (len < 23)
+ if (s & (1 << PKT_ROUTE_REFRESH))
{
- bgp_error(conn, 1, 2, pkt+16, 2);
- return;
+ c->packets_to_send &= ~(1 << PKT_ROUTE_REFRESH);
+ end = bgp_create_route_refresh(c, pkt);
+ return bgp_send(conn, PKT_ROUTE_REFRESH, end - buf);
}
- withdrawn = pkt + 21;
- withdrawn_len = get_u16(pkt + 19);
- if (withdrawn_len + 23 > len)
- goto malformed;
- attrs = withdrawn + withdrawn_len + 2;
- attr_len = get_u16(attrs - 2);
- if (withdrawn_len + attr_len + 23 > len)
- goto malformed;
- nlri = attrs + attr_len;
- nlri_len = len - withdrawn_len - attr_len - 23;
- if (!attr_len && nlri_len)
- goto malformed;
- DBG("Sizes: withdrawn=%d, attrs=%d, NLRI=%d\n", withdrawn_len, attr_len, nlri_len);
-
- lp_flush(bgp_linpool);
-
- bgp_do_rx_update(conn, withdrawn, withdrawn_len, nlri, nlri_len, attrs, attr_len);
- return;
+ else if (s & (1 << PKT_BEGIN_REFRESH))
+ {
+ /* BoRR is a subtype of RR, but uses separate bit in packets_to_send */
+ c->packets_to_send &= ~(1 << PKT_BEGIN_REFRESH);
+ end = bgp_create_begin_refresh(c, pkt);
+ return bgp_send(conn, PKT_ROUTE_REFRESH, end - buf);
+ }
+ else if (s & (1 << PKT_UPDATE))
+ {
+ end = bgp_create_update(c, pkt);
+ if (end)
+ return bgp_send(conn, PKT_UPDATE, end - buf);
+
+ /* No update to send, perhaps we need to send End-of-RIB or EoRR */
+ c->packets_to_send = 0;
+ conn->channels_to_send &= ~(1 << c->index);
+
+ if (c->feed_state == BFS_LOADED)
+ {
+ c->feed_state = BFS_NONE;
+ end = bgp_create_end_mark(c, pkt);
+ return bgp_send(conn, PKT_UPDATE, end - buf);
+ }
+
+ else if (c->feed_state == BFS_REFRESHED)
+ {
+ c->feed_state = BFS_NONE;
+ end = bgp_create_end_refresh(c, pkt);
+ return bgp_send(conn, PKT_ROUTE_REFRESH, end - buf);
+ }
+ }
+ else if (s)
+ bug("Channel packets_to_send: %x", s);
+
+ c->packets_to_send = 0;
+ conn->channels_to_send &= ~(1 << c->index);
+ }
+
+ return 0;
+}
+
+/**
+ * bgp_schedule_packet - schedule a packet for transmission
+ * @conn: connection
+ * @c: channel
+ * @type: packet type
+ *
+ * Schedule a packet of type @type to be sent as soon as possible.
+ */
+void
+bgp_schedule_packet(struct bgp_conn *conn, struct bgp_channel *c, int type)
+{
+ ASSERT(conn->sk);
+
+ DBG("BGP: Scheduling packet type %d\n", type);
+
+ if (c)
+ {
+ if (! conn->channels_to_send)
+ {
+ conn->last_channel = c->index;
+ conn->last_channel_count = 0;
+ }
+
+ c->packets_to_send |= 1 << type;
+ conn->channels_to_send |= 1 << c->index;
+ }
+ else
+ conn->packets_to_send |= 1 << type;
+
+ if ((conn->sk->tpos == conn->sk->tbuf) && !ev_active(conn->tx_ev))
+ ev_schedule(conn->tx_ev);
+}
+
+void
+bgp_kick_tx(void *vconn)
+{
+ struct bgp_conn *conn = vconn;
-malformed:
- bgp_error(conn, 3, 1, NULL, 0);
+ DBG("BGP: kicking TX\n");
+ while (bgp_fire_tx(conn) > 0)
+ ;
+}
+
+void
+bgp_tx(sock *sk)
+{
+ struct bgp_conn *conn = sk->data;
+
+ DBG("BGP: TX hook\n");
+ while (bgp_fire_tx(conn) > 0)
+ ;
}
+
static struct {
byte major, minor;
byte *msg;
@@ -1480,26 +2665,25 @@ static struct {
* which might be static string or given temporary buffer.
*/
const char *
-bgp_error_dsc(unsigned code, unsigned subcode)
+bgp_error_dsc(uint code, uint subcode)
{
static char buff[32];
- unsigned i;
+ uint i;
+
for (i=0; i < ARRAY_SIZE(bgp_msg_table); i++)
if (bgp_msg_table[i].major == code && bgp_msg_table[i].minor == subcode)
- {
- return bgp_msg_table[i].msg;
- }
+ return bgp_msg_table[i].msg;
- bsprintf(buff, "Unknown error %d.%d", code, subcode);
+ bsprintf(buff, "Unknown error %u.%u", code, subcode);
return buff;
}
void
-bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsigned subcode, byte *data, unsigned len)
+bgp_log_error(struct bgp_proto *p, u8 class, char *msg, uint code, uint subcode, byte *data, uint len)
{
const byte *name;
byte *t, argbuf[36];
- unsigned i;
+ uint i;
/* Don't report Cease messages generated by myself */
if (code == 6 && class == BE_BGP_TX)
@@ -1515,7 +2699,7 @@ bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsigned
if ((code == 2) && (subcode == 2) && ((len == 2) || (len == 4)))
{
/* Bad peer AS - we would like to print the AS */
- t += bsprintf(t, "%d", (len == 2) ? get_u16(data) : get_u32(data));
+ t += bsprintf(t, "%u", (len == 2) ? get_u16(data) : get_u32(data));
goto done;
}
if (len > 16)
@@ -1532,47 +2716,25 @@ static void
bgp_rx_notification(struct bgp_conn *conn, byte *pkt, uint len)
{
struct bgp_proto *p = conn->bgp;
+
if (len < 21)
- {
- bgp_error(conn, 1, 2, pkt+16, 2);
- return;
- }
+ { bgp_error(conn, 1, 2, pkt+16, 2); return; }
- unsigned code = pkt[19];
- unsigned subcode = pkt[20];
+ uint code = pkt[19];
+ uint subcode = pkt[20];
int err = (code != 6);
bgp_log_error(p, BE_BGP_RX, "Received", code, subcode, pkt+21, len-21);
bgp_store_error(p, conn, BE_BGP_RX, (code << 16) | subcode);
-#ifndef IPV6
- if ((code == 2) && ((subcode == 4) || (subcode == 7))
- /* Error related to capability:
- * 4 - Peer does not support capabilities at all.
- * 7 - Peer request some capability. Strange unless it is IPv6 only peer.
- */
- && (p->cf->capabilities == 2)
- /* Capabilities are not explicitly enabled or disabled, therefore heuristic is used */
- && (conn->start_state == BSS_CONNECT)
- /* Failed connection attempt have used capabilities */
- && (p->cf->remote_as <= 0xFFFF))
- /* Not possible with disabled capabilities */
- {
- /* We try connect without capabilities */
- log(L_WARN "%s: Capability related error received, retry with capabilities disabled", p->p.name);
- p->start_state = BSS_CONNECT_NOCAP;
- err = 0;
- }
-#endif
-
bgp_conn_enter_close_state(conn);
- bgp_schedule_packet(conn, PKT_SCHEDULE_CLOSE);
+ bgp_schedule_packet(conn, NULL, PKT_SCHEDULE_CLOSE);
- if (err)
- {
- bgp_update_startup_delay(p);
- bgp_stop(p, 0);
- }
+ if (err)
+ {
+ bgp_update_startup_delay(p);
+ bgp_stop(p, 0);
+ }
}
static void
@@ -1582,64 +2744,12 @@ bgp_rx_keepalive(struct bgp_conn *conn)
BGP_TRACE(D_PACKETS, "Got KEEPALIVE");
bgp_start_timer(conn->hold_timer, conn->hold_time);
- switch (conn->state)
- {
- case BS_OPENCONFIRM:
- bgp_conn_enter_established_state(conn);
- break;
- case BS_ESTABLISHED:
- break;
- default:
- bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0);
- }
-}
-static void
-bgp_rx_route_refresh(struct bgp_conn *conn, byte *pkt, uint len)
-{
- struct bgp_proto *p = conn->bgp;
+ if (conn->state == BS_OPENCONFIRM)
+ { bgp_conn_enter_established_state(conn); return; }
if (conn->state != BS_ESTABLISHED)
- { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
-
- if (!p->cf->enable_refresh)
- { bgp_error(conn, 1, 3, pkt+18, 1); return; }
-
- if (len < (BGP_HEADER_LENGTH + 4))
- { bgp_error(conn, 1, 2, pkt+16, 2); return; }
-
- if (len > (BGP_HEADER_LENGTH + 4))
- { bgp_error(conn, 7, 1, pkt, MIN(len, 2048)); return; }
-
- /* FIXME - we ignore AFI/SAFI values, as we support
- just one value and even an error code for an invalid
- request is not defined */
-
- /* RFC 7313 redefined reserved field as RR message subtype */
- uint subtype = conn->peer_enhanced_refresh_support ? pkt[21] : BGP_RR_REQUEST;
-
- switch (subtype)
- {
- case BGP_RR_REQUEST:
- BGP_TRACE(D_PACKETS, "Got ROUTE-REFRESH");
- proto_request_feeding(&p->p);
- break;
-
- case BGP_RR_BEGIN:
- BGP_TRACE(D_PACKETS, "Got BEGIN-OF-RR");
- bgp_refresh_begin(p);
- break;
-
- case BGP_RR_END:
- BGP_TRACE(D_PACKETS, "Got END-OF-RR");
- bgp_refresh_end(p);
- break;
-
- default:
- log(L_WARN "%s: Got ROUTE-REFRESH message with unknown subtype %u, ignoring",
- p->p.name, subtype);
- break;
- }
+ bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0);
}
@@ -1653,7 +2763,7 @@ bgp_rx_route_refresh(struct bgp_conn *conn, byte *pkt, uint len)
* packet handler according to the packet type.
*/
static void
-bgp_rx_packet(struct bgp_conn *conn, byte *pkt, unsigned len)
+bgp_rx_packet(struct bgp_conn *conn, byte *pkt, uint len)
{
byte type = pkt[18];
@@ -1663,14 +2773,14 @@ bgp_rx_packet(struct bgp_conn *conn, byte *pkt, unsigned len)
mrt_dump_bgp_packet(conn, pkt, len);
switch (type)
- {
- case PKT_OPEN: return bgp_rx_open(conn, pkt, len);
- case PKT_UPDATE: return bgp_rx_update(conn, pkt, len);
- case PKT_NOTIFICATION: return bgp_rx_notification(conn, pkt, len);
- case PKT_KEEPALIVE: return bgp_rx_keepalive(conn);
- case PKT_ROUTE_REFRESH: return bgp_rx_route_refresh(conn, pkt, len);
- default: bgp_error(conn, 1, 3, pkt+18, 1);
- }
+ {
+ case PKT_OPEN: return bgp_rx_open(conn, pkt, len);
+ case PKT_UPDATE: return bgp_rx_update(conn, pkt, len);
+ case PKT_NOTIFICATION: return bgp_rx_notification(conn, pkt, len);
+ case PKT_KEEPALIVE: return bgp_rx_keepalive(conn);
+ case PKT_ROUTE_REFRESH: return bgp_rx_route_refresh(conn, pkt, len);
+ default: bgp_error(conn, 1, 3, pkt+18, 1);
+ }
}
/**
@@ -1687,10 +2797,9 @@ int
bgp_rx(sock *sk, uint size)
{
struct bgp_conn *conn = sk->data;
- struct bgp_proto *p = conn->bgp;
byte *pkt_start = sk->rbuf;
byte *end = pkt_start + size;
- unsigned i, len;
+ uint i, len;
DBG("BGP: RX hook: Got %d bytes\n", size);
while (end >= pkt_start + BGP_HEADER_LENGTH)
@@ -1704,7 +2813,7 @@ bgp_rx(sock *sk, uint size)
break;
}
len = get_u16(pkt_start+16);
- if (len < BGP_HEADER_LENGTH || len > bgp_max_packet_length(p))
+ if ((len < BGP_HEADER_LENGTH) || (len > bgp_max_packet_length(conn)))
{
bgp_error(conn, 1, 2, pkt_start+16, 2);
break;
diff --git a/proto/ospf/Makefile b/proto/ospf/Makefile
index f90222cf..39e74f71 100644
--- a/proto/ospf/Makefile
+++ b/proto/ospf/Makefile
@@ -1,5 +1,6 @@
-source=ospf.c topology.c packet.c hello.c neighbor.c iface.c dbdes.c lsreq.c lsupd.c lsack.c lsalib.c rt.c
-root-rel=../../
-dir-name=proto/ospf
+src := dbdes.c hello.c iface.c lsack.c lsalib.c lsreq.c lsupd.c neighbor.c ospf.c packet.c rt.c topology.c
+obj := $(src-o-files)
+$(all-daemon)
+$(cf-local)
-include ../../Rules
+tests_objs := $(tests_objs) $(src-o-files) \ No newline at end of file
diff --git a/proto/ospf/config.Y b/proto/ospf/config.Y
index 7b35b191..98ddf5d0 100644
--- a/proto/ospf/config.Y
+++ b/proto/ospf/config.Y
@@ -78,14 +78,66 @@ static void
ospf_proto_finish(void)
{
struct ospf_config *cf = OSPF_CFG;
+ struct ospf_area_config *ac;
+ struct ospf_iface_patt *ic;
+
+ /* Define default channel */
+ if (EMPTY_LIST(this_proto->channels))
+ {
+ this_proto->net_type = ospf_cfg_is_v2() ? NET_IP4 : NET_IP6;
+ channel_config_new(NULL, this_proto->net_type, this_proto);
+ }
+
+ /* Propagate global instance ID to interfaces */
+ if (cf->instance_id_set)
+ {
+ WALK_LIST(ac, cf->area_list)
+ WALK_LIST(ic, ac->patt_list)
+ if (!ic->instance_id_set)
+ { ic->instance_id = cf->instance_id; ic->instance_id_set = 1; }
+
+ WALK_LIST(ic, cf->vlink_list)
+ if (!ic->instance_id_set)
+ { ic->instance_id = cf->instance_id; ic->instance_id_set = 1; }
+ }
+
+ if (ospf_cfg_is_v3())
+ {
+ uint ipv4 = (this_proto->net_type == NET_IP4);
+ uint base = (ipv4 ? 64 : 0) + (cf->af_mc ? 32 : 0);
+
+ /* RFC 5838 - OSPFv3-AF */
+ if (cf->af_ext)
+ {
+ /* RFC 5838 2.1 - instance IDs based on AFs */
+ WALK_LIST(ac, cf->area_list)
+ WALK_LIST(ic, ac->patt_list)
+ {
+ if (!ic->instance_id_set)
+ ic->instance_id = base;
+ else if (ic->instance_id >= 128)
+ log(L_WARN "Instance ID %d from unassigned/private range", ic->instance_id);
+ else if ((ic->instance_id < base) || (ic->instance_id >= (base + 32)))
+ cf_error("Instance ID %d invalid for given channel type", ic->instance_id);
+ }
+
+ /* RFC 5838 2.8 - vlinks limited to IPv6 unicast */
+ if ((ipv4 || cf->af_mc) && !EMPTY_LIST(cf->vlink_list))
+ cf_error("Vlinks not supported in AFs other than IPv6 unicast");
+ }
+ else
+ {
+ if (ipv4 || cf->af_mc)
+ cf_error("Different channel type");
+ }
+ }
if (EMPTY_LIST(cf->area_list))
- cf_error( "No configured areas in OSPF");
+ cf_error("No configured areas in OSPF");
int areano = 0;
int backbone = 0;
int nssa = 0;
- struct ospf_area_config *ac;
WALK_LIST(ac, cf->area_list)
{
areano++;
@@ -98,7 +150,7 @@ ospf_proto_finish(void)
cf->abr = areano > 1;
/* Route export or NSSA translation (RFC 3101 3.1) */
- cf->asbr = (this_proto->out_filter != FILTER_REJECT) || (nssa && cf->abr);
+ cf->asbr = (proto_cf_main_channel(this_proto)->out_filter != FILTER_REJECT) || (nssa && cf->abr);
if (cf->abr && !backbone)
{
@@ -122,7 +174,7 @@ static inline void
ospf_check_defcost(int cost)
{
if ((cost <= 0) || (cost >= LSINFINITY))
- cf_error("Default cost must be in range 1-%d", LSINFINITY-1);
+ cf_error("Default cost must be in range 1-%u", LSINFINITY-1);
}
static inline void
@@ -135,8 +187,8 @@ ospf_check_auth(void)
CF_DECLS
-CF_KEYWORDS(OSPF, AREA, OSPF_METRIC1, OSPF_METRIC2, OSPF_TAG, OSPF_ROUTER_ID)
-CF_KEYWORDS(NEIGHBORS, RFC1583COMPAT, STUB, TICK, COST, COST2, RETRANSMIT)
+CF_KEYWORDS(OSPF, V2, V3, OSPF_METRIC1, OSPF_METRIC2, OSPF_TAG, OSPF_ROUTER_ID)
+CF_KEYWORDS(AREA, NEIGHBORS, RFC1583COMPAT, STUB, TICK, COST, COST2, RETRANSMIT)
CF_KEYWORDS(HELLO, TRANSMIT, PRIORITY, DEAD, TYPE, BROADCAST, BCAST, DEFAULT)
CF_KEYWORDS(NONBROADCAST, NBMA, POINTOPOINT, PTP, POINTOMULTIPOINT, PTMP)
CF_KEYWORDS(NONE, SIMPLE, AUTHENTICATION, STRICT, CRYPTOGRAPHIC, TTL, SECURITY)
@@ -144,39 +196,67 @@ CF_KEYWORDS(ELIGIBLE, POLL, NETWORKS, HIDDEN, VIRTUAL, CHECK, LINK, ONLY, BFD)
CF_KEYWORDS(RX, BUFFER, LARGE, NORMAL, STUBNET, HIDDEN, SUMMARY, TAG, EXTERNAL)
CF_KEYWORDS(WAIT, DELAY, LSADB, ECMP, LIMIT, WEIGHT, NSSA, TRANSLATOR, STABILITY)
CF_KEYWORDS(GLOBAL, LSID, ROUTER, SELF, INSTANCE, REAL, NETMASK, TX, PRIORITY, LENGTH)
-CF_KEYWORDS(SECONDARY, MERGE, LSA, SUPPRESSION)
+CF_KEYWORDS(SECONDARY, MERGE, LSA, SUPPRESSION, MULTICAST, RFC5838)
-%type <t> opttext
%type <ld> lsadb_args
-%type <i> nbma_eligible
+%type <i> ospf_variant ospf_af_mc nbma_eligible
+%type <cc> ospf_channel_start ospf_channel
CF_GRAMMAR
CF_ADDTO(proto, ospf_proto '}' { ospf_proto_finish(); } )
-ospf_proto_start: proto_start OSPF {
- this_proto = proto_config_new(&proto_ospf, $1);
- init_list(&OSPF_CFG->area_list);
- init_list(&OSPF_CFG->vlink_list);
- OSPF_CFG->tick = OSPF_DEFAULT_TICK;
- OSPF_CFG->ospf2 = OSPF_IS_V2;
- }
+ospf_variant:
+ OSPF { $$ = 1; }
+ | OSPF V2 { $$ = 1; }
+ | OSPF V3 { $$ = 0; }
;
+ospf_proto_start: proto_start ospf_variant
+{
+ this_proto = proto_config_new(&proto_ospf, $1);
+ this_proto->net_type = $2 ? NET_IP4 : 0;
+
+ init_list(&OSPF_CFG->area_list);
+ init_list(&OSPF_CFG->vlink_list);
+ OSPF_CFG->tick = OSPF_DEFAULT_TICK;
+ OSPF_CFG->ospf2 = $2;
+ OSPF_CFG->af_ext = !$2;
+};
+
ospf_proto:
ospf_proto_start proto_name '{'
| ospf_proto ospf_proto_item ';'
;
+ospf_af_mc:
+ { $$ = 0; }
+ | MULTICAST { $$ = 1; }
+ ;
+
+/* We redefine proto_channel to add multicast flag */
+ospf_channel_start: net_type ospf_af_mc
+{
+ $$ = this_channel = channel_config_new(NULL, $1, this_proto);
+
+ /* Save the multicast flag */
+ if (this_channel == proto_cf_main_channel(this_proto))
+ OSPF_CFG->af_mc = $2;
+};
+
+ospf_channel: ospf_channel_start channel_opt_list channel_end;
+
ospf_proto_item:
proto_item
+ | ospf_channel { this_proto->net_type = $1->net_type; }
| RFC1583COMPAT bool { OSPF_CFG->rfc1583 = $2; }
+ | RFC5838 bool { OSPF_CFG->af_ext = $2; if (!ospf_cfg_is_v3()) cf_error("RFC5838 option requires OSPFv3"); }
| STUB ROUTER bool { OSPF_CFG->stub_router = $3; }
| ECMP bool { OSPF_CFG->ecmp = $2 ? OSPF_DEFAULT_ECMP_LIMIT : 0; }
- | ECMP bool LIMIT expr { OSPF_CFG->ecmp = $2 ? $4 : 0; if ($4 < 0) cf_error("ECMP limit cannot be negative"); }
+ | ECMP bool LIMIT expr { OSPF_CFG->ecmp = $2 ? $4 : 0; }
| MERGE EXTERNAL bool { OSPF_CFG->merge_external = $3; }
- | TICK expr { OSPF_CFG->tick = $2; if($2<=0) cf_error("Tick must be greater than zero"); }
- | INSTANCE ID expr { OSPF_CFG->instance_id = $3; if (($3<0) || ($3>255)) cf_error("Instance ID must be in range 0-255"); }
+ | TICK expr { OSPF_CFG->tick = $2; if($2 <= 0) cf_error("Tick must be greater than zero"); }
+ | INSTANCE ID expr { OSPF_CFG->instance_id = $3; OSPF_CFG->instance_id_set = 1; if ($3 > 255) cf_error("Instance ID must be in range 0-255"); }
| ospf_area
;
@@ -226,10 +306,10 @@ ospf_stubnet:
;
ospf_stubnet_start:
- prefix {
+ net_ip {
this_stubnet = cfg_allocz(sizeof(struct ospf_stubnet_config));
add_tail(&this_area->stubnet_list, NODE this_stubnet);
- this_stubnet->px = $1;
+ this_stubnet->prefix = $1;
this_stubnet->cost = COST_D;
}
;
@@ -281,7 +361,6 @@ ospf_vlink_start: VIRTUAL LINK idval
OSPF_PATT->inftransdelay = INFTRANSDELAY_D;
OSPF_PATT->deadc = DEADC_D;
OSPF_PATT->type = OSPF_IT_VLINK;
- OSPF_PATT->instance_id = OSPF_CFG->instance_id;
init_list(&OSPF_PATT->nbma_list);
reset_passwords();
}
@@ -306,7 +385,7 @@ ospf_iface_item:
| REAL BROADCAST bool { OSPF_PATT->real_bcast = $3; if (!ospf_cfg_is_v2()) cf_error("Real broadcast option requires OSPFv2"); }
| PTP NETMASK bool { OSPF_PATT->ptp_netmask = $3; if (!ospf_cfg_is_v2()) cf_error("PtP netmask option requires OSPFv2"); }
| TRANSMIT DELAY expr { OSPF_PATT->inftransdelay = $3 ; if (($3<=0) || ($3>65535)) cf_error("Transmit delay must be in range 1-65535"); }
- | PRIORITY expr { OSPF_PATT->priority = $2 ; if (($2<0) || ($2>255)) cf_error("Priority must be in range 0-255"); }
+ | PRIORITY expr { OSPF_PATT->priority = $2 ; if ($2>255) cf_error("Priority must be in range 0-255"); }
| STRICT NONBROADCAST bool { OSPF_PATT->strictnbma = $3 ; }
| STUB bool { OSPF_PATT->stub = $2 ; }
| CHECK LINK bool { OSPF_PATT->check_link = $3; }
@@ -325,7 +404,6 @@ ospf_iface_item:
| TTL SECURITY bool { OSPF_PATT->ttl_security = $3; }
| TTL SECURITY TX ONLY { OSPF_PATT->ttl_security = 2; }
| BFD bool { OSPF_PATT->bfd = $2; cf_check_bfd($2); }
- | SECONDARY bool { OSPF_PATT->bsd_secondary = $2; }
| password_list { ospf_check_auth(); }
;
@@ -336,12 +414,11 @@ pref_list:
pref_item: pref_base pref_opt ';' ;
-pref_base: prefix
+pref_base: net_ip
{
this_pref = cfg_allocz(sizeof(struct area_net_config));
add_tail(this_nets, NODE this_pref);
- this_pref->px.addr = $1.addr;
- this_pref->px.len = $1.len;
+ this_pref->prefix = $1;
}
;
@@ -383,7 +460,6 @@ ospf_iface_start:
OSPF_PATT->priority = PRIORITY_D;
OSPF_PATT->deadc = DEADC_D;
OSPF_PATT->type = OSPF_IT_UNDEF;
- OSPF_PATT->instance_id = OSPF_CFG->instance_id;
init_list(&OSPF_PATT->nbma_list);
OSPF_PATT->ptp_netmask = 2; /* not specified */
OSPF_PATT->tx_tos = IP_PREC_INTERNET_CONTROL;
@@ -394,7 +470,7 @@ ospf_iface_start:
ospf_instance_id:
/* empty */
- | INSTANCE expr { OSPF_PATT->instance_id = $2; if (($2<0) || ($2>255)) cf_error("Instance ID must be in range 0-255"); }
+ | INSTANCE expr { OSPF_PATT->instance_id = $2; OSPF_PATT->instance_id_set = 1; if ($2 > 255) cf_error("Instance ID must be in range 0-255"); }
;
ospf_iface_patt_list:
@@ -415,11 +491,6 @@ ospf_iface:
ospf_iface_start ospf_iface_patt_list ospf_iface_opt_list { ospf_iface_finish(); }
;
-opttext:
- TEXT
- | /* empty */ { $$ = NULL; }
- ;
-
CF_ADDTO(dynamic_attr, OSPF_METRIC1 { $$ = f_new_dynamic_attr(EAF_TYPE_INT | EAF_TEMP, T_INT, EA_OSPF_METRIC1); })
CF_ADDTO(dynamic_attr, OSPF_METRIC2 { $$ = f_new_dynamic_attr(EAF_TYPE_INT | EAF_TEMP, T_INT, EA_OSPF_METRIC2); })
CF_ADDTO(dynamic_attr, OSPF_TAG { $$ = f_new_dynamic_attr(EAF_TYPE_INT | EAF_TEMP, T_INT, EA_OSPF_TAG); })
diff --git a/proto/ospf/dbdes.c b/proto/ospf/dbdes.c
index d6904343..f211935f 100644
--- a/proto/ospf/dbdes.c
+++ b/proto/ospf/dbdes.c
@@ -39,7 +39,7 @@ struct ospf_dbdes3_packet
static inline uint
-ospf_dbdes_hdrlen(struct ospf_proto *p UNUSED4 UNUSED6)
+ospf_dbdes_hdrlen(struct ospf_proto *p)
{
return ospf_is_v2(p) ?
sizeof(struct ospf_dbdes2_packet) : sizeof(struct ospf_dbdes3_packet);
@@ -356,7 +356,7 @@ ospf_receive_dbdes(struct ospf_packet *pkt, struct ospf_iface *ifa,
LOG_PKT_WARN("MTU mismatch with nbr %R on %s (remote %d, local %d)",
n->rid, ifa->ifname, rcv_iface_mtu, ifa->iface->mtu);
- if ((rcv_imms == DBDES_IMMS) &&
+ if (((rcv_imms & DBDES_IMMS) == DBDES_IMMS) &&
(n->rid > p->router_id) &&
(plen == ospf_dbdes_hdrlen(p)))
{
@@ -428,7 +428,7 @@ ospf_receive_dbdes(struct ospf_packet *pkt, struct ospf_iface *ifa,
}
ospf_send_dbdes(p, n);
- tm_start(n->dbdes_timer, n->ifa->rxmtint);
+ tm_start(n->dbdes_timer, n->ifa->rxmtint S);
}
else
{
diff --git a/proto/ospf/hello.c b/proto/ospf/hello.c
index e00487dc..e706ea0f 100644
--- a/proto/ospf/hello.c
+++ b/proto/ospf/hello.c
@@ -32,10 +32,7 @@ struct ospf_hello3_packet
struct ospf_packet hdr;
u32 iface_id;
- u8 priority;
- u8 options3;
- u8 options2;
- u8 options;
+ u32 options;
u16 helloint;
u16 deadint;
u32 dr;
@@ -74,7 +71,7 @@ ospf_send_hello(struct ospf_iface *ifa, int kind, struct ospf_neighbor *dirn)
((ifa->type == OSPF_IT_PTP) && !ifa->ptp_netmask))
ps->netmask = 0;
else
- ps->netmask = htonl(u32_mkmask(ifa->addr->pxlen));
+ ps->netmask = htonl(u32_mkmask(ifa->addr->prefix.pxlen));
ps->helloint = ntohs(ifa->helloint);
ps->options = ifa->oa->options;
@@ -91,10 +88,7 @@ ospf_send_hello(struct ospf_iface *ifa, int kind, struct ospf_neighbor *dirn)
struct ospf_hello3_packet *ps = (void *) pkt;
ps->iface_id = htonl(ifa->iface_id);
- ps->priority = ifa->priority;
- ps->options3 = ifa->oa->options >> 16;
- ps->options2 = ifa->oa->options >> 8;
- ps->options = ifa->oa->options;
+ ps->options = ntohl(ifa->oa->options | (ifa->priority << 24));
ps->helloint = ntohs(ifa->helloint);
ps->deadint = htons(ifa->deadint);
ps->dr = htonl(ifa->drid);
@@ -190,7 +184,8 @@ ospf_receive_hello(struct ospf_packet *pkt, struct ospf_iface *ifa,
struct ospf_proto *p = ifa->oa->po;
const char *err_dsc = NULL;
u32 rcv_iface_id, rcv_helloint, rcv_deadint, rcv_dr, rcv_bdr;
- u8 rcv_options, rcv_priority;
+ uint rcv_options, rcv_priority;
+ uint loc_options = ifa->oa->options;
u32 *neighbors;
u32 neigh_count;
uint plen, i, err_val = 0;
@@ -198,7 +193,7 @@ ospf_receive_hello(struct ospf_packet *pkt, struct ospf_iface *ifa,
/* RFC 2328 10.5 */
/*
- * We may not yet havethe associate neighbor, so we use Router ID from the
+ * We may not yet have the associate neighbor, so we use Router ID from the
* packet instead of one from the neighbor structure for log messages.
*/
u32 rcv_rid = ntohl(pkt->routerid);
@@ -227,7 +222,7 @@ ospf_receive_hello(struct ospf_packet *pkt, struct ospf_iface *ifa,
if ((ifa->type != OSPF_IT_VLINK) &&
(ifa->type != OSPF_IT_PTP) &&
- ((uint) pxlen != ifa->addr->pxlen))
+ ((uint) pxlen != ifa->addr->prefix.pxlen))
DROP("prefix length mismatch", pxlen);
neighbors = ps->neighbors;
@@ -245,8 +240,8 @@ ospf_receive_hello(struct ospf_packet *pkt, struct ospf_iface *ifa,
rcv_deadint = ntohs(ps->deadint);
rcv_dr = ntohl(ps->dr);
rcv_bdr = ntohl(ps->bdr);
- rcv_options = ps->options;
- rcv_priority = ps->priority;
+ rcv_options = ntohl(ps->options) & 0x00FFFFFF;
+ rcv_priority = ntohl(ps->options) >> 24;
neighbors = ps->neighbors;
neigh_count = (plen - sizeof(struct ospf_hello3_packet)) / sizeof(u32);
@@ -259,9 +254,13 @@ ospf_receive_hello(struct ospf_packet *pkt, struct ospf_iface *ifa,
DROP("dead interval mismatch", rcv_deadint);
/* Check whether bits E, N match */
- if ((rcv_options ^ ifa->oa->options) & (OPT_E | OPT_N))
+ if ((rcv_options ^ loc_options) & (OPT_E | OPT_N))
DROP("area type mismatch", rcv_options);
+ /* RFC 5838 2.4 - AF-bit check unless on IPv6 unicast */
+ if ((loc_options & OPT_AF) && !(loc_options & OPT_V6) && !(rcv_options & OPT_AF))
+ DROP("AF-bit mismatch", rcv_options);
+
/* Check consistency of existing neighbor entry */
if (n)
{
diff --git a/proto/ospf/iface.c b/proto/ospf/iface.c
index 1795ec22..e3d8d61b 100644
--- a/proto/ospf/iface.c
+++ b/proto/ospf/iface.c
@@ -55,7 +55,9 @@ ifa_tx_length(struct ospf_iface *ifa)
static inline uint
ifa_tx_hdrlen(struct ospf_iface *ifa)
{
- uint hlen = SIZE_OF_IP_HEADER;
+ struct ospf_proto *p = ifa->oa->po;
+
+ uint hlen = ospf_is_v2(p) ? IP4_HEADER_LENGTH : IP6_HEADER_LENGTH;
/* Relevant just for OSPFv2 */
if (ifa->autype == OSPF_AUTH_CRYPT)
@@ -115,6 +117,7 @@ ospf_sk_open(struct ospf_iface *ifa)
sock *sk = sk_new(ifa->pool);
sk->type = SK_IP;
+ sk->subtype = ospf_is_v2(p) ? SK_IPV4 : SK_IPV6;
sk->dport = OSPF_PROTO;
sk->saddr = ifa->addr->ip;
sk->iface = ifa->iface;
@@ -200,6 +203,7 @@ ospf_open_vlink_sk(struct ospf_proto *p)
{
sock *sk = sk_new(p->p.pool);
sk->type = SK_IP;
+ sk->subtype = ospf_is_v2(p) ? SK_IPV4 : SK_IPV6;
sk->dport = OSPF_PROTO;
sk->vrf = p->p.vrf;
@@ -246,8 +250,8 @@ ospf_iface_down(struct ospf_iface *ifa)
OSPF_TRACE(D_EVENTS, "Removing interface %s (peer %I) from area %R",
ifa->ifname, ifa->addr->opposite, ifa->oa->areaid);
else
- OSPF_TRACE(D_EVENTS, "Removing interface %s (%I/%d) from area %R",
- ifa->ifname, ifa->addr->prefix, ifa->addr->pxlen, ifa->oa->areaid);
+ OSPF_TRACE(D_EVENTS, "Removing interface %s (%N) from area %R",
+ ifa->ifname, &ifa->addr->prefix, ifa->oa->areaid);
/* First of all kill all the related vlinks */
WALK_LIST(iff, p->iface_list)
@@ -394,15 +398,15 @@ ospf_iface_sm(struct ospf_iface *ifa, int event)
{
ospf_iface_chstate(ifa, OSPF_IS_WAITING);
if (ifa->wait_timer)
- tm_start(ifa->wait_timer, ifa->waitint);
+ tm_start(ifa->wait_timer, ifa->waitint S);
}
}
if (ifa->hello_timer)
- tm_start(ifa->hello_timer, ifa->helloint);
+ tm_start(ifa->hello_timer, ifa->helloint S);
if (ifa->poll_timer)
- tm_start(ifa->poll_timer, ifa->pollint);
+ tm_start(ifa->poll_timer, ifa->pollint S);
ospf_send_hello(ifa, OHS_HELLO, NULL);
}
@@ -492,13 +496,13 @@ ospf_iface_add(struct object_lock *lock)
if (! ifa->stub)
{
- ifa->hello_timer = tm_new_set(ifa->pool, hello_timer_hook, ifa, 0, ifa->helloint);
+ ifa->hello_timer = tm_new_init(ifa->pool, hello_timer_hook, ifa, ifa->helloint S, 0);
if (ifa->type == OSPF_IT_NBMA)
- ifa->poll_timer = tm_new_set(ifa->pool, poll_timer_hook, ifa, 0, ifa->pollint);
+ ifa->poll_timer = tm_new_init(ifa->pool, poll_timer_hook, ifa, ifa->pollint S, 0);
if ((ifa->type == OSPF_IT_BCAST) || (ifa->type == OSPF_IT_NBMA))
- ifa->wait_timer = tm_new_set(ifa->pool, wait_timer_hook, ifa, 0, 0);
+ ifa->wait_timer = tm_new_init(ifa->pool, wait_timer_hook, ifa, 0, 0);
ifa->flood_queue_size = ifa_flood_queue_size(ifa);
ifa->flood_queue = mb_allocz(ifa->pool, ifa->flood_queue_size * sizeof(void *));
@@ -532,15 +536,6 @@ ospf_iface_stubby(struct ospf_iface_patt *ip, struct ifa *addr)
if (addr->iface->flags & IF_LOOPBACK)
return 1;
- /*
- * For compatibility reasons on BSD systems, we force OSPF
- * interfaces with non-primary IP prefixes to be stub.
- */
-#if defined(OSPFv2) && !defined(CONFIG_MC_PROPER_SRC)
- if (!ip->bsd_secondary && !(addr->flags & IA_PRIMARY))
- return 1;
-#endif
-
return ip->stub;
}
@@ -559,8 +554,8 @@ ospf_iface_new(struct ospf_area *oa, struct ifa *addr, struct ospf_iface_patt *i
OSPF_TRACE(D_EVENTS, "Adding interface %s (peer %I) to area %R",
iface->name, addr->opposite, oa->areaid);
else
- OSPF_TRACE(D_EVENTS, "Adding interface %s (%I/%d) to area %R",
- iface->name, addr->prefix, addr->pxlen, oa->areaid);
+ OSPF_TRACE(D_EVENTS, "Adding interface %s (%N) to area %R",
+ iface->name, &addr->prefix, oa->areaid);
pool = rp_new(p->p.pool, "OSPF Interface");
ifa = mb_allocz(pool, sizeof(struct ospf_iface));
@@ -598,6 +593,7 @@ ospf_iface_new(struct ospf_area *oa, struct ifa *addr, struct ospf_iface_patt *i
if (ip->ptp_netmask < 2)
ifa->ptp_netmask = ip->ptp_netmask;
+ ifa->drip = ifa->bdrip = ospf_is_v2(p) ? IPA_NONE4 : IPA_NONE6;
ifa->type = ospf_iface_classify(ip->type, addr);
@@ -637,7 +633,7 @@ ospf_iface_new(struct ospf_area *oa, struct ifa *addr, struct ospf_iface_patt *i
should be used). Because OSPFv3 iface is not subnet-specific,
there is no need for ipa_in_net() check */
- if (ospf_is_v2(p) && !ipa_in_net(nb->ip, addr->prefix, addr->pxlen))
+ if (ospf_is_v2(p) && !ipa_in_netX(nb->ip, &addr->prefix))
continue;
if (ospf_is_v3(p) && !ipa_is_link_local(nb->ip))
@@ -650,7 +646,7 @@ ospf_iface_new(struct ospf_area *oa, struct ifa *addr, struct ospf_iface_patt *i
add_tail(&oa->po->iface_list, NODE ifa);
struct object_lock *lock = olock_new(pool);
- lock->addr = ospf_is_v2(p) ? ifa->addr->prefix : IPA_NONE;
+ lock->addr = ospf_is_v2(p) ? ipa_from_ip4(net4_prefix(&ifa->addr->prefix)) : IPA_NONE;
lock->type = OBJLOCK_IP;
lock->port = OSPF_PROTO;
lock->inst = ifa->instance_id;
@@ -709,7 +705,7 @@ ospf_iface_new_vlink(struct ospf_proto *p, struct ospf_iface_patt *ip)
add_tail(&p->iface_list, NODE ifa);
- ifa->hello_timer = tm_new_set(ifa->pool, hello_timer_hook, ifa, 0, ifa->helloint);
+ ifa->hello_timer = tm_new_init(ifa->pool, hello_timer_hook, ifa, ifa->helloint S, 0);
ifa->flood_queue_size = ifa_flood_queue_size(ifa);
ifa->flood_queue = mb_allocz(ifa->pool, ifa->flood_queue_size * sizeof(void *));
@@ -721,10 +717,10 @@ ospf_iface_change_timer(timer *tm, uint val)
if (!tm)
return;
- tm->recurrent = val;
+ tm->recurrent = val S;
- if (tm->expires)
- tm_start(tm, val);
+ if (tm_active(tm))
+ tm_start(tm, val S);
}
static inline void
@@ -807,8 +803,8 @@ ospf_iface_reconfigure(struct ospf_iface *ifa, struct ospf_iface_patt *new)
ifname, ifa->waitint, new->waitint);
ifa->waitint = new->waitint;
- if (ifa->wait_timer && ifa->wait_timer->expires)
- tm_start(ifa->wait_timer, ifa->waitint);
+ if (ifa->wait_timer && tm_active(ifa->wait_timer))
+ tm_start(ifa->wait_timer, ifa->waitint S);
}
/* DEAD TIMER */
@@ -900,7 +896,7 @@ ospf_iface_reconfigure(struct ospf_iface *ifa, struct ospf_iface_patt *new)
WALK_LIST(nb, new->nbma_list)
{
/* See related note in ospf_iface_new() */
- if (ospf_is_v2(p) && !ipa_in_net(nb->ip, ifa->addr->prefix, ifa->addr->pxlen))
+ if (ospf_is_v2(p) && !ipa_in_netX(nb->ip, &ifa->addr->prefix))
continue;
if (ospf_is_v3(p) && !ipa_is_link_local(nb->ip))
@@ -1087,6 +1083,9 @@ ospf_ifa_notify2(struct proto *P, uint flags, struct ifa *a)
{
struct ospf_proto *p = (struct ospf_proto *) P;
+ if (a->prefix.type != NET_IP4)
+ return;
+
if (a->flags & IA_SECONDARY)
return;
@@ -1126,6 +1125,9 @@ ospf_ifa_notify3(struct proto *P, uint flags, struct ifa *a)
other addresses are used for link-LSA. */
if (a->scope == SCOPE_LINK)
{
+ if (a->prefix.type != NET_IP6)
+ return;
+
if (flags & IF_CHANGE_UP)
{
struct ospf_mip_walk s = { .iface = a->iface };
@@ -1143,6 +1145,9 @@ ospf_ifa_notify3(struct proto *P, uint flags, struct ifa *a)
}
else
{
+ if (a->prefix.type != ospf_get_af(p))
+ return;
+
struct ospf_iface *ifa;
WALK_LIST(ifa, p->iface_list)
if (ifa->iface == a->iface)
@@ -1168,6 +1173,9 @@ ospf_reconfigure_ifaces2(struct ospf_proto *p)
WALK_LIST(a, iface->addrs)
{
+ if (a->prefix.type != NET_IP4)
+ continue;
+
if (a->flags & IA_SECONDARY)
continue;
@@ -1186,8 +1194,8 @@ ospf_reconfigure_ifaces2(struct ospf_proto *p)
continue;
/* Hard restart */
- log(L_INFO "%s: Restarting interface %s (%I/%d) in area %R",
- p->p.name, ifa->ifname, a->prefix, a->pxlen, s.oa->areaid);
+ log(L_INFO "%s: Restarting interface %s (%N) in area %R",
+ p->p.name, ifa->ifname, &a->prefix, s.oa->areaid);
ospf_iface_shutdown(ifa);
ospf_iface_remove(ifa);
}
@@ -1211,6 +1219,9 @@ ospf_reconfigure_ifaces3(struct ospf_proto *p)
WALK_LIST(a, iface->addrs)
{
+ if (a->prefix.type != NET_IP6)
+ continue;
+
if (a->flags & IA_SECONDARY)
continue;
@@ -1342,7 +1353,7 @@ ospf_iface_info(struct ospf_iface *ifa)
else if (ifa->addr->flags & IA_PEER)
cli_msg(-1015, "Interface %s (peer %I)", ifa->ifname, ifa->addr->opposite);
else
- cli_msg(-1015, "Interface %s (%I/%d)", ifa->ifname, ifa->addr->prefix, ifa->addr->pxlen);
+ cli_msg(-1015, "Interface %s (%N)", ifa->ifname, &ifa->addr->prefix);
cli_msg(-1015, "\tType: %s%s", ospf_it[ifa->type], more);
cli_msg(-1015, "\tArea: %R (%u)", ifa->oa->areaid, ifa->oa->areaid);
diff --git a/proto/ospf/lsalib.c b/proto/ospf/lsalib.c
index cb7b186a..fbfd8d29 100644
--- a/proto/ospf/lsalib.c
+++ b/proto/ospf/lsalib.c
@@ -280,21 +280,19 @@ lsa_walk_rt(struct ospf_lsa_rt_walk *rt)
void
-lsa_parse_sum_net(struct top_hash_entry *en, int ospf2, ip_addr *ip, int *pxlen, u8 *pxopts, u32 *metric)
+lsa_parse_sum_net(struct top_hash_entry *en, int ospf2, int af, net_addr *net, u8 *pxopts, u32 *metric)
{
if (ospf2)
{
struct ospf_lsa_sum2 *ls = en->lsa_body;
- *ip = ipa_from_u32(en->lsa.id & ls->netmask);
- *pxlen = u32_masklen(ls->netmask);
+ net_fill_ip4(net, ip4_from_u32(en->lsa.id & ls->netmask), u32_masklen(ls->netmask));
*pxopts = 0;
*metric = ls->metric & LSA_METRIC_MASK;
}
else
{
struct ospf_lsa_sum3_net *ls = en->lsa_body;
- u16 rest;
- lsa_get_ipv6_prefix(ls->prefix, ip, pxlen, pxopts, &rest);
+ ospf3_get_prefix(ls->prefix, af, net, pxopts, NULL);
*metric = ls->metric & LSA_METRIC_MASK;
}
}
@@ -319,13 +317,14 @@ lsa_parse_sum_rt(struct top_hash_entry *en, int ospf2, u32 *drid, u32 *metric, u
}
void
-lsa_parse_ext(struct top_hash_entry *en, int ospf2, struct ospf_lsa_ext_local *rt)
+lsa_parse_ext(struct top_hash_entry *en, int ospf2, int af, struct ospf_lsa_ext_local *rt)
{
if (ospf2)
{
struct ospf_lsa_ext2 *ext = en->lsa_body;
- rt->ip = ipa_from_u32(en->lsa.id & ext->netmask);
- rt->pxlen = u32_masklen(ext->netmask);
+ net_fill_ip4(&rt->net,
+ ip4_from_u32(en->lsa.id & ext->netmask),
+ u32_masklen(ext->netmask));
rt->pxopts = 0;
rt->metric = ext->metric & LSA_METRIC_MASK;
rt->ebit = ext->metric & LSA_EXT2_EBIT;
@@ -339,14 +338,13 @@ lsa_parse_ext(struct top_hash_entry *en, int ospf2, struct ospf_lsa_ext_local *r
else
{
struct ospf_lsa_ext3 *ext = en->lsa_body;
- u16 rest;
- u32 *buf = lsa_get_ipv6_prefix(ext->rest, &rt->ip, &rt->pxlen, &rt->pxopts, &rest);
+ u32 *buf = ospf3_get_prefix(ext->rest, af, &rt->net, &rt->pxopts, NULL);
rt->metric = ext->metric & LSA_METRIC_MASK;
rt->ebit = ext->metric & LSA_EXT3_EBIT;
rt->fbit = ext->metric & LSA_EXT3_FBIT;
if (rt->fbit)
- buf = lsa_get_ipv6_addr(buf, &rt->fwaddr);
+ buf = ospf3_get_addr(buf, af, &rt->fwaddr);
else
rt->fwaddr = IPA_NONE;
@@ -452,7 +450,7 @@ lsa_validate_sum3_net(struct ospf_lsa_header *lsa, struct ospf_lsa_sum3_net *bod
return 0;
u8 pxl = pxlen(body->prefix);
- if (pxl > MAX_PREFIX_LENGTH)
+ if (pxl > IP6_MAX_PREFIX_LENGTH)
return 0;
if (lsa->length != (HDRLEN + sizeof(struct ospf_lsa_sum3_net) +
@@ -491,11 +489,11 @@ lsa_validate_ext3(struct ospf_lsa_header *lsa, struct ospf_lsa_ext3 *body)
return 0;
u8 pxl = pxlen(body->rest);
- if (pxl > MAX_PREFIX_LENGTH)
+ if (pxl > IP6_MAX_PREFIX_LENGTH)
return 0;
int len = IPV6_PREFIX_SPACE(pxl);
- if (body->metric & LSA_EXT3_FBIT) // forwardinf address
+ if (body->metric & LSA_EXT3_FBIT) // forwarding address
len += 16;
if (body->metric & LSA_EXT3_TBIT) // route tag
len += 4;
@@ -520,7 +518,7 @@ lsa_validate_pxlist(struct ospf_lsa_header *lsa, u32 pxcount, uint offset, u8 *p
return 0;
u8 pxl = pxlen((u32 *) (pbuf + offset));
- if (pxl > MAX_PREFIX_LENGTH)
+ if (pxl > IP6_MAX_PREFIX_LENGTH)
return 0;
offset += IPV6_PREFIX_SPACE(pxl);
@@ -554,8 +552,8 @@ lsa_validate_prefix(struct ospf_lsa_header *lsa, struct ospf_lsa_prefix *body)
/**
* lsa_validate - check whether given LSA is valid
* @lsa: LSA header
- * @lsa_type: one of %LSA_T_xxx
- * @ospf2: %true means OSPF version 2, %false means OSPF version 3
+ * @lsa_type: internal LSA type (%LSA_T_xxx)
+ * @ospf2: %true for OSPFv2, %false for OSPFv3
* @body: pointer to LSA body
*
* Checks internal structure of given LSA body (minimal length,
diff --git a/proto/ospf/lsalib.h b/proto/ospf/lsalib.h
index 638b3525..fca7faec 100644
--- a/proto/ospf/lsalib.h
+++ b/proto/ospf/lsalib.h
@@ -41,7 +41,7 @@ void lsa_get_type_domain_(u32 itype, struct ospf_iface *ifa, u32 *otype, u32 *do
static inline void lsa_get_type_domain(struct ospf_lsa_header *lsa, struct ospf_iface *ifa, u32 *otype, u32 *domain)
{ lsa_get_type_domain_(lsa->type_raw, ifa, otype, domain); }
-static inline u32 lsa_get_etype(struct ospf_lsa_header *h, struct ospf_proto *p UNUSED4 UNUSED6)
+static inline u32 lsa_get_etype(struct ospf_lsa_header *h, struct ospf_proto *p)
{ return ospf_is_v2(p) ? (h->type_raw & LSA_T_V2_MASK) : h->type_raw; }
@@ -55,9 +55,12 @@ u16 lsa_verify_checksum(const void *lsa_n, int lsa_len);
int lsa_comp(struct ospf_lsa_header *l1, struct ospf_lsa_header *l2);
void lsa_walk_rt_init(struct ospf_proto *po, struct top_hash_entry *act, struct ospf_lsa_rt_walk *rt);
int lsa_walk_rt(struct ospf_lsa_rt_walk *rt);
-void lsa_parse_sum_net(struct top_hash_entry *en, int ospf2, ip_addr *ip, int *pxlen, u8 *pxopts, u32 *metric);
+void lsa_parse_sum_net(struct top_hash_entry *en, int ospf2, int af, net_addr *net, u8 *pxopts, u32 *metric);
void lsa_parse_sum_rt(struct top_hash_entry *en, int ospf2, u32 *drid, u32 *metric, u32 *options);
-void lsa_parse_ext(struct top_hash_entry *en, int ospf2, struct ospf_lsa_ext_local *rt);
+void lsa_parse_ext(struct top_hash_entry *en, int ospf2, int af, struct ospf_lsa_ext_local *rt);
int lsa_validate(struct ospf_lsa_header *lsa, u32 lsa_type, int ospf2, void *body);
+static inline btime lsa_inst_age(struct top_hash_entry *en)
+{ return current_time() - en->inst_time; }
+
#endif /* _BIRD_OSPF_LSALIB_H_ */
diff --git a/proto/ospf/lsupd.c b/proto/ospf/lsupd.c
index 157d9628..a98c9098 100644
--- a/proto/ospf/lsupd.c
+++ b/proto/ospf/lsupd.c
@@ -137,7 +137,7 @@ ospf_lsa_lsrt_up(struct top_hash_entry *en, struct ospf_neighbor *n)
ret->lsa_body = LSA_BODY_DUMMY;
if (!tm_active(n->lsrt_timer))
- tm_start(n->lsrt_timer, n->ifa->rxmtint);
+ tm_start(n->lsrt_timer, n->ifa->rxmtint S);
}
void
@@ -572,7 +572,7 @@ ospf_receive_lsupd(struct ospf_packet *pkt, struct ospf_iface *ifa,
{
/* 13. (5a) - enforce minimum time between updates for received LSAs */
/* We also use this to ratelimit reactions to received self-originated LSAs */
- if (en && ((now - en->inst_time) < MINLSARRIVAL))
+ if (en && (lsa_inst_age(en) < MINLSARRIVAL))
{
OSPF_TRACE(D_EVENTS, "Skipping LSA received in less that MinLSArrival");
continue;
@@ -700,7 +700,7 @@ ospf_receive_lsupd(struct ospf_packet *pkt, struct ospf_iface *ifa,
if (!EMPTY_SLIST(n->lsrql) && (n->lsrqi == SHEAD(n->lsrql)))
{
ospf_send_lsreq(p, n);
- tm_start(n->lsrq_timer, n->ifa->rxmtint);
+ tm_start(n->lsrq_timer, n->ifa->rxmtint S);
}
return;
diff --git a/proto/ospf/neighbor.c b/proto/ospf/neighbor.c
index 9fe3c028..f2d3505e 100644
--- a/proto/ospf/neighbor.c
+++ b/proto/ospf/neighbor.c
@@ -94,11 +94,11 @@ ospf_neighbor_new(struct ospf_iface *ifa)
init_list(&n->ackl[ACKL_DIRECT]);
init_list(&n->ackl[ACKL_DELAY]);
- n->inactim = tm_new_set(pool, inactivity_timer_hook, n, 0, 0);
- n->dbdes_timer = tm_new_set(pool, dbdes_timer_hook, n, 0, ifa->rxmtint);
- n->lsrq_timer = tm_new_set(pool, lsrq_timer_hook, n, 0, ifa->rxmtint);
- n->lsrt_timer = tm_new_set(pool, lsrt_timer_hook, n, 0, ifa->rxmtint);
- n->ackd_timer = tm_new_set(pool, ackd_timer_hook, n, 0, ifa->rxmtint / 2);
+ n->inactim = tm_new_init(pool, inactivity_timer_hook, n, 0, 0);
+ n->dbdes_timer = tm_new_init(pool, dbdes_timer_hook, n, ifa->rxmtint S, 0);
+ n->lsrq_timer = tm_new_init(pool, lsrq_timer_hook, n, ifa->rxmtint S, 0);
+ n->lsrt_timer = tm_new_init(pool, lsrt_timer_hook, n, ifa->rxmtint S, 0);
+ n->ackd_timer = tm_new_init(pool, ackd_timer_hook, n, ifa->rxmtint S / 2, 0);
return (n);
}
@@ -186,7 +186,7 @@ ospf_neigh_chstate(struct ospf_neighbor *n, u8 state)
n->myimms = DBDES_IMMS;
tm_start(n->dbdes_timer, 0);
- tm_start(n->ackd_timer, ifa->rxmtint / 2);
+ tm_start(n->ackd_timer, ifa->rxmtint S / 2);
}
if (state > NEIGHBOR_EXSTART)
@@ -231,7 +231,7 @@ ospf_neigh_sm(struct ospf_neighbor *n, int event)
ospf_neigh_chstate(n, NEIGHBOR_INIT);
/* Restart inactivity timer */
- tm_start(n->inactim, n->ifa->deadint);
+ tm_start(n->inactim, n->ifa->deadint S);
break;
case INM_2WAYREC:
@@ -359,7 +359,7 @@ can_do_adj(struct ospf_neighbor *n)
}
-static inline u32 neigh_get_id(struct ospf_proto *p UNUSED4 UNUSED6, struct ospf_neighbor *n)
+static inline u32 neigh_get_id(struct ospf_proto *p, struct ospf_neighbor *n)
{ return ospf_is_v2(p) ? ipa_to_u32(n->ip) : n->rid; }
static struct ospf_neighbor *
@@ -507,13 +507,14 @@ ospf_dr_election(struct ospf_iface *ifa)
u32 old_drid = ifa->drid;
u32 old_bdrid = ifa->bdrid;
+ ip_addr none = ospf_is_v2(p) ? IPA_NONE4 : IPA_NONE6;
ifa->drid = ndr ? ndr->rid : 0;
- ifa->drip = ndr ? ndr->ip : IPA_NONE;
+ ifa->drip = ndr ? ndr->ip : none;
ifa->dr_iface_id = ndr ? ndr->iface_id : 0;
ifa->bdrid = nbdr ? nbdr->rid : 0;
- ifa->bdrip = nbdr ? nbdr->ip : IPA_NONE;
+ ifa->bdrip = nbdr ? nbdr->ip : none;
DBG("DR=%R, BDR=%R\n", ifa->drid, ifa->bdrid);
@@ -650,20 +651,6 @@ ospf_sh_neigh_info(struct ospf_neighbor *n)
{
struct ospf_iface *ifa = n->ifa;
char *pos = "PtP ";
- char etime[6];
- int exp, sec, min;
-
- exp = n->inactim->expires - now;
- sec = exp % 60;
- min = exp / 60;
- if (min > 59)
- {
- bsprintf(etime, "-Inf-");
- }
- else
- {
- bsprintf(etime, "%02u:%02u", min, sec);
- }
if ((ifa->type == OSPF_IT_BCAST) || (ifa->type == OSPF_IT_NBMA))
{
@@ -675,6 +662,7 @@ ospf_sh_neigh_info(struct ospf_neighbor *n)
pos = "Other";
}
- cli_msg(-1013, "%-1R\t%3u\t%s/%s\t%-5s\t%-10s %-1I", n->rid, n->priority,
- ospf_ns_names[n->state], pos, etime, ifa->ifname, n->ip);
+ cli_msg(-1013, "%-1R\t%3u\t%s/%s\t%7t\t%-10s %-1I",
+ n->rid, n->priority, ospf_ns_names[n->state], pos,
+ tm_remains(n->inactim), ifa->ifname, n->ip);
}
diff --git a/proto/ospf/ospf.c b/proto/ospf/ospf.c
index d5d5d354..3ebebdaa 100644
--- a/proto/ospf/ospf.c
+++ b/proto/ospf/ospf.c
@@ -92,8 +92,10 @@
* - RFC 2328 - main OSPFv2 standard
* - RFC 5340 - main OSPFv3 standard
* - RFC 3101 - OSPFv2 NSSA areas
- * - RFC 6549 - OSPFv2 multi-instance extensions
- * - RFC 6987 - OSPF stub router advertisement
+ * - RFC 5709 - OSPFv2 HMAC-SHA Cryptographic Authentication
+ * - RFC 5838 - OSPFv3 Support of Address Families
+ * - RFC 6549 - OSPFv2 Multi-Instance Extensions
+ * - RFC 6987 - OSPF Stub Router Advertisement
*/
#include <stdlib.h>
@@ -102,18 +104,11 @@
static int ospf_import_control(struct proto *P, rte **new, ea_list **attrs, struct linpool *pool);
static struct ea_list *ospf_make_tmp_attrs(struct rte *rt, struct linpool *pool);
static void ospf_store_tmp_attrs(struct rte *rt, struct ea_list *attrs);
-static int ospf_reload_routes(struct proto *P);
+static void ospf_reload_routes(struct channel *C);
static int ospf_rte_better(struct rte *new, struct rte *old);
static int ospf_rte_same(struct rte *new, struct rte *old);
static void ospf_disp(timer *timer);
-static void
-ospf_area_initfib(struct fib_node *fn)
-{
- struct area_net *an = (struct area_net *) fn;
- an->hidden = 0;
- an->active = 0;
-}
static void
add_area_nets(struct ospf_area *oa, struct ospf_area_config *ac)
@@ -122,23 +117,35 @@ add_area_nets(struct ospf_area *oa, struct ospf_area_config *ac)
struct area_net_config *anc;
struct area_net *an;
- fib_init(&oa->net_fib, p->p.pool, sizeof(struct area_net), 0, ospf_area_initfib);
- fib_init(&oa->enet_fib, p->p.pool, sizeof(struct area_net), 0, ospf_area_initfib);
+ fib_init(&oa->net_fib, p->p.pool, ospf_get_af(p),
+ sizeof(struct area_net), OFFSETOF(struct area_net, fn), 0, NULL);
+ fib_init(&oa->enet_fib, p->p.pool, ospf_get_af(p),
+ sizeof(struct area_net), OFFSETOF(struct area_net, fn), 0, NULL);
WALK_LIST(anc, ac->net_list)
{
- an = (struct area_net *) fib_get(&oa->net_fib, &anc->px.addr, anc->px.len);
+ an = fib_get(&oa->net_fib, &anc->prefix);
an->hidden = anc->hidden;
}
WALK_LIST(anc, ac->enet_list)
{
- an = (struct area_net *) fib_get(&oa->enet_fib, &anc->px.addr, anc->px.len);
+ an = fib_get(&oa->enet_fib, &anc->prefix);
an->hidden = anc->hidden;
an->tag = anc->tag;
}
}
+static inline uint
+ospf_opts(struct ospf_proto *p)
+{
+ if (ospf_is_v2(p))
+ return 0;
+
+ return ((ospf_is_ip6(p) && !p->af_mc) ? OPT_V6 : 0) |
+ (!p->stub_router ? OPT_R : 0) | (p->af_ext ? OPT_AF : 0);
+}
+
static void
ospf_area_add(struct ospf_proto *p, struct ospf_area_config *ac)
{
@@ -154,16 +161,13 @@ ospf_area_add(struct ospf_proto *p, struct ospf_area_config *ac)
oa->areaid = ac->areaid;
oa->rt = NULL;
oa->po = p;
- fib_init(&oa->rtr, p->p.pool, sizeof(ort), 0, ospf_rt_initort);
+ fib_init(&oa->rtr, p->p.pool, NET_IP4, sizeof(ort), OFFSETOF(ort, fn), 0, NULL);
add_area_nets(oa, ac);
if (oa->areaid == 0)
p->backbone = oa;
- if (ospf_is_v2(p))
- oa->options = ac->type;
- else
- oa->options = ac->type | OPT_V6 | (p->stub_router ? 0 : OPT_R);
+ oa->options = ac->type | ospf_opts(p);
ospf_notify_rt_lsa(oa);
}
@@ -229,21 +233,25 @@ ospf_start(struct proto *P)
p->router_id = proto_get_router_id(P->cf);
p->ospf2 = c->ospf2;
+ p->af_ext = c->af_ext;
+ p->af_mc = c->af_mc;
p->rfc1583 = c->rfc1583;
p->stub_router = c->stub_router;
p->merge_external = c->merge_external;
p->asbr = c->asbr;
p->ecmp = c->ecmp;
p->tick = c->tick;
- p->disp_timer = tm_new_set(P->pool, ospf_disp, p, 0, p->tick);
- tm_start(p->disp_timer, 1);
+ p->disp_timer = tm_new_init(P->pool, ospf_disp, p, p->tick S, 0);
+ tm_start(p->disp_timer, 100 MS);
p->lsab_size = 256;
p->lsab_used = 0;
p->lsab = mb_alloc(P->pool, p->lsab_size);
- p->nhpool = lp_new(P->pool, 12*sizeof(struct mpnh));
+ p->nhpool = lp_new(P->pool, 12*sizeof(struct nexthop));
init_list(&(p->iface_list));
init_list(&(p->area_list));
- fib_init(&p->rtf, P->pool, sizeof(ort), 0, ospf_rt_initort);
+ fib_init(&p->rtf, P->pool, ospf_get_af(p), sizeof(ort), OFFSETOF(ort, fn), 0, NULL);
+ if (ospf_is_v3(p))
+ idm_init(&p->idm, P->pool, 16);
p->areano = 0;
p->gr = ospf_top_new(p, P->pool);
s_init_list(&(p->lsal));
@@ -299,15 +307,16 @@ ospf_dump(struct proto *P)
}
static struct proto *
-ospf_init(struct proto_config *c)
+ospf_init(struct proto_config *CF)
{
- struct ospf_config *oc = (struct ospf_config *) c;
- struct proto *P = proto_new(c, sizeof(struct ospf_proto));
+ struct ospf_config *cf = (struct ospf_config *) CF;
+ struct proto *P = proto_new(CF);
+
+ P->main_channel = proto_add_channel(P, proto_cf_main_channel(CF));
- P->accept_ra_types = RA_OPTIMAL;
P->rt_notify = ospf_rt_notify;
P->if_notify = ospf_if_notify;
- P->ifa_notify = oc->ospf2 ? ospf_ifa_notify2 : ospf_ifa_notify3;
+ P->ifa_notify = cf->ospf2 ? ospf_ifa_notify2 : ospf_ifa_notify3;
P->import_control = ospf_import_control;
P->reload_routes = ospf_reload_routes;
P->make_tmp_attrs = ospf_make_tmp_attrs;
@@ -391,17 +400,16 @@ ospf_schedule_rtcalc(struct ospf_proto *p)
p->calcrt = 1;
}
-static int
-ospf_reload_routes(struct proto *P)
+static void
+ospf_reload_routes(struct channel *C)
{
- struct ospf_proto *p = (struct ospf_proto *) P;
+ struct ospf_proto *p = (struct ospf_proto *) C->proto;
- if (p->calcrt != 2)
- OSPF_TRACE(D_EVENTS, "Scheduling routing table calculation with route reload");
+ if (p->calcrt == 2)
+ return;
+ OSPF_TRACE(D_EVENTS, "Scheduling routing table calculation with route reload");
p->calcrt = 2;
-
- return 1;
}
@@ -506,9 +514,9 @@ ospf_shutdown(struct proto *P)
ospf_iface_shutdown(ifa);
/* Cleanup locked rta entries */
- FIB_WALK(&p->rtf, nftmp)
+ FIB_WALK(&p->rtf, ort, nf)
{
- rta_free(((ort *) nftmp)->old_rta);
+ rta_free(nf->old_rta);
}
FIB_WALK_END;
@@ -603,11 +611,7 @@ ospf_area_reconfigure(struct ospf_area *oa, struct ospf_area_config *nac)
struct ospf_iface *ifa;
oa->ac = nac;
-
- if (ospf_is_v2(p))
- oa->options = nac->type;
- else
- oa->options = nac->type | OPT_V6 | (p->stub_router ? 0 : OPT_R);
+ oa->options = nac->type | ospf_opts(p);
if (nac->type != oac->type)
{
@@ -639,17 +643,20 @@ ospf_area_reconfigure(struct ospf_area *oa, struct ospf_area_config *nac)
* nonbroadcast network, cost of interface, etc.
*/
static int
-ospf_reconfigure(struct proto *P, struct proto_config *c)
+ospf_reconfigure(struct proto *P, struct proto_config *CF)
{
struct ospf_proto *p = (struct ospf_proto *) P;
struct ospf_config *old = (struct ospf_config *) (P->cf);
- struct ospf_config *new = (struct ospf_config *) c;
+ struct ospf_config *new = (struct ospf_config *) CF;
struct ospf_area_config *nac;
struct ospf_area *oa, *oax;
struct ospf_iface *ifa, *ifx;
struct ospf_iface_patt *ip;
- if (proto_get_router_id(c) != p->router_id)
+ if (proto_get_router_id(CF) != p->router_id)
+ return 0;
+
+ if (p->ospf2 != new->ospf2)
return 0;
if (p->rfc1583 != new->rfc1583)
@@ -658,13 +665,19 @@ ospf_reconfigure(struct proto *P, struct proto_config *c)
if (old->abr != new->abr)
return 0;
+ if ((p->af_ext != new->af_ext) || (p->af_mc != new->af_mc))
+ return 0;
+
+ if (!proto_configure_channel(P, &P->main_channel, proto_cf_main_channel(CF)))
+ return 0;
+
p->stub_router = new->stub_router;
p->merge_external = new->merge_external;
p->asbr = new->asbr;
p->ecmp = new->ecmp;
p->tick = new->tick;
- p->disp_timer->recurrent = p->tick;
- tm_start(p->disp_timer, 1);
+ p->disp_timer->recurrent = p->tick S;
+ tm_start(p->disp_timer, 100 MS);
/* Mark all areas and ifaces */
WALK_LIST(oa, p->area_list)
@@ -746,7 +759,6 @@ ospf_sh(struct proto *P)
struct ospf_iface *ifa;
struct ospf_neighbor *n;
int ifano, nno, adjno, firstfib;
- struct area_net *anet;
if (p->p.proto_state != PS_UP)
{
@@ -795,29 +807,27 @@ ospf_sh(struct proto *P)
cli_msg(-1014, "\t\tNumber of adjacent neighbors:\t%u", adjno);
firstfib = 1;
- FIB_WALK(&oa->net_fib, nftmp)
+ FIB_WALK(&oa->net_fib, struct area_net, anet)
{
- anet = (struct area_net *) nftmp;
if(firstfib)
{
cli_msg(-1014, "\t\tArea networks:");
firstfib = 0;
}
- cli_msg(-1014, "\t\t\t%1I/%u\t%s\t%s", anet->fn.prefix, anet->fn.pxlen,
+ cli_msg(-1014, "\t\t\t%1N\t%s\t%s", anet->fn.addr,
anet->hidden ? "Hidden" : "Advertise", anet->active ? "Active" : "");
}
FIB_WALK_END;
firstfib = 1;
- FIB_WALK(&oa->enet_fib, nftmp)
+ FIB_WALK(&oa->enet_fib, struct area_net, anet)
{
- anet = (struct area_net *) nftmp;
if(firstfib)
{
cli_msg(-1014, "\t\tArea external networks:");
firstfib = 0;
}
- cli_msg(-1014, "\t\t\t%1I/%u\t%s\t%s", anet->fn.prefix, anet->fn.pxlen,
+ cli_msg(-1014, "\t\t\t%1N\t%s\t%s", anet->fn.addr,
anet->hidden ? "Hidden" : "Advertise", anet->active ? "Active" : "");
}
FIB_WALK_END;
@@ -1072,15 +1082,14 @@ show_lsa_network(struct top_hash_entry *he, int ospf2)
}
static inline void
-show_lsa_sum_net(struct top_hash_entry *he, int ospf2)
+show_lsa_sum_net(struct top_hash_entry *he, int ospf2, int af)
{
- ip_addr ip;
- int pxlen;
+ net_addr net;
u8 pxopts;
u32 metric;
- lsa_parse_sum_net(he, ospf2, &ip, &pxlen, &pxopts, &metric);
- cli_msg(-1016, "\t\txnetwork %I/%d metric %u", ip, pxlen, metric);
+ lsa_parse_sum_net(he, ospf2, af, &net, &pxopts, &metric);
+ cli_msg(-1016, "\t\txnetwork %N metric %u", &net, metric);
}
static inline void
@@ -1096,16 +1105,16 @@ show_lsa_sum_rt(struct top_hash_entry *he, int ospf2)
static inline void
-show_lsa_external(struct top_hash_entry *he, int ospf2)
+show_lsa_external(struct top_hash_entry *he, int ospf2, int af)
{
struct ospf_lsa_ext_local rt;
- char str_via[STD_ADDRESS_P_LENGTH + 8] = "";
+ char str_via[IPA_MAX_TEXT_LENGTH + 8] = "";
char str_tag[16] = "";
if (he->lsa_type == LSA_T_EXT)
he->domain = 0; /* Unmark the LSA */
- lsa_parse_ext(he, ospf2, &rt);
+ lsa_parse_ext(he, ospf2, af, &rt);
if (rt.fbit)
bsprintf(str_via, " via %I", rt.fwaddr);
@@ -1113,19 +1122,15 @@ show_lsa_external(struct top_hash_entry *he, int ospf2)
if (rt.tag)
bsprintf(str_tag, " tag %08x", rt.tag);
- cli_msg(-1016, "\t\t%s %I/%d metric%s %u%s%s",
+ cli_msg(-1016, "\t\t%s %N metric%s %u%s%s",
(he->lsa_type == LSA_T_NSSA) ? "nssa-ext" : "external",
- rt.ip, rt.pxlen, rt.ebit ? "2" : "", rt.metric, str_via, str_tag);
+ &rt.net, rt.ebit ? "2" : "", rt.metric, str_via, str_tag);
}
static inline void
-show_lsa_prefix(struct top_hash_entry *he, struct top_hash_entry *cnode)
+show_lsa_prefix(struct top_hash_entry *he, struct top_hash_entry *cnode, int af)
{
struct ospf_lsa_prefix *px = he->lsa_body;
- ip_addr pxa;
- int pxlen;
- u8 pxopts;
- u16 metric;
u32 *buf;
int i;
@@ -1141,14 +1146,18 @@ show_lsa_prefix(struct top_hash_entry *he, struct top_hash_entry *cnode)
buf = px->rest;
for (i = 0; i < px->pxcount; i++)
- {
- buf = lsa_get_ipv6_prefix(buf, &pxa, &pxlen, &pxopts, &metric);
+ {
+ net_addr net;
+ u8 pxopts;
+ u16 metric;
- if (px->ref_type == LSA_T_RT)
- cli_msg(-1016, "\t\tstubnet %I/%d metric %u", pxa, pxlen, metric);
- else
- cli_msg(-1016, "\t\taddress %I/%d", pxa, pxlen);
- }
+ buf = ospf3_get_prefix(buf, af, &net, &pxopts, &metric);
+
+ if (px->ref_type == LSA_T_RT)
+ cli_msg(-1016, "\t\tstubnet %N metric %u", &net, metric);
+ else
+ cli_msg(-1016, "\t\taddress %N", &net);
+ }
}
void
@@ -1156,6 +1165,7 @@ ospf_sh_state(struct proto *P, int verbose, int reachable)
{
struct ospf_proto *p = (struct ospf_proto *) P;
int ospf2 = ospf_is_v2(p);
+ int af = ospf_get_af(p);
uint i, ix, j1, jx;
u32 last_area = 0xFFFFFFFF;
@@ -1276,7 +1286,7 @@ ospf_sh_state(struct proto *P, int verbose, int reachable)
case LSA_T_SUM_NET:
if (cnode->lsa_type == LSA_T_RT)
- show_lsa_sum_net(he, ospf2);
+ show_lsa_sum_net(he, ospf2, af);
break;
case LSA_T_SUM_RT:
@@ -1286,11 +1296,11 @@ ospf_sh_state(struct proto *P, int verbose, int reachable)
case LSA_T_EXT:
case LSA_T_NSSA:
- show_lsa_external(he, ospf2);
+ show_lsa_external(he, ospf2, af);
break;
case LSA_T_PREFIX:
- show_lsa_prefix(he, cnode);
+ show_lsa_prefix(he, cnode, af);
break;
}
@@ -1304,7 +1314,7 @@ ospf_sh_state(struct proto *P, int verbose, int reachable)
ix++;
while ((ix < jx) && (hex[ix]->lsa.rt == cnode->lsa.rt))
- show_lsa_external(hex[ix++], ospf2);
+ show_lsa_external(hex[ix++], ospf2, af);
cnode = NULL;
}
@@ -1338,7 +1348,7 @@ ospf_sh_state(struct proto *P, int verbose, int reachable)
last_rt = he->lsa.rt;
}
- show_lsa_external(he, ospf2);
+ show_lsa_external(he, ospf2, af);
}
}
@@ -1468,6 +1478,8 @@ struct protocol proto_ospf = {
.template = "ospf%d",
.attr_class = EAP_OSPF,
.preference = DEF_PREF_OSPF,
+ .channel_mask = NB_IP,
+ .proto_size = sizeof(struct ospf_proto),
.config_size = sizeof(struct ospf_config),
.init = ospf_init,
.dump = ospf_dump,
diff --git a/proto/ospf/ospf.h b/proto/ospf/ospf.h
index 81c610d5..54eeb74c 100644
--- a/proto/ospf/ospf.h
+++ b/proto/ospf/ospf.h
@@ -14,7 +14,7 @@
#include "nest/bird.h"
#include "lib/checksum.h"
-#include "lib/ip.h"
+#include "lib/idm.h"
#include "lib/lists.h"
#include "lib/slists.h"
#include "lib/socket.h"
@@ -37,14 +37,6 @@
#endif
-#ifdef IPV6
-#define OSPF_IS_V2 0
-#else
-#define OSPF_IS_V2 1
-#endif
-
-// FIXME: MAX_PREFIX_LENGTH
-
#define OSPF_TRACE(flags, msg, args...) \
do { if ((p->p.debug & flags) || OSPF_FORCE_DEBUG) \
log(L_TRACE "%s: " msg, p->p.name , ## args ); } while(0)
@@ -66,16 +58,16 @@
log_rl(&p->log_lsa_tbf, L_REMOTE "%s: " msg, p->p.name, args)
#define LOG_LSA2(msg, args...) \
- do { if (! p->log_lsa_tbf.mark) \
+ do { if (! p->log_lsa_tbf.drop) \
log(L_REMOTE "%s: " msg, p->p.name, args); } while(0)
#define OSPF_PROTO 89
-#define LSREFRESHTIME 1800 /* 30 minutes */
-#define MINLSINTERVAL 5
-#define MINLSARRIVAL 1
-#define LSINFINITY 0xffffff
+#define LSREFRESHTIME 1800 /* 30 minutes */
+#define MINLSINTERVAL (5 S_)
+#define MINLSARRIVAL (1 S_)
+#define LSINFINITY 0xffffff
#define OSPF_DEFAULT_TICK 1
#define OSPF_DEFAULT_STUB_COST 1000
@@ -87,16 +79,18 @@
#define OSPF_VLINK_ID_OFFSET 0x80000000
-
struct ospf_config
{
struct proto_config c;
uint tick;
u8 ospf2;
+ u8 af_ext;
+ u8 af_mc;
u8 rfc1583;
u8 stub_router;
u8 merge_external;
u8 instance_id;
+ u8 instance_id_set;
u8 abr;
u8 asbr;
int ecmp;
@@ -125,24 +119,24 @@ struct ospf_area_config
struct area_net_config
{
node n;
- struct prefix px;
+ net_addr prefix;
u32 tag;
u8 hidden;
};
struct area_net
{
- struct fib_node fn;
u32 metric; /* With possible LSA_EXT3_EBIT for NSSA area nets */
u32 tag;
u8 hidden;
u8 active;
+ struct fib_node fn;
};
struct ospf_stubnet_config
{
node n;
- struct prefix px;
+ net_addr prefix;
u32 cost;
u8 hidden;
u8 summary;
@@ -177,9 +171,9 @@ struct ospf_iface_patt
int tx_priority;
u16 tx_length;
u16 rx_buffer;
-
#define OSPF_RXBUF_MINSIZE 256 /* Minimal allowed size */
u8 instance_id;
+ u8 instance_id_set;
u8 autype; /* OSPF_AUTH_*, not really used in OSPFv3 */
u8 strictnbma;
u8 check_link;
@@ -189,7 +183,6 @@ struct ospf_iface_patt
u8 ptp_netmask; /* bool + 2 for unspecified */
u8 ttl_security; /* bool + 2 for TX only */
u8 bfd;
- u8 bsd_secondary;
list *passwords;
};
@@ -220,12 +213,15 @@ struct ospf_proto
int areano; /* Number of area I belong to */
int padj; /* Number of neighbors in Exchange or Loading state */
struct fib rtf; /* Routing table */
- byte ospf2; /* OSPF v2 or v3 */
- byte rfc1583; /* RFC1583 compatibility */
- byte stub_router; /* Do not forward transit traffic */
- byte merge_external; /* Should i merge external routes? */
- byte asbr; /* May i originate any ext/NSSA lsa? */
- byte ecmp; /* Maximal number of nexthops in ECMP route, or 0 */
+ struct idm idm; /* OSPFv3 LSA ID map */
+ u8 ospf2; /* OSPF v2 or v3 */
+ u8 af_ext; /* OSPFv3-AF extension */
+ u8 af_mc; /* OSPFv3-AF multicast */
+ u8 rfc1583; /* RFC1583 compatibility */
+ u8 stub_router; /* Do not forward transit traffic */
+ u8 merge_external; /* Should i merge external routes? */
+ u8 asbr; /* May i originate any ext/NSSA lsa? */
+ u8 ecmp; /* Maximal number of nexthops in ECMP route, or 0 */
struct ospf_area *backbone; /* If exists */
event *flood_event; /* Event for flooding LS updates */
void *lsab; /* LSA buffer used when originating router LSAs */
@@ -273,10 +269,10 @@ struct ospf_iface
sock *sk; /* IP socket */
list neigh_list; /* List of neighbors (struct ospf_neighbor) */
u32 cost; /* Cost of iface */
- u32 waitint; /* number of sec before changing state from wait */
- u32 rxmtint; /* number of seconds between LSA retransmissions */
- u32 pollint; /* Poll interval */
- u32 deadint; /* after "deadint" missing hellos is router dead */
+ u32 waitint; /* Number of seconds before changing state from wait */
+ u32 rxmtint; /* Number of seconds between LSA retransmissions */
+ u32 pollint; /* Poll interval in seconds */
+ u32 deadint; /* After deadint seconds without hellos is router dead */
u32 iface_id; /* Interface ID (iface->index or new value for vlinks) */
u32 vid; /* ID of peer of virtual link */
ip_addr vip; /* IP of peer of virtual link */
@@ -287,8 +283,8 @@ struct ospf_iface
interface. LSAs contained in the update */
u16 helloint; /* number of seconds between hello sending */
list *passwords;
- u32 csn; /* Last used crypt seq number */
- bird_clock_t csn_use; /* Last time when packet with that CSN was sent */
+ u32 csn; /* Last used crypt seq number */
+ btime csn_use; /* Last time when packet with that CSN was sent */
ip_addr all_routers; /* Multicast (or broadcast) address for all routers */
ip_addr des_routers; /* Multicast (or NULL) address for designated routers */
ip_addr drip; /* Designated router IP */
@@ -458,14 +454,15 @@ struct ospf_neighbor
/* Generic option flags */
-#define OPT_V6 0x01 /* OSPFv3, LSA relevant for IPv6 routing calculation */
-#define OPT_E 0x02 /* Related to AS-external LSAs */
-#define OPT_MC 0x04 /* Related to MOSPF, not used and obsolete */
-#define OPT_N 0x08 /* Related to NSSA */
-#define OPT_P 0x08 /* OSPFv2, flags P and N share position, see NSSA RFC */
-#define OPT_EA 0x10 /* OSPFv2, external attributes, not used and obsolete */
-#define OPT_R 0x10 /* OSPFv3, originator is active router */
-#define OPT_DC 0x20 /* Related to demand circuits, not used */
+#define OPT_V6 0x0001 /* OSPFv3, LSA relevant for IPv6 routing calculation */
+#define OPT_E 0x0002 /* Related to AS-external LSAs */
+#define OPT_MC 0x0004 /* Related to MOSPF, not used and obsolete */
+#define OPT_N 0x0008 /* Related to NSSA */
+#define OPT_P 0x0008 /* OSPFv2, flags P and N share position, see NSSA RFC */
+#define OPT_EA 0x0010 /* OSPFv2, external attributes, not used and obsolete */
+#define OPT_R 0x0010 /* OSPFv3, originator is active router */
+#define OPT_DC 0x0020 /* Related to demand circuits, not used */
+#define OPT_AF 0x0100 /* OSPFv3 Address Families (RFC 5838) */
/* Router-LSA VEB flags are are stored together with links (OSPFv2) or options (OSPFv3) */
#define OPT_RT_B (0x01 << 24)
@@ -682,8 +679,8 @@ struct ospf_lsa_ext3
struct ospf_lsa_ext_local
{
- ip_addr ip, fwaddr;
- int pxlen;
+ net_addr net;
+ ip_addr fwaddr;
u32 metric, ebit, fbit, tag, propagate;
u8 pxopts;
};
@@ -721,73 +718,102 @@ lsa_net_count(struct ospf_lsa_header *lsa)
/* In ospf_area->rtr we store paths to routers, but we use RID (and not IP address)
as index, so we need to encapsulate RID to IP address */
-#define ipa_from_rid(x) ipa_from_u32(x)
-#define ipa_to_rid(x) ipa_to_u32(x)
+#define net_from_rid(x) NET_ADDR_IP4(ip4_from_u32(x), IP4_MAX_PREFIX_LENGTH)
+#define rid_from_net(x) ip4_to_u32(((net_addr_ip4 *) x)->prefix)
#define IPV6_PREFIX_SPACE(x) ((((x) + 63) / 32) * 4)
#define IPV6_PREFIX_WORDS(x) (((x) + 63) / 32)
-/* FIXME: these four functions should be significantly redesigned w.r.t. integration,
- also should be named as ospf3_* instead of *_ipv6_* */
+
+static inline int
+ospf_valid_prefix(net_addr *n)
+{
+ /*
+ * In OSPFv2, prefix is stored as netmask; ip4_masklen() returns 255 for
+ * invalid one. But OSPFv3-AF may receive IPv4 net with 32 < pxlen < 128.
+ */
+ uint max = (n->type == NET_IP4) ? IP4_MAX_PREFIX_LENGTH : IP6_MAX_PREFIX_LENGTH;
+ return n->pxlen <= max;
+}
+
+/*
+ * In OSPFv3-AF (RFC 5835), IPv4 address is encoded by just placing it in the
+ * first 32 bits of IPv6 address and setting remaining bits to zero. Likewise
+ * for IPv4 prefix, where remaining bits do not matter. We use following
+ * functions to convert between IPv4 and IPv4-in-IPv6 representations:
+ */
+
+static inline ip4_addr ospf3_6to4(ip6_addr a)
+{ return _MI4(_I0(a)); }
+
+static inline ip6_addr ospf3_4to6(ip4_addr a)
+{ return _MI6(_I(a), 0, 0, 0); }
+
static inline u32 *
-lsa_get_ipv6_prefix(u32 *buf, ip_addr *addr, int *pxlen, u8 *pxopts, u16 *rest)
+ospf3_get_prefix(u32 *buf, int af, net_addr *n, u8 *pxopts, u16 *rest)
{
- u8 pxl = (*buf >> 24);
- *pxopts = (*buf >> 16);
- *rest = *buf;
- *pxlen = pxl;
+ ip6_addr px = IP6_NONE;
+ uint pxlen = (*buf >> 24);
+ *pxopts = (*buf >> 16) & 0xff;
+ if (rest) *rest = *buf & 0xffff;
buf++;
- *addr = IPA_NONE;
-
-#ifdef IPV6
- if (pxl > 0)
- _I0(*addr) = *buf++;
- if (pxl > 32)
- _I1(*addr) = *buf++;
- if (pxl > 64)
- _I2(*addr) = *buf++;
- if (pxl > 96)
- _I3(*addr) = *buf++;
+ if (pxlen > 0)
+ _I0(px) = *buf++;
+ if (pxlen > 32)
+ _I1(px) = *buf++;
+ if (pxlen > 64)
+ _I2(px) = *buf++;
+ if (pxlen > 96)
+ _I3(px) = *buf++;
/* Clean up remaining bits */
- if (pxl < 128)
- addr->addr[pxl / 32] &= u32_mkmask(pxl % 32);
-#endif
+ if (pxlen < 128)
+ px.addr[pxlen / 32] &= u32_mkmask(pxlen % 32);
+
+ if (af == NET_IP4)
+ net_fill_ip4(n, ospf3_6to4(px), pxlen);
+ else
+ net_fill_ip6(n, px, pxlen);
return buf;
}
static inline u32 *
-lsa_get_ipv6_addr(u32 *buf, ip_addr *addr)
+ospf3_put_prefix(u32 *buf, net_addr *n, u8 pxopts, u16 rest)
{
- *addr = *(ip_addr *) buf;
- return buf + 4;
-}
+ ip6_addr px = (n->type == NET_IP4) ? ospf3_4to6(net4_prefix(n)) : net6_prefix(n);
+ uint pxlen = n->pxlen;
-static inline u32 *
-put_ipv6_prefix(u32 *buf, ip_addr addr UNUSED4, u8 pxlen UNUSED4, u8 pxopts UNUSED4, u16 lh UNUSED4)
-{
-#ifdef IPV6
- *buf++ = ((pxlen << 24) | (pxopts << 16) | lh);
+ *buf++ = ((pxlen << 24) | (pxopts << 16) | rest);
if (pxlen > 0)
- *buf++ = _I0(addr);
+ *buf++ = _I0(px);
if (pxlen > 32)
- *buf++ = _I1(addr);
+ *buf++ = _I1(px);
if (pxlen > 64)
- *buf++ = _I2(addr);
+ *buf++ = _I2(px);
if (pxlen > 96)
- *buf++ = _I3(addr);
-#endif
+ *buf++ = _I3(px);
+
return buf;
}
static inline u32 *
-put_ipv6_addr(u32 *buf, ip_addr addr)
+ospf3_get_addr(u32 *buf, int af, ip_addr *addr)
{
- *(ip_addr *) buf = addr;
+ ip6_addr a;
+ memcpy(&a, buf, 16);
+ *addr = (af == NET_IP4) ? ipa_from_ip4(ospf3_6to4(a)) : ipa_from_ip6(a);
+ return buf + 4;
+}
+
+static inline u32 *
+ospf3_put_addr(u32 *buf, ip_addr addr)
+{
+ ip6_addr a = ipa_is_ip4(addr) ? ospf3_4to6(ipa_to_ip4(addr)) : ipa_to_ip6(addr);
+ memcpy(buf, &a, 16);
return buf + 4;
}
@@ -831,19 +857,24 @@ static inline void ospf_notify_net_lsa(struct ospf_iface *ifa)
static inline void ospf_notify_link_lsa(struct ospf_iface *ifa)
{ ifa->update_link_lsa = 1; }
-
-#define ospf_is_v2(X) OSPF_IS_V2
-#define ospf_is_v3(X) (!OSPF_IS_V2)
-/*
static inline int ospf_is_v2(struct ospf_proto *p)
{ return p->ospf2; }
static inline int ospf_is_v3(struct ospf_proto *p)
{ return ! p->ospf2; }
-*/
-static inline int ospf_get_version(struct ospf_proto *p UNUSED4 UNUSED6)
+
+static inline int ospf_get_version(struct ospf_proto *p)
{ return ospf_is_v2(p) ? 2 : 3; }
+static inline int ospf_is_ip4(struct ospf_proto *p)
+{ return p->p.net_type == NET_IP4; }
+
+static inline int ospf_is_ip6(struct ospf_proto *p)
+{ return p->p.net_type == NET_IP6; }
+
+static inline int ospf_get_af(struct ospf_proto *p)
+{ return p->p.net_type; }
+
struct ospf_area *ospf_find_area(struct ospf_proto *p, u32 aid);
static inline struct ospf_area *ospf_main_area(struct ospf_proto *p)
@@ -925,7 +956,7 @@ static inline void ospf_send_to_des(struct ospf_iface *ifa)
#define SKIP(DSC) do { err_dsc = DSC; goto skip; } while(0)
#endif
-static inline uint ospf_pkt_hdrlen(struct ospf_proto *p UNUSED4 UNUSED6)
+static inline uint ospf_pkt_hdrlen(struct ospf_proto *p)
{ return ospf_is_v2(p) ? (sizeof(struct ospf_packet) + sizeof(union ospf_auth)) : sizeof(struct ospf_packet); }
static inline void * ospf_tx_buffer(struct ospf_iface *ifa)
diff --git a/proto/ospf/packet.c b/proto/ospf/packet.c
index 6b6a97a4..38d7a75f 100644
--- a/proto/ospf/packet.c
+++ b/proto/ospf/packet.c
@@ -77,16 +77,16 @@ ospf_pkt_finalize(struct ospf_iface *ifa, struct ospf_packet *pkt, uint *plen)
reboot when system does not have independent RTC? */
if (!ifa->csn)
{
- ifa->csn = (u32) now;
- ifa->csn_use = now;
+ ifa->csn = (u32) (current_real_time() TO_S);
+ ifa->csn_use = current_time();
}
/* We must have sufficient delay between sending a packet and increasing
CSN to prevent reordering of packets (in a network) with different CSNs */
- if ((now - ifa->csn_use) > 1)
+ if ((current_time() - ifa->csn_use) > 1 S)
ifa->csn++;
- ifa->csn_use = now;
+ ifa->csn_use = current_time();
uint auth_len = mac_type_length(pass->alg);
byte *auth_tail = ((byte *) pkt + *plen);
@@ -203,7 +203,7 @@ drop:
/**
* ospf_rx_hook
* @sk: socket we received the packet.
- * @len: size of the packet
+ * @len: length of the packet
*
* This is the entry point for messages from neighbors. Many checks (like
* authentication, checksums, size) are done before the packet is passed to
@@ -231,7 +231,7 @@ ospf_rx_hook(sock *sk, uint len)
return 1;
int src_local, dst_local, dst_mcast;
- src_local = ipa_in_net(sk->faddr, ifa->addr->prefix, ifa->addr->pxlen);
+ src_local = ipa_in_netX(sk->faddr, &ifa->addr->prefix);
dst_local = ipa_equal(sk->laddr, ifa->addr->ip);
dst_mcast = ipa_equal(sk->laddr, ifa->all_routers) || ipa_equal(sk->laddr, ifa->des_routers);
@@ -270,9 +270,6 @@ ospf_rx_hook(sock *sk, uint len)
if (pkt == NULL)
DROP("bad IP header", len);
- if (ifa->check_ttl && (sk->rcv_ttl < 255))
- DROP("wrong TTL", sk->rcv_ttl);
-
if (len < sizeof(struct ospf_packet))
DROP("too short", len);
@@ -379,6 +376,10 @@ found:
if (ipa_equal(sk->laddr, ifa->des_routers) && (ifa->sk_dr == 0))
return 1;
+ /* TTL check must be done after instance dispatch */
+ if (ifa->check_ttl && (sk->rcv_ttl < 255))
+ DROP("wrong TTL", sk->rcv_ttl);
+
if (rid == p->router_id)
DROP1("my own router ID");
@@ -491,8 +492,8 @@ ospf_send_to_agt(struct ospf_iface *ifa, u8 state)
void
ospf_send_to_bdr(struct ospf_iface *ifa)
{
- if (ipa_nonzero(ifa->drip))
+ if (ipa_nonzero2(ifa->drip))
ospf_send_to(ifa, ifa->drip);
- if (ipa_nonzero(ifa->bdrip))
+ if (ipa_nonzero2(ifa->bdrip))
ospf_send_to(ifa, ifa->bdrip);
}
diff --git a/proto/ospf/rt.c b/proto/ospf/rt.c
index 368e3d05..c0fe218a 100644
--- a/proto/ospf/rt.c
+++ b/proto/ospf/rt.c
@@ -10,9 +10,7 @@
#include "ospf.h"
-static void add_cand(list * l, struct top_hash_entry *en,
- struct top_hash_entry *par, u32 dist,
- struct ospf_area *oa, int i);
+static void add_cand(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry *par, u32 dist, int i, uint lif, uint nif);
static void rt_sync(struct ospf_proto *p);
@@ -21,17 +19,8 @@ static inline void reset_ri(ort *ort)
bzero(&ort->n, sizeof(orta));
}
-void
-ospf_rt_initort(struct fib_node *fn)
-{
- ort *ri = (ort *) fn;
- reset_ri(ri);
- ri->old_rta = NULL;
- ri->fn.flags = 0;
-}
-
static inline int
-nh_is_vlink(struct mpnh *nhs)
+nh_is_vlink(struct nexthop *nhs)
{
return !nhs->iface;
}
@@ -42,20 +31,19 @@ unresolved_vlink(ort *ort)
return ort->n.nhs && nh_is_vlink(ort->n.nhs);
}
-static inline struct mpnh *
+static inline struct nexthop *
new_nexthop(struct ospf_proto *p, ip_addr gw, struct iface *iface, byte weight)
{
- struct mpnh *nh = lp_alloc(p->nhpool, sizeof(struct mpnh));
+ struct nexthop *nh = lp_allocz(p->nhpool, sizeof(struct nexthop));
nh->gw = gw;
nh->iface = iface;
- nh->next = NULL;
nh->weight = weight;
return nh;
}
/* Returns true if there are device nexthops in n */
static inline int
-has_device_nexthops(const struct mpnh *n)
+has_device_nexthops(const struct nexthop *n)
{
for (; n; n = n->next)
if (ipa_zero(n->gw))
@@ -65,13 +53,13 @@ has_device_nexthops(const struct mpnh *n)
}
/* Replace device nexthops with nexthops to gw */
-static struct mpnh *
-fix_device_nexthops(struct ospf_proto *p, const struct mpnh *n, ip_addr gw)
+static struct nexthop *
+fix_device_nexthops(struct ospf_proto *p, const struct nexthop *n, ip_addr gw)
{
- struct mpnh *root1 = NULL;
- struct mpnh *root2 = NULL;
- struct mpnh **nn1 = &root1;
- struct mpnh **nn2 = &root2;
+ struct nexthop *root1 = NULL;
+ struct nexthop *root2 = NULL;
+ struct nexthop **nn1 = &root1;
+ struct nexthop **nn2 = &root2;
if (!p->ecmp)
return new_nexthop(p, gw, n->iface, n->weight);
@@ -82,7 +70,7 @@ fix_device_nexthops(struct ospf_proto *p, const struct mpnh *n, ip_addr gw)
for (; n; n = n->next)
{
- struct mpnh *nn = new_nexthop(p, ipa_zero(n->gw) ? gw : n->gw, n->iface, n->weight);
+ struct nexthop *nn = new_nexthop(p, ipa_zero(n->gw) ? gw : n->gw, n->iface, n->weight);
if (ipa_zero(n->gw))
{
@@ -96,7 +84,7 @@ fix_device_nexthops(struct ospf_proto *p, const struct mpnh *n, ip_addr gw)
}
}
- return mpnh_merge(root1, root2, 1, 1, p->ecmp, p->nhpool);
+ return nexthop_merge(root1, root2, 1, 1, p->ecmp, p->nhpool);
}
@@ -292,7 +280,7 @@ ort_merge(struct ospf_proto *p, ort *o, const orta *new)
if (old->nhs != new->nhs)
{
- old->nhs = mpnh_merge(old->nhs, new->nhs, old->nhs_reuse, new->nhs_reuse,
+ old->nhs = nexthop_merge(old->nhs, new->nhs, old->nhs_reuse, new->nhs_reuse,
p->ecmp, p->nhpool);
old->nhs_reuse = 1;
}
@@ -308,7 +296,7 @@ ort_merge_ext(struct ospf_proto *p, ort *o, const orta *new)
if (old->nhs != new->nhs)
{
- old->nhs = mpnh_merge(old->nhs, new->nhs, old->nhs_reuse, new->nhs_reuse,
+ old->nhs = nexthop_merge(old->nhs, new->nhs, old->nhs_reuse, new->nhs_reuse,
p->ecmp, p->nhpool);
old->nhs_reuse = 1;
}
@@ -334,9 +322,9 @@ ort_merge_ext(struct ospf_proto *p, ort *o, const orta *new)
static inline void
-ri_install_net(struct ospf_proto *p, ip_addr prefix, int pxlen, const orta *new)
+ri_install_net(struct ospf_proto *p, net_addr *net, const orta *new)
{
- ort *old = (ort *) fib_get(&p->rtf, &prefix, pxlen);
+ ort *old = fib_get(&p->rtf, net);
int cmp = orta_compare(p, new, &old->n);
if (cmp > 0)
@@ -348,8 +336,8 @@ ri_install_net(struct ospf_proto *p, ip_addr prefix, int pxlen, const orta *new)
static inline void
ri_install_rt(struct ospf_area *oa, u32 rid, const orta *new)
{
- ip_addr addr = ipa_from_rid(rid);
- ort *old = (ort *) fib_get(&oa->rtr, &addr, MAX_PREFIX_LENGTH);
+ net_addr_ip4 nrid = net_from_rid(rid);
+ ort *old = fib_get(&oa->rtr, (net_addr *) &nrid);
int cmp = orta_compare(oa->po, new, &old->n);
if (cmp > 0)
@@ -359,17 +347,19 @@ ri_install_rt(struct ospf_area *oa, u32 rid, const orta *new)
}
static inline void
-ri_install_asbr(struct ospf_proto *p, ip_addr *addr, const orta *new)
+ri_install_asbr(struct ospf_proto *p, u32 rid, const orta *new)
{
- ort *old = (ort *) fib_get(&p->backbone->rtr, addr, MAX_PREFIX_LENGTH);
+ net_addr_ip4 nrid = net_from_rid(rid);
+ ort *old = fib_get(&p->backbone->rtr, (net_addr *) &nrid);
+
if (orta_compare_asbr(p, new, &old->n) > 0)
ort_replace(old, new);
}
static inline void
-ri_install_ext(struct ospf_proto *p, ip_addr prefix, int pxlen, const orta *new)
+ri_install_ext(struct ospf_proto *p, net_addr *net, const orta *new)
{
- ort *old = (ort *) fib_get(&p->rtf, &prefix, pxlen);
+ ort *old = fib_get(&p->rtf, net);
int cmp = orta_compare_ext(p, new, &old->n);
if (cmp > 0)
@@ -404,7 +394,7 @@ px_pos_to_ifa(struct ospf_area *oa, int pos)
static void
-add_network(struct ospf_area *oa, ip_addr px, int pxlen, int metric, struct top_hash_entry *en, int pos)
+add_network(struct ospf_area *oa, net_addr *net, int metric, struct top_hash_entry *en, int pos)
{
struct ospf_proto *p = oa->po;
@@ -419,7 +409,7 @@ add_network(struct ospf_area *oa, ip_addr px, int pxlen, int metric, struct top_
.nhs = en->nhs
};
- if (pxlen < 0 || pxlen > MAX_PREFIX_LENGTH)
+ if (!ospf_valid_prefix(net))
{
log(L_WARN "%s: Invalid prefix in LSA (Type: %04x, Id: %R, Rt: %R)",
p->p.name, en->lsa_type, en->lsa.id, en->lsa.rt);
@@ -440,7 +430,7 @@ add_network(struct ospf_area *oa, ip_addr px, int pxlen, int metric, struct top_
nf.nhs = ifa ? new_nexthop(p, IPA_NONE, ifa->iface, ifa->ecmp_weight) : NULL;
}
- ri_install_net(p, px, pxlen, &nf);
+ ri_install_net(p, net, &nf);
}
@@ -451,8 +441,7 @@ spfa_process_rt(struct ospf_proto *p, struct ospf_area *oa, struct top_hash_entr
struct ospf_lsa_rt *rt = act->lsa_body;
struct ospf_lsa_rt_walk rtl;
struct top_hash_entry *tmp;
- ip_addr prefix;
- int pxlen, i;
+ int i;
if (rt->options & OPT_RT_V)
oa->trcap = 1;
@@ -502,9 +491,10 @@ spfa_process_rt(struct ospf_proto *p, struct ospf_area *oa, struct top_hash_entr
* the same result by handing them here because add_network()
* will keep the best (not the first) found route.
*/
- prefix = ipa_from_u32(rtl.id & rtl.data);
- pxlen = u32_masklen(rtl.data);
- add_network(oa, prefix, pxlen, act->dist + rtl.metric, act, i);
+ net_addr_ip4 net =
+ NET_ADDR_IP4(ip4_from_u32(rtl.id & rtl.data), u32_masklen(rtl.data));
+
+ add_network(oa, (net_addr *) &net, act->dist + rtl.metric, act, i);
break;
case LSART_NET:
@@ -517,7 +507,7 @@ spfa_process_rt(struct ospf_proto *p, struct ospf_area *oa, struct top_hash_entr
break;
}
- add_cand(&oa->cand, tmp, act, act->dist + rtl.metric, oa, i);
+ add_cand(oa, tmp, act, act->dist + rtl.metric, i, rtl.lif, rtl.nif);
}
}
@@ -526,21 +516,21 @@ spfa_process_net(struct ospf_proto *p, struct ospf_area *oa, struct top_hash_ent
{
struct ospf_lsa_net *ln = act->lsa_body;
struct top_hash_entry *tmp;
- ip_addr prefix;
- int pxlen, i, cnt;
+ int i, cnt;
if (ospf_is_v2(p))
{
- prefix = ipa_from_u32(act->lsa.id & ln->optx);
- pxlen = u32_masklen(ln->optx);
- add_network(oa, prefix, pxlen, act->dist, act, -1);
+ net_addr_ip4 net =
+ NET_ADDR_IP4(ip4_from_u32(act->lsa.id & ln->optx), u32_masklen(ln->optx));
+
+ add_network(oa, (net_addr *) &net, act->dist, act, -1);
}
cnt = lsa_net_count(&act->lsa);
for (i = 0; i < cnt; i++)
{
tmp = ospf_hash_find_rt(p->gr, oa->areaid, ln->routers[i]);
- add_cand(&oa->cand, tmp, act, act->dist, oa, -1);
+ add_cand(oa, tmp, act, act->dist, -1, 0, 0);
}
}
@@ -549,10 +539,6 @@ spfa_process_prefixes(struct ospf_proto *p, struct ospf_area *oa)
{
struct top_hash_entry *en, *src;
struct ospf_lsa_prefix *px;
- ip_addr pxa;
- int pxlen;
- u8 pxopts;
- u16 metric;
u32 *buf;
int i;
@@ -587,18 +573,22 @@ spfa_process_prefixes(struct ospf_proto *p, struct ospf_area *oa)
buf = px->rest;
for (i = 0; i < px->pxcount; i++)
- {
- buf = lsa_get_ipv6_prefix(buf, &pxa, &pxlen, &pxopts, &metric);
+ {
+ net_addr net;
+ u8 pxopts;
+ u16 metric;
- if (pxopts & OPT_PX_NU)
- continue;
+ buf = ospf3_get_prefix(buf, ospf_get_af(p), &net, &pxopts, &metric);
- /* Store the first global address to use it later as a vlink endpoint */
- if ((pxopts & OPT_PX_LA) && ipa_zero(src->lb))
- src->lb = pxa;
+ if (pxopts & OPT_PX_NU)
+ continue;
- add_network(oa, pxa, pxlen, src->dist + metric, src, i);
- }
+ /* Store the first global address to use it later as a vlink endpoint */
+ if ((pxopts & OPT_PX_LA) && (net.type == NET_IP6) && ipa_zero(src->lb))
+ src->lb = ipa_from_ip6(net6_prefix(&net));
+
+ add_network(oa, &net, src->dist + metric, src, i);
+ }
}
}
@@ -659,7 +649,8 @@ ospf_rt_spfa(struct ospf_area *oa)
}
static int
-link_back(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry *par)
+link_back(struct ospf_area *oa, struct top_hash_entry *en,
+ struct top_hash_entry *par, uint lif, uint nif)
{
struct ospf_proto *p = oa->po;
struct ospf_lsa_rt_walk rtl;
@@ -697,6 +688,10 @@ link_back(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry
tmp = ospf_hash_find_net(p->gr, oa->areaid, rtl.id, rtl.nif);
if (tmp == par)
{
+ /*
+ * Note that there may be multiple matching Rt-fields if router 'en'
+ * have multiple interfaces to net 'par'. Perhaps we should do ECMP.
+ */
if (ospf_is_v2(p))
en->lb = ipa_from_u32(rtl.data);
else
@@ -708,7 +703,13 @@ link_back(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry
case LSART_VLNK:
case LSART_PTP:
- /* Not necessary the same link, see RFC 2328 [23] */
+ /*
+ * For OSPFv2, not necessary the same link, see RFC 2328 [23].
+ * For OSPFv3, we verify that by comparing nif and lif fields.
+ */
+ if (ospf_is_v3(p) && ((rtl.lif != nif) || (rtl.nif != lif)))
+ break;
+
tmp = ospf_hash_find_rt(p->gr, oa->areaid, rtl.id);
if (tmp == par)
return 1;
@@ -741,13 +742,12 @@ ospf_rt_sum(struct ospf_area *oa)
{
struct ospf_proto *p = oa->po;
struct top_hash_entry *en;
- ip_addr ip, abrip;
+ net_addr net;
u32 dst_rid, metric, options;
ort *abr;
- int pxlen = -1, type = -1;
+ int type;
u8 pxopts;
-
OSPF_TRACE(D_EVENTS, "Starting routing table calculation for inter-area (area %R)", oa->areaid);
WALK_SLIST(en, p->lsal)
@@ -770,18 +770,18 @@ ospf_rt_sum(struct ospf_area *oa)
if (en->lsa_type == LSA_T_SUM_NET)
{
- lsa_parse_sum_net(en, ospf_is_v2(p), &ip, &pxlen, &pxopts, &metric);
-
- if (pxopts & OPT_PX_NU)
- continue;
+ lsa_parse_sum_net(en, ospf_is_v2(p), ospf_get_af(p), &net, &pxopts, &metric);
- if (pxlen < 0 || pxlen > MAX_PREFIX_LENGTH)
+ if (!ospf_valid_prefix(&net))
{
log(L_WARN "%s: Invalid prefix in LSA (Type: %04x, Id: %R, Rt: %R)",
p->p.name, en->lsa_type, en->lsa.id, en->lsa.rt);
continue;
}
+ if (pxopts & OPT_PX_NU)
+ continue;
+
options = 0;
type = ORT_NET;
}
@@ -802,8 +802,8 @@ ospf_rt_sum(struct ospf_area *oa)
continue;
/* 16.2. (4) */
- abrip = ipa_from_rid(en->lsa.rt);
- abr = (ort *) fib_find(&oa->rtr, &abrip, MAX_PREFIX_LENGTH);
+ net_addr_ip4 nrid = net_from_rid(en->lsa.rt);
+ abr = fib_find(&oa->rtr, (net_addr *) &nrid);
if (!abr || !abr->n.type)
continue;
@@ -827,7 +827,7 @@ ospf_rt_sum(struct ospf_area *oa)
};
if (type == ORT_NET)
- ri_install_net(p, ip, pxlen, &nf);
+ ri_install_net(p, &net, &nf);
else
ri_install_rt(oa, dst_rid, &nf);
}
@@ -841,11 +841,7 @@ ospf_rt_sum_tr(struct ospf_area *oa)
struct ospf_area *bb = p->backbone;
struct top_hash_entry *en;
ort *re, *abr;
- ip_addr ip, abrip;
- u32 dst_rid, metric, options;
- int pxlen;
- u8 pxopts;
-
+ u32 metric;
if (!bb)
return;
@@ -868,26 +864,31 @@ ospf_rt_sum_tr(struct ospf_area *oa)
if (en->lsa_type == LSA_T_SUM_NET)
{
- lsa_parse_sum_net(en, ospf_is_v2(p), &ip, &pxlen, &pxopts, &metric);
+ net_addr net;
+ u8 pxopts;
- if (pxopts & OPT_PX_NU)
- continue;
+ lsa_parse_sum_net(en, ospf_is_v2(p), ospf_get_af(p), &net, &pxopts, &metric);
- if (pxlen < 0 || pxlen > MAX_PREFIX_LENGTH)
+ if (!ospf_valid_prefix(&net))
{
log(L_WARN "%s: Invalid prefix in LSA (Type: %04x, Id: %R, Rt: %R)",
p->p.name, en->lsa_type, en->lsa.id, en->lsa.rt);
continue;
}
- re = fib_find(&p->rtf, &ip, pxlen);
+ if (pxopts & OPT_PX_NU)
+ continue;
+
+ re = fib_find(&p->rtf, &net);
}
else // en->lsa_type == LSA_T_SUM_RT
{
+ u32 dst_rid, options;
+
lsa_parse_sum_rt(en, ospf_is_v2(p), &dst_rid, &metric, &options);
- ip = ipa_from_rid(dst_rid);
- re = fib_find(&bb->rtr, &ip, MAX_PREFIX_LENGTH);
+ net_addr_ip4 nrid = net_from_rid(dst_rid);
+ re = fib_find(&bb->rtr, (net_addr *) &nrid);
}
/* 16.3 (1b) */
@@ -905,8 +906,8 @@ ospf_rt_sum_tr(struct ospf_area *oa)
continue;
/* 16.3. (4) */
- abrip = ipa_from_rid(en->lsa.rt);
- abr = fib_find(&oa->rtr, &abrip, MAX_PREFIX_LENGTH);
+ net_addr_ip4 nrid = net_from_rid(en->lsa.rt);
+ abr = fib_find(&oa->rtr, (net_addr *) &nrid);
if (!abr || !abr->n.type)
continue;
@@ -997,7 +998,7 @@ decide_sum_lsa(struct ospf_area *oa, ort *nf, int dest)
return 1;
struct area_net *anet = (struct area_net *)
- fib_route(&nf->n.oa->net_fib, nf->fn.prefix, nf->fn.pxlen);
+ fib_route(&nf->n.oa->net_fib, nf->fn.addr);
/* Condensed area network found */
if (anet)
@@ -1016,13 +1017,13 @@ check_sum_net_lsa(struct ospf_proto *p, ort *nf)
if (nf->area_net)
{
/* It is a default route for stub areas, handled entirely in ospf_rt_abr() */
- if (nf->fn.pxlen == 0)
+ if (nf->fn.addr->pxlen == 0)
return;
/* Find that area network */
WALK_LIST(anet_oa, p->area_list)
{
- anet = (struct area_net *) fib_find(&anet_oa->net_fib, &nf->fn.prefix, nf->fn.pxlen);
+ anet = fib_find(&anet_oa->net_fib, nf->fn.addr);
if (anet)
break;
}
@@ -1041,14 +1042,16 @@ check_sum_net_lsa(struct ospf_proto *p, ort *nf)
static inline void
check_sum_rt_lsa(struct ospf_proto *p, ort *nf)
{
+ u32 rid = rid_from_net(nf->fn.addr);
+
struct ospf_area *oa;
WALK_LIST(oa, p->area_list)
if (decide_sum_lsa(oa, nf, ORT_ROUTER))
- ospf_originate_sum_rt_lsa(p, oa, nf, nf->n.metric1, nf->n.options);
+ ospf_originate_sum_rt_lsa(p, oa, rid, nf->n.metric1, nf->n.options);
}
static inline int
-decide_nssa_lsa(struct ospf_proto *p UNUSED4 UNUSED6, ort *nf, struct ospf_lsa_ext_local *rt)
+decide_nssa_lsa(struct ospf_proto *p, ort *nf, struct ospf_lsa_ext_local *rt)
{
struct ospf_area *oa = nf->n.oa;
struct top_hash_entry *en = nf->n.en;
@@ -1057,14 +1060,14 @@ decide_nssa_lsa(struct ospf_proto *p UNUSED4 UNUSED6, ort *nf, struct ospf_lsa_e
return 0;
/* Condensed area network found */
- if (fib_route(&oa->enet_fib, nf->fn.prefix, nf->fn.pxlen))
+ if (fib_route(&oa->enet_fib, nf->fn.addr))
return 0;
if (!en || (en->lsa_type != LSA_T_NSSA))
return 0;
/* We do not store needed data in struct orta, we have to parse the LSA */
- lsa_parse_ext(en, ospf_is_v2(p), rt);
+ lsa_parse_ext(en, ospf_is_v2(p), ospf_get_af(p), rt);
if (rt->pxopts & OPT_PX_NU)
return 0;
@@ -1092,7 +1095,7 @@ check_nssa_lsa(struct ospf_proto *p, ort *nf)
/* Find that area network */
WALK_LIST(oa, p->area_list)
{
- anet = (struct area_net *) fib_find(&oa->enet_fib, &nf->fn.prefix, nf->fn.pxlen);
+ anet = fib_find(&oa->enet_fib, nf->fn.addr);
if (anet)
break;
}
@@ -1162,24 +1165,20 @@ static void
ospf_rt_abr1(struct ospf_proto *p)
{
struct area_net *anet;
- ort *nf, *default_nf;
+ ort *default_nf;
+ net_addr default_net;
/* RFC 2328 G.3 - incomplete resolution of virtual next hops - routers */
- FIB_WALK(&p->backbone->rtr, nftmp)
+ FIB_WALK(&p->backbone->rtr, ort, nf)
{
- nf = (ort *) nftmp;
-
if (nf->n.type && unresolved_vlink(nf))
reset_ri(nf);
}
FIB_WALK_END;
- FIB_WALK(&p->rtf, nftmp)
+ FIB_WALK(&p->rtf, ort, nf)
{
- nf = (ort *) nftmp;
-
-
/* RFC 2328 G.3 - incomplete resolution of virtual next hops - networks */
if (nf->n.type && unresolved_vlink(nf))
reset_ri(nf);
@@ -1188,7 +1187,7 @@ ospf_rt_abr1(struct ospf_proto *p)
/* Compute condensed area networks */
if (nf->n.type == RTS_OSPF)
{
- anet = (struct area_net *) fib_route(&nf->n.oa->net_fib, nf->fn.prefix, nf->fn.pxlen);
+ anet = (struct area_net *) fib_route(&nf->n.oa->net_fib, nf->fn.addr);
if (anet)
{
if (!anet->active)
@@ -1196,7 +1195,7 @@ ospf_rt_abr1(struct ospf_proto *p)
anet->active = 1;
/* Get a RT entry and mark it to know that it is an area network */
- ort *nfi = (ort *) fib_get(&p->rtf, &anet->fn.prefix, anet->fn.pxlen);
+ ort *nfi = fib_get(&p->rtf, anet->fn.addr);
nfi->area_net = 1;
/* 16.2. (3) */
@@ -1211,8 +1210,13 @@ ospf_rt_abr1(struct ospf_proto *p)
}
FIB_WALK_END;
- ip_addr addr = IPA_NONE;
- default_nf = (ort *) fib_get(&p->rtf, &addr, 0);
+
+ if (ospf_is_v2(p))
+ net_fill_ip4(&default_net, IP4_NONE, 0);
+ else
+ net_fill_ip6(&default_net, IP6_NONE, 0);
+
+ default_nf = fib_get(&p->rtf, &default_net);
default_nf->area_net = 1;
struct ospf_area *oa;
@@ -1239,11 +1243,10 @@ ospf_rt_abr1(struct ospf_proto *p)
/* RFC 2328 16.4. (3) - precompute preferred ASBR entries */
if (oa_is_ext(oa))
{
- FIB_WALK(&oa->rtr, nftmp)
+ FIB_WALK(&oa->rtr, ort, nf)
{
- nf = (ort *) nftmp;
if (nf->n.options & ORTA_ASBR)
- ri_install_asbr(p, &nf->fn.prefix, &nf->n);
+ ri_install_asbr(p, rid_from_net(nf->fn.addr), &nf->n);
}
FIB_WALK_END;
}
@@ -1251,9 +1254,9 @@ ospf_rt_abr1(struct ospf_proto *p)
/* Originate or flush ASBR summary LSAs */
- FIB_WALK(&p->backbone->rtr, nftmp)
+ FIB_WALK(&p->backbone->rtr, ort, nf)
{
- check_sum_rt_lsa(p, (ort *) nftmp);
+ check_sum_rt_lsa(p, nf);
}
FIB_WALK_END;
@@ -1280,8 +1283,6 @@ ospf_rt_abr2(struct ospf_proto *p)
{
struct ospf_area *oa;
struct top_hash_entry *en;
- ort *nf, *nf2;
-
/* RFC 3103 3.1 - type-7 translator election */
struct ospf_area *bb = p->backbone;
@@ -1293,13 +1294,12 @@ ospf_rt_abr2(struct ospf_proto *p)
if (oa->ac->translator)
goto decided;
- FIB_WALK(&oa->rtr, nftmp)
+ FIB_WALK(&oa->rtr, ort, nf)
{
- nf = (ort *) nftmp;
if (!nf->n.type || !(nf->n.options & ORTA_ABR))
continue;
- nf2 = fib_find(&bb->rtr, &nf->fn.prefix, MAX_PREFIX_LENGTH);
+ ort *nf2 = fib_find(&bb->rtr, nf->fn.addr);
if (!nf2 || !nf2->n.type || !(nf2->n.options & ORTA_ABR))
continue;
@@ -1329,23 +1329,21 @@ ospf_rt_abr2(struct ospf_proto *p)
if (!translate && (oa->translate == TRANS_ON))
{
if (oa->translator_timer == NULL)
- oa->translator_timer = tm_new_set(p->p.pool, translator_timer_hook, oa, 0, 0);
+ oa->translator_timer = tm_new_init(p->p.pool, translator_timer_hook, oa, 0, 0);
/* Schedule the end of translation */
- tm_start(oa->translator_timer, oa->ac->transint);
+ tm_start(oa->translator_timer, oa->ac->transint S);
oa->translate = TRANS_WAIT;
}
}
/* Compute condensed external networks */
- FIB_WALK(&p->rtf, nftmp)
+ FIB_WALK(&p->rtf, ort, nf)
{
- nf = (ort *) nftmp;
if (rt_is_nssa(nf) && (nf->n.options & ORTA_PROP))
{
- struct area_net *anet = (struct area_net *)
- fib_route(&nf->n.oa->enet_fib, nf->fn.prefix, nf->fn.pxlen);
+ struct area_net *anet = fib_route(&nf->n.oa->enet_fib, nf->fn.addr);
if (anet)
{
@@ -1354,7 +1352,7 @@ ospf_rt_abr2(struct ospf_proto *p)
anet->active = 1;
/* Get a RT entry and mark it to know that it is an area network */
- nf2 = (ort *) fib_get(&p->rtf, &anet->fn.prefix, anet->fn.pxlen);
+ ort *nf2 = fib_get(&p->rtf, anet->fn.addr);
nf2->area_net = 1;
}
@@ -1369,10 +1367,8 @@ ospf_rt_abr2(struct ospf_proto *p)
FIB_WALK_END;
- FIB_WALK(&p->rtf, nftmp)
+ FIB_WALK(&p->rtf, ort, nf)
{
- nf = (ort *) nftmp;
-
check_sum_net_lsa(p, nf);
check_nssa_lsa(p, nf);
}
@@ -1382,22 +1378,57 @@ ospf_rt_abr2(struct ospf_proto *p)
/* Like fib_route(), but ignores dummy rt entries */
static void *
-ospf_fib_route(struct fib *f, ip_addr a, int len)
+ospf_fib_route_ip4(struct fib *f, ip4_addr a, int len)
+{
+ net_addr_ip4 net = NET_ADDR_IP4(a, len);
+ ort *nf;
+
+loop:
+ nf = fib_find(f, (net_addr *) &net);
+ if (nf && nf->n.type)
+ return nf;
+
+ if (net.pxlen > 0)
+ {
+ net.pxlen--;
+ ip4_clrbit(&net.prefix, net.pxlen);
+ goto loop;
+ }
+
+ return NULL;
+}
+
+static void *
+ospf_fib_route_ip6(struct fib *f, ip6_addr a, int len)
{
- ip_addr a0;
+ net_addr_ip6 net = NET_ADDR_IP6(a, len);
ort *nf;
- while (len >= 0)
+loop:
+ nf = fib_find(f, (net_addr *) &net);
+ if (nf && nf->n.type)
+ return nf;
+
+ if (net.pxlen > 0)
{
- a0 = ipa_and(a, ipa_mkmask(len));
- nf = fib_find(f, &a0, len);
- if (nf && nf->n.type)
- return nf;
- len--;
+ net.pxlen--;
+ ip6_clrbit(&net.prefix, net.pxlen);
+ goto loop;
}
+
return NULL;
}
+static void *
+ospf_fib_route(struct fib *f, ip_addr a)
+{
+ if (f->addr_type == NET_IP4)
+ return ospf_fib_route_ip4(f, ipa_to_ip4(a), IP4_MAX_PREFIX_LENGTH);
+ else
+ return ospf_fib_route_ip6(f, ipa_to_ip6(a), IP6_MAX_PREFIX_LENGTH);
+}
+
+
/* RFC 2328 16.4. calculating external routes */
static void
ospf_ext_spf(struct ospf_proto *p)
@@ -1405,7 +1436,6 @@ ospf_ext_spf(struct ospf_proto *p)
struct top_hash_entry *en;
struct ospf_lsa_ext_local rt;
ort *nf1, *nf2;
- ip_addr rtid;
u32 br_metric;
struct ospf_area *atmp;
@@ -1429,21 +1459,20 @@ ospf_ext_spf(struct ospf_proto *p)
DBG("%s: Working on LSA. ID: %R, RT: %R, Type: %u\n",
p->p.name, en->lsa.id, en->lsa.rt, en->lsa_type);
- lsa_parse_ext(en, ospf_is_v2(p), &rt);
-
- if (rt.metric == LSINFINITY)
- continue;
+ lsa_parse_ext(en, ospf_is_v2(p), ospf_get_af(p), &rt);
- if (rt.pxopts & OPT_PX_NU)
- continue;
-
- if (rt.pxlen < 0 || rt.pxlen > MAX_PREFIX_LENGTH)
+ if (!ospf_valid_prefix(&rt.net))
{
log(L_WARN "%s: Invalid prefix in LSA (Type: %04x, Id: %R, Rt: %R)",
p->p.name, en->lsa_type, en->lsa.id, en->lsa.rt);
continue;
}
+ if (rt.metric == LSINFINITY)
+ continue;
+
+ if (rt.pxopts & OPT_PX_NU)
+ continue;
/* 16.4. (3) */
/* If there are more areas, we already precomputed preferred ASBR
@@ -1457,8 +1486,8 @@ ospf_ext_spf(struct ospf_proto *p)
if (!atmp)
continue; /* Should not happen */
- rtid = ipa_from_rid(en->lsa.rt);
- nf1 = fib_find(&atmp->rtr, &rtid, MAX_PREFIX_LENGTH);
+ net_addr_ip4 nrid = net_from_rid(en->lsa.rt);
+ nf1 = fib_find(&atmp->rtr, (net_addr *) &nrid);
if (!nf1 || !nf1->n.type)
continue; /* No AS boundary router found */
@@ -1468,7 +1497,7 @@ ospf_ext_spf(struct ospf_proto *p)
/* 16.4. (3) NSSA - special rule for default routes */
/* ABR should use default only if P-bit is set and summaries are active */
- if ((en->lsa_type == LSA_T_NSSA) && ipa_zero(rt.ip) && (rt.pxlen == 0) &&
+ if ((en->lsa_type == LSA_T_NSSA) && (rt.net.pxlen == 0) &&
(p->areano > 1) && !(rt.propagate && atmp->ac->summary))
continue;
@@ -1480,7 +1509,7 @@ ospf_ext_spf(struct ospf_proto *p)
}
else
{
- nf2 = ospf_fib_route(&p->rtf, rt.fwaddr, MAX_PREFIX_LENGTH);
+ nf2 = ospf_fib_route(&p->rtf, rt.fwaddr);
if (!nf2)
continue;
@@ -1542,7 +1571,7 @@ ospf_ext_spf(struct ospf_proto *p)
nfa.oa = atmp; /* undefined in RFC 2328 */
nfa.en = en; /* store LSA for later (NSSA processing) */
- ri_install_ext(p, rt.ip, rt.pxlen, &nfa);
+ ri_install_ext(p, &rt.net, &nfa);
}
}
@@ -1552,13 +1581,10 @@ ospf_rt_reset(struct ospf_proto *p)
{
struct ospf_area *oa;
struct top_hash_entry *en;
- struct area_net *anet;
- ort *ri;
/* Reset old routing table */
- FIB_WALK(&p->rtf, nftmp)
+ FIB_WALK(&p->rtf, ort, ri)
{
- ri = (ort *) nftmp;
ri->area_net = 0;
ri->keep = 0;
reset_ri(ri);
@@ -1580,9 +1606,8 @@ ospf_rt_reset(struct ospf_proto *p)
WALK_LIST(oa, p->area_list)
{
/* Reset ASBR routing tables */
- FIB_WALK(&oa->rtr, nftmp)
+ FIB_WALK(&oa->rtr, ort, ri)
{
- ri = (ort *) nftmp;
reset_ri(ri);
}
FIB_WALK_END;
@@ -1590,17 +1615,15 @@ ospf_rt_reset(struct ospf_proto *p)
/* Reset condensed area networks */
if (p->areano > 1)
{
- FIB_WALK(&oa->net_fib, nftmp)
+ FIB_WALK(&oa->net_fib, struct area_net, anet)
{
- anet = (struct area_net *) nftmp;
anet->active = 0;
anet->metric = 0;
}
FIB_WALK_END;
- FIB_WALK(&oa->enet_fib, nftmp)
+ FIB_WALK(&oa->enet_fib, struct area_net, anet)
{
- anet = (struct area_net *) nftmp;
anet->active = 0;
anet->metric = 0;
}
@@ -1659,19 +1682,33 @@ ospf_rt_spf(struct ospf_proto *p)
static inline int
-inherit_nexthops(struct mpnh *pn)
+inherit_nexthops(struct nexthop *pn)
{
/* Proper nexthops (with defined GW) or dummy vlink nexthops (without iface) */
return pn && (ipa_nonzero(pn->gw) || !pn->iface);
}
-static struct mpnh *
+static inline ip_addr
+link_lsa_lladdr(struct ospf_proto *p, struct top_hash_entry *en)
+{
+ struct ospf_lsa_link *link_lsa = en->lsa_body;
+ ip6_addr ll = link_lsa->lladdr;
+
+ if (ip6_zero(ll))
+ return IPA_NONE;
+
+ return ospf_is_ip4(p) ? ipa_from_ip4(ospf3_6to4(ll)) : ipa_from_ip6(ll);
+}
+
+static struct nexthop *
calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en,
- struct top_hash_entry *par, int pos)
+ struct top_hash_entry *par, int pos, uint lif, uint nif)
{
struct ospf_proto *p = oa->po;
- struct mpnh *pn = par->nhs;
- struct ospf_iface *ifa;
+ struct nexthop *pn = par->nhs;
+ struct top_hash_entry *link = NULL;
+ struct ospf_iface *ifa = NULL;
+ ip_addr nh = IPA_NONE;
u32 rid = en->lsa.rt;
/* 16.1.1. The next hop calculation */
@@ -1696,6 +1733,9 @@ calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en,
if (!ifa)
return NULL;
+ if (ospf_is_v3(p) && (ifa->iface_id != lif))
+ log(L_WARN "%s: Inconsistent interface ID %u/%u", p->p.name, ifa->iface_id, lif);
+
return new_nexthop(p, IPA_NONE, ifa->iface, ifa->ecmp_weight);
}
@@ -1706,14 +1746,44 @@ calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en,
if (!ifa)
return NULL;
+ if (ospf_is_v3(p) && (ifa->iface_id != lif))
+ log(L_WARN "%s: Inconsistent interface ID %u/%u", p->p.name, ifa->iface_id, lif);
+
if (ifa->type == OSPF_IT_VLINK)
return new_nexthop(p, IPA_NONE, NULL, 0);
- struct ospf_neighbor *m = find_neigh(ifa, rid);
- if (!m || (m->state != NEIGHBOR_FULL))
- return NULL;
+ /* FIXME: On physical PtP links we may skip next-hop altogether */
+
+ if (ospf_is_v2(p) || ospf_is_ip6(p))
+ {
+ /*
+ * In this case, next-hop is a source address from neighbor's packets.
+ * That is necessary for OSPFv2 and practical for OSPFv3 (as it works even
+ * if neighbor uses LinkLSASuppression), but does not work with OSPFv3-AF
+ * on IPv4 topology, where src is IPv6 but next-hop should be IPv4.
+ */
+ struct ospf_neighbor *m = find_neigh(ifa, rid);
+ if (!m || (m->state != NEIGHBOR_FULL))
+ return NULL;
+
+ nh = m->ip;
+ }
+ else
+ {
+ /*
+ * Next-hop is taken from lladdr field of Link-LSA, based on Neighbor
+ * Iface ID (nif) field in our Router-LSA, which is just nbr->iface_id.
+ */
+ link = ospf_hash_find(p->gr, ifa->iface_id, nif, rid, LSA_T_LINK);
+ if (!link)
+ return NULL;
+
+ nh = link_lsa_lladdr(p, link);
+ if (ipa_zero(nh))
+ return NULL;
+ }
- return new_nexthop(p, m->ip, ifa->iface, ifa->ecmp_weight);
+ return new_nexthop(p, nh, ifa->iface, ifa->ecmp_weight);
}
/* The third case - bcast or nbma neighbor */
@@ -1740,18 +1810,15 @@ calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en,
* Next-hop is taken from lladdr field of Link-LSA, en->lb_id
* is computed in link_back().
*/
- struct top_hash_entry *lhe;
- lhe = ospf_hash_find(p->gr, pn->iface->index, en->lb_id, rid, LSA_T_LINK);
-
- if (!lhe)
+ link = ospf_hash_find(p->gr, pn->iface->index, en->lb_id, rid, LSA_T_LINK);
+ if (!link)
return NULL;
- struct ospf_lsa_link *llsa = lhe->lsa_body;
-
- if (ip6_zero(llsa->lladdr))
+ nh = link_lsa_lladdr(p, link);
+ if (ipa_zero(nh))
return NULL;
- return new_nexthop(p, ipa_from_ip6(llsa->lladdr), pn->iface, pn->weight);
+ return new_nexthop(p, nh, pn->iface, pn->weight);
}
}
@@ -1764,8 +1831,8 @@ calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en,
/* Add LSA into list of candidates in Dijkstra's algorithm */
static void
-add_cand(list * l, struct top_hash_entry *en, struct top_hash_entry *par,
- u32 dist, struct ospf_area *oa, int pos)
+add_cand(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry *par,
+ u32 dist, int pos, uint lif, uint nif)
{
struct ospf_proto *p = oa->po;
node *prev, *n;
@@ -1778,9 +1845,9 @@ add_cand(list * l, struct top_hash_entry *en, struct top_hash_entry *par,
if (en->lsa.age == LSA_MAXAGE)
return;
- if (ospf_is_v3(p) && (en->lsa_type == LSA_T_RT))
+ if (ospf_is_v3(p) && (oa->options & OPT_V6) && (en->lsa_type == LSA_T_RT))
{
- /* In OSPFv3, check V6 flag */
+ /* In OSPFv3 IPv6 unicast, check V6 flag */
struct ospf_lsa_rt *rt = en->lsa_body;
if (!(rt->options & OPT_V6))
return;
@@ -1795,10 +1862,10 @@ add_cand(list * l, struct top_hash_entry *en, struct top_hash_entry *par,
return;
/* We should check whether there is a reverse link from en to par, */
- if (!link_back(oa, en, par))
+ if (!link_back(oa, en, par, lif, nif))
return;
- struct mpnh *nhs = calc_next_hop(oa, en, par, pos);
+ struct nexthop *nhs = calc_next_hop(oa, en, par, pos, lif, nif);
if (!nhs)
{
log(L_WARN "%s: Cannot find next hop for LSA (Type: %04x, Id: %R, Rt: %R)",
@@ -1836,7 +1903,7 @@ add_cand(list * l, struct top_hash_entry *en, struct top_hash_entry *par,
/* Merge old and new */
int new_reuse = (par->nhs != nhs);
- en->nhs = mpnh_merge(en->nhs, nhs, en->nhs_reuse, new_reuse, p->ecmp, p->nhpool);
+ en->nhs = nexthop_merge(en->nhs, nhs, en->nhs_reuse, new_reuse, p->ecmp, p->nhpool);
en->nhs_reuse = 1;
return;
}
@@ -1855,20 +1922,20 @@ add_cand(list * l, struct top_hash_entry *en, struct top_hash_entry *par,
prev = NULL;
- if (EMPTY_LIST(*l))
+ if (EMPTY_LIST(oa->cand))
{
- add_head(l, &en->cn);
+ add_head(&oa->cand, &en->cn);
}
else
{
- WALK_LIST(n, *l)
+ WALK_LIST(n, oa->cand)
{
act = SKIP_BACK(struct top_hash_entry, cn, n);
if ((act->dist > dist) ||
((act->dist == dist) && (act->lsa_type == LSA_T_RT)))
{
if (prev == NULL)
- add_head(l, &en->cn);
+ add_head(&oa->cand, &en->cn);
else
insert_node(&en->cn, prev);
added = 1;
@@ -1879,7 +1946,7 @@ add_cand(list * l, struct top_hash_entry *en, struct top_hash_entry *par,
if (!added)
{
- add_tail(l, &en->cn);
+ add_tail(&oa->cand, &en->cn);
}
}
}
@@ -1892,8 +1959,7 @@ ort_changed(ort *nf, rta *nr)
(nf->n.metric1 != nf->old_metric1) || (nf->n.metric2 != nf->old_metric2) ||
(nf->n.tag != nf->old_tag) || (nf->n.rid != nf->old_rid) ||
(nr->source != or->source) || (nr->dest != or->dest) ||
- (nr->iface != or->iface) || !ipa_equal(nr->gw, or->gw) ||
- !mpnh_same(nr->nexthops, or->nexthops);
+ !nexthop_same(&(nr->nh), &(or->nh));
}
static void
@@ -1902,25 +1968,22 @@ rt_sync(struct ospf_proto *p)
struct top_hash_entry *en;
struct fib_iterator fit;
struct fib *fib = &p->rtf;
- ort *nf;
struct ospf_area *oa;
/* This is used for forced reload of routes */
int reload = (p->calcrt == 2);
- OSPF_TRACE(D_EVENTS, "Starting routing table synchronisation");
+ OSPF_TRACE(D_EVENTS, "Starting routing table synchronization");
DBG("Now syncing my rt table with nest's\n");
FIB_ITERATE_INIT(&fit, fib);
again1:
- FIB_ITERATE_START(fib, &fit, nftmp)
+ FIB_ITERATE_START(fib, &fit, ort, nf)
{
- nf = (ort *) nftmp;
-
/* Sanity check of next-hop addresses, failure should not happen */
if (nf->n.type)
{
- struct mpnh *nh;
+ struct nexthop *nh;
for (nh = nf->n.nhs; nh; nh = nh->next)
if (ipa_nonzero(nh->gw))
{
@@ -1943,29 +2006,12 @@ again1:
.src = p->p.main_source,
.source = nf->n.type,
.scope = SCOPE_UNIVERSE,
- .cast = RTC_UNICAST
+ .dest = RTD_UNICAST,
+ .nh = *(nf->n.nhs),
};
- if (nf->n.nhs->next)
- {
- a0.dest = RTD_MULTIPATH;
- a0.nexthops = nf->n.nhs;
- }
- else if (ipa_nonzero(nf->n.nhs->gw))
- {
- a0.dest = RTD_ROUTER;
- a0.iface = nf->n.nhs->iface;
- a0.gw = nf->n.nhs->gw;
- }
- else
- {
- a0.dest = RTD_DEVICE;
- a0.iface = nf->n.nhs->iface;
- }
-
if (reload || ort_changed(nf, &a0))
{
- net *ne = net_get(p->p.table, nf->fn.prefix, nf->fn.pxlen);
rta *a = rta_lookup(&a0);
rte *e = rte_get_temp(a);
@@ -1976,12 +2022,10 @@ again1:
e->u.ospf.tag = nf->old_tag = nf->n.tag;
e->u.ospf.router_id = nf->old_rid = nf->n.rid;
e->pflags = 0;
- e->net = ne;
- e->pref = p->p.preference;
- DBG("Mod rte type %d - %I/%d via %I on iface %s, met %d\n",
- a0.source, nf->fn.prefix, nf->fn.pxlen, a0.gw, a0.iface ? a0.iface->name : "(none)", nf->n.metric1);
- rte_update(&p->p, ne, e);
+ DBG("Mod rte type %d - %N via %I on iface %s, met %d\n",
+ a0.source, nf->fn.addr, a0.gw, a0.iface ? a0.iface->name : "(none)", nf->n.metric1);
+ rte_update(&p->p, nf->fn.addr, e);
}
}
else if (nf->old_rta)
@@ -1990,19 +2034,21 @@ again1:
rta_free(nf->old_rta);
nf->old_rta = NULL;
- net *ne = net_get(p->p.table, nf->fn.prefix, nf->fn.pxlen);
- rte_update(&p->p, ne, NULL);
+ rte_update(&p->p, nf->fn.addr, NULL);
}
/* Remove unused rt entry, some special entries are persistent */
if (!nf->n.type && !nf->external_rte && !nf->area_net && !nf->keep)
{
- FIB_ITERATE_PUT(&fit, nftmp);
- fib_delete(fib, nftmp);
+ if (nf->lsa_id)
+ idm_free(&p->idm, nf->lsa_id);
+
+ FIB_ITERATE_PUT(&fit);
+ fib_delete(fib, nf);
goto again1;
}
}
- FIB_ITERATE_END(nftmp);
+ FIB_ITERATE_END;
WALK_LIST(oa, p->area_list)
@@ -2010,18 +2056,16 @@ again1:
/* Cleanup ASBR hash tables */
FIB_ITERATE_INIT(&fit, &oa->rtr);
again2:
- FIB_ITERATE_START(&oa->rtr, &fit, nftmp)
+ FIB_ITERATE_START(&oa->rtr, &fit, ort, nf)
{
- nf = (ort *) nftmp;
-
if (!nf->n.type)
{
- FIB_ITERATE_PUT(&fit, nftmp);
- fib_delete(&oa->rtr, nftmp);
+ FIB_ITERATE_PUT(&fit);
+ fib_delete(&oa->rtr, nf);
goto again2;
}
}
- FIB_ITERATE_END(nftmp);
+ FIB_ITERATE_END;
}
/* Cleanup stale LSAs */
diff --git a/proto/ospf/rt.h b/proto/ospf/rt.h
index 73b28375..589d2bc5 100644
--- a/proto/ospf/rt.h
+++ b/proto/ospf/rt.h
@@ -53,7 +53,7 @@ typedef struct orta
struct ospf_area *oa;
struct ospf_area *voa; /* Used when route is replaced in ospf_rt_sum_tr(),
NULL otherwise */
- struct mpnh *nhs; /* Next hops computed during SPF */
+ struct nexthop *nhs; /* Next hops computed during SPF */
struct top_hash_entry *en; /* LSA responsible for this orta */
}
orta;
@@ -78,13 +78,15 @@ typedef struct ort
* route was not in the last update, in that case other old_* values are not
* valid.
*/
- struct fib_node fn;
orta n;
u32 old_metric1, old_metric2, old_tag, old_rid;
rta *old_rta;
+ u32 lsa_id;
u8 external_rte;
u8 area_net;
u8 keep;
+
+ struct fib_node fn;
}
ort;
diff --git a/proto/ospf/topology.c b/proto/ospf/topology.c
index 341eff87..717c8280 100644
--- a/proto/ospf/topology.c
+++ b/proto/ospf/topology.c
@@ -70,7 +70,7 @@ ospf_install_lsa(struct ospf_proto *p, struct ospf_lsa_header *lsa, u32 type, u3
en->lsa_body = body;
en->lsa = *lsa;
en->init_age = en->lsa.age;
- en->inst_time = now;
+ en->inst_time = current_time();
/*
* We do not set en->mode. It is either default LSA_M_BASIC, or in a special
@@ -128,7 +128,7 @@ ospf_advance_lsa(struct ospf_proto *p, struct top_hash_entry *en, struct ospf_ls
en->lsa.sn = lsa->sn + 1;
en->lsa.age = 0;
en->init_age = 0;
- en->inst_time = now;
+ en->inst_time = current_time();
lsa_generate_checksum(&en->lsa, en->lsa_body);
OSPF_TRACE(D_EVENTS, "Advancing LSA: Type: %04x, Id: %R, Rt: %R, Seq: %08x",
@@ -160,7 +160,7 @@ ospf_advance_lsa(struct ospf_proto *p, struct top_hash_entry *en, struct ospf_ls
en->lsa = *lsa;
en->lsa.age = LSA_MAXAGE;
en->init_age = lsa->age;
- en->inst_time = now;
+ en->inst_time = current_time();
OSPF_TRACE(D_EVENTS, "Resetting LSA: Type: %04x, Id: %R, Rt: %R, Seq: %08x",
en->lsa_type, en->lsa.id, en->lsa.rt, en->lsa.sn);
@@ -196,7 +196,7 @@ static int
ospf_do_originate_lsa(struct ospf_proto *p, struct top_hash_entry *en, void *lsa_body, u16 lsa_blen, u16 lsa_opts)
{
/* Enforce MinLSInterval */
- if ((en->init_age == 0) && en->inst_time && ((en->inst_time + MINLSINTERVAL) > now))
+ if (!en->init_age && en->inst_time && (lsa_inst_age(en) < MINLSINTERVAL))
return 0;
/* Handle wrapping sequence number */
@@ -237,7 +237,7 @@ ospf_do_originate_lsa(struct ospf_proto *p, struct top_hash_entry *en, void *lsa
en->lsa.sn++;
en->lsa.age = 0;
en->init_age = 0;
- en->inst_time = now;
+ en->inst_time = current_time();
lsa_generate_checksum(&en->lsa, en->lsa_body);
OSPF_TRACE(D_EVENTS, "Originating LSA: Type: %04x, Id: %R, Rt: %R, Seq: %08x",
@@ -283,8 +283,8 @@ ospf_originate_lsa(struct ospf_proto *p, struct ospf_new_lsa *lsa)
if (en->nf != lsa->nf)
{
- log(L_ERR "%s: LSA ID collision for %I/%d",
- p->p.name, lsa->nf->fn.prefix, lsa->nf->fn.pxlen);
+ log(L_ERR "%s: LSA ID collision for %N",
+ p->p.name, lsa->nf->fn.addr);
en = NULL;
goto drop;
@@ -381,7 +381,7 @@ ospf_refresh_lsa(struct ospf_proto *p, struct top_hash_entry *en)
en->lsa.sn++;
en->lsa.age = 0;
en->init_age = 0;
- en->inst_time = now;
+ en->inst_time = current_time();
lsa_generate_checksum(&en->lsa, en->lsa_body);
ospf_flood_lsa(p, en, NULL);
}
@@ -476,14 +476,15 @@ void
ospf_update_lsadb(struct ospf_proto *p)
{
struct top_hash_entry *en, *nxt;
- bird_clock_t real_age;
+ btime now_ = current_time();
+ int real_age;
WALK_SLIST_DELSAFE(en, nxt, p->lsal)
{
if (en->next_lsa_body)
ospf_originate_next_lsa(p, en);
- real_age = en->init_age + (now - en->inst_time);
+ real_age = en->init_age + (now_ - en->inst_time) TO_S;
if (en->lsa.age == LSA_MAXAGE)
{
@@ -514,14 +515,14 @@ ospf_update_lsadb(struct ospf_proto *p)
}
-static inline u32
-ort_to_lsaid(struct ospf_proto *p UNUSED4 UNUSED6, ort *nf)
+static u32
+ort_to_lsaid(struct ospf_proto *p, ort *nf)
{
/*
* In OSPFv2, We have to map IP prefixes to u32 in such manner that resulting
* u32 interpreted as IP address is a member of given prefix. Therefore, /32
- * prefix have to be mapped on itself. All received prefixes have to be
- * mapped on different u32s.
+ * prefix has to be mapped on itself. All received prefixes have to be mapped
+ * on different u32s.
*
* We have an assumption that if there is nontrivial (non-/32) network prefix,
* then there is not /32 prefix for the first and the last IP address of the
@@ -542,17 +543,21 @@ ort_to_lsaid(struct ospf_proto *p UNUSED4 UNUSED6, ort *nf)
* network appeared, we choose a different way.
*
* In OSPFv3, it is simpler. There is not a requirement for membership of the
- * result in the input network, so we just use a hash-based unique ID of a
- * routing table entry for a route that originated given LSA. For ext-LSA, it
- * is an imported route in the nest's routing table (p->table). For summary-LSA,
- * it is a 'source' route in the protocol internal routing table (p->rtf).
+ * result in the input network, so we just allocate a unique ID from ID map
+ * and store it in nf->lsa_id for further reference.
*/
if (ospf_is_v3(p))
- return nf->fn.uid;
+ {
+ if (!nf->lsa_id)
+ nf->lsa_id = idm_alloc(&p->idm);
- u32 id = ipa_to_u32(nf->fn.prefix);
- int pxlen = nf->fn.pxlen;
+ return nf->lsa_id;
+ }
+
+ net_addr_ip4 *net = (void *) nf->fn.addr;
+ u32 id = ip4_to_u32(net->prefix);
+ int pxlen = net->pxlen;
if ((pxlen == 0) || (pxlen == 32))
return id;
@@ -628,12 +633,12 @@ configured_stubnet(struct ospf_area *oa, struct ifa *a)
{
if (sn->summary)
{
- if (ipa_in_net(a->prefix, sn->px.addr, sn->px.len) && (a->pxlen >= sn->px.len))
+ if (net_in_netX(&a->prefix, &sn->prefix))
return 1;
}
else
{
- if (ipa_equal(a->prefix, sn->px.addr) && (a->pxlen == sn->px.len))
+ if (net_equal(&a->prefix, &sn->prefix))
return 1;
}
}
@@ -781,7 +786,8 @@ prepare_rt2_lsa_body(struct ospf_proto *p, struct ospf_area *oa)
(ifa->type == OSPF_IT_PTMP))
add_rt2_lsa_link(p, LSART_STUB, ipa_to_u32(ifa->addr->ip), 0xffffffff, 0);
else
- add_rt2_lsa_link(p, LSART_STUB, ipa_to_u32(ifa->addr->prefix), u32_mkmask(ifa->addr->pxlen), ifa->cost);
+ add_rt2_lsa_link(p, LSART_STUB, ip4_to_u32(net4_prefix(&ifa->addr->prefix)),
+ u32_mkmask(net4_pxlen(&ifa->addr->prefix)), ifa->cost);
i++;
ifa->rt_pos_end = i;
@@ -790,7 +796,8 @@ prepare_rt2_lsa_body(struct ospf_proto *p, struct ospf_area *oa)
struct ospf_stubnet_config *sn;
WALK_LIST(sn, oa->ac->stubnet_list)
if (!sn->hidden)
- add_rt2_lsa_link(p, LSART_STUB, ipa_to_u32(sn->px.addr), u32_mkmask(sn->px.len), sn->cost), i++;
+ add_rt2_lsa_link(p, LSART_STUB, ip4_to_u32(net4_prefix(&sn->prefix)),
+ u32_mkmask(net4_pxlen(&sn->prefix)), sn->cost), i++;
struct ospf_lsa_rt *rt = p->lsab;
/* Store number of links in lower half of options */
@@ -907,7 +914,7 @@ prepare_net2_lsa_body(struct ospf_proto *p, struct ospf_iface *ifa)
ASSERT(p->lsab_used == 0);
net = lsab_alloc(p, sizeof(struct ospf_lsa_net) + 4 * nodes);
- net->optx = u32_mkmask(ifa->addr->pxlen);
+ net->optx = u32_mkmask(ifa->addr->prefix.pxlen);
net->routers[0] = p->router_id;
WALK_LIST(n, ifa->neigh_list)
@@ -999,9 +1006,10 @@ prepare_sum3_net_lsa_body(struct ospf_proto *p, ort *nf, u32 metric)
{
struct ospf_lsa_sum3_net *sum;
- sum = lsab_allocz(p, sizeof(struct ospf_lsa_sum3_net) + IPV6_PREFIX_SPACE(nf->fn.pxlen));
+ sum = lsab_allocz(p, sizeof(struct ospf_lsa_sum3_net) +
+ IPV6_PREFIX_SPACE(nf->fn.addr->pxlen));
sum->metric = metric;
- put_ipv6_prefix(sum->prefix, nf->fn.prefix, nf->fn.pxlen, 0, 0);
+ ospf3_put_prefix(sum->prefix, nf->fn.addr, 0, 0);
}
static inline void
@@ -1028,7 +1036,7 @@ ospf_originate_sum_net_lsa(struct ospf_proto *p, struct ospf_area *oa, ort *nf,
};
if (ospf_is_v2(p))
- prepare_sum2_lsa_body(p, nf->fn.pxlen, metric);
+ prepare_sum2_lsa_body(p, nf->fn.addr->pxlen, metric);
else
prepare_sum3_net_lsa_body(p, nf, metric);
@@ -1036,20 +1044,20 @@ ospf_originate_sum_net_lsa(struct ospf_proto *p, struct ospf_area *oa, ort *nf,
}
void
-ospf_originate_sum_rt_lsa(struct ospf_proto *p, struct ospf_area *oa, ort *nf, int metric, u32 options)
+ospf_originate_sum_rt_lsa(struct ospf_proto *p, struct ospf_area *oa, u32 drid, int metric, u32 options)
{
struct ospf_new_lsa lsa = {
.type = LSA_T_SUM_RT,
.mode = LSA_M_RTCALC,
.dom = oa->areaid,
- .id = ipa_to_rid(nf->fn.prefix), /* Router ID of ASBR, irrelevant for OSPFv3 */
+ .id = drid, /* Router ID of ASBR, irrelevant for OSPFv3 */
.opts = oa->options
};
if (ospf_is_v2(p))
prepare_sum2_lsa_body(p, 0, metric);
else
- prepare_sum3_rt_lsa_body(p, lsa.id, metric, options & LSA_OPTIONS_MASK);
+ prepare_sum3_rt_lsa_body(p, drid, metric, options & LSA_OPTIONS_MASK);
ospf_originate_lsa(p, &lsa);
}
@@ -1082,7 +1090,7 @@ prepare_ext3_lsa_body(struct ospf_proto *p, ort *nf,
{
struct ospf_lsa_ext3 *ext;
int bsize = sizeof(struct ospf_lsa_ext3)
- + IPV6_PREFIX_SPACE(nf->fn.pxlen)
+ + IPV6_PREFIX_SPACE(nf->fn.addr->pxlen)
+ (ipa_nonzero(fwaddr) ? 16 : 0)
+ (tag ? 4 : 0);
@@ -1090,7 +1098,7 @@ prepare_ext3_lsa_body(struct ospf_proto *p, ort *nf,
ext->metric = metric & LSA_METRIC_MASK;
u32 *buf = ext->rest;
- buf = put_ipv6_prefix(buf, nf->fn.prefix, nf->fn.pxlen, pbit ? OPT_PX_P : 0, 0);
+ buf = ospf3_put_prefix(buf, nf->fn.addr, pbit ? OPT_PX_P : 0, 0);
if (ebit)
ext->metric |= LSA_EXT3_EBIT;
@@ -1098,7 +1106,7 @@ prepare_ext3_lsa_body(struct ospf_proto *p, ort *nf,
if (ipa_nonzero(fwaddr))
{
ext->metric |= LSA_EXT3_FBIT;
- buf = put_ipv6_addr(buf, fwaddr);
+ buf = ospf3_put_addr(buf, fwaddr);
}
if (tag)
@@ -1140,7 +1148,7 @@ ospf_originate_ext_lsa(struct ospf_proto *p, struct ospf_area *oa, ort *nf, u8 m
};
if (ospf_is_v2(p))
- prepare_ext2_lsa_body(p, nf->fn.pxlen, metric, ebit, fwaddr, tag);
+ prepare_ext2_lsa_body(p, nf->fn.addr->pxlen, metric, ebit, fwaddr, tag);
else
prepare_ext3_lsa_body(p, nf, metric, ebit, fwaddr, tag, oa && pbit);
@@ -1177,7 +1185,7 @@ use_gw_for_fwaddr(struct ospf_proto *p, ip_addr gw, struct iface *iface)
WALK_LIST(ifa, p->iface_list)
if ((ifa->iface == iface) &&
- (!ospf_is_v2(p) || ipa_in_net(gw, ifa->addr->prefix, ifa->addr->pxlen)))
+ (!ospf_is_v2(p) || ipa_in_netX(gw, &ifa->addr->prefix)))
return 1;
return 0;
@@ -1215,7 +1223,8 @@ find_surrogate_fwaddr(struct ospf_proto *p, struct ospf_area *oa)
{
WALK_LIST(a, ifa->iface->addrs)
{
- if ((a->flags & IA_SECONDARY) ||
+ if ((a->prefix.type != ospf_get_af(p)) ||
+ (a->flags & IA_SECONDARY) ||
(a->flags & IA_PEER) ||
(a->scope <= SCOPE_LINK))
continue;
@@ -1234,7 +1243,7 @@ find_surrogate_fwaddr(struct ospf_proto *p, struct ospf_area *oa)
}
void
-ospf_rt_notify(struct proto *P, rtable *tbl UNUSED, net *n, rte *new, rte *old UNUSED, ea_list *ea)
+ospf_rt_notify(struct proto *P, struct channel *ch UNUSED, net *n, rte *new, rte *old UNUSED, ea_list *ea)
{
struct ospf_proto *p = (struct ospf_proto *) P;
struct ospf_area *oa = NULL; /* non-NULL for NSSA-LSA */
@@ -1253,7 +1262,7 @@ ospf_rt_notify(struct proto *P, rtable *tbl UNUSED, net *n, rte *new, rte *old U
if (!new)
{
- nf = (ort *) fib_find(&p->rtf, &n->n.prefix, n->n.pxlen);
+ nf = fib_find(&p->rtf, n->n.addr);
if (!nf || !nf->external_rte)
return;
@@ -1280,8 +1289,8 @@ ospf_rt_notify(struct proto *P, rtable *tbl UNUSED, net *n, rte *new, rte *old U
ip_addr fwd = IPA_NONE;
- if ((a->dest == RTD_ROUTER) && use_gw_for_fwaddr(p, a->gw, a->iface))
- fwd = a->gw;
+ if ((a->dest == RTD_UNICAST) && use_gw_for_fwaddr(p, a->nh.gw, a->nh.iface))
+ fwd = a->nh.gw;
/* NSSA-LSA with P-bit set must have non-zero forwarding address */
if (oa && ipa_zero(fwd))
@@ -1290,13 +1299,13 @@ ospf_rt_notify(struct proto *P, rtable *tbl UNUSED, net *n, rte *new, rte *old U
if (ipa_zero(fwd))
{
- log(L_ERR "%s: Cannot find forwarding address for NSSA-LSA %I/%d",
- p->p.name, n->n.prefix, n->n.pxlen);
+ log(L_ERR "%s: Cannot find forwarding address for NSSA-LSA %N",
+ p->p.name, n->n.addr);
return;
}
}
- nf = (ort *) fib_get(&p->rtf, &n->n.prefix, n->n.pxlen);
+ nf = fib_get(&p->rtf, n->n.addr);
ospf_originate_ext_lsa(p, oa, nf, LSA_M_EXPORT, metric, ebit, fwd, tag, 1);
nf->external_rte = 1;
}
@@ -1308,38 +1317,47 @@ ospf_rt_notify(struct proto *P, rtable *tbl UNUSED, net *n, rte *new, rte *old U
*/
static inline void
-lsab_put_prefix(struct ospf_proto *p, ip_addr prefix, u32 pxlen, u32 cost)
+lsab_put_prefix(struct ospf_proto *p, net_addr *n, u32 cost)
{
- void *buf = lsab_alloc(p, IPV6_PREFIX_SPACE(pxlen));
- u8 flags = (pxlen < MAX_PREFIX_LENGTH) ? 0 : OPT_PX_LA;
- put_ipv6_prefix(buf, prefix, pxlen, flags, cost);
+ void *buf = lsab_alloc(p, IPV6_PREFIX_SPACE(net_pxlen(n)));
+ uint max = (n->type == NET_IP4) ? IP4_MAX_PREFIX_LENGTH : IP6_MAX_PREFIX_LENGTH;
+ u8 flags = (net_pxlen(n) < max) ? 0 : OPT_PX_LA;
+ ospf3_put_prefix(buf, n, flags, cost);
}
static void
prepare_link_lsa_body(struct ospf_proto *p, struct ospf_iface *ifa)
{
- struct ospf_lsa_link *ll;
+ ip_addr nh = ospf_is_ip4(p) ? IPA_NONE : ifa->addr->ip;
int i = 0;
+ /* Preallocating space for header */
ASSERT(p->lsab_used == 0);
- ll = lsab_allocz(p, sizeof(struct ospf_lsa_link));
- ll->options = ifa->oa->options | (ifa->priority << 24);
- ll->lladdr = ipa_to_ip6(ifa->addr->ip);
- ll = NULL; /* buffer might be reallocated later */
+ lsab_allocz(p, sizeof(struct ospf_lsa_link));
struct ifa *a;
WALK_LIST(a, ifa->iface->addrs)
{
- if ((a->flags & IA_SECONDARY) ||
- (a->scope < SCOPE_SITE))
+ if ((a->prefix.type != ospf_get_af(p)) ||
+ (a->flags & IA_SECONDARY) ||
+ (a->scope <= SCOPE_LINK))
continue;
- lsab_put_prefix(p, a->prefix, a->pxlen, 0);
+ if (ospf_is_ip4(p) && ipa_zero(nh))
+ nh = a->ip;
+
+ lsab_put_prefix(p, &a->prefix, 0);
i++;
}
- ll = p->lsab;
+ /* Filling the preallocated header */
+ struct ospf_lsa_link *ll = p->lsab;
+ ll->options = ifa->oa->options | (ifa->priority << 24);
+ ll->lladdr = ospf_is_ip4(p) ? ospf3_4to6(ipa_to_ip4(nh)) : ipa_to_ip6(nh);
ll->pxcount = i;
+
+ if (ipa_zero(nh))
+ log(L_ERR "%s: Cannot find next hop address for %s", p->p.name, ifa->ifname);
}
static void
@@ -1401,12 +1419,13 @@ prepare_prefix_rt_lsa_body(struct ospf_proto *p, struct ospf_area *oa)
struct ifa *a;
WALK_LIST(a, ifa->iface->addrs)
{
- if ((a->flags & IA_SECONDARY) ||
+ if ((a->prefix.type != ospf_get_af(p)) ||
+ (a->flags & IA_SECONDARY) ||
(a->flags & IA_PEER) ||
(a->scope <= SCOPE_LINK))
continue;
- if (((a->pxlen < MAX_PREFIX_LENGTH) && net_lsa) ||
+ if (((a->prefix.pxlen < IP6_MAX_PREFIX_LENGTH) && net_lsa) ||
configured_stubnet(oa, a))
continue;
@@ -1414,11 +1433,12 @@ prepare_prefix_rt_lsa_body(struct ospf_proto *p, struct ospf_area *oa)
(ifa->state == OSPF_IS_LOOP) ||
(ifa->type == OSPF_IT_PTMP))
{
- lsab_put_prefix(p, a->ip, MAX_PREFIX_LENGTH, 0);
+ net_addr_ip6 net = NET_ADDR_IP6(a->ip, IP6_MAX_PREFIX_LENGTH);
+ lsab_put_prefix(p, (net_addr *) &net, 0);
host_addr = 1;
}
else
- lsab_put_prefix(p, a->prefix, a->pxlen, ifa->cost);
+ lsab_put_prefix(p, &a->prefix, ifa->cost);
i++;
}
@@ -1429,15 +1449,15 @@ prepare_prefix_rt_lsa_body(struct ospf_proto *p, struct ospf_area *oa)
WALK_LIST(sn, oa->ac->stubnet_list)
if (!sn->hidden)
{
- lsab_put_prefix(p, sn->px.addr, sn->px.len, sn->cost);
- if (sn->px.len == MAX_PREFIX_LENGTH)
+ lsab_put_prefix(p, &sn->prefix, sn->cost);
+ if (sn->prefix.pxlen == IP6_MAX_PREFIX_LENGTH)
host_addr = 1;
i++;
}
/* If there are some configured vlinks, find some global address
(even from another area), which will be used as a vlink endpoint. */
- if (!EMPTY_LIST(cf->vlink_list) && !host_addr)
+ if (!EMPTY_LIST(cf->vlink_list) && !host_addr && ospf_is_ip6(p))
{
WALK_LIST(ifa, p->iface_list)
{
@@ -1447,11 +1467,14 @@ prepare_prefix_rt_lsa_body(struct ospf_proto *p, struct ospf_area *oa)
struct ifa *a;
WALK_LIST(a, ifa->iface->addrs)
{
- if ((a->flags & IA_SECONDARY) || (a->scope <= SCOPE_LINK))
+ if ((a->prefix.type != NET_IP6) ||
+ (a->flags & IA_SECONDARY) ||
+ (a->scope <= SCOPE_LINK))
continue;
/* Found some IP */
- lsab_put_prefix(p, a->ip, MAX_PREFIX_LENGTH, 0);
+ net_addr_ip6 net = NET_ADDR_IP6(a->ip, IP6_MAX_PREFIX_LENGTH);
+ lsab_put_prefix(p, (net_addr *) &net, 0);
i++;
goto done;
}
@@ -1557,7 +1580,7 @@ add_link_lsa(struct ospf_proto *p, struct ospf_lsa_link *ll, int offset, int *px
continue;
/* Skip link-local prefixes */
- if ((pxlen >= 10) && ((pxb[1] & 0xffc00000) == 0xfe800000))
+ if (ospf_is_ip6(p) && (pxlen >= 10) && ((pxb[1] & 0xffc00000) == 0xfe800000))
continue;
add_prefix(p, pxb, offset, pxc);
@@ -1614,7 +1637,7 @@ ospf_originate_prefix_net_lsa(struct ospf_proto *p, struct ospf_iface *ifa)
}
static inline int breaks_minlsinterval(struct top_hash_entry *en)
-{ return en && (en->lsa.age < LSA_MAXAGE) && ((en->inst_time + MINLSINTERVAL) > now); }
+{ return en && (en->lsa.age < LSA_MAXAGE) && (lsa_inst_age(en) < MINLSINTERVAL); }
void
ospf_update_topology(struct ospf_proto *p)
@@ -1748,7 +1771,7 @@ ospf_top_hash(struct top_graph *f, u32 domain, u32 lsaid, u32 rtrid, u32 type)
* and request lists of OSPF neighbors.
*/
struct top_graph *
-ospf_top_new(struct ospf_proto *p UNUSED4 UNUSED6, pool *pool)
+ospf_top_new(struct ospf_proto *p, pool *pool)
{
struct top_graph *f;
diff --git a/proto/ospf/topology.h b/proto/ospf/topology.h
index 5652ced0..ac87334b 100644
--- a/proto/ospf/topology.h
+++ b/proto/ospf/topology.h
@@ -26,9 +26,9 @@ struct top_hash_entry
void *next_lsa_body; /* For postponed LSA origination */
u16 next_lsa_blen; /* For postponed LSA origination */
u16 next_lsa_opts; /* For postponed LSA origination */
- bird_clock_t inst_time; /* Time of installation into DB */
+ btime inst_time; /* Time of installation into DB */
struct ort *nf; /* Reference fibnode for sum and ext LSAs, NULL for otherwise */
- struct mpnh *nhs; /* Computed nexthops - valid only in ospf_rt_spf() */
+ struct nexthop *nhs; /* Computed nexthops - valid only in ospf_rt_spf() */
ip_addr lb; /* In OSPFv2, link back address. In OSPFv3, any global address in the area useful for vlinks */
u32 lb_id; /* Interface ID of link back iface (for bcast or NBMA networks) */
u32 dist; /* Distance from the root */
@@ -185,10 +185,10 @@ static inline void ospf_flush2_lsa(struct ospf_proto *p, struct top_hash_entry *
{ if (*en) { ospf_flush_lsa(p, *en); *en = NULL; } }
void ospf_originate_sum_net_lsa(struct ospf_proto *p, struct ospf_area *oa, ort *nf, int metric);
-void ospf_originate_sum_rt_lsa(struct ospf_proto *p, struct ospf_area *oa, ort *nf, int metric, u32 options);
+void ospf_originate_sum_rt_lsa(struct ospf_proto *p, struct ospf_area *oa, u32 drid, int metric, u32 options);
void ospf_originate_ext_lsa(struct ospf_proto *p, struct ospf_area *oa, ort *nf, u8 mode, u32 metric, u32 ebit, ip_addr fwaddr, u32 tag, int pbit);
-void ospf_rt_notify(struct proto *P, rtable *tbl, net *n, rte *new, rte *old, ea_list *attrs);
+void ospf_rt_notify(struct proto *P, struct channel *ch, net *n, rte *new, rte *old, ea_list *attrs);
void ospf_update_topology(struct ospf_proto *p);
struct top_hash_entry *ospf_hash_find(struct top_graph *, u32 domain, u32 lsa, u32 rtr, u32 type);
diff --git a/proto/pipe/Makefile b/proto/pipe/Makefile
index 77de5b88..5093da98 100644
--- a/proto/pipe/Makefile
+++ b/proto/pipe/Makefile
@@ -1,6 +1,6 @@
-source=pipe.c
-root-rel=../../
-dir-name=proto/pipe
-
-include ../../Rules
+src := pipe.c
+obj := $(src-o-files)
+$(all-daemon)
+$(cf-local)
+tests_objs := $(tests_objs) $(src-o-files) \ No newline at end of file
diff --git a/proto/pipe/config.Y b/proto/pipe/config.Y
index 8daf2e7c..f51ee575 100644
--- a/proto/pipe/config.Y
+++ b/proto/pipe/config.Y
@@ -16,28 +16,25 @@ CF_DEFINES
CF_DECLS
-CF_KEYWORDS(PIPE, PEER, TABLE, MODE, OPAQUE, TRANSPARENT)
+CF_KEYWORDS(PIPE, PEER, TABLE)
CF_GRAMMAR
-CF_ADDTO(proto, pipe_proto '}')
+CF_ADDTO(proto, pipe_proto '}' { this_channel = NULL; } )
-pipe_proto_start: proto_start PIPE {
- this_proto = proto_config_new(&proto_pipe, $1);
- PIPE_CFG->mode = PIPE_TRANSPARENT;
- }
- ;
+pipe_proto_start: proto_start PIPE
+{
+ this_proto = proto_config_new(&proto_pipe, $1);
+ this_channel = channel_config_new(NULL, 0, this_proto);
+ this_channel->in_filter = FILTER_ACCEPT;
+ this_channel->out_filter = FILTER_ACCEPT;
+};
pipe_proto:
pipe_proto_start proto_name '{'
| pipe_proto proto_item ';'
- | pipe_proto PEER TABLE SYM ';' {
- if ($4->class != SYM_TABLE)
- cf_error("Routing table name expected");
- PIPE_CFG->peer = $4->def;
- }
- | pipe_proto MODE OPAQUE ';' { PIPE_CFG->mode = PIPE_OPAQUE; }
- | pipe_proto MODE TRANSPARENT ';' { PIPE_CFG->mode = PIPE_TRANSPARENT; }
+ | pipe_proto channel_item ';'
+ | pipe_proto PEER TABLE rtable ';' { PIPE_CFG->peer = $4; }
;
CF_CODE
diff --git a/proto/pipe/pipe.c b/proto/pipe/pipe.c
index 6ef80322..310f3c01 100644
--- a/proto/pipe/pipe.c
+++ b/proto/pipe/pipe.c
@@ -44,54 +44,42 @@
#include "pipe.h"
static void
-pipe_rt_notify(struct proto *P, rtable *src_table, net *n, rte *new, rte *old, ea_list *attrs)
+pipe_rt_notify(struct proto *P, struct channel *src_ch, net *n, rte *new, rte *old, ea_list *attrs)
{
- struct pipe_proto *p = (struct pipe_proto *) P;
- struct announce_hook *ah = (src_table == P->table) ? p->peer_ahook : P->main_ahook;
- rtable *dst_table = ah->table;
+ struct pipe_proto *p = (void *) P;
+ struct channel *dst = (src_ch == p->pri) ? p->sec : p->pri;
struct rte_src *src;
- net *nn;
rte *e;
- rta a;
+ rta *a;
if (!new && !old)
return;
- if (dst_table->pipe_busy)
+ if (dst->table->pipe_busy)
{
- log(L_ERR "Pipe loop detected when sending %I/%d to table %s",
- n->n.prefix, n->n.pxlen, dst_table->name);
+ log(L_ERR "Pipe loop detected when sending %N to table %s",
+ n->n.addr, dst->table->name);
return;
}
- nn = net_get(dst_table, n->n.prefix, n->n.pxlen);
if (new)
{
- memcpy(&a, new->attrs, sizeof(rta));
-
- if (p->mode == PIPE_OPAQUE)
- {
- a.src = P->main_source;
- a.source = RTS_PIPE;
- }
-
- a.aflags = 0;
- a.eattrs = attrs;
- a.hostentry = NULL;
- e = rte_get_temp(&a);
- e->net = nn;
+ a = alloca(rta_size(new->attrs));
+ memcpy(a, new->attrs, rta_size(new->attrs));
+
+ a->aflags = 0;
+ a->eattrs = attrs;
+ a->hostentry = NULL;
+ e = rte_get_temp(a);
e->pflags = 0;
- if (p->mode == PIPE_TRANSPARENT)
- {
- /* Copy protocol specific embedded attributes. */
- memcpy(&(e->u), &(new->u), sizeof(e->u));
- e->pref = new->pref;
- e->pflags = new->pflags;
- }
+ /* Copy protocol specific embedded attributes. */
+ memcpy(&(e->u), &(new->u), sizeof(e->u));
+ e->pref = new->pref;
+ e->pflags = new->pflags;
- src = a.src;
+ src = a->src;
}
else
{
@@ -99,9 +87,9 @@ pipe_rt_notify(struct proto *P, rtable *src_table, net *n, rte *new, rte *old, e
src = old->attrs->src;
}
- src_table->pipe_busy = 1;
- rte_update2(ah, nn, e, src);
- src_table->pipe_busy = 0;
+ src_ch->table->pipe_busy = 1;
+ rte_update2(dst, n->n.addr, e, src);
+ src_ch->table->pipe_busy = 0;
}
static int
@@ -111,171 +99,117 @@ pipe_import_control(struct proto *P, rte **ee, ea_list **ea UNUSED, struct linpo
if (pp == P)
return -1; /* Avoid local loops automatically */
+
return 0;
}
-static int
-pipe_reload_routes(struct proto *P)
+static void
+pipe_reload_routes(struct channel *C)
{
- struct pipe_proto *p = (struct pipe_proto *) P;
-
- /*
- * Because the pipe protocol feeds routes from both routing tables
- * together, both directions are reloaded during refeed and 'reload
- * out' command works like 'reload' command. For symmetry, we also
- * request refeed when 'reload in' command is used.
- */
- proto_request_feeding(P);
+ struct pipe_proto *p = (void *) C->proto;
- proto_reset_limit(P->main_ahook->in_limit);
- proto_reset_limit(p->peer_ahook->in_limit);
-
- return 1;
+ /* Route reload on one channel is just refeed on the other */
+ channel_request_feeding((C == p->pri) ? p->sec : p->pri);
}
-static struct proto *
-pipe_init(struct proto_config *C)
-{
- struct pipe_config *c = (struct pipe_config *) C;
- struct proto *P = proto_new(C, sizeof(struct pipe_proto));
- struct pipe_proto *p = (struct pipe_proto *) P;
- p->mode = c->mode;
- p->peer_table = c->peer->table;
- P->accept_ra_types = (p->mode == PIPE_OPAQUE) ? RA_OPTIMAL : RA_ANY;
- P->rt_notify = pipe_rt_notify;
- P->import_control = pipe_import_control;
- P->reload_routes = pipe_reload_routes;
-
- return P;
-}
-
-static int
-pipe_start(struct proto *P)
+static void
+pipe_postconfig(struct proto_config *CF)
{
- struct pipe_config *cf = (struct pipe_config *) P->cf;
- struct pipe_proto *p = (struct pipe_proto *) P;
+ struct pipe_config *cf = (void *) CF;
+ struct channel_config *cc = proto_cf_main_channel(CF);
- /* Lock both tables, unlock is handled in pipe_cleanup() */
- rt_lock_table(P->table);
- rt_lock_table(p->peer_table);
+ if (!cc->table)
+ cf_error("Primary routing table not specified");
- /* Going directly to PS_UP - prepare for feeding,
- connect the protocol to both routing tables */
+ if (!cf->peer)
+ cf_error("Secondary routing table not specified");
- P->main_ahook = proto_add_announce_hook(P, P->table, &P->stats);
- P->main_ahook->out_filter = cf->c.out_filter;
- P->main_ahook->in_limit = cf->c.in_limit;
- proto_reset_limit(P->main_ahook->in_limit);
+ if (cc->table == cf->peer)
+ cf_error("Primary table and peer table must be different");
- p->peer_ahook = proto_add_announce_hook(P, p->peer_table, &p->peer_stats);
- p->peer_ahook->out_filter = cf->c.in_filter;
- p->peer_ahook->in_limit = cf->c.out_limit;
- proto_reset_limit(p->peer_ahook->in_limit);
+ if (cc->table->addr_type != cf->peer->addr_type)
+ cf_error("Primary table and peer table must have the same type");
- if (p->mode == PIPE_OPAQUE)
- {
- P->main_source = rt_get_source(P, 0);
- rt_lock_source(P->main_source);
- }
+ if (cc->rx_limit.action)
+ cf_error("Pipe protocol does not support receive limits");
- return PS_UP;
+ if (cc->in_keep_filtered)
+ cf_error("Pipe protocol prohibits keeping filtered routes");
}
-static void
-pipe_cleanup(struct proto *P)
+static int
+pipe_configure_channels(struct pipe_proto *p, struct pipe_config *cf)
{
- struct pipe_proto *p = (struct pipe_proto *) P;
-
- bzero(&P->stats, sizeof(struct proto_stats));
- bzero(&p->peer_stats, sizeof(struct proto_stats));
-
- P->main_ahook = NULL;
- p->peer_ahook = NULL;
-
- if (p->mode == PIPE_OPAQUE)
- rt_unlock_source(P->main_source);
- P->main_source = NULL;
-
- rt_unlock_table(P->table);
- rt_unlock_table(p->peer_table);
+ struct channel_config *cc = proto_cf_main_channel(&cf->c);
+
+ struct channel_config pri_cf = {
+ .name = "pri",
+ .channel = cc->channel,
+ .table = cc->table,
+ .out_filter = cc->out_filter,
+ .in_limit = cc->in_limit,
+ .ra_mode = RA_ANY
+ };
+
+ struct channel_config sec_cf = {
+ .name = "sec",
+ .channel = cc->channel,
+ .table = cf->peer,
+ .out_filter = cc->in_filter,
+ .in_limit = cc->out_limit,
+ .ra_mode = RA_ANY
+ };
+
+ return
+ proto_configure_channel(&p->p, &p->pri, &pri_cf) &&
+ proto_configure_channel(&p->p, &p->sec, &sec_cf);
}
-static void
-pipe_postconfig(struct proto_config *C)
+static struct proto *
+pipe_init(struct proto_config *CF)
{
- struct pipe_config *c = (struct pipe_config *) C;
+ struct proto *P = proto_new(CF);
+ struct pipe_proto *p = (void *) P;
+ struct pipe_config *cf = (void *) CF;
- if (!c->peer)
- cf_error("Name of peer routing table not specified");
- if (c->peer == C->table)
- cf_error("Primary table and peer table must be different");
+ P->rt_notify = pipe_rt_notify;
+ P->import_control = pipe_import_control;
+ P->reload_routes = pipe_reload_routes;
- if (C->in_keep_filtered)
- cf_error("Pipe protocol prohibits keeping filtered routes");
- if (C->rx_limit)
- cf_error("Pipe protocol does not support receive limits");
-}
+ pipe_configure_channels(p, cf);
-extern int proto_reconfig_type;
+ return P;
+}
static int
-pipe_reconfigure(struct proto *P, struct proto_config *new)
+pipe_reconfigure(struct proto *P, struct proto_config *CF)
{
- struct pipe_proto *p = (struct pipe_proto *)P;
- struct proto_config *old = P->cf;
- struct pipe_config *oc = (struct pipe_config *) old;
- struct pipe_config *nc = (struct pipe_config *) new;
-
- if ((oc->peer->table != nc->peer->table) || (oc->mode != nc->mode))
- return 0;
-
- /* Update output filters in ahooks */
- if (P->main_ahook)
- {
- P->main_ahook->out_filter = new->out_filter;
- P->main_ahook->in_limit = new->in_limit;
- proto_verify_limits(P->main_ahook);
- }
-
- if (p->peer_ahook)
- {
- p->peer_ahook->out_filter = new->in_filter;
- p->peer_ahook->in_limit = new->out_limit;
- proto_verify_limits(p->peer_ahook);
- }
-
- if ((P->proto_state != PS_UP) || (proto_reconfig_type == RECONFIG_SOFT))
- return 1;
-
- if ((new->preference != old->preference)
- || ! filter_same(new->in_filter, old->in_filter)
- || ! filter_same(new->out_filter, old->out_filter))
- proto_request_feeding(P);
+ struct pipe_proto *p = (void *) P;
+ struct pipe_config *cf = (void *) CF;
- return 1;
+ return pipe_configure_channels(p, cf);
}
static void
-pipe_copy_config(struct proto_config *dest, struct proto_config *src)
+pipe_copy_config(struct proto_config *dest UNUSED, struct proto_config *src UNUSED)
{
/* Just a shallow copy, not many items here */
- proto_copy_rest(dest, src, sizeof(struct pipe_config));
}
static void
pipe_get_status(struct proto *P, byte *buf)
{
- struct pipe_proto *p = (struct pipe_proto *) P;
+ struct pipe_proto *p = (void *) P;
- bsprintf(buf, "%c> %s", (p->mode == PIPE_OPAQUE) ? '-' : '=', p->peer_table->name);
+ bsprintf(buf, "%s <=> %s", p->pri->table->name, p->sec->table->name);
}
static void
pipe_show_stats(struct pipe_proto *p)
{
- struct proto_stats *s1 = &p->p.stats;
- struct proto_stats *s2 = &p->peer_stats;
+ struct proto_stats *s1 = &p->pri->stats;
+ struct proto_stats *s2 = &p->sec->stats;
/*
* Pipe stats (as anything related to pipes) are a bit tricky. There
@@ -318,17 +252,16 @@ pipe_show_stats(struct pipe_proto *p)
static void
pipe_show_proto_info(struct proto *P)
{
- struct pipe_proto *p = (struct pipe_proto *) P;
- struct pipe_config *cf = (struct pipe_config *) P->cf;
+ struct pipe_proto *p = (void *) P;
- // cli_msg(-1006, " Table: %s", P->table->name);
- // cli_msg(-1006, " Peer table: %s", p->peer_table->name);
- cli_msg(-1006, " Preference: %d", P->preference);
- cli_msg(-1006, " Input filter: %s", filter_name(cf->c.in_filter));
- cli_msg(-1006, " Output filter: %s", filter_name(cf->c.out_filter));
+ cli_msg(-1006, " Channel %s", "main");
+ cli_msg(-1006, " Table: %s", p->pri->table->name);
+ cli_msg(-1006, " Peer table: %s", p->sec->table->name);
+ cli_msg(-1006, " Import filter: %s", filter_name(p->sec->out_filter));
+ cli_msg(-1006, " Export filter: %s", filter_name(p->pri->out_filter));
- proto_show_limit(cf->c.in_limit, "Import limit:");
- proto_show_limit(cf->c.out_limit, "Export limit:");
+ channel_show_limit(&p->pri->in_limit, "Import limit:");
+ channel_show_limit(&p->sec->in_limit, "Export limit:");
if (P->proto_state != PS_DOWN)
pipe_show_stats(p);
@@ -338,13 +271,10 @@ pipe_show_proto_info(struct proto *P)
struct protocol proto_pipe = {
.name = "Pipe",
.template = "pipe%d",
- .multitable = 1,
- .preference = DEF_PREF_PIPE,
+ .proto_size = sizeof(struct pipe_proto),
.config_size = sizeof(struct pipe_config),
.postconfig = pipe_postconfig,
.init = pipe_init,
- .start = pipe_start,
- .cleanup = pipe_cleanup,
.reconfigure = pipe_reconfigure,
.copy_config = pipe_copy_config,
.get_status = pipe_get_status,
diff --git a/proto/pipe/pipe.h b/proto/pipe/pipe.h
index 50b31698..038c6666 100644
--- a/proto/pipe/pipe.h
+++ b/proto/pipe/pipe.h
@@ -9,27 +9,15 @@
#ifndef _BIRD_PIPE_H_
#define _BIRD_PIPE_H_
-#define PIPE_OPAQUE 0
-#define PIPE_TRANSPARENT 1
-
struct pipe_config {
struct proto_config c;
struct rtable_config *peer; /* Table we're connected to */
- int mode; /* PIPE_OPAQUE or PIPE_TRANSPARENT */
};
struct pipe_proto {
struct proto p;
- struct rtable *peer_table;
- struct announce_hook *peer_ahook; /* Announce hook for direction peer->primary */
- struct proto_stats peer_stats; /* Statistics for the direction peer->primary */
- int mode; /* PIPE_OPAQUE or PIPE_TRANSPARENT */
+ struct channel *pri;
+ struct channel *sec;
};
-
-extern struct protocol proto_pipe;
-
-static inline int proto_is_pipe(struct proto *p)
-{ return p->proto == &proto_pipe; }
-
#endif
diff --git a/proto/radv/Makefile b/proto/radv/Makefile
index efc4d4af..05317eff 100644
--- a/proto/radv/Makefile
+++ b/proto/radv/Makefile
@@ -1,5 +1,6 @@
-source=radv.c packets.c
-root-rel=../../
-dir-name=proto/radv
+src := packets.c radv.c
+obj := $(src-o-files)
+$(all-daemon)
+$(cf-local)
-include ../../Rules
+tests_objs := $(tests_objs) $(src-o-files) \ No newline at end of file
diff --git a/proto/radv/config.Y b/proto/radv/config.Y
index 0ff84aeb..0e43c237 100644
--- a/proto/radv/config.Y
+++ b/proto/radv/config.Y
@@ -41,6 +41,7 @@ CF_ADDTO(proto, radv_proto)
radv_proto_start: proto_start RADV
{
this_proto = proto_config_new(&proto_radv, $1);
+
init_list(&RADV_CFG->patt_list);
init_list(&RADV_CFG->pref_list);
init_list(&RADV_CFG->rdnss_list);
@@ -49,15 +50,12 @@ radv_proto_start: proto_start RADV
radv_proto_item:
proto_item
+ | proto_channel
| INTERFACE radv_iface
| PREFIX radv_prefix { add_tail(&RADV_CFG->pref_list, NODE this_radv_prefix); }
| RDNSS { init_list(&radv_dns_list); } radv_rdnss { add_tail_list(&RADV_CFG->rdnss_list, &radv_dns_list); }
| DNSSL { init_list(&radv_dns_list); } radv_dnssl { add_tail_list(&RADV_CFG->dnssl_list, &radv_dns_list); }
- | TRIGGER prefix {
- RADV_CFG->trigger_prefix = $2.addr;
- RADV_CFG->trigger_pxlen = $2.len;
- RADV_CFG->trigger_valid = 1;
- }
+ | TRIGGER net_ip6 { RADV_CFG->trigger = $2; }
;
radv_proto_opts:
@@ -94,15 +92,15 @@ radv_iface_item:
| MIN DELAY expr { RADV_IFACE->min_delay = $3; if ($3 <= 0) cf_error("Min delay must be positive"); }
| MANAGED bool { RADV_IFACE->managed = $2; }
| OTHER CONFIG bool { RADV_IFACE->other_config = $3; }
- | LINK MTU expr { RADV_IFACE->link_mtu = $3; if ($3 < 0) cf_error("Link MTU must be 0 or positive"); }
- | REACHABLE TIME expr { RADV_IFACE->reachable_time = $3; if (($3 < 0) || ($3 > 3600000)) cf_error("Reachable time must be in range 0-3600000"); }
- | RETRANS TIMER expr { RADV_IFACE->retrans_timer = $3; if ($3 < 0) cf_error("Retrans timer must be 0 or positive"); }
- | LINGER TIME expr { RADV_IFACE->linger_time = $3; if (($3 < 0) || ($3 > 3600)) cf_error("Linger time must be in range 0-3600"); }
- | CURRENT HOP LIMIT expr { RADV_IFACE->current_hop_limit = $4; if (($4 < 0) || ($4 > 255)) cf_error("Current hop limit must be in range 0-255"); }
+ | LINK MTU expr { RADV_IFACE->link_mtu = $3; }
+ | REACHABLE TIME expr { RADV_IFACE->reachable_time = $3; if ($3 > 3600000) cf_error("Reachable time must be in range 0-3600000"); }
+ | RETRANS TIMER expr { RADV_IFACE->retrans_timer = $3; }
+ | LINGER TIME expr { RADV_IFACE->linger_time = $3; if ($3 > 3600) cf_error("Linger time must be in range 0-3600"); }
+ | CURRENT HOP LIMIT expr { RADV_IFACE->current_hop_limit = $4; if ($4 > 255) cf_error("Current hop limit must be in range 0-255"); }
| DEFAULT LIFETIME expr radv_sensitive {
RADV_IFACE->default_lifetime = $3;
- if (($3 < 0) || ($3 > 9000)) cf_error("Default lifetime must be in range 0-9000");
- if ($4 != -1) RADV_IFACE->default_lifetime_sensitive = $4;
+ if ($3 > 9000) cf_error("Default lifetime must be in range 0-9000");
+ if ($4 != (uint) -1) RADV_IFACE->default_lifetime_sensitive = $4;
}
| DEFAULT PREFERENCE radv_preference { RADV_IFACE->default_preference = $3; }
| PREFIX radv_prefix { add_tail(&RADV_IFACE->pref_list, NODE this_radv_prefix); }
@@ -129,7 +127,7 @@ radv_iface_finish:
if ((ic->min_ra_int > 3) &&
(ic->min_ra_int > (ic->max_ra_int * 3 / 4)))
- cf_error("Min RA interval must be at most 3/4 * Max RA interval %d %d", ic->min_ra_int, ic->max_ra_int);
+ cf_error("Min RA interval must be at most 3/4 * Max RA interval");
if ((ic->default_lifetime > 0) && (ic->default_lifetime < ic->max_ra_int))
cf_error("Default lifetime must be either 0 or at least Max RA interval");
@@ -150,11 +148,10 @@ radv_iface:
radv_iface_start iface_patt_list_nopx radv_iface_opt_list radv_iface_finish;
-radv_prefix_start: prefix
+radv_prefix_start: net_ip6
{
this_radv_prefix = cfg_allocz(sizeof(struct radv_prefix_config));
- RADV_PREFIX->prefix = $1.addr;
- RADV_PREFIX->pxlen = $1.len;
+ RADV_PREFIX->prefix = *(net_addr_ip6 *) &($1);
RADV_PREFIX->onlink = 1;
RADV_PREFIX->autonomous = 1;
@@ -168,13 +165,11 @@ radv_prefix_item:
| AUTONOMOUS bool { RADV_PREFIX->autonomous = $2; }
| VALID LIFETIME expr radv_sensitive {
RADV_PREFIX->valid_lifetime = $3;
- if ($3 < 0) cf_error("Valid lifetime must be 0 or positive");
- if ($4 != -1) RADV_PREFIX->valid_lifetime_sensitive = $4;
+ if ($4 != (uint) -1) RADV_PREFIX->valid_lifetime_sensitive = $4;
}
| PREFERRED LIFETIME expr radv_sensitive {
RADV_PREFIX->preferred_lifetime = $3;
- if ($3 < 0) cf_error("Preferred lifetime must be 0 or positive");
- if ($4 != -1) RADV_PREFIX->preferred_lifetime_sensitive = $4;
+ if ($4 != (uint) -1) RADV_PREFIX->preferred_lifetime_sensitive = $4;
}
;
diff --git a/proto/radv/packets.c b/proto/radv/packets.c
index 8a301854..7c148b7d 100644
--- a/proto/radv/packets.c
+++ b/proto/radv/packets.c
@@ -38,7 +38,7 @@ struct radv_opt_prefix
u32 valid_lifetime;
u32 preferred_lifetime;
u32 reserved;
- ip_addr prefix;
+ ip6_addr prefix;
};
#define OPT_PX_ONLINK 0x80
@@ -58,7 +58,7 @@ struct radv_opt_rdnss
u8 length;
u16 reserved;
u32 lifetime;
- ip_addr servers[];
+ ip6_addr servers[];
};
struct radv_opt_dnssl
@@ -79,7 +79,7 @@ radv_prepare_rdnss(struct radv_iface *ifa, list *rdnss_list, char **buf, char *b
{
struct radv_rdnss_config *rcf_base = rcf;
struct radv_opt_rdnss *op = (void *) *buf;
- int max_i = (bufend - *buf - sizeof(struct radv_opt_rdnss)) / sizeof(ip_addr);
+ int max_i = (bufend - *buf - sizeof(struct radv_opt_rdnss)) / sizeof(ip6_addr);
int i = 0;
if (max_i < 1)
@@ -100,8 +100,7 @@ radv_prepare_rdnss(struct radv_iface *ifa, list *rdnss_list, char **buf, char *b
if (i >= max_i)
goto too_much;
- op->servers[i] = rcf->server;
- ipa_hton(op->servers[i]);
+ op->servers[i] = ip6_hton(rcf->server);
i++;
rcf = NODE_NEXT(rcf);
@@ -206,10 +205,10 @@ radv_prepare_dnssl(struct radv_iface *ifa, list *dnssl_list, char **buf, char *b
}
static int
-radv_prepare_prefix(struct radv_iface *ifa, struct radv_prefix *prefix,
+radv_prepare_prefix(struct radv_iface *ifa, struct radv_prefix *px,
char **buf, char *bufend)
{
- struct radv_prefix_config *pc = prefix->cf;
+ struct radv_prefix_config *pc = px->cf;
if (*buf + sizeof(struct radv_opt_prefix) > bufend)
{
@@ -221,7 +220,7 @@ radv_prepare_prefix(struct radv_iface *ifa, struct radv_prefix *prefix,
struct radv_opt_prefix *op = (void *) *buf;
op->type = OPT_PREFIX;
op->length = 4;
- op->pxlen = prefix->len;
+ op->pxlen = px->prefix.pxlen;
op->flags = (pc->onlink ? OPT_PX_ONLINK : 0) |
(pc->autonomous ? OPT_PX_AUTONOMOUS : 0);
op->valid_lifetime = (ifa->ra->active || !pc->valid_lifetime_sensitive) ?
@@ -229,8 +228,7 @@ radv_prepare_prefix(struct radv_iface *ifa, struct radv_prefix *prefix,
op->preferred_lifetime = (ifa->ra->active || !pc->preferred_lifetime_sensitive) ?
htonl(pc->preferred_lifetime) : 0;
op->reserved = 0;
- op->prefix = prefix->prefix;
- ipa_hton(op->prefix);
+ op->prefix = ip6_hton(px->prefix.prefix);
*buf += sizeof(*op);
return 0;
@@ -334,7 +332,7 @@ radv_rx_hook(sock *sk, uint size)
if (sk->lifindex != sk->iface->index)
return 1;
- if (ipa_equal(sk->faddr, ifa->addr->ip))
+ if (ipa_equal(sk->faddr, sk->saddr))
return 1;
if (size < 8)
@@ -386,6 +384,7 @@ radv_sk_open(struct radv_iface *ifa)
{
sock *sk = sk_new(ifa->pool);
sk->type = SK_IP;
+ sk->subtype = SK_IPV6;
sk->dport = ICMPV6_PROTO;
sk->saddr = ifa->addr->ip;
sk->vrf = ifa->ra->p.vrf;
diff --git a/proto/radv/radv.c b/proto/radv/radv.c
index c53a0a95..e9140115 100644
--- a/proto/radv/radv.c
+++ b/proto/radv/radv.c
@@ -58,23 +58,24 @@ radv_timer(timer *tm)
* This sets the timer, but we replace it just at the end of this function
* (replacing a timer is fine).
*/
- if (ifa->prefix_expires && (ifa->prefix_expires <= now))
+ if (ifa->prefix_expires && (ifa->prefix_expires <= current_time()))
radv_iface_notify(ifa, RA_EV_GC);
radv_send_ra(ifa, 0);
/* Update timer */
- ifa->last = now;
- unsigned after = ifa->cf->min_ra_int;
- after += random() % (ifa->cf->max_ra_int - ifa->cf->min_ra_int + 1);
+ ifa->last = current_time();
+ btime t = ifa->cf->min_ra_int S;
+ btime r = (ifa->cf->max_ra_int - ifa->cf->min_ra_int) S;
+ t += random() % (r + 1);
if (ifa->initial)
+ {
+ t = MIN(t, MAX_INITIAL_RTR_ADVERT_INTERVAL);
ifa->initial--;
+ }
- if (ifa->initial)
- after = MIN(after, MAX_INITIAL_RTR_ADVERT_INTERVAL);
-
- tm_start(ifa->timer, after);
+ tm_start(ifa->timer, t);
}
static struct radv_prefix_config default_prefix = {
@@ -89,21 +90,18 @@ static struct radv_prefix_config dead_prefix = {
/* Find a corresponding config for the given prefix */
static struct radv_prefix_config *
-radv_prefix_match(struct radv_iface *ifa, struct ifa *a)
+radv_prefix_match(struct radv_iface *ifa, net_addr_ip6 *px)
{
struct radv_proto *p = ifa->ra;
struct radv_config *cf = (struct radv_config *) (p->p.cf);
struct radv_prefix_config *pc;
- if (a->scope <= SCOPE_LINK)
- return NULL;
-
WALK_LIST(pc, ifa->cf->pref_list)
- if ((a->pxlen >= pc->pxlen) && ipa_in_net(a->prefix, pc->prefix, pc->pxlen))
+ if (net_in_net_ip6(px, &pc->prefix))
return pc;
WALK_LIST(pc, cf->pref_list)
- if ((a->pxlen >= pc->pxlen) && ipa_in_net(a->prefix, pc->prefix, pc->pxlen))
+ if (net_in_net_ip6(px, &pc->prefix))
return pc;
return &default_prefix;
@@ -128,7 +126,12 @@ radv_prepare_prefixes(struct radv_iface *ifa)
struct ifa *addr;
WALK_LIST(addr, ifa->iface->addrs)
{
- struct radv_prefix_config *pc = radv_prefix_match(ifa, addr);
+ if ((addr->prefix.type != NET_IP6) ||
+ (addr->scope <= SCOPE_LINK))
+ continue;
+
+ net_addr_ip6 *prefix = (void *) &addr->prefix;
+ struct radv_prefix_config *pc = radv_prefix_match(ifa, prefix);
if (!pc || pc->skip)
continue;
@@ -136,7 +139,7 @@ radv_prepare_prefixes(struct radv_iface *ifa)
/* Do we have it already? */
struct radv_prefix *existing = NULL;
WALK_LIST(pfx, ifa->prefixes)
- if ((pfx->len == addr->pxlen) && ipa_equal(pfx->prefix, addr->prefix))
+ if (net_equal_ip6(&pfx->prefix, prefix))
{
existing = pfx;
break;
@@ -144,12 +147,11 @@ radv_prepare_prefixes(struct radv_iface *ifa)
if (!existing)
{
- RADV_TRACE(D_EVENTS, "Adding new prefix %I/%d on %s",
- addr->prefix, addr->pxlen, ifa->iface->name);
+ RADV_TRACE(D_EVENTS, "Adding new prefix %N on %s",
+ prefix, ifa->iface->name);
existing = mb_allocz(ifa->pool, sizeof *existing);
- existing->prefix = addr->prefix;
- existing->len = addr->pxlen;
+ net_copy_ip6(&existing->prefix, prefix);
add_tail(&ifa->prefixes, NODE existing);
}
@@ -167,15 +169,16 @@ radv_prepare_prefixes(struct radv_iface *ifa)
* dropped just yet). If something is dead and rots there for long enough,
* clean it up.
*/
- bird_clock_t expires = now + cf->linger_time;
- bird_clock_t expires_min = 0;
+ btime now_ = current_time();
+ btime expires = now_ + cf->linger_time S;
+ btime expires_min = 0;
struct radv_prefix *next;
WALK_LIST_DELSAFE(pfx, next, ifa->prefixes)
{
if (pfx->alive && !pfx->mark)
{
- RADV_TRACE(D_EVENTS, "Marking prefix %I/$d on %s as dead",
- pfx->prefix, pfx->len, ifa->iface->name);
+ RADV_TRACE(D_EVENTS, "Marking prefix %N on %s as dead",
+ pfx->prefix, ifa->iface->name);
pfx->alive = 0;
pfx->expires = expires;
@@ -184,10 +187,10 @@ radv_prepare_prefixes(struct radv_iface *ifa)
if (!pfx->alive)
{
- if (pfx->expires <= now)
+ if (pfx->expires <= now_)
{
- RADV_TRACE(D_EVENTS, "Removing prefix %I/%d on %s",
- pfx->prefix, pfx->len, ifa->iface->name);
+ RADV_TRACE(D_EVENTS, "Removing prefix %N on %s",
+ pfx->prefix, ifa->iface->name);
rem_node(NODE pfx);
mb_free(pfx);
@@ -232,13 +235,8 @@ radv_iface_notify(struct radv_iface *ifa, int event)
radv_prepare_prefixes(ifa);
/* Update timer */
- unsigned delta = now - ifa->last;
- unsigned after = 0;
-
- if (delta < ifa->cf->min_delay)
- after = ifa->cf->min_delay - delta;
-
- tm_start(ifa->timer, after);
+ btime t = ifa->last + ifa->cf->min_delay S - current_time();
+ tm_start(ifa->timer, t);
}
static void
@@ -278,17 +276,6 @@ radv_iface_add(struct object_lock *lock)
radv_iface_notify(ifa, RA_EV_INIT);
}
-static inline struct ifa *
-find_lladdr(struct iface *iface)
-{
- struct ifa *a;
- WALK_LIST(a, iface->addrs)
- if (a->scope == SCOPE_LINK)
- return a;
-
- return NULL;
-}
-
static void
radv_iface_new(struct radv_proto *p, struct iface *iface, struct radv_iface_config *cf)
{
@@ -302,23 +289,12 @@ radv_iface_new(struct radv_proto *p, struct iface *iface, struct radv_iface_conf
ifa->ra = p;
ifa->cf = cf;
ifa->iface = iface;
+ ifa->addr = iface->llv6;
init_list(&ifa->prefixes);
add_tail(&p->iface_list, NODE ifa);
- ifa->addr = find_lladdr(iface);
- if (!ifa->addr)
- {
- log(L_ERR "%s: Missing link-local address on interface %s", p->p.name, iface->name);
- return;
- }
-
- timer *tm = tm_new(pool);
- tm->hook = radv_timer;
- tm->data = ifa;
- tm->randomize = 0;
- tm->recurrent = 0;
- ifa->timer = tm;
+ ifa->timer = tm_new_init(pool, radv_timer, ifa, 0, 0);
struct object_lock *lock = olock_new(pool);
lock->type = OBJLOCK_IP;
@@ -353,8 +329,15 @@ radv_if_notify(struct proto *P, unsigned flags, struct iface *iface)
if (flags & IF_CHANGE_UP)
{
- struct radv_iface_config *ic = (struct radv_iface_config *)
- iface_patt_find(&cf->patt_list, iface, NULL);
+ struct radv_iface_config *ic = (void *) iface_patt_find(&cf->patt_list, iface, NULL);
+
+ /* Ignore non-multicast ifaces */
+ if (!(iface->flags & IF_MULTICAST))
+ return;
+
+ /* Ignore ifaces without link-local address */
+ if (!iface->llv6)
+ return;
if (ic)
radv_iface_new(p, iface, ic);
@@ -393,11 +376,16 @@ radv_ifa_notify(struct proto *P, unsigned flags UNUSED, struct ifa *a)
radv_iface_notify(ifa, RA_EV_CHANGE);
}
-static inline int radv_net_match_trigger(struct radv_config *cf, net *n)
+static inline int
+radv_trigger_valid(struct radv_config *cf)
+{
+ return cf->trigger.type != 0;
+}
+
+static inline int
+radv_net_match_trigger(struct radv_config *cf, net *n)
{
- return cf->trigger_valid &&
- (n->n.pxlen == cf->trigger_pxlen) &&
- ipa_equal(n->n.prefix, cf->trigger_prefix);
+ return radv_trigger_valid(cf) && net_equal(n->n.addr, &cf->trigger);
}
int
@@ -413,7 +401,7 @@ radv_import_control(struct proto *P, rte **new, ea_list **attrs UNUSED, struct l
}
static void
-radv_rt_notify(struct proto *P, rtable *tbl UNUSED, net *n, rte *new, rte *old UNUSED, ea_list *attrs UNUSED)
+radv_rt_notify(struct proto *P, struct channel *ch UNUSED, net *n, rte *new, rte *old UNUSED, ea_list *attrs UNUSED)
{
struct radv_proto *p = (struct radv_proto *) P;
struct radv_config *cf = (struct radv_config *) (P->cf);
@@ -440,19 +428,30 @@ radv_check_active(struct radv_proto *p)
{
struct radv_config *cf = (struct radv_config *) (p->p.cf);
- if (! cf->trigger_valid)
+ if (!radv_trigger_valid(cf))
return 1;
- return rt_examine(p->p.table, cf->trigger_prefix, cf->trigger_pxlen,
- &(p->p), p->p.cf->out_filter);
+ struct channel *c = p->p.main_channel;
+ return rt_examine(c->table, &cf->trigger, &p->p, c->out_filter);
+}
+
+static void
+radv_postconfig(struct proto_config *CF)
+{
+ // struct radv_config *cf = (void *) CF;
+
+ /* Define default channel */
+ if (EMPTY_LIST(CF->channels))
+ channel_config_new(NULL, NET_IP6, CF);
}
static struct proto *
-radv_init(struct proto_config *c)
+radv_init(struct proto_config *CF)
{
- struct proto *P = proto_new(c, sizeof(struct radv_proto));
+ struct proto *P = proto_new(CF);
+
+ P->main_channel = proto_add_channel(P, proto_cf_main_channel(CF));
- P->accept_ra_types = RA_OPTIMAL;
P->import_control = radv_import_control;
P->rt_notify = radv_rt_notify;
P->if_notify = radv_if_notify;
@@ -468,7 +467,7 @@ radv_start(struct proto *P)
struct radv_config *cf = (struct radv_config *) (P->cf);
init_list(&(p->iface_list));
- p->active = !cf->trigger_valid;
+ p->active = !radv_trigger_valid(cf);
return PS_UP;
}
@@ -493,11 +492,11 @@ radv_shutdown(struct proto *P)
}
static int
-radv_reconfigure(struct proto *P, struct proto_config *c)
+radv_reconfigure(struct proto *P, struct proto_config *CF)
{
struct radv_proto *p = (struct radv_proto *) P;
// struct radv_config *old = (struct radv_config *) (p->cf);
- struct radv_config *new = (struct radv_config *) c;
+ struct radv_config *new = (struct radv_config *) CF;
/*
* The question is why there is a reconfigure function for RAdv if
@@ -507,12 +506,26 @@ radv_reconfigure(struct proto *P, struct proto_config *c)
* causing nodes to temporary remove their default routes.
*/
- P->cf = c; /* radv_check_active() requires proper P->cf */
+ if (!proto_configure_channel(P, &P->main_channel, proto_cf_main_channel(CF)))
+ return 0;
+
+ P->cf = CF; /* radv_check_active() requires proper P->cf */
p->active = radv_check_active(p);
struct iface *iface;
WALK_LIST(iface, iface_list)
{
+ if (!(iface->flags & IF_UP))
+ continue;
+
+ /* Ignore non-multicast ifaces */
+ if (!(iface->flags & IF_MULTICAST))
+ continue;
+
+ /* Ignore ifaces without link-local address */
+ if (!iface->llv6)
+ continue;
+
struct radv_iface *ifa = radv_iface_find(p, iface);
struct radv_iface_config *ic = (struct radv_iface_config *)
iface_patt_find(&new->patt_list, iface, NULL);
@@ -564,7 +577,10 @@ radv_get_status(struct proto *P, byte *buf)
struct protocol proto_radv = {
.name = "RAdv",
.template = "radv%d",
+ .channel_mask = NB_IP6,
+ .proto_size = sizeof(struct radv_proto),
.config_size = sizeof(struct radv_config),
+ .postconfig = radv_postconfig,
.init = radv_init,
.start = radv_start,
.shutdown = radv_shutdown,
diff --git a/proto/radv/radv.h b/proto/radv/radv.h
index 60b9980f..4672e3b2 100644
--- a/proto/radv/radv.h
+++ b/proto/radv/radv.h
@@ -30,7 +30,7 @@
#define ICMPV6_RA 134
#define MAX_INITIAL_RTR_ADVERTISEMENTS 3
-#define MAX_INITIAL_RTR_ADVERT_INTERVAL 16
+#define MAX_INITIAL_RTR_ADVERT_INTERVAL (16 S_)
#define DEFAULT_MAX_RA_INT 600
#define DEFAULT_MIN_DELAY 3
@@ -51,9 +51,7 @@ struct radv_config
list rdnss_list; /* Global list of RDNSS configs (struct radv_rdnss_config) */
list dnssl_list; /* Global list of DNSSL configs (struct radv_dnssl_config) */
- ip_addr trigger_prefix; /* Prefix of a trigger route, if defined */
- u8 trigger_pxlen; /* Pxlen of a trigger route, if defined */
- u8 trigger_valid; /* Whether a trigger route is defined */
+ net_addr trigger; /* Prefix of a trigger route, if defined */
};
struct radv_iface_config
@@ -87,8 +85,7 @@ struct radv_iface_config
struct radv_prefix_config
{
node n;
- ip_addr prefix;
- uint pxlen;
+ net_addr_ip6 prefix;
u8 skip; /* Do not include this prefix to RA */
u8 onlink; /* Standard options from RFC 4861 */
@@ -104,7 +101,7 @@ struct radv_rdnss_config
node n;
u32 lifetime; /* Valid if lifetime_mult is 0 */
u16 lifetime_mult; /* Lifetime specified as multiple of max_ra_int */
- ip_addr server; /* IP address of recursive DNS server */
+ ip6_addr server; /* IP address of recursive DNS server */
};
struct radv_dnssl_config
@@ -128,12 +125,12 @@ struct radv_proto
struct radv_prefix /* One prefix we advertise */
{
node n;
- ip_addr prefix;
- u8 len;
+ net_addr_ip6 prefix;
+
u8 alive; /* Is the prefix alive? If not, we advertise it
with 0 lifetime, so clients stop using it */
u8 mark; /* A temporary mark for processing */
- bird_clock_t expires; /* The time when we drop this prefix from
+ btime expires; /* The time when we drop this prefix from
advertising. It is valid only if !alive. */
struct radv_prefix_config *cf; /* The config tied to this prefix */
};
@@ -147,13 +144,13 @@ struct radv_iface
struct ifa *addr; /* Link-local address of iface */
struct pool *pool; /* A pool for interface-specific things */
list prefixes; /* The prefixes we advertise (struct radv_prefix) */
- bird_clock_t prefix_expires; /* When the soonest prefix expires (0 = none dead) */
+ btime prefix_expires; /* When the soonest prefix expires (0 = none dead) */
timer *timer;
struct object_lock *lock;
sock *sk;
- bird_clock_t last; /* Time of last sending of RA */
+ btime last; /* Time of last sending of RA */
u16 plen; /* Length of prepared RA in tbuf, or 0 if not valid */
byte initial; /* How many RAs are still to be sent as initial */
};
diff --git a/proto/rip/Makefile b/proto/rip/Makefile
index d2d3c987..7feabcd8 100644
--- a/proto/rip/Makefile
+++ b/proto/rip/Makefile
@@ -1,5 +1,6 @@
-source=rip.c packets.c
-root-rel=../../
-dir-name=proto/rip
+src := packets.c rip.c
+obj := $(src-o-files)
+$(all-daemon)
+$(cf-local)
-include ../../Rules
+tests_objs := $(tests_objs) $(src-o-files) \ No newline at end of file
diff --git a/proto/rip/config.Y b/proto/rip/config.Y
index 4ec45c7a..e3bc4ae3 100644
--- a/proto/rip/config.Y
+++ b/proto/rip/config.Y
@@ -32,34 +32,40 @@ rip_check_auth(void)
CF_DECLS
-CF_KEYWORDS(RIP, ECMP, LIMIT, WEIGHT, INFINITY, METRIC, UPDATE, TIMEOUT,
+CF_KEYWORDS(RIP, NG, ECMP, LIMIT, WEIGHT, INFINITY, METRIC, UPDATE, TIMEOUT,
GARBAGE, PORT, ADDRESS, MODE, BROADCAST, MULTICAST, PASSIVE,
VERSION, SPLIT, HORIZON, POISON, REVERSE, CHECK, ZERO, TIME, BFD,
AUTHENTICATION, NONE, PLAINTEXT, CRYPTOGRAPHIC, MD5, TTL, SECURITY,
RX, TX, BUFFER, LENGTH, PRIORITY, ONLY, LINK, RIP_METRIC, RIP_TAG)
-%type <i> rip_auth
+%type <i> rip_variant rip_auth
CF_GRAMMAR
CF_ADDTO(proto, rip_proto)
-rip_proto_start: proto_start RIP
+rip_variant:
+ RIP { $$ = 1; }
+ | RIP NG { $$ = 0; }
+ ;
+
+rip_proto_start: proto_start rip_variant
{
this_proto = proto_config_new(&proto_rip, $1);
- init_list(&RIP_CFG->patt_list);
+ this_proto->net_type = $2 ? NET_IP4 : NET_IP6;
- RIP_CFG->rip2 = RIP_IS_V2;
+ init_list(&RIP_CFG->patt_list);
+ RIP_CFG->rip2 = $2;
RIP_CFG->infinity = RIP_DEFAULT_INFINITY;
-
- RIP_CFG->min_timeout_time = 60;
- RIP_CFG->max_garbage_time = 60;
+ RIP_CFG->min_timeout_time = 60 S_;
+ RIP_CFG->max_garbage_time = 60 S_;
};
rip_proto_item:
proto_item
+ | proto_channel
| ECMP bool { RIP_CFG->ecmp = $2 ? RIP_DEFAULT_ECMP_LIMIT : 0; }
- | ECMP bool LIMIT expr { RIP_CFG->ecmp = $2 ? $4 : 0; if ($4 < 0) cf_error("ECMP limit cannot be negative"); }
+ | ECMP bool LIMIT expr { RIP_CFG->ecmp = $2 ? $4 : 0; }
| INFINITY expr { RIP_CFG->infinity = $2; }
| INTERFACE rip_iface
;
@@ -131,7 +137,7 @@ rip_iface_item:
| MODE MULTICAST { RIP_IFACE->mode = RIP_IM_MULTICAST; }
| MODE BROADCAST { RIP_IFACE->mode = RIP_IM_BROADCAST; if (rip_cfg_is_ng()) cf_error("Broadcast not supported in RIPng"); }
| PASSIVE bool { RIP_IFACE->passive = $2; }
- | ADDRESS ipa { RIP_IFACE->address = $2; }
+ | ADDRESS ipa { RIP_IFACE->address = $2; if (ipa_is_ip4($2) != rip_cfg_is_v2()) cf_error("IP address version mismatch"); }
| PORT expr { RIP_IFACE->port = $2; if (($2<1) || ($2>65535)) cf_error("Invalid port number"); }
| VERSION expr { RIP_IFACE->version = $2;
if (rip_cfg_is_ng()) cf_error("Version not supported in RIPng");
@@ -141,9 +147,9 @@ rip_iface_item:
| SPLIT HORIZON bool { RIP_IFACE->split_horizon = $3; }
| POISON REVERSE bool { RIP_IFACE->poison_reverse = $3; }
| CHECK ZERO bool { RIP_IFACE->check_zero = $3; }
- | UPDATE TIME expr { RIP_IFACE->update_time = $3; if ($3<=0) cf_error("Update time must be positive"); }
- | TIMEOUT TIME expr { RIP_IFACE->timeout_time = $3; if ($3<=0) cf_error("Timeout time must be positive"); }
- | GARBAGE TIME expr { RIP_IFACE->garbage_time = $3; if ($3<=0) cf_error("Garbage time must be positive"); }
+ | UPDATE TIME expr { RIP_IFACE->update_time = $3 S_; if ($3<=0) cf_error("Update time must be positive"); }
+ | TIMEOUT TIME expr { RIP_IFACE->timeout_time = $3 S_; if ($3<=0) cf_error("Timeout time must be positive"); }
+ | GARBAGE TIME expr { RIP_IFACE->garbage_time = $3 S_; if ($3<=0) cf_error("Garbage time must be positive"); }
| ECMP WEIGHT expr { RIP_IFACE->ecmp_weight = $3 - 1; if (($3<1) || ($3>256)) cf_error("ECMP weight must be in range 1-256"); }
| RX BUFFER expr { RIP_IFACE->rx_buffer = $3; if (($3<256) || ($3>65535)) cf_error("RX length must be in range 256-65535"); }
| TX LENGTH expr { RIP_IFACE->tx_length = $3; if (($3<256) || ($3>65535)) cf_error("TX length must be in range 256-65535"); }
diff --git a/proto/rip/packets.c b/proto/rip/packets.c
index 722a9012..891f454f 100644
--- a/proto/rip/packets.c
+++ b/proto/rip/packets.c
@@ -9,6 +9,8 @@
* Can be freely distributed and used under the terms of the GNU GPL.
*/
+#undef LOCAL_DEBUG
+
#include "rip.h"
#include "lib/mac.h"
@@ -76,8 +78,7 @@ struct rip_auth_tail
/* Internal representation of RTE block data */
struct rip_block
{
- ip_addr prefix;
- int pxlen;
+ net_addr net;
u32 metric;
u16 tag;
u16 no_af;
@@ -106,30 +107,30 @@ static inline uint rip_pkt_hdrlen(struct rip_iface *ifa)
{ return sizeof(struct rip_packet) + (ifa->cf->auth_type ? RIP_BLOCK_LENGTH : 0); }
static inline void
-rip_put_block(struct rip_proto *p UNUSED4 UNUSED6, byte *pos, struct rip_block *rte)
+rip_put_block(struct rip_proto *p, byte *pos, struct rip_block *rte)
{
if (rip_is_v2(p))
{
struct rip_block_v2 *block = (void *) pos;
block->family = rte->no_af ? 0 : htons(RIP_AF_IPV4);
block->tag = htons(rte->tag);
- block->network = ip4_hton(ipa_to_ip4(rte->prefix));
- block->netmask = ip4_hton(ip4_mkmask(rte->pxlen));
+ block->network = ip4_hton(net4_prefix(&rte->net));
+ block->netmask = ip4_hton(ip4_mkmask(net4_pxlen(&rte->net)));
block->next_hop = ip4_hton(ipa_to_ip4(rte->next_hop));
block->metric = htonl(rte->metric);
}
else /* RIPng */
{
struct rip_block_ng *block = (void *) pos;
- block->prefix = ip6_hton(ipa_to_ip6(rte->prefix));
+ block->prefix = ip6_hton(net6_prefix(&rte->net));
block->tag = htons(rte->tag);
- block->pxlen = rte->pxlen;
+ block->pxlen = net6_pxlen(&rte->net);
block->metric = rte->metric;
}
}
static inline void
-rip_put_next_hop(struct rip_proto *p UNUSED, byte *pos, struct rip_block *rte UNUSED4)
+rip_put_next_hop(struct rip_proto *p UNUSED, byte *pos, struct rip_block *rte)
{
struct rip_block_ng *block = (void *) pos;
block->prefix = ip6_hton(ipa_to_ip6(rte->next_hop));
@@ -139,7 +140,7 @@ rip_put_next_hop(struct rip_proto *p UNUSED, byte *pos, struct rip_block *rte UN
}
static inline int
-rip_get_block(struct rip_proto *p UNUSED4 UNUSED6, byte *pos, struct rip_block *rte)
+rip_get_block(struct rip_proto *p, byte *pos, struct rip_block *rte)
{
if (rip_is_v2(p))
{
@@ -149,8 +150,8 @@ rip_get_block(struct rip_proto *p UNUSED4 UNUSED6, byte *pos, struct rip_block *
if (block->family != (rte->no_af ? 0 : htons(RIP_AF_IPV4)))
return 0;
- rte->prefix = ipa_from_ip4(ip4_ntoh(block->network));
- rte->pxlen = ip4_masklen(ip4_ntoh(block->netmask));
+ uint pxlen = ip4_masklen(ip4_ntoh(block->netmask));
+ net_fill_ip4(&rte->net, ip4_ntoh(block->network), pxlen);
rte->metric = ntohl(block->metric);
rte->tag = ntohs(block->tag);
rte->next_hop = ipa_from_ip4(ip4_ntoh(block->next_hop));
@@ -169,8 +170,8 @@ rip_get_block(struct rip_proto *p UNUSED4 UNUSED6, byte *pos, struct rip_block *
return 0;
}
- rte->prefix = ipa_from_ip6(ip6_ntoh(block->prefix));
- rte->pxlen = block->pxlen;
+ uint pxlen = (block->pxlen <= IP6_MAX_PREFIX_LENGTH) ? block->pxlen : 255;
+ net_fill_ip6(&rte->net, ip6_ntoh(block->prefix), pxlen);
rte->metric = block->metric;
rte->tag = ntohs(block->tag);
/* rte->next_hop is deliberately kept unmodified */;
@@ -188,7 +189,10 @@ rip_update_csn(struct rip_proto *p UNUSED, struct rip_iface *ifa)
* have the same CSN. We are using real time, but enforcing monotonicity.
*/
if (ifa->cf->auth_type == RIP_AUTH_CRYPTO)
- ifa->csn = (ifa->csn < (u32) now_real) ? (u32) now_real : ifa->csn + 1;
+ {
+ u32 now_real = (u32) (current_real_time() TO_S);
+ ifa->csn = (ifa->csn < now_real) ? now_real : ifa->csn + 1;
+ }
}
static void
@@ -406,8 +410,9 @@ rip_receive_request(struct rip_proto *p, struct rip_iface *ifa, struct rip_packe
if (!rip_get_block(p, pos, &b))
return;
- /* Special case - zero prefix, infinity metric */
- if (ipa_nonzero(b.prefix) || b.pxlen || (b.metric != p->infinity))
+ /* Special case - infinity metric, for RIPng also zero prefix */
+ if ((b.metric != p->infinity) ||
+ (rip_is_ng(p) && !net_zero_ip6((net_addr_ip6 *) &b.net)))
return;
/* We do nothing if TX is already active */
@@ -432,6 +437,7 @@ rip_send_response(struct rip_proto *p, struct rip_iface *ifa)
byte *max = rip_tx_buffer(ifa) + ifa->tx_plen -
(rip_is_v2(p) ? RIP_BLOCK_LENGTH : 2*RIP_BLOCK_LENGTH);
ip_addr last_next_hop = IPA_NONE;
+ btime now_ = current_time();
int send = 0;
struct rip_packet *pkt = (void *) pos;
@@ -440,17 +446,15 @@ rip_send_response(struct rip_proto *p, struct rip_iface *ifa)
pkt->unused = 0;
pos += rip_pkt_hdrlen(ifa);
- FIB_ITERATE_START(&p->rtable, &ifa->tx_fit, z)
+ FIB_ITERATE_START(&p->rtable, &ifa->tx_fit, struct rip_entry, en)
{
- struct rip_entry *en = (struct rip_entry *) z;
-
/* Dummy entries */
if (!en->valid)
goto next_entry;
/* Stale entries that should be removed */
if ((en->valid == RIP_ENTRY_STALE) &&
- ((en->changed + ifa->cf->garbage_time) <= now))
+ ((en->changed + ifa->cf->garbage_time) <= now_))
goto next_entry;
/* Triggered updates */
@@ -460,28 +464,28 @@ rip_send_response(struct rip_proto *p, struct rip_iface *ifa)
/* Not enough space for current entry */
if (pos > max)
{
- FIB_ITERATE_PUT(&ifa->tx_fit, z);
+ FIB_ITERATE_PUT(&ifa->tx_fit);
goto break_loop;
}
struct rip_block rte = {
- .prefix = en->n.prefix,
- .pxlen = en->n.pxlen,
.metric = en->metric,
.tag = en->tag
};
+ net_copy(&rte.net, en->n.addr);
+
if (en->iface == ifa->iface)
rte.next_hop = en->next_hop;
if (rip_is_v2(p) && (ifa->cf->version == RIP_V1))
{
/* Skipping subnets (i.e. not hosts, classful networks or default route) */
- if (ip4_masklen(ip4_class_mask(ipa_to_ip4(en->n.prefix))) != en->n.pxlen)
+ if (ip4_masklen(ip4_class_mask(net4_prefix(&rte.net))) != rte.net.pxlen)
goto next_entry;
rte.tag = 0;
- rte.pxlen = 0;
+ rte.net.pxlen = 0;
rte.next_hop = IPA_NONE;
}
@@ -497,7 +501,7 @@ rip_send_response(struct rip_proto *p, struct rip_iface *ifa)
goto next_entry;
}
- // TRACE(D_PACKETS, " %I/%d -> %I metric %d", rte.prefix, rte.pxlen, rte.next_hop, rte.metric);
+ // TRACE(D_PACKETS, " %N -> %I metric %d", &rte.net, rte.next_hop, rte.metric);
/* RIPng next hop entry */
if (rip_is_ng(p) && !ipa_equal(rte.next_hop, last_next_hop))
@@ -513,7 +517,7 @@ rip_send_response(struct rip_proto *p, struct rip_iface *ifa)
next_entry: ;
}
- FIB_ITERATE_END(z);
+ FIB_ITERATE_END;
ifa->tx_active = 0;
/* Do not send empty packet */
@@ -540,9 +544,9 @@ break_loop:
* activating the new one.
*/
void
-rip_send_table(struct rip_proto *p, struct rip_iface *ifa, ip_addr addr, bird_clock_t changed)
+rip_send_table(struct rip_proto *p, struct rip_iface *ifa, ip_addr addr, btime changed)
{
- DBG("RIP: Opening TX session to %I on %s\n", dst, ifa->iface->name);
+ DBG("RIP: Opening TX session to %I on %s\n", addr, ifa->iface->name);
rip_reset_tx_session(p, ifa);
@@ -591,6 +595,7 @@ rip_receive_response(struct rip_proto *p, struct rip_iface *ifa, struct rip_pack
byte *pos = (byte *) pkt + sizeof(struct rip_packet);
byte *end = (byte *) pkt + plen;
+ btime now_ = current_time();
for (; pos < end; pos += RIP_BLOCK_LENGTH)
{
@@ -598,23 +603,25 @@ rip_receive_response(struct rip_proto *p, struct rip_iface *ifa, struct rip_pack
if (!rip_get_block(p, pos, &rte))
continue;
- int c = ipa_classify_net(rte.prefix);
- if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
- SKIP("invalid prefix");
-
if (rip_is_v2(p) && (pkt->version == RIP_V1))
{
- if (ifa->cf->check_zero && (rte.tag || rte.pxlen || ipa_nonzero(rte.next_hop)))
+ if (ifa->cf->check_zero && (rte.tag || rte.net.pxlen || ipa_nonzero(rte.next_hop)))
SKIP("RIPv1 reserved field is nonzero");
rte.tag = 0;
- rte.pxlen = ip4_masklen(ip4_class_mask(ipa_to_ip4(rte.prefix)));
+ rte.net.pxlen = ip4_masklen(ip4_class_mask(net4_prefix(&rte.net)));
rte.next_hop = IPA_NONE;
}
- if ((rte.pxlen < 0) || (rte.pxlen > MAX_PREFIX_LENGTH))
+ if (rte.net.pxlen == 255)
SKIP("invalid prefix length");
+ net_normalize(&rte.net);
+
+ int c = net_classify(&rte.net);
+ if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
+ SKIP("invalid prefix");
+
if (rte.metric > p->infinity)
SKIP("invalid metric");
@@ -625,7 +632,7 @@ rip_receive_response(struct rip_proto *p, struct rip_iface *ifa, struct rip_pack
rte.next_hop = IPA_NONE;
}
- // TRACE(D_PACKETS, " %I/%d -> %I metric %d", rte.prefix, rte.pxlen, rte.next_hop, rte.metric);
+ // TRACE(D_PACKETS, " %N -> %I metric %d", &rte.net.n, rte.next_hop, rte.metric);
rte.metric += ifa->cf->metric;
@@ -636,19 +643,19 @@ rip_receive_response(struct rip_proto *p, struct rip_iface *ifa, struct rip_pack
.next_hop = ipa_nonzero(rte.next_hop) ? rte.next_hop : from->nbr->addr,
.metric = rte.metric,
.tag = rte.tag,
- .expires = now + ifa->cf->timeout_time
+ .expires = now_ + ifa->cf->timeout_time
};
- rip_update_rte(p, &rte.prefix, rte.pxlen, &new);
+ rip_update_rte(p, &rte.net, &new);
}
else
- rip_withdraw_rte(p, &rte.prefix, rte.pxlen, from);
+ rip_withdraw_rte(p, &rte.net, from);
continue;
skip:
- LOG_RTE("Ignoring route %I/%d received from %I - %s",
- rte.prefix, rte.pxlen, from->nbr->addr, err_dsc);
+ LOG_RTE("Ignoring route %N received from %I - %s",
+ &rte.net, from->nbr->addr, err_dsc);
}
}
@@ -667,8 +674,7 @@ rip_rx_hook(sock *sk, uint len)
sk->iface->name, sk->faddr, sk->laddr);
/* Silently ignore my own packets */
- /* FIXME: Better local address check */
- if (ipa_equal(ifa->iface->addr->ip, sk->faddr))
+ if (ipa_equal(sk->faddr, sk->saddr))
return 1;
if (rip_is_ng(p) && !ipa_is_link_local(sk->faddr))
@@ -704,7 +710,7 @@ rip_rx_hook(sock *sk, uint len)
if ((plen - sizeof(struct rip_packet)) % RIP_BLOCK_LENGTH)
DROP("invalid length", plen);
- n->last_seen = now;
+ n->last_seen = current_time();
rip_update_bfd(p, n);
switch (pkt->command)
@@ -736,19 +742,13 @@ rip_open_socket(struct rip_iface *ifa)
sock *sk = sk_new(p->p.pool);
sk->type = SK_UDP;
+ sk->subtype = rip_is_v2(p) ? SK_IPV4 : SK_IPV6;
sk->sport = ifa->cf->port;
sk->dport = ifa->cf->port;
sk->iface = ifa->iface;
+ sk->saddr = rip_is_v2(p) ? ifa->iface->addr4->ip : ifa->iface->llv6->ip;
sk->vrf = p->p.vrf;
- /*
- * For RIPv2, we explicitly choose a primary address, mainly to ensure that
- * RIP and BFD uses the same one. For RIPng, we left it to kernel, which
- * should choose some link-local address based on the same scope rule.
- */
- if (rip_is_v2(p))
- sk->saddr = ifa->iface->addr->ip;
-
sk->rx_hook = rip_rx_hook;
sk->tx_hook = rip_tx_hook;
sk->err_hook = rip_err_hook;
diff --git a/proto/rip/rip.c b/proto/rip/rip.c
index 7b380097..a3eeaf17 100644
--- a/proto/rip/rip.c
+++ b/proto/rip/rip.c
@@ -92,15 +92,6 @@ static void rip_trigger_update(struct rip_proto *p);
* RIP routes
*/
-static void
-rip_init_entry(struct fib_node *fn)
-{
- // struct rip_entry *en = (void) *fn;
-
- const uint offset = OFFSETOF(struct rip_entry, routes);
- memset((byte *)fn + offset, 0, sizeof(struct rip_entry) - offset);
-}
-
static struct rip_rte *
rip_add_rte(struct rip_proto *p, struct rip_rte **rp, struct rip_rte *src)
{
@@ -152,27 +143,20 @@ rip_announce_rte(struct rip_proto *p, struct rip_entry *en)
if (rt)
{
/* Update */
- net *n = net_get(p->p.table, en->n.prefix, en->n.pxlen);
-
rta a0 = {
.src = p->p.main_source,
.source = RTS_RIP,
.scope = SCOPE_UNIVERSE,
- .cast = RTC_UNICAST
+ .dest = RTD_UNICAST,
};
u8 rt_metric = rt->metric;
u16 rt_tag = rt->tag;
- struct rip_rte *rt2 = rt->next;
- /* Find second valid rte */
- while (rt2 && !rip_valid_rte(rt2))
- rt2 = rt2->next;
-
- if (p->ecmp && rt2)
+ if (p->ecmp)
{
/* ECMP route */
- struct mpnh *nhs = NULL;
+ struct nexthop *nhs = NULL;
int num = 0;
for (rt = en->routes; rt && (num < p->ecmp); rt = rt->next)
@@ -180,54 +164,51 @@ rip_announce_rte(struct rip_proto *p, struct rip_entry *en)
if (!rip_valid_rte(rt))
continue;
- struct mpnh *nh = alloca(sizeof(struct mpnh));
+ struct nexthop *nh = allocz(sizeof(struct nexthop));
+
nh->gw = rt->next_hop;
nh->iface = rt->from->nbr->iface;
nh->weight = rt->from->ifa->cf->ecmp_weight;
- mpnh_insert(&nhs, nh);
+
+ nexthop_insert(&nhs, nh);
num++;
if (rt->tag != rt_tag)
rt_tag = 0;
}
- a0.dest = RTD_MULTIPATH;
- a0.nexthops = nhs;
+ a0.nh = *nhs;
}
else
{
/* Unipath route */
- a0.dest = RTD_ROUTER;
- a0.gw = rt->next_hop;
- a0.iface = rt->from->nbr->iface;
a0.from = rt->from->nbr->addr;
+ a0.nh.gw = rt->next_hop;
+ a0.nh.iface = rt->from->nbr->iface;
}
rta *a = rta_lookup(&a0);
rte *e = rte_get_temp(a);
- e->u.rip.from = a0.iface;
+ e->u.rip.from = a0.nh.iface;
e->u.rip.metric = rt_metric;
e->u.rip.tag = rt_tag;
- e->net = n;
e->pflags = 0;
- rte_update(&p->p, n, e);
+ rte_update(&p->p, en->n.addr, e);
}
else
{
/* Withdraw */
- net *n = net_find(p->p.table, en->n.prefix, en->n.pxlen);
- rte_update(&p->p, n, NULL);
+ rte_update(&p->p, en->n.addr, NULL);
}
}
/**
* rip_update_rte - enter a route update to RIP routing table
* @p: RIP instance
- * @prefix: network prefix
- * @pxlen: network prefix length
+ * @addr: network address
* @new: a &rip_rte representing the new route
*
* The function is called by the RIP packet processing code whenever it receives
@@ -237,9 +218,9 @@ rip_announce_rte(struct rip_proto *p, struct rip_entry *en)
* rip_withdraw_rte() should be called instead of rip_update_rte().
*/
void
-rip_update_rte(struct rip_proto *p, ip_addr *prefix, int pxlen, struct rip_rte *new)
+rip_update_rte(struct rip_proto *p, net_addr *n, struct rip_rte *new)
{
- struct rip_entry *en = fib_get(&p->rtable, prefix, pxlen);
+ struct rip_entry *en = fib_get(&p->rtable, n);
struct rip_rte *rt, **rp;
int changed = 0;
@@ -279,8 +260,7 @@ rip_update_rte(struct rip_proto *p, ip_addr *prefix, int pxlen, struct rip_rte *
/**
* rip_withdraw_rte - enter a route withdraw to RIP routing table
* @p: RIP instance
- * @prefix: network prefix
- * @pxlen: network prefix length
+ * @addr: network address
* @from: a &rip_neighbor propagating the withdraw
*
* The function is called by the RIP packet processing code whenever it receives
@@ -288,9 +268,9 @@ rip_update_rte(struct rip_proto *p, ip_addr *prefix, int pxlen, struct rip_rte *
* removed. Eventually, the change is also propagated by rip_announce_rte().
*/
void
-rip_withdraw_rte(struct rip_proto *p, ip_addr *prefix, int pxlen, struct rip_neighbor *from)
+rip_withdraw_rte(struct rip_proto *p, net_addr *n, struct rip_neighbor *from)
{
- struct rip_entry *en = fib_find(&p->rtable, prefix, pxlen);
+ struct rip_entry *en = fib_find(&p->rtable, n);
struct rip_rte *rt, **rp;
if (!en)
@@ -317,7 +297,7 @@ rip_withdraw_rte(struct rip_proto *p, ip_addr *prefix, int pxlen, struct rip_nei
* it into our data structures.
*/
static void
-rip_rt_notify(struct proto *P, struct rtable *table UNUSED, struct network *net, struct rte *new,
+rip_rt_notify(struct proto *P, struct channel *ch UNUSED, struct network *net, struct rte *new,
struct rte *old UNUSED, struct ea_list *attrs)
{
struct rip_proto *p = (struct rip_proto *) P;
@@ -332,15 +312,15 @@ rip_rt_notify(struct proto *P, struct rtable *table UNUSED, struct network *net,
if (rt_metric > p->infinity)
{
- log(L_WARN "%s: Invalid rip_metric value %u for route %I/%d",
- p->p.name, rt_metric, net->n.prefix, net->n.pxlen);
+ log(L_WARN "%s: Invalid rip_metric value %u for route %N",
+ p->p.name, rt_metric, net->n.addr);
rt_metric = p->infinity;
}
if (rt_tag > 0xffff)
{
- log(L_WARN "%s: Invalid rip_tag value %u for route %I/%d",
- p->p.name, rt_tag, net->n.prefix, net->n.pxlen);
+ log(L_WARN "%s: Invalid rip_tag value %u for route %N",
+ p->p.name, rt_tag, net->n.addr);
rt_metric = p->infinity;
rt_tag = 0;
}
@@ -352,7 +332,7 @@ rip_rt_notify(struct proto *P, struct rtable *table UNUSED, struct network *net,
* collection.
*/
- en = fib_get(&p->rtable, &net->n.prefix, net->n.pxlen);
+ en = fib_get(&p->rtable, net->n.addr);
old_metric = en->valid ? en->metric : -1;
@@ -360,13 +340,13 @@ rip_rt_notify(struct proto *P, struct rtable *table UNUSED, struct network *net,
en->metric = rt_metric;
en->tag = rt_tag;
en->from = (new->attrs->src->proto == P) ? new->u.rip.from : NULL;
- en->iface = new->attrs->iface;
- en->next_hop = new->attrs->gw;
+ en->iface = new->attrs->nh.iface;
+ en->next_hop = new->attrs->nh.gw;
}
else
{
/* Withdraw */
- en = fib_find(&p->rtable, &net->n.prefix, net->n.pxlen);
+ en = fib_find(&p->rtable, net->n.addr);
if (!en || en->valid != RIP_ENTRY_VALID)
return;
@@ -384,7 +364,7 @@ rip_rt_notify(struct proto *P, struct rtable *table UNUSED, struct network *net,
/* Activate triggered updates */
if (en->metric != old_metric)
{
- en->changed = now;
+ en->changed = current_time();
rip_trigger_update(p);
}
}
@@ -526,10 +506,10 @@ rip_iface_start(struct rip_iface *ifa)
TRACE(D_EVENTS, "Starting interface %s", ifa->iface->name);
- ifa->next_regular = now + (random() % ifa->cf->update_time) + 1;
- ifa->next_triggered = now; /* Available immediately */
- ifa->want_triggered = 1; /* All routes in triggered update */
- tm_start(ifa->timer, 1); /* Or 100 ms */
+ ifa->next_regular = current_time() + (random() % ifa->cf->update_time) + 100 MS;
+ ifa->next_triggered = current_time(); /* Available immediately */
+ ifa->want_triggered = 1; /* All routes in triggered update */
+ tm_start(ifa->timer, 100 MS);
ifa->up = 1;
if (!ifa->cf->passive)
@@ -650,13 +630,19 @@ rip_add_iface(struct rip_proto *p, struct iface *iface, struct rip_iface_config
else if (ic->mode == RIP_IM_MULTICAST)
ifa->addr = rip_is_v2(p) ? IP4_RIP_ROUTERS : IP6_RIP_ROUTERS;
else /* Broadcast */
- ifa->addr = iface->addr->brd;
+ ifa->addr = iface->addr4->brd;
+ /*
+ * The above is just a workaround for BSD as it can't send broadcasts
+ * to 255.255.255.255. BSD systems need the network broadcast address instead.
+ *
+ * TODO: move this to sysdep code
+ */
init_list(&ifa->neigh_list);
add_tail(&p->iface_list, NODE ifa);
- ifa->timer = tm_new_set(p->p.pool, rip_iface_timer, ifa, 0, 0);
+ ifa->timer = tm_new_init(p->p.pool, rip_iface_timer, ifa, 0, 0);
struct object_lock *lock = olock_new(p->p.pool);
lock->type = OBJLOCK_UDP;
@@ -704,8 +690,8 @@ rip_reconfigure_iface(struct rip_proto *p, struct rip_iface *ifa, struct rip_ifa
rip_iface_update_buffers(ifa);
- if (ifa->next_regular > (now + new->update_time))
- ifa->next_regular = now + (random() % new->update_time) + 1;
+ if (ifa->next_regular > (current_time() + new->update_time))
+ ifa->next_regular = current_time() + (random() % new->update_time) + 100 MS;
if (new->check_link != old->check_link)
rip_iface_update_state(ifa);
@@ -726,7 +712,11 @@ rip_reconfigure_ifaces(struct rip_proto *p, struct rip_config *cf)
WALK_LIST(iface, iface_list)
{
- if (! (iface->flags & IF_UP))
+ if (!(iface->flags & IF_UP))
+ continue;
+
+ /* Ignore ifaces without appropriate address */
+ if (rip_is_v2(p) ? !iface->addr4 : !iface->llv6)
continue;
struct rip_iface *ifa = rip_find_iface(p, iface);
@@ -764,6 +754,10 @@ rip_if_notify(struct proto *P, unsigned flags, struct iface *iface)
{
struct rip_iface_config *ic = (void *) iface_patt_find(&cf->patt_list, iface, NULL);
+ /* Ignore ifaces without appropriate address */
+ if (rip_is_v2(p) ? !iface->addr4 : !iface->llv6)
+ return;
+
if (ic)
rip_add_iface(p, iface, ic);
@@ -822,24 +816,24 @@ rip_timer(timer *t)
struct rip_iface *ifa;
struct rip_neighbor *n, *nn;
struct fib_iterator fit;
- bird_clock_t next = now + MIN(cf->min_timeout_time, cf->max_garbage_time);
- bird_clock_t expires = 0;
+ btime now_ = current_time();
+ btime next = now_ + MIN(cf->min_timeout_time, cf->max_garbage_time);
+ btime expires = 0;
TRACE(D_EVENTS, "Main timer fired");
FIB_ITERATE_INIT(&fit, &p->rtable);
loop:
- FIB_ITERATE_START(&p->rtable, &fit, node)
+ FIB_ITERATE_START(&p->rtable, &fit, struct rip_entry, en)
{
- struct rip_entry *en = (struct rip_entry *) node;
struct rip_rte *rt, **rp;
int changed = 0;
/* Checking received routes for timeout and for dead neighbors */
for (rp = &en->routes; rt = *rp; /* rp = &rt->next */)
{
- if (!rip_valid_rte(rt) || (rt->expires <= now))
+ if (!rip_valid_rte(rt) || (rt->expires <= now_))
{
rip_remove_rte(p, rp);
changed = 1;
@@ -859,7 +853,7 @@ rip_timer(timer *t)
* rip_rt_notify() -> p->rtable change, invalidating hidden variables.
*/
- FIB_ITERATE_PUT_NEXT(&fit, &p->rtable, node);
+ FIB_ITERATE_PUT_NEXT(&fit, &p->rtable);
rip_announce_rte(p, en);
goto loop;
}
@@ -869,9 +863,9 @@ rip_timer(timer *t)
{
expires = en->changed + cf->max_garbage_time;
- if (expires <= now)
+ if (expires <= now_)
{
- // TRACE(D_EVENTS, "entry is too old: %I/%d", en->n.prefix, en->n.pxlen);
+ // TRACE(D_EVENTS, "entry is too old: %N", en->n.addr);
en->valid = 0;
}
else
@@ -881,12 +875,12 @@ rip_timer(timer *t)
/* Remove empty nodes */
if (!en->valid && !en->routes)
{
- FIB_ITERATE_PUT(&fit, node);
- fib_delete(&p->rtable, node);
+ FIB_ITERATE_PUT(&fit);
+ fib_delete(&p->rtable, en);
goto loop;
}
}
- FIB_ITERATE_END(node);
+ FIB_ITERATE_END;
p->rt_reload = 0;
@@ -897,20 +891,20 @@ rip_timer(timer *t)
{
expires = n->last_seen + n->ifa->cf->timeout_time;
- if (expires <= now)
+ if (expires <= now_)
rip_remove_neighbor(p, n);
else
next = MIN(next, expires);
}
- tm_start(p->timer, MAX(next - now, 1));
+ tm_start(p->timer, MAX(next - now_, 100 MS));
}
static inline void
rip_kick_timer(struct rip_proto *p)
{
- if (p->timer->expires > (now + 1))
- tm_start(p->timer, 1); /* Or 100 ms */
+ if (p->timer->expires > (current_time() + 100 MS))
+ tm_start(p->timer, 100 MS);
}
/**
@@ -928,7 +922,8 @@ rip_iface_timer(timer *t)
{
struct rip_iface *ifa = t->data;
struct rip_proto *p = ifa->rip;
- bird_clock_t period = ifa->cf->update_time;
+ btime now_ = current_time();
+ btime period = ifa->cf->update_time;
if (ifa->cf->passive)
return;
@@ -937,40 +932,40 @@ rip_iface_timer(timer *t)
if (ifa->tx_active)
{
- if (now < (ifa->next_regular + period))
- { tm_start(ifa->timer, 1); return; }
+ if (now_ < (ifa->next_regular + period))
+ { tm_start(ifa->timer, 100 MS); return; }
/* We are too late, reset is done by rip_send_table() */
log(L_WARN "%s: Too slow update on %s, resetting", p->p.name, ifa->iface->name);
}
- if (now >= ifa->next_regular)
+ if (now_ >= ifa->next_regular)
{
/* Send regular update, set timer for next period (or following one if necessay) */
TRACE(D_EVENTS, "Sending regular updates for %s", ifa->iface->name);
rip_send_table(p, ifa, ifa->addr, 0);
- ifa->next_regular += period * (1 + ((now - ifa->next_regular) / period));
+ ifa->next_regular += period * (1 + ((now_ - ifa->next_regular) / period));
ifa->want_triggered = 0;
p->triggered = 0;
}
- else if (ifa->want_triggered && (now >= ifa->next_triggered))
+ else if (ifa->want_triggered && (now_ >= ifa->next_triggered))
{
/* Send triggered update, enforce interval between triggered updates */
TRACE(D_EVENTS, "Sending triggered updates for %s", ifa->iface->name);
rip_send_table(p, ifa, ifa->addr, ifa->want_triggered);
- ifa->next_triggered = now + MIN(5, period / 2 + 1);
+ ifa->next_triggered = now_ + MIN(5 S, period / 2);
ifa->want_triggered = 0;
p->triggered = 0;
}
- tm_start(ifa->timer, ifa->want_triggered ? 1 : (ifa->next_regular - now));
+ tm_start(ifa->timer, ifa->want_triggered ? (1 S) : (ifa->next_regular - now_));
}
static inline void
rip_iface_kick_timer(struct rip_iface *ifa)
{
- if (ifa->timer->expires > (now + 1))
- tm_start(ifa->timer, 1); /* Or 100 ms */
+ if (ifa->timer->expires > (current_time() + 100 MS))
+ tm_start(ifa->timer, 100 MS);
}
static void
@@ -991,7 +986,7 @@ rip_trigger_update(struct rip_proto *p)
continue;
TRACE(D_EVENTS, "Scheduling triggered updates for %s", ifa->iface->name);
- ifa->want_triggered = now;
+ ifa->want_triggered = current_time();
rip_iface_kick_timer(ifa);
}
@@ -1035,19 +1030,17 @@ rip_import_control(struct proto *P UNUSED, struct rte **rt, struct ea_list **att
return 0;
}
-static int
-rip_reload_routes(struct proto *P)
+static void
+rip_reload_routes(struct channel *C)
{
- struct rip_proto *p = (struct rip_proto *) P;
+ struct rip_proto *p = (struct rip_proto *) C->proto;
if (p->rt_reload)
- return 1;
+ return;
TRACE(D_EVENTS, "Scheduling route reload");
p->rt_reload = 1;
rip_kick_timer(p);
-
- return 1;
}
static struct ea_list *
@@ -1078,12 +1071,23 @@ rip_rte_same(struct rte *new, struct rte *old)
}
+static void
+rip_postconfig(struct proto_config *CF)
+{
+ // struct rip_config *cf = (void *) CF;
+
+ /* Define default channel */
+ if (EMPTY_LIST(CF->channels))
+ channel_config_new(NULL, CF->net_type, CF);
+}
+
static struct proto *
-rip_init(struct proto_config *cfg)
+rip_init(struct proto_config *CF)
{
- struct proto *P = proto_new(cfg, sizeof(struct rip_proto));
+ struct proto *P = proto_new(CF);
+
+ P->main_channel = proto_add_channel(P, proto_cf_main_channel(CF));
- P->accept_ra_types = RA_OPTIMAL;
P->if_notify = rip_if_notify;
P->rt_notify = rip_rt_notify;
P->neigh_notify = rip_neigh_notify;
@@ -1104,10 +1108,12 @@ rip_start(struct proto *P)
struct rip_config *cf = (void *) (P->cf);
init_list(&p->iface_list);
- fib_init(&p->rtable, P->pool, sizeof(struct rip_entry), 0, rip_init_entry);
+ fib_init(&p->rtable, P->pool, cf->rip2 ? NET_IP4 : NET_IP6,
+ sizeof(struct rip_entry), OFFSETOF(struct rip_entry, n), 0, NULL);
p->rte_slab = sl_new(P->pool, sizeof(struct rip_rte));
- p->timer = tm_new_set(P->pool, rip_timer, p, 0, 0);
+ p->timer = tm_new_init(P->pool, rip_timer, p, 0, 0);
+ p->rip2 = cf->rip2;
p->ecmp = cf->ecmp;
p->infinity = cf->infinity;
p->triggered = 0;
@@ -1121,18 +1127,24 @@ rip_start(struct proto *P)
}
static int
-rip_reconfigure(struct proto *P, struct proto_config *c)
+rip_reconfigure(struct proto *P, struct proto_config *CF)
{
struct rip_proto *p = (void *) P;
- struct rip_config *new = (void *) c;
+ struct rip_config *new = (void *) CF;
// struct rip_config *old = (void *) (P->cf);
+ if (new->rip2 != p->rip2)
+ return 0;
+
if (new->infinity != p->infinity)
return 0;
+ if (!proto_configure_channel(P, &P->main_channel, proto_cf_main_channel(CF)))
+ return 0;
+
TRACE(D_EVENTS, "Reconfiguring");
- p->p.cf = c;
+ p->p.cf = CF;
p->ecmp = new->ecmp;
rip_reconfigure_ifaces(p, new);
@@ -1184,7 +1196,7 @@ rip_show_interfaces(struct proto *P, char *iff)
}
cli_msg(-1021, "%s:", p->p.name);
- cli_msg(-1021, "%-10s %-6s %6s %6s %6s",
+ cli_msg(-1021, "%-10s %-6s %6s %6s %7s",
"Interface", "State", "Metric", "Nbrs", "Timer");
WALK_LIST(ifa, p->iface_list)
@@ -1197,8 +1209,9 @@ rip_show_interfaces(struct proto *P, char *iff)
if (n->last_seen)
nbrs++;
- int timer = MAX(ifa->next_regular - now, 0);
- cli_msg(-1021, "%-10s %-6s %6u %6u %6u",
+ btime now_ = current_time();
+ btime timer = (ifa->next_regular > now_) ? (ifa->next_regular - now_) : 0;
+ cli_msg(-1021, "%-10s %-6s %6u %6u %7t",
ifa->iface->name, (ifa->up ? "Up" : "Down"), ifa->cf->metric, nbrs, timer);
}
@@ -1220,7 +1233,7 @@ rip_show_neighbors(struct proto *P, char *iff)
}
cli_msg(-1022, "%s:", p->p.name);
- cli_msg(-1022, "%-25s %-10s %6s %6s %6s",
+ cli_msg(-1022, "%-25s %-10s %6s %6s %7s",
"IP address", "Interface", "Metric", "Routes", "Seen");
WALK_LIST(ifa, p->iface_list)
@@ -1233,8 +1246,8 @@ rip_show_neighbors(struct proto *P, char *iff)
if (!n->last_seen)
continue;
- int timer = now - n->last_seen;
- cli_msg(-1022, "%-25I %-10s %6u %6u %6u",
+ btime timer = current_time() - n->last_seen;
+ cli_msg(-1022, "%-25I %-10s %6u %6u %7t",
n->nbr->addr, ifa->iface->name, ifa->cf->metric, n->uc, timer);
}
}
@@ -1250,12 +1263,11 @@ rip_dump(struct proto *P)
int i;
i = 0;
- FIB_WALK(&p->rtable, e)
+ FIB_WALK(&p->rtable, struct rip_entry, en)
{
- struct rip_entry *en = (struct rip_entry *) e;
- debug("RIP: entry #%d: %I/%d via %I dev %s valid %d metric %d age %d s\n",
- i++, en->n.prefix, en->n.pxlen, en->next_hop, en->iface->name,
- en->valid, en->metric, now - en->changed);
+ debug("RIP: entry #%d: %N via %I dev %s valid %d metric %d age %t\n",
+ i++, en->n.addr, en->next_hop, en->iface->name,
+ en->valid, en->metric, current_time() - en->changed);
}
FIB_WALK_END;
@@ -1274,7 +1286,10 @@ struct protocol proto_rip = {
.template = "rip%d",
.attr_class = EAP_RIP,
.preference = DEF_PREF_RIP,
+ .channel_mask = NB_IP,
+ .proto_size = sizeof(struct rip_proto),
.config_size = sizeof(struct rip_config),
+ .postconfig = rip_postconfig,
.init = rip_init,
.dump = rip_dump,
.start = rip_start,
diff --git a/proto/rip/rip.h b/proto/rip/rip.h
index b24d9536..55696333 100644
--- a/proto/rip/rip.h
+++ b/proto/rip/rip.h
@@ -27,12 +27,6 @@
#include "lib/timer.h"
-#ifdef IPV6
-#define RIP_IS_V2 0
-#else
-#define RIP_IS_V2 1
-#endif
-
#define RIP_V1 1
#define RIP_V2 2
@@ -44,9 +38,9 @@
#define RIP_DEFAULT_ECMP_LIMIT 16
#define RIP_DEFAULT_INFINITY 16
-#define RIP_DEFAULT_UPDATE_TIME 30
-#define RIP_DEFAULT_TIMEOUT_TIME 180
-#define RIP_DEFAULT_GARBAGE_TIME 120
+#define RIP_DEFAULT_UPDATE_TIME (30 S_)
+#define RIP_DEFAULT_TIMEOUT_TIME (180 S_)
+#define RIP_DEFAULT_GARBAGE_TIME (120 S_)
struct rip_config
@@ -58,8 +52,8 @@ struct rip_config
u8 ecmp; /* Maximum number of nexthops in ECMP route, or 0 */
u8 infinity; /* Maximum metric value, representing infinity */
- u32 min_timeout_time; /* Minimum of interface timeout_time */
- u32 max_garbage_time; /* Maximum of interface garbage_time */
+ btime min_timeout_time; /* Minimum of interface timeout_time */
+ btime max_garbage_time; /* Maximum of interface garbage_time */
};
struct rip_iface_config
@@ -84,9 +78,9 @@ struct rip_iface_config
u16 tx_length; /* TX packet length limit (including headers), 0 for MTU */
int tx_tos;
int tx_priority;
- u32 update_time; /* Periodic update interval */
- u32 timeout_time; /* Route expiration timeout */
- u32 garbage_time; /* Unreachable entry GC timeout */
+ btime update_time; /* Periodic update interval */
+ btime timeout_time; /* Route expiration timeout */
+ btime garbage_time; /* Unreachable entry GC timeout */
list *passwords; /* Passwords for authentication */
};
@@ -98,6 +92,7 @@ struct rip_proto
slab *rte_slab; /* Slab for internal routes (struct rip_rte) */
timer *timer; /* Main protocol timer */
+ u8 rip2; /* RIPv2 (IPv4) or RIPng (IPv6) */
u8 ecmp; /* Maximum number of nexthops in ECMP route, or 0 */
u8 infinity; /* Maximum metric value, representing infinity */
u8 triggered; /* Logical AND of interface want_triggered values */
@@ -125,14 +120,14 @@ struct rip_iface
list neigh_list; /* List of iface neighbors (struct rip_neighbor) */
/* Update scheduling */
- bird_clock_t next_regular; /* Next time when regular update should be called */
- bird_clock_t next_triggered; /* Next time when triggerd update may be called */
- bird_clock_t want_triggered; /* Nonzero if triggered update is scheduled */
+ btime next_regular; /* Next time when regular update should be called */
+ btime next_triggered; /* Next time when triggerd update may be called */
+ btime want_triggered; /* Nonzero if triggered update is scheduled */
/* Active update */
int tx_active; /* Update session is active */
ip_addr tx_addr; /* Update session destination address */
- bird_clock_t tx_changed; /* Minimal changed time for triggered update */
+ btime tx_changed; /* Minimal changed time for triggered update */
struct fib_iterator tx_fit; /* FIB iterator in RIP routing table (p.rtable) */
};
@@ -142,14 +137,13 @@ struct rip_neighbor
struct rip_iface *ifa; /* Associated interface, may be NULL if stale */
struct neighbor *nbr; /* Associaded core neighbor, may be NULL if stale */
struct bfd_request *bfd_req; /* BFD request, if BFD is used */
- bird_clock_t last_seen; /* Time of last received and accepted message */
+ btime last_seen; /* Time of last received and accepted message */
u32 uc; /* Use count, number of routes linking the neighbor */
u32 csn; /* Last received crypto sequence number */
};
struct rip_entry
{
- struct fib_node n;
struct rip_rte *routes; /* List of incoming routes */
u8 valid; /* Entry validity state (RIP_ENTRY_*) */
@@ -159,7 +153,9 @@ struct rip_entry
struct iface *iface; /* Outgoing route iface (for next hop) */
ip_addr next_hop; /* Outgoing route next hop */
- bird_clock_t changed; /* Last time when the outgoing route metric changed */
+ btime changed; /* Last time when the outgoing route metric changed */
+
+ struct fib_node n;
};
struct rip_rte
@@ -171,7 +167,7 @@ struct rip_rte
u16 metric; /* Route metric (after increase) */
u16 tag; /* Route tag */
- bird_clock_t expires; /* Time of route expiration */
+ btime expires; /* Time of route expiration */
};
@@ -189,16 +185,11 @@ struct rip_rte
#define EA_RIP_METRIC EA_CODE(EAP_RIP, 0)
#define EA_RIP_TAG EA_CODE(EAP_RIP, 1)
-#define rip_is_v2(X) RIP_IS_V2
-#define rip_is_ng(X) (!RIP_IS_V2)
-
-/*
static inline int rip_is_v2(struct rip_proto *p)
{ return p->rip2; }
static inline int rip_is_ng(struct rip_proto *p)
{ return ! p->rip2; }
-*/
static inline void
rip_reset_tx_session(struct rip_proto *p, struct rip_iface *ifa)
@@ -211,8 +202,8 @@ rip_reset_tx_session(struct rip_proto *p, struct rip_iface *ifa)
}
/* rip.c */
-void rip_update_rte(struct rip_proto *p, ip_addr *prefix, int pxlen, struct rip_rte *new);
-void rip_withdraw_rte(struct rip_proto *p, ip_addr *prefix, int pxlen, struct rip_neighbor *from);
+void rip_update_rte(struct rip_proto *p, net_addr *n, struct rip_rte *new);
+void rip_withdraw_rte(struct rip_proto *p, net_addr *n, struct rip_neighbor *from);
struct rip_neighbor * rip_get_neighbor(struct rip_proto *p, ip_addr *a, struct rip_iface *ifa);
void rip_update_bfd(struct rip_proto *p, struct rip_neighbor *n);
void rip_show_interfaces(struct proto *P, char *iff);
@@ -220,7 +211,7 @@ void rip_show_neighbors(struct proto *P, char *iff);
/* packets.c */
void rip_send_request(struct rip_proto *p, struct rip_iface *ifa);
-void rip_send_table(struct rip_proto *p, struct rip_iface *ifa, ip_addr addr, bird_clock_t changed);
+void rip_send_table(struct rip_proto *p, struct rip_iface *ifa, ip_addr addr, btime changed);
int rip_open_socket(struct rip_iface *ifa);
diff --git a/proto/rpki/Doc b/proto/rpki/Doc
new file mode 100644
index 00000000..d1d1bf55
--- /dev/null
+++ b/proto/rpki/Doc
@@ -0,0 +1,5 @@
+S rpki.c
+S packets.c
+S transport.c
+S tcp_transport.c
+S ssh_transport.c
diff --git a/proto/rpki/Makefile b/proto/rpki/Makefile
new file mode 100644
index 00000000..eb09b7df
--- /dev/null
+++ b/proto/rpki/Makefile
@@ -0,0 +1,6 @@
+src := rpki.c packets.c tcp_transport.c ssh_transport.c transport.c
+obj := $(src-o-files)
+$(all-daemon)
+$(cf-local)
+
+tests_objs := $(tests_objs) $(src-o-files) \ No newline at end of file
diff --git a/proto/rpki/config.Y b/proto/rpki/config.Y
new file mode 100644
index 00000000..39fdfd01
--- /dev/null
+++ b/proto/rpki/config.Y
@@ -0,0 +1,144 @@
+/*
+ * BIRD -- The Resource Public Key Infrastructure (RPKI) to Router Protocol
+ *
+ * (c) 2015 CZ.NIC
+ *
+ * Can be freely distributed and used under the terms of the GNU GPL.
+ */
+
+CF_HDR
+
+#include "proto/rpki/rpki.h"
+
+CF_DEFINES
+
+#define RPKI_CFG ((struct rpki_config *) this_proto)
+#define RPKI_TR_SSH_CFG ((struct rpki_tr_ssh_config *) RPKI_CFG->tr_config.spec)
+
+static void
+rpki_check_unused_hostname(void)
+{
+ if (RPKI_CFG->hostname != NULL)
+ cf_error("Only one cache server per protocol allowed");
+}
+
+static void
+rpki_check_unused_transport(void)
+{
+ if (RPKI_CFG->tr_config.spec != NULL)
+ cf_error("At the most one transport per protocol allowed");
+}
+
+CF_DECLS
+
+CF_KEYWORDS(RPKI, REMOTE, BIRD, PRIVATE, PUBLIC, KEY, TCP, SSH, TRANSPORT, USER,
+ RETRY, REFRESH, EXPIRE, KEEP)
+
+%type <i> rpki_keep_interval
+
+CF_GRAMMAR
+
+CF_ADDTO(proto, rpki_proto)
+
+rpki_proto_start: proto_start RPKI {
+ this_proto = proto_config_new(&proto_rpki, $1);
+ RPKI_CFG->retry_interval = RPKI_RETRY_INTERVAL;
+ RPKI_CFG->refresh_interval = RPKI_REFRESH_INTERVAL;
+ RPKI_CFG->expire_interval = RPKI_EXPIRE_INTERVAL;
+};
+
+rpki_proto: rpki_proto_start proto_name '{' rpki_proto_opts '}' { rpki_check_config(RPKI_CFG); };
+
+rpki_proto_opts:
+ /* empty */
+ | rpki_proto_opts rpki_proto_item ';'
+ ;
+
+rpki_proto_item:
+ proto_item
+ | proto_channel
+ | REMOTE rpki_cache_addr
+ | REMOTE rpki_cache_addr rpki_proto_item_port
+ | rpki_proto_item_port
+ | TRANSPORT rpki_transport
+ | REFRESH rpki_keep_interval expr {
+ if (rpki_check_refresh_interval($3))
+ cf_error(rpki_check_refresh_interval($3));
+ RPKI_CFG->refresh_interval = $3;
+ RPKI_CFG->keep_refresh_interval = $2;
+ }
+ | RETRY rpki_keep_interval expr {
+ if (rpki_check_retry_interval($3))
+ cf_error(rpki_check_retry_interval($3));
+ RPKI_CFG->retry_interval = $3;
+ RPKI_CFG->keep_retry_interval = $2;
+ }
+ | EXPIRE rpki_keep_interval expr {
+ if (rpki_check_expire_interval($3))
+ cf_error(rpki_check_expire_interval($3));
+ RPKI_CFG->expire_interval = $3;
+ RPKI_CFG->keep_expire_interval = $2;
+ }
+ ;
+
+rpki_keep_interval:
+ /* empty */ { $$ = 0; }
+ | KEEP { $$ = 1; }
+ ;
+
+rpki_proto_item_port: PORT expr { check_u16($2); RPKI_CFG->port = $2; };
+
+rpki_cache_addr:
+ text {
+ rpki_check_unused_hostname();
+ RPKI_CFG->hostname = $1;
+ }
+ | ipa {
+ rpki_check_unused_hostname();
+ RPKI_CFG->ip = $1;
+ /* Ensure hostname is filled */
+ char *hostname = cfg_allocz(sizeof(INET6_ADDRSTRLEN + 1));
+ bsnprintf(hostname, INET6_ADDRSTRLEN+1, "%I", RPKI_CFG->ip);
+ RPKI_CFG->hostname = hostname;
+ }
+ ;
+
+rpki_transport:
+ TCP rpki_transport_tcp_init
+ | SSH rpki_transport_ssh_init '{' rpki_transport_ssh_opts '}' rpki_transport_ssh_check
+ ;
+
+rpki_transport_tcp_init:
+{
+ rpki_check_unused_transport();
+ RPKI_CFG->tr_config.spec = cfg_allocz(sizeof(struct rpki_tr_tcp_config));
+ RPKI_CFG->tr_config.type = RPKI_TR_TCP;
+};
+
+rpki_transport_ssh_init:
+{
+ rpki_check_unused_transport();
+ RPKI_CFG->tr_config.spec = cfg_allocz(sizeof(struct rpki_tr_ssh_config));
+ RPKI_CFG->tr_config.type = RPKI_TR_SSH;
+};
+
+rpki_transport_ssh_opts:
+ /* empty */
+ | rpki_transport_ssh_opts rpki_transport_ssh_item ';'
+ ;
+
+rpki_transport_ssh_item:
+ BIRD PRIVATE KEY text { RPKI_TR_SSH_CFG->bird_private_key = $4; }
+ | REMOTE PUBLIC KEY text { RPKI_TR_SSH_CFG->cache_public_key = $4; }
+ | USER text { RPKI_TR_SSH_CFG->user = $2; }
+ ;
+
+rpki_transport_ssh_check:
+{
+ if (RPKI_TR_SSH_CFG->user == NULL)
+ cf_error("User must be set");
+};
+
+CF_CODE
+
+CF_END
diff --git a/proto/rpki/packets.c b/proto/rpki/packets.c
new file mode 100644
index 00000000..59a5efaf
--- /dev/null
+++ b/proto/rpki/packets.c
@@ -0,0 +1,1073 @@
+/*
+ * BIRD -- The Resource Public Key Infrastructure (RPKI) to Router Protocol
+ *
+ * (c) 2015 CZ.NIC
+ * (c) 2015 Pavel Tvrdik <pawel.tvrdik@gmail.com>
+ *
+ * This file was a part of RTRlib: http://rpki.realmv6.org/
+ *
+ * Can be freely distributed and used under the terms of the GNU GPL.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+
+#undef LOCAL_DEBUG
+
+#include "rpki.h"
+#include "transport.h"
+#include "packets.h"
+
+#define RPKI_ADD_FLAG 0b00000001
+
+enum rpki_transmit_type {
+ RPKI_RECV = 0,
+ RPKI_SEND = 1,
+};
+
+enum pdu_error_type {
+ CORRUPT_DATA = 0,
+ INTERNAL_ERROR = 1,
+ NO_DATA_AVAIL = 2,
+ INVALID_REQUEST = 3,
+ UNSUPPORTED_PROTOCOL_VER = 4,
+ UNSUPPORTED_PDU_TYPE = 5,
+ WITHDRAWAL_OF_UNKNOWN_RECORD = 6,
+ DUPLICATE_ANNOUNCEMENT = 7,
+ PDU_TOO_BIG = 32
+};
+
+static const char *str_pdu_error_type[] = {
+ [CORRUPT_DATA] = "Corrupt-Data",
+ [INTERNAL_ERROR] = "Internal-Error",
+ [NO_DATA_AVAIL] = "No-Data-Available",
+ [INVALID_REQUEST] = "Invalid-Request",
+ [UNSUPPORTED_PROTOCOL_VER] = "Unsupported-Protocol-Version",
+ [UNSUPPORTED_PDU_TYPE] = "Unsupported-PDU-Type",
+ [WITHDRAWAL_OF_UNKNOWN_RECORD]= "Withdrawal-Of-Unknown-Record",
+ [DUPLICATE_ANNOUNCEMENT] = "Duplicate-Announcement",
+ [PDU_TOO_BIG] = "PDU-Too-Big",
+};
+
+enum pdu_type {
+ SERIAL_NOTIFY = 0,
+ SERIAL_QUERY = 1,
+ RESET_QUERY = 2,
+ CACHE_RESPONSE = 3,
+ IPV4_PREFIX = 4,
+ RESERVED = 5,
+ IPV6_PREFIX = 6,
+ END_OF_DATA = 7,
+ CACHE_RESET = 8,
+ ROUTER_KEY = 9,
+ ERROR = 10,
+ PDU_TYPE_MAX
+};
+
+static const char *str_pdu_type_[] = {
+ [SERIAL_NOTIFY] = "Serial Notify",
+ [SERIAL_QUERY] = "Serial Query",
+ [RESET_QUERY] = "Reset Query",
+ [CACHE_RESPONSE] = "Cache Response",
+ [IPV4_PREFIX] = "IPv4 Prefix",
+ [RESERVED] = "Reserved",
+ [IPV6_PREFIX] = "IPv6 Prefix",
+ [END_OF_DATA] = "End of Data",
+ [CACHE_RESET] = "Cache Reset",
+ [ROUTER_KEY] = "Router Key",
+ [ERROR] = "Error"
+};
+
+static const char *str_pdu_type(uint type) {
+ if (type < PDU_TYPE_MAX)
+ return str_pdu_type_[type];
+ else
+ return "Undefined packet type";
+}
+
+/*
+ * 0 8 16 24 31
+ * .-------------------------------------------.
+ * | Protocol | PDU | |
+ * | Version | Type | reserved = zero |
+ * | 0 or 1 | 0 - 10 | |
+ * +-------------------------------------------+
+ * | |
+ * | Length >= 8 |
+ * | |
+ * `-------------------------------------------' */
+struct pdu_header {
+ u8 ver;
+ u8 type;
+ u16 reserved;
+ u32 len;
+} PACKED;
+
+struct pdu_cache_response {
+ u8 ver;
+ u8 type;
+ u16 session_id;
+ u32 len;
+} PACKED;
+
+struct pdu_serial_notify {
+ u8 ver;
+ u8 type;
+ u16 session_id;
+ u32 len;
+ u32 serial_num;
+} PACKED;
+
+struct pdu_serial_query {
+ u8 ver;
+ u8 type;
+ u16 session_id;
+ u32 len;
+ u32 serial_num;
+} PACKED;
+
+struct pdu_ipv4 {
+ u8 ver;
+ u8 type;
+ u16 reserved;
+ u32 len;
+ u8 flags;
+ u8 prefix_len;
+ u8 max_prefix_len;
+ u8 zero;
+ ip4_addr prefix;
+ u32 asn;
+} PACKED;
+
+struct pdu_ipv6 {
+ u8 ver;
+ u8 type;
+ u16 reserved;
+ u32 len;
+ u8 flags;
+ u8 prefix_len;
+ u8 max_prefix_len;
+ u8 zero;
+ ip6_addr prefix;
+ u32 asn;
+} PACKED;
+
+/*
+ * 0 8 16 24 31
+ * .-------------------------------------------.
+ * | Protocol | PDU | |
+ * | Version | Type | Error Code |
+ * | 1 | 10 | |
+ * +-------------------------------------------+
+ * | |
+ * | Length |
+ * | |
+ * +-------------------------------------------+
+ * | |
+ * | Length of Encapsulated PDU |
+ * | |
+ * +-------------------------------------------+
+ * | |
+ * ~ Copy of Erroneous PDU ~
+ * | |
+ * +-------------------------------------------+
+ * | |
+ * | Length of Error Text |
+ * | |
+ * +-------------------------------------------+
+ * | |
+ * | Arbitrary Text |
+ * | of |
+ * ~ Error Diagnostic Message ~
+ * | |
+ * `-------------------------------------------' */
+struct pdu_error {
+ u8 ver;
+ u8 type;
+ u16 error_code;
+ u32 len;
+ u32 len_enc_pdu; /* Length of Encapsulated PDU */
+ byte rest[]; /* Copy of Erroneous PDU
+ * Length of Error Text
+ * Error Diagnostic Message */
+} PACKED;
+
+struct pdu_reset_query {
+ u8 ver;
+ u8 type;
+ u16 flags;
+ u32 len;
+} PACKED;
+
+struct pdu_end_of_data_v0 {
+ u8 ver;
+ u8 type;
+ u16 session_id;
+ u32 len;
+ u32 serial_num;
+} PACKED;
+
+struct pdu_end_of_data_v1 {
+ u8 ver;
+ u8 type;
+ u16 session_id;
+ u32 len;
+ u32 serial_num;
+ u32 refresh_interval;
+ u32 retry_interval;
+ u32 expire_interval;
+} PACKED;
+
+static const size_t min_pdu_size[] = {
+ [SERIAL_NOTIFY] = sizeof(struct pdu_serial_notify),
+ [SERIAL_QUERY] = sizeof(struct pdu_serial_query),
+ [RESET_QUERY] = sizeof(struct pdu_reset_query),
+ [CACHE_RESPONSE] = sizeof(struct pdu_cache_response),
+ [IPV4_PREFIX] = sizeof(struct pdu_ipv4),
+ [RESERVED] = sizeof(struct pdu_header),
+ [IPV6_PREFIX] = sizeof(struct pdu_ipv6),
+ [END_OF_DATA] = sizeof(struct pdu_end_of_data_v0),
+ [CACHE_RESET] = sizeof(struct pdu_cache_response),
+ [ROUTER_KEY] = sizeof(struct pdu_header), /* FIXME */
+ [ERROR] = 16,
+};
+
+static int rpki_send_error_pdu(struct rpki_cache *cache, const enum pdu_error_type error_code, const u32 err_pdu_len, const struct pdu_header *erroneous_pdu, const char *fmt, ...);
+
+static void
+rpki_pdu_to_network_byte_order(struct pdu_header *pdu)
+{
+ pdu->reserved = htons(pdu->reserved);
+ pdu->len = htonl(pdu->len);
+
+ switch (pdu->type)
+ {
+ case SERIAL_QUERY:
+ {
+ /* Note that a session_id is converted using converting header->reserved */
+ struct pdu_serial_query *sq_pdu = (void *) pdu;
+ sq_pdu->serial_num = htonl(sq_pdu->serial_num);
+ break;
+ }
+
+ case ERROR:
+ {
+ struct pdu_error *err = (void *) pdu;
+ u32 *err_text_len = (u32 *)(err->rest + err->len_enc_pdu);
+ *err_text_len = htonl(*err_text_len);
+ err->len_enc_pdu = htonl(err->len_enc_pdu);
+ break;
+ }
+
+ case RESET_QUERY:
+ break;
+
+ default:
+ bug("PDU type %s should not be sent by us", str_pdu_type(pdu->type));
+ }
+}
+
+static void
+rpki_pdu_to_host_byte_order(struct pdu_header *pdu)
+{
+ /* The Router Key PDU has two one-byte fields instead of one two-bytes field. */
+ if (pdu->type != ROUTER_KEY)
+ pdu->reserved = ntohs(pdu->reserved);
+
+ pdu->len = ntohl(pdu->len);
+
+ switch (pdu->type)
+ {
+ case SERIAL_NOTIFY:
+ {
+ /* Note that a session_id is converted using converting header->reserved */
+ struct pdu_serial_notify *sn_pdu = (void *) pdu;
+ sn_pdu->serial_num = ntohl(sn_pdu->serial_num);
+ break;
+ }
+
+ case END_OF_DATA:
+ {
+ /* Note that a session_id is converted using converting header->reserved */
+ struct pdu_end_of_data_v0 *eod0 = (void *) pdu;
+ eod0->serial_num = ntohl(eod0->serial_num); /* Same either for version 1 */
+
+ if (pdu->ver == RPKI_VERSION_1)
+ {
+ struct pdu_end_of_data_v1 *eod1 = (void *) pdu;
+ eod1->expire_interval = ntohl(eod1->expire_interval);
+ eod1->refresh_interval = ntohl(eod1->refresh_interval);
+ eod1->retry_interval = ntohl(eod1->retry_interval);
+ }
+ break;
+ }
+
+ case IPV4_PREFIX:
+ {
+ struct pdu_ipv4 *ipv4 = (void *) pdu;
+ ipv4->prefix = ip4_ntoh(ipv4->prefix);
+ ipv4->asn = ntohl(ipv4->asn);
+ break;
+ }
+
+ case IPV6_PREFIX:
+ {
+ struct pdu_ipv6 *ipv6 = (void *) pdu;
+ ipv6->prefix = ip6_ntoh(ipv6->prefix);
+ ipv6->asn = ntohl(ipv6->asn);
+ break;
+ }
+
+ case ERROR:
+ {
+ /* Note that a error_code is converted using converting header->reserved */
+ struct pdu_error *err = (void *) pdu;
+ err->len_enc_pdu = ntohl(err->len_enc_pdu);
+ u32 *err_text_len = (u32 *)(err->rest + err->len_enc_pdu);
+ *err_text_len = htonl(*err_text_len);
+ break;
+ }
+
+ case ROUTER_KEY:
+ /* Router Key PDU is not supported yet */
+
+ case SERIAL_QUERY:
+ case RESET_QUERY:
+ /* Serial/Reset Query are sent only in direction router to cache.
+ * We don't care here. */
+
+ case CACHE_RESPONSE:
+ case CACHE_RESET:
+ /* Converted with pdu->reserved */
+ break;
+ }
+}
+
+/**
+ * rpki_convert_pdu_back_to_network_byte_order - convert host-byte order PDU back to network-byte order
+ * @out: allocated memory for writing a converted PDU of size @in->len
+ * @in: host-byte order PDU
+ *
+ * Assumed: |A == ntoh(ntoh(A))|
+ */
+static struct pdu_header *
+rpki_pdu_back_to_network_byte_order(struct pdu_header *out, const struct pdu_header *in)
+{
+ memcpy(out, in, in->len);
+ rpki_pdu_to_host_byte_order(out);
+ return out;
+}
+
+static void
+rpki_log_packet(struct rpki_cache *cache, const struct pdu_header *pdu, const enum rpki_transmit_type action)
+{
+ if (!(cache->p->p.debug & D_PACKETS))
+ return;
+
+ const char *str_type = str_pdu_type(pdu->type);
+ char detail[256];
+
+#define SAVE(fn) \
+ do { \
+ if (fn < 0) \
+ { \
+ bsnprintf(detail + sizeof(detail) - 16, 16, "... <too long>)"); \
+ goto detail_finished; \
+ } \
+ } while(0) \
+
+ switch (pdu->type)
+ {
+ case SERIAL_NOTIFY:
+ case SERIAL_QUERY:
+ SAVE(bsnprintf(detail, sizeof(detail), "(session id: %u, serial number: %u)", pdu->reserved, ((struct pdu_serial_notify *) pdu)->serial_num));
+ break;
+
+ case END_OF_DATA:
+ {
+ const struct pdu_end_of_data_v1 *eod = (void *) pdu;
+ if (eod->ver == RPKI_VERSION_1)
+ SAVE(bsnprintf(detail, sizeof(detail), "(session id: %u, serial number: %u, refresh: %us, retry: %us, expire: %us)", eod->session_id, eod->serial_num, eod->refresh_interval, eod->retry_interval, eod->expire_interval));
+ else
+ SAVE(bsnprintf(detail, sizeof(detail), "(session id: %u, serial number: %u)", eod->session_id, eod->serial_num));
+ break;
+ }
+
+ case CACHE_RESPONSE:
+ SAVE(bsnprintf(detail, sizeof(detail), "(session id: %u)", pdu->reserved));
+ break;
+
+ case IPV4_PREFIX:
+ {
+ const struct pdu_ipv4 *ipv4 = (void *) pdu;
+ SAVE(bsnprintf(detail, sizeof(detail), "(%I4/%u-%u AS%u)", ipv4->prefix, ipv4->prefix_len, ipv4->max_prefix_len, ipv4->asn));
+ break;
+ }
+
+ case IPV6_PREFIX:
+ {
+ const struct pdu_ipv6 *ipv6 = (void *) pdu;
+ SAVE(bsnprintf(detail, sizeof(detail), "(%I6/%u-%u AS%u)", ipv6->prefix, ipv6->prefix_len, ipv6->max_prefix_len, ipv6->asn));
+ break;
+ }
+
+ case ROUTER_KEY:
+ /* We don't support saving Router Key PDUs yet */
+ SAVE(bsnprintf(detail, sizeof(detail), "(ignored)"));
+ break;
+
+ case ERROR:
+ {
+ const struct pdu_error *err = (void *) pdu;
+ SAVE(bsnprintf(detail, sizeof(detail), "(%s", str_pdu_error_type[err->error_code]));
+
+ /* Optional description of error */
+ const u32 len_err_txt = *((u32 *) (err->rest + err->len_enc_pdu));
+ if (len_err_txt > 0)
+ {
+ size_t expected_len = err->len_enc_pdu + len_err_txt + 16;
+ if (expected_len == err->len)
+ {
+ char txt[len_err_txt + 1];
+ char *pdu_txt = (char *) err->rest + err->len_enc_pdu + 4;
+ bsnprintf(txt, sizeof(txt), "%s", pdu_txt); /* it's ensured that txt is ended with a null byte */
+ SAVE(bsnprintf(detail + strlen(detail), sizeof(detail) - strlen(detail), ": '%s'", txt));
+ }
+ else
+ {
+ SAVE(bsnprintf(detail + strlen(detail), sizeof(detail) - strlen(detail), ", malformed size"));
+ }
+ }
+
+ /* Optional encapsulated erroneous packet */
+ if (err->len_enc_pdu)
+ {
+ SAVE(bsnprintf(detail + strlen(detail), sizeof(detail) - strlen(detail), ", %s packet:", str_pdu_type(((struct pdu_header *) err->rest)->type)));
+ if (err->rest + err->len_enc_pdu <= (byte *)err + err->len)
+ {
+ for (const byte *c = err->rest; c != err->rest + err->len_enc_pdu; c++)
+ SAVE(bsnprintf(detail + strlen(detail), sizeof(detail) - strlen(detail), " %02X", *c));
+ }
+ }
+
+ SAVE(bsnprintf(detail + strlen(detail), sizeof(detail) - strlen(detail), ")"));
+ break;
+ }
+
+ default:
+ *detail = '\0';
+ }
+#undef SAVE
+
+ detail_finished:
+
+ if (action == RPKI_RECV)
+ {
+ CACHE_TRACE(D_PACKETS, cache, "Received %s packet %s", str_type, detail);
+ }
+ else
+ {
+ CACHE_TRACE(D_PACKETS, cache, "Sending %s packet %s", str_type, detail);
+ }
+
+#if defined(LOCAL_DEBUG) || defined(GLOBAL_DEBUG)
+ int seq = 0;
+ for(const byte *c = pdu; c != pdu + pdu->len; c++)
+ {
+ if ((seq % 4) == 0)
+ DBG("%2d: ", seq);
+
+ DBG(" 0x%02X %-3u", *c, *c);
+
+ if ((++seq % 4) == 0)
+ DBG("\n");
+ }
+ if ((seq % 4) != 0)
+ DBG("\n");
+#endif
+}
+
+static int
+rpki_send_pdu(struct rpki_cache *cache, const void *pdu, const uint len)
+{
+ struct rpki_proto *p = cache->p;
+ sock *sk = cache->tr_sock->sk;
+
+ rpki_log_packet(cache, pdu, RPKI_SEND);
+
+ if (sk->tbuf != sk->tpos)
+ {
+ RPKI_WARN(p, "Old packet overwritten in TX buffer");
+ }
+
+ if (len > sk->tbsize)
+ {
+ RPKI_WARN(p, "%u bytes is too much for send", len);
+ ASSERT(0);
+ return RPKI_ERROR;
+ }
+
+ memcpy(sk->tbuf, pdu, len);
+ rpki_pdu_to_network_byte_order((void *) sk->tbuf);
+
+ if (!sk_send(sk, len))
+ {
+ DBG("Cannot send just the whole data. It will be sent using a call of tx_hook()");
+ }
+
+ return RPKI_SUCCESS;
+}
+
+/**
+ * rpki_check_receive_packet - make a basic validation of received RPKI PDU header
+ * @cache: cache connection instance
+ * @pdu: RPKI PDU in network byte order
+ *
+ * This function checks protocol version, PDU type, version and size. If all is all right then
+ * function returns |RPKI_SUCCESS| otherwise sends Error PDU and returns
+ * |RPKI_ERROR|.
+ */
+static int
+rpki_check_receive_packet(struct rpki_cache *cache, const struct pdu_header *pdu)
+{
+ u32 pdu_len = ntohl(pdu->len);
+
+ /*
+ * Minimal and maximal allowed PDU size is treated in rpki_rx_hook() function.
+ * @header.len corresponds to number of bytes of @pdu and
+ * it is in range from RPKI_PDU_HEADER_LEN to RPKI_PDU_MAX_LEN bytes.
+ */
+
+ /* Do not handle error PDUs here, leave this task to rpki_handle_error_pdu() */
+ if (pdu->ver != cache->version && pdu->type != ERROR)
+ {
+ /* If this is the first PDU we have received */
+ if (cache->request_session_id)
+ {
+ if (pdu->type == SERIAL_NOTIFY)
+ {
+ /*
+ * The router MUST ignore any Serial Notify PDUs it might receive from
+ * the cache during this initial start-up period, regardless of the
+ * Protocol Version field in the Serial Notify PDU.
+ * (https://tools.ietf.org/html/draft-ietf-sidr-rpki-rtr-rfc6810-bis-07#section-7)
+ */
+ }
+ else if (!cache->last_update &&
+ (pdu->ver <= RPKI_MAX_VERSION) &&
+ (pdu->ver < cache->version))
+ {
+ CACHE_TRACE(D_EVENTS, cache, "Downgrade session to %s from %u to %u version", rpki_get_cache_ident(cache), cache->version, pdu->ver);
+ cache->version = pdu->ver;
+ }
+ else
+ {
+ /* If this is not the first PDU we have received, something is wrong with
+ * the server implementation -> Error */
+ rpki_send_error_pdu(cache, UNSUPPORTED_PROTOCOL_VER, pdu_len, pdu, "PDU with unsupported Protocol version received");
+ return RPKI_ERROR;
+ }
+ }
+ }
+
+ if ((pdu->type >= PDU_TYPE_MAX) || (pdu->ver == RPKI_VERSION_0 && pdu->type == ROUTER_KEY))
+ {
+ rpki_send_error_pdu(cache, UNSUPPORTED_PDU_TYPE, pdu_len, pdu, "Unsupported PDU type %u received", pdu->type);
+ return RPKI_ERROR;
+ }
+
+ if (pdu_len < min_pdu_size[pdu->type])
+ {
+ rpki_send_error_pdu(cache, CORRUPT_DATA, pdu_len, pdu, "Received %s packet with %d bytes, but expected at least %d bytes", str_pdu_type(pdu->type), pdu_len, min_pdu_size[pdu->type]);
+ return RPKI_ERROR;
+ }
+
+ return RPKI_SUCCESS;
+}
+
+static int
+rpki_handle_error_pdu(struct rpki_cache *cache, const struct pdu_error *pdu)
+{
+ switch (pdu->error_code)
+ {
+ case CORRUPT_DATA:
+ case INTERNAL_ERROR:
+ case INVALID_REQUEST:
+ case UNSUPPORTED_PDU_TYPE:
+ rpki_cache_change_state(cache, RPKI_CS_ERROR_FATAL);
+ break;
+
+ case NO_DATA_AVAIL:
+ rpki_cache_change_state(cache, RPKI_CS_ERROR_NO_DATA_AVAIL);
+ break;
+
+ case UNSUPPORTED_PROTOCOL_VER:
+ CACHE_TRACE(D_PACKETS, cache, "Client uses unsupported protocol version");
+ if (pdu->ver <= RPKI_MAX_VERSION &&
+ pdu->ver < cache->version)
+ {
+ CACHE_TRACE(D_EVENTS, cache, "Downgrading from protocol version %d to version %d", cache->version, pdu->ver);
+ cache->version = pdu->ver;
+ rpki_cache_change_state(cache, RPKI_CS_FAST_RECONNECT);
+ }
+ else
+ {
+ CACHE_TRACE(D_PACKETS, cache, "Got UNSUPPORTED_PROTOCOL_VER error PDU with invalid values, " \
+ "current version: %d, PDU version: %d", cache->version, pdu->ver);
+ rpki_cache_change_state(cache, RPKI_CS_ERROR_FATAL);
+ }
+ break;
+
+ default:
+ CACHE_TRACE(D_PACKETS, cache, "Error unknown, server sent unsupported error code %u", pdu->error_code);
+ rpki_cache_change_state(cache, RPKI_CS_ERROR_FATAL);
+ break;
+ }
+
+ return RPKI_SUCCESS;
+}
+
+static void
+rpki_handle_serial_notify_pdu(struct rpki_cache *cache, const struct pdu_serial_notify *pdu)
+{
+ /* The router MUST ignore any Serial Notify PDUs it might receive from
+ * the cache during this initial start-up period, regardless of the
+ * Protocol Version field in the Serial Notify PDU.
+ * (https://tools.ietf.org/html/draft-ietf-sidr-rpki-rtr-rfc6810-bis-07#section-7)
+ */
+ if (cache->request_session_id)
+ {
+ CACHE_TRACE(D_PACKETS, cache, "Ignore a Serial Notify packet during initial start-up period");
+ return;
+ }
+
+ /* XXX Serial number should be compared using method RFC 1982 (3.2) */
+ if (cache->serial_num != pdu->serial_num)
+ rpki_cache_change_state(cache, RPKI_CS_SYNC_START);
+}
+
+static int
+rpki_handle_cache_response_pdu(struct rpki_cache *cache, const struct pdu_cache_response *pdu)
+{
+ if (cache->request_session_id)
+ {
+ if (cache->last_update)
+ {
+ /*
+ * This isn't the first sync and we already received records. This point
+ * is after Reset Query and before importing new records from cache
+ * server. We need to load new ones and kick out missing ones. So start
+ * a refresh cycle.
+ */
+ if (cache->p->roa4_channel)
+ rt_refresh_begin(cache->p->roa4_channel->table, cache->p->roa4_channel);
+ if (cache->p->roa6_channel)
+ rt_refresh_begin(cache->p->roa6_channel->table, cache->p->roa6_channel);
+
+ cache->p->refresh_channels = 1;
+ }
+ cache->session_id = pdu->session_id;
+ cache->request_session_id = 0;
+ }
+ else
+ {
+ if (cache->session_id != pdu->session_id)
+ {
+ byte tmp[pdu->len];
+ const struct pdu_header *hton_pdu = rpki_pdu_back_to_network_byte_order((void *) tmp, (const void *) pdu);
+ rpki_send_error_pdu(cache, CORRUPT_DATA, pdu->len, hton_pdu, "Wrong session_id %u in Cache Response PDU", pdu->session_id);
+ rpki_cache_change_state(cache, RPKI_CS_ERROR_FATAL);
+ return RPKI_ERROR;
+ }
+ }
+
+ rpki_cache_change_state(cache, RPKI_CS_SYNC_RUNNING);
+ return RPKI_SUCCESS;
+}
+
+/**
+ * rpki_prefix_pdu_2_net_addr - convert IPv4/IPv6 Prefix PDU into net_addr_union
+ * @pdu: host byte order IPv4/IPv6 Prefix PDU
+ * @n: allocated net_addr_union for save ROA
+ *
+ * This function reads ROA data from IPv4/IPv6 Prefix PDU and
+ * write them into net_addr_roa4 or net_addr_roa6 data structure.
+ */
+static net_addr_union *
+rpki_prefix_pdu_2_net_addr(const struct pdu_header *pdu, net_addr_union *n)
+{
+ /*
+ * Note that sizeof(net_addr_roa6) > sizeof(net_addr)
+ * and thence we must use net_addr_union and not only net_addr
+ */
+
+ if (pdu->type == IPV4_PREFIX)
+ {
+ const struct pdu_ipv4 *ipv4 = (void *) pdu;
+ n->roa4.type = NET_ROA4;
+ n->roa4.length = sizeof(net_addr_roa4);
+ n->roa4.prefix = ipv4->prefix;
+ n->roa4.asn = ipv4->asn;
+ n->roa4.pxlen = ipv4->prefix_len;
+ n->roa4.max_pxlen = ipv4->max_prefix_len;
+ }
+ else
+ {
+ const struct pdu_ipv6 *ipv6 = (void *) pdu;
+ n->roa6.type = NET_ROA6;
+ n->roa6.length = sizeof(net_addr_roa6);
+ n->roa6.prefix = ipv6->prefix;
+ n->roa6.asn = ipv6->asn;
+ n->roa6.pxlen = ipv6->prefix_len;
+ n->roa6.max_pxlen = ipv6->max_prefix_len;
+ }
+
+ return n;
+}
+
+static int
+rpki_handle_prefix_pdu(struct rpki_cache *cache, const struct pdu_header *pdu)
+{
+ const enum pdu_type type = pdu->type;
+ ASSERT(type == IPV4_PREFIX || type == IPV6_PREFIX);
+
+ net_addr_union addr = {};
+ rpki_prefix_pdu_2_net_addr(pdu, &addr);
+
+ struct channel *channel = NULL;
+
+ if (type == IPV4_PREFIX)
+ channel = cache->p->roa4_channel;
+ if (type == IPV6_PREFIX)
+ channel = cache->p->roa6_channel;
+
+ if (!channel)
+ {
+ CACHE_TRACE(D_ROUTES, cache, "Skip %N, missing %s channel", &addr, (type == IPV4_PREFIX ? "roa4" : "roa6"), addr);
+ return RPKI_ERROR;
+ }
+
+ cache->last_rx_prefix = current_time();
+
+ /* A place for 'flags' is same for both data structures pdu_ipv4 or pdu_ipv6 */
+ struct pdu_ipv4 *pfx = (void *) pdu;
+ if (pfx->flags & RPKI_ADD_FLAG)
+ rpki_table_add_roa(cache, channel, &addr);
+ else
+ rpki_table_remove_roa(cache, channel, &addr);
+
+ return RPKI_SUCCESS;
+}
+
+static uint
+rpki_check_interval(struct rpki_cache *cache, const char *(check_fn)(uint), uint interval)
+{
+ if (check_fn(interval))
+ {
+ RPKI_WARN(cache->p, "%s, received %u seconds", check_fn(interval), interval);
+ return 0;
+ }
+ return 1;
+}
+
+static void
+rpki_handle_end_of_data_pdu(struct rpki_cache *cache, const struct pdu_end_of_data_v1 *pdu)
+{
+ const struct rpki_config *cf = (void *) cache->p->p.cf;
+
+ if (pdu->session_id != cache->session_id)
+ {
+ byte tmp[pdu->len];
+ const struct pdu_header *hton_pdu = rpki_pdu_back_to_network_byte_order((void *) tmp, (const void *) pdu);
+ rpki_send_error_pdu(cache, CORRUPT_DATA, pdu->len, hton_pdu, "Received Session ID %u, but expected %u", pdu->session_id, cache->session_id);
+ rpki_cache_change_state(cache, RPKI_CS_ERROR_FATAL);
+ return;
+ }
+
+ if (pdu->ver == RPKI_VERSION_1)
+ {
+ if (!cf->keep_refresh_interval && rpki_check_interval(cache, rpki_check_refresh_interval, pdu->refresh_interval))
+ cache->refresh_interval = pdu->refresh_interval;
+
+ if (!cf->keep_retry_interval && rpki_check_interval(cache, rpki_check_retry_interval, pdu->retry_interval))
+ cache->retry_interval = pdu->retry_interval;
+
+ if (!cf->keep_expire_interval && rpki_check_interval(cache, rpki_check_expire_interval, pdu->expire_interval))
+ cache->expire_interval = pdu->expire_interval;
+
+ CACHE_TRACE(D_EVENTS, cache, "New interval values: "
+ "refresh: %s%us, "
+ "retry: %s%us, "
+ "expire: %s%us",
+ (cf->keep_refresh_interval ? "keeps " : ""), cache->refresh_interval,
+ (cf->keep_retry_interval ? "keeps " : ""), cache->retry_interval,
+ (cf->keep_expire_interval ? "keeps " : ""), cache->expire_interval);
+ }
+
+ if (cache->p->refresh_channels)
+ {
+ cache->p->refresh_channels = 0;
+ if (cache->p->roa4_channel)
+ rt_refresh_end(cache->p->roa4_channel->table, cache->p->roa4_channel);
+ if (cache->p->roa6_channel)
+ rt_refresh_end(cache->p->roa6_channel->table, cache->p->roa6_channel);
+ }
+
+ cache->last_update = current_time();
+ cache->serial_num = pdu->serial_num;
+ rpki_cache_change_state(cache, RPKI_CS_ESTABLISHED);
+}
+
+/**
+ * rpki_rx_packet - process a received RPKI PDU
+ * @cache: RPKI connection instance
+ * @pdu: a RPKI PDU in network byte order
+ */
+static void
+rpki_rx_packet(struct rpki_cache *cache, struct pdu_header *pdu)
+{
+ struct rpki_proto *p = cache->p;
+
+ if (rpki_check_receive_packet(cache, pdu) == RPKI_ERROR)
+ {
+ rpki_cache_change_state(cache, RPKI_CS_ERROR_FATAL);
+ return;
+ }
+
+ rpki_pdu_to_host_byte_order(pdu);
+ rpki_log_packet(cache, pdu, RPKI_RECV);
+
+ switch (pdu->type)
+ {
+ case RESET_QUERY:
+ case SERIAL_QUERY:
+ RPKI_WARN(p, "Received a %s packet that is destined for cache server", str_pdu_type(pdu->type));
+ break;
+
+ case SERIAL_NOTIFY:
+ /* This is a signal to synchronize with the cache server just now */
+ rpki_handle_serial_notify_pdu(cache, (void *) pdu);
+ break;
+
+ case CACHE_RESPONSE:
+ rpki_handle_cache_response_pdu(cache, (void *) pdu);
+ break;
+
+ case IPV4_PREFIX:
+ case IPV6_PREFIX:
+ rpki_handle_prefix_pdu(cache, pdu);
+ break;
+
+ case END_OF_DATA:
+ rpki_handle_end_of_data_pdu(cache, (void *) pdu);
+ break;
+
+ case CACHE_RESET:
+ /* Cache cannot provide an incremental update. */
+ rpki_cache_change_state(cache, RPKI_CS_NO_INCR_UPDATE_AVAIL);
+ break;
+
+ case ERROR:
+ rpki_handle_error_pdu(cache, (void *) pdu);
+ break;
+
+ case ROUTER_KEY:
+ /* TODO: Implement Router Key PDU handling */
+ break;
+
+ default:
+ CACHE_TRACE(D_PACKETS, cache, "Received unsupported type (%u)", pdu->type);
+ };
+}
+
+int
+rpki_rx_hook(struct birdsock *sk, uint size)
+{
+ struct rpki_cache *cache = sk->data;
+ struct rpki_proto *p = cache->p;
+
+ byte *pkt_start = sk->rbuf;
+ byte *end = pkt_start + size;
+
+ DBG("rx hook got %u bytes \n", size);
+
+ while (end >= pkt_start + RPKI_PDU_HEADER_LEN)
+ {
+ struct pdu_header *pdu = (void *) pkt_start;
+ u32 pdu_size = ntohl(pdu->len);
+
+ if (pdu_size < RPKI_PDU_HEADER_LEN || pdu_size > RPKI_PDU_MAX_LEN)
+ {
+ RPKI_WARN(p, "Received invalid packet length %u, purge the whole receiving buffer", pdu_size);
+ return 1; /* Purge recv buffer */
+ }
+
+ if (end < pkt_start + pdu_size)
+ break;
+
+ rpki_rx_packet(cache, pdu);
+
+ /* It is possible that bird socket was freed/closed */
+ if (p->p.proto_state == PS_DOWN || sk != cache->tr_sock->sk)
+ return 0;
+
+ pkt_start += pdu_size;
+ }
+
+ if (pkt_start != sk->rbuf)
+ {
+ CACHE_DBG(cache, "Move %u bytes of a memory at the start of buffer", end - pkt_start);
+ memmove(sk->rbuf, pkt_start, end - pkt_start);
+ sk->rpos = sk->rbuf + (end - pkt_start);
+ }
+
+ return 0; /* Not purge sk->rbuf */
+}
+
+void
+rpki_err_hook(struct birdsock *sk, int error_num)
+{
+ struct rpki_cache *cache = sk->data;
+
+ if (error_num)
+ {
+ /* sk->err may contains a SSH error description */
+ if (sk->err)
+ CACHE_TRACE(D_EVENTS, cache, "Lost connection: %s", sk->err);
+ else
+ CACHE_TRACE(D_EVENTS, cache, "Lost connection: %M", error_num);
+ }
+ else
+ {
+ CACHE_TRACE(D_EVENTS, cache, "The other side closed a connection");
+ }
+
+
+ rpki_cache_change_state(cache, RPKI_CS_ERROR_TRANSPORT);
+}
+
+static int
+rpki_fire_tx(struct rpki_cache *cache)
+{
+ sock *sk = cache->tr_sock->sk;
+
+ uint bytes_to_send = sk->tpos - sk->tbuf;
+ DBG("Sending %u bytes", bytes_to_send);
+ return sk_send(sk, bytes_to_send);
+}
+
+void
+rpki_tx_hook(sock *sk)
+{
+ struct rpki_cache *cache = sk->data;
+
+ while (rpki_fire_tx(cache) > 0)
+ ;
+}
+
+void
+rpki_connected_hook(sock *sk)
+{
+ struct rpki_cache *cache = sk->data;
+
+ CACHE_TRACE(D_EVENTS, cache, "Connected");
+ proto_notify_state(&cache->p->p, PS_UP);
+
+ sk->rx_hook = rpki_rx_hook;
+ sk->tx_hook = rpki_tx_hook;
+
+ rpki_cache_change_state(cache, RPKI_CS_SYNC_START);
+}
+
+/**
+ * rpki_send_error_pdu - send RPKI Error PDU
+ * @cache: RPKI connection instance
+ * @error_code: PDU Error type
+ * @err_pdu_len: length of @erroneous_pdu
+ * @erroneous_pdu: optional network byte-order PDU that invokes Error by us or NULL
+ * @fmt: optional description text of error or NULL
+ * @args: optional arguments for @fmt
+ *
+ * This function prepares Error PDU and sends it to a cache server.
+ */
+static int
+rpki_send_error_pdu(struct rpki_cache *cache, const enum pdu_error_type error_code, const u32 err_pdu_len, const struct pdu_header *erroneous_pdu, const char *fmt, ...)
+{
+ va_list args;
+ char msg[128];
+
+ /* Size including the terminating null byte ('\0') */
+ int msg_len = 0;
+
+ /* Don't send errors for erroneous error PDUs */
+ if (err_pdu_len >= 2)
+ {
+ if (erroneous_pdu->type == ERROR)
+ return RPKI_SUCCESS;
+ }
+
+ if (fmt)
+ {
+ va_start(args, fmt);
+ msg_len = bvsnprintf(msg, sizeof(msg), fmt, args) + 1;
+ }
+
+ u32 pdu_size = 16 + err_pdu_len + msg_len;
+ byte pdu[pdu_size];
+ memset(pdu, 0, sizeof(pdu));
+
+ struct pdu_error *e = (void *) pdu;
+ e->ver = cache->version;
+ e->type = ERROR;
+ e->error_code = error_code;
+ e->len = pdu_size;
+
+ e->len_enc_pdu = err_pdu_len;
+ if (err_pdu_len > 0)
+ memcpy(e->rest, erroneous_pdu, err_pdu_len);
+
+ *((u32 *)(e->rest + err_pdu_len)) = msg_len;
+ if (msg_len > 0)
+ memcpy(e->rest + err_pdu_len + 4, msg, msg_len);
+
+ return rpki_send_pdu(cache, pdu, pdu_size);
+}
+
+int
+rpki_send_serial_query(struct rpki_cache *cache)
+{
+ struct pdu_serial_query pdu = {
+ .ver = cache->version,
+ .type = SERIAL_QUERY,
+ .session_id = cache->session_id,
+ .len = sizeof(pdu),
+ .serial_num = cache->serial_num
+ };
+
+ if (rpki_send_pdu(cache, &pdu, sizeof(pdu)) != RPKI_SUCCESS)
+ {
+ rpki_cache_change_state(cache, RPKI_CS_ERROR_TRANSPORT);
+ return RPKI_ERROR;
+ }
+
+ return RPKI_SUCCESS;
+}
+
+int
+rpki_send_reset_query(struct rpki_cache *cache)
+{
+ struct pdu_reset_query pdu = {
+ .ver = cache->version,
+ .type = RESET_QUERY,
+ .len = sizeof(pdu),
+ };
+
+ if (rpki_send_pdu(cache, &pdu, sizeof(pdu)) != RPKI_SUCCESS)
+ {
+ rpki_cache_change_state(cache, RPKI_CS_ERROR_TRANSPORT);
+ return RPKI_ERROR;
+ }
+
+ return RPKI_SUCCESS;
+}
diff --git a/proto/rpki/packets.h b/proto/rpki/packets.h
new file mode 100644
index 00000000..d6f8a249
--- /dev/null
+++ b/proto/rpki/packets.h
@@ -0,0 +1,45 @@
+/*
+ * BIRD -- The Resource Public Key Infrastructure (RPKI) to Router Protocol
+ *
+ * (c) 2015 CZ.NIC
+ * (c) 2015 Pavel Tvrdik <pawel.tvrdik@gmail.com>
+ *
+ * This file was a part of RTRlib: http://rpki.realmv6.org/
+ *
+ * Can be freely distributed and used under the terms of the GNU GPL.
+ */
+
+#ifndef _BIRD_RPKI_PACKETS_H_
+#define _BIRD_RPKI_PACKETS_H_
+
+#include <arpa/inet.h>
+
+#define RPKI_PDU_HEADER_LEN 8
+
+/* A Error PDU size is the biggest (has encapsulate PDU inside):
+ * +8 bytes (Header size)
+ * +4 bytes (Length of Encapsulated PDU)
+ * +32 bytes (Encapsulated PDU IPv6 32)
+ * +4 bytes (Length of inserted text)
+ * +800 bytes (UTF-8 text 400*2 bytes)
+ * ------------
+ * = 848 bytes (Maximal expected PDU size) */
+#define RPKI_PDU_MAX_LEN 848
+
+/* RX buffer size has a great impact to scheduler granularity */
+#define RPKI_RX_BUFFER_SIZE 4096
+#define RPKI_TX_BUFFER_SIZE RPKI_PDU_MAX_LEN
+
+/* Return values */
+enum rpki_rtvals {
+ RPKI_SUCCESS = 0,
+ RPKI_ERROR = -1
+};
+
+int rpki_send_serial_query(struct rpki_cache *cache);
+int rpki_send_reset_query(struct rpki_cache *cache);
+int rpki_rx_hook(sock *sk, uint size);
+void rpki_connected_hook(sock *sk);
+void rpki_err_hook(sock *sk, int size);
+
+#endif
diff --git a/proto/rpki/rpki.c b/proto/rpki/rpki.c
new file mode 100644
index 00000000..3145399b
--- /dev/null
+++ b/proto/rpki/rpki.c
@@ -0,0 +1,928 @@
+/*
+ * BIRD -- The Resource Public Key Infrastructure (RPKI) to Router Protocol
+ *
+ * (c) 2015 CZ.NIC
+ * (c) 2015 Pavel Tvrdik <pawel.tvrdik@gmail.com>
+ *
+ * Using RTRlib: http://rpki.realmv6.org/
+ *
+ * Can be freely distributed and used under the terms of the GNU GPL.
+ */
+
+/**
+ * DOC: RPKI To Router (RPKI-RTR)
+ *
+ * The RPKI-RTR protocol is implemented in several files: |rpki.c| containing
+ * the routes handling, protocol logic, timer events, cache connection,
+ * reconfiguration, configuration and protocol glue with BIRD core, |packets.c|
+ * containing the RPKI packets handling and finally all transports files:
+ * |transport.c|, |tcp_transport.c| and |ssh_transport.c|.
+ *
+ * The |transport.c| is a middle layer and interface for each specific
+ * transport. Transport is a way how to wrap a communication with a cache
+ * server. There is supported an unprotected TCP transport and an encrypted
+ * SSHv2 transport. The SSH transport requires LibSSH library. LibSSH is
+ * loading dynamically using |dlopen()| function. SSH support is integrated in
+ * |sysdep/unix/io.c|. Each transport must implement an initialization
+ * function, an open function and a socket identification function. That's all.
+ *
+ * This implementation is based on the RTRlib (http://rpki.realmv6.org/). The
+ * BIRD takes over files |packets.c|, |rtr.c| (inside |rpki.c|), |transport.c|,
+ * |tcp_transport.c| and |ssh_transport.c| from RTRlib.
+ *
+ * A RPKI-RTR connection is described by a structure &rpki_cache. The main
+ * logic is located in |rpki_cache_change_state()| function. There is a state
+ * machine. The standard starting state flow looks like |Down| ~> |Connecting|
+ * ~> |Sync-Start| ~> |Sync-Running| ~> |Established| and then the last three
+ * states are periodically repeated.
+ *
+ * |Connecting| state establishes the transport connection. The state from a
+ * call |rpki_cache_change_state(CONNECTING)| to a call |rpki_connected_hook()|
+ *
+ * |Sync-Start| state starts with sending |Reset Query| or |Serial Query| and
+ * then waits for |Cache Response|. The state from |rpki_connected_hook()| to
+ * |rpki_handle_cache_response_pdu()|
+ *
+ * During |Sync-Running| BIRD receives data with IPv4/IPv6 Prefixes from cache
+ * server. The state starts from |rpki_handle_cache_response_pdu()| and ends
+ * in |rpki_handle_end_of_data_pdu()|.
+ *
+ * |Established| state means that BIRD has synced all data with cache server.
+ * Schedules a refresh timer event that invokes |Sync-Start|. Schedules Expire
+ * timer event and stops a Retry timer event.
+ *
+ * |Transport Error| state means that we have some troubles with a network
+ * connection. We cannot connect to a cache server or we wait too long for some
+ * expected PDU for received - |Cache Response| or |End of Data|. It closes
+ * current connection and schedules a Retry timer event.
+ *
+ * |Fatal Protocol Error| is occurred e.g. by received a bad Session ID. We
+ * restart a protocol, so all ROAs are flushed immediately.
+ *
+ * The RPKI-RTR protocol (RFC 6810 bis) defines configurable refresh, retry and
+ * expire intervals. For maintaining a connection are used timer events that
+ * are scheduled by |rpki_schedule_next_refresh()|,
+ * |rpki_schedule_next_retry()| and |rpki_schedule_next_expire()| functions.
+ *
+ * A Refresh timer event performs a sync of |Established| connection. So it
+ * shifts state to |Sync-Start|. If at the beginning of second call of a
+ * refresh event is connection in |Sync-Start| state then we didn't receive a
+ * |Cache Response| from a cache server and we invoke |Transport Error| state.
+ *
+ * A Retry timer event attempts to connect cache server. It is activated after
+ * |Transport Error| state and terminated by reaching |Established| state.
+ * If cache connection is still connecting to the cache server at the beginning
+ * of an event call then the Retry timer event invokes |Transport Error| state.
+ *
+ * An Expire timer event checks expiration of ROAs. If a last successful sync
+ * was more ago than the expire interval then the Expire timer event invokes a
+ * protocol restart thereby removes all ROAs learned from that cache server and
+ * continue trying to connect to cache server. The Expire event is activated
+ * by initial successful loading of ROAs, receiving End of Data PDU.
+ *
+ * A reconfiguration of cache connection works well without restarting when we
+ * change only intervals values.
+ *
+ * Supported standards:
+ * - RFC 6810 - main RPKI-RTR standard
+ * - RFC 6810 bis - an explicit timing parameters and protocol version number negotiation
+ */
+
+#include <stdlib.h>
+#include <netdb.h>
+
+#undef LOCAL_DEBUG
+
+#include "rpki.h"
+#include "lib/string.h"
+#include "nest/cli.h"
+
+/* Return values for reconfiguration functions */
+#define NEED_RESTART 0
+#define SUCCESSFUL_RECONF 1
+
+static int rpki_open_connection(struct rpki_cache *cache);
+static void rpki_close_connection(struct rpki_cache *cache);
+static void rpki_schedule_next_refresh(struct rpki_cache *cache);
+static void rpki_schedule_next_retry(struct rpki_cache *cache);
+static void rpki_schedule_next_expire_check(struct rpki_cache *cache);
+static void rpki_stop_refresh_timer_event(struct rpki_cache *cache);
+static void rpki_stop_retry_timer_event(struct rpki_cache *cache);
+static void rpki_stop_expire_timer_event(struct rpki_cache *cache);
+
+
+/*
+ * Routes handling
+ */
+
+void
+rpki_table_add_roa(struct rpki_cache *cache, struct channel *channel, const net_addr_union *pfxr)
+{
+ struct rpki_proto *p = cache->p;
+
+ rta a0 = {
+ .src = p->p.main_source,
+ .source = RTS_RPKI,
+ .scope = SCOPE_UNIVERSE,
+ .dest = RTD_NONE,
+ };
+
+ rta *a = rta_lookup(&a0);
+ rte *e = rte_get_temp(a);
+
+ e->pflags = 0;
+
+ rte_update2(channel, &pfxr->n, e, a0.src);
+}
+
+void
+rpki_table_remove_roa(struct rpki_cache *cache, struct channel *channel, const net_addr_union *pfxr)
+{
+ struct rpki_proto *p = cache->p;
+ rte_update2(channel, &pfxr->n, NULL, p->p.main_source);
+}
+
+
+/*
+ * RPKI Protocol Logic
+ */
+
+static const char *str_cache_states[] = {
+ [RPKI_CS_CONNECTING] = "Connecting",
+ [RPKI_CS_ESTABLISHED] = "Established",
+ [RPKI_CS_RESET] = "Reseting",
+ [RPKI_CS_SYNC_START] = "Sync-Start",
+ [RPKI_CS_SYNC_RUNNING] = "Sync-Running",
+ [RPKI_CS_FAST_RECONNECT] = "Fast-Reconnect",
+ [RPKI_CS_NO_INCR_UPDATE_AVAIL]= "No-Increment-Update-Available",
+ [RPKI_CS_ERROR_NO_DATA_AVAIL] = "Cache-Error-No-Data-Available",
+ [RPKI_CS_ERROR_FATAL] = "Fatal-Protocol-Error",
+ [RPKI_CS_ERROR_TRANSPORT] = "Transport-Error",
+ [RPKI_CS_SHUTDOWN] = "Down"
+};
+
+/**
+ * rpki_cache_state_to_str - give a text representation of cache state
+ * @state: A cache state
+ *
+ * The function converts logic cache state into string.
+ */
+const char *
+rpki_cache_state_to_str(enum rpki_cache_state state)
+{
+ return str_cache_states[state];
+}
+
+/**
+ * rpki_start_cache - connect to a cache server
+ * @cache: RPKI connection instance
+ *
+ * This function is a high level method to kick up a connection to a cache server.
+ */
+static void
+rpki_start_cache(struct rpki_cache *cache)
+{
+ rpki_cache_change_state(cache, RPKI_CS_CONNECTING);
+}
+
+/**
+ * rpki_force_restart_proto - force shutdown and start protocol again
+ * @p: RPKI protocol instance
+ *
+ * This function calls shutdown and frees all protocol resources as well.
+ * After calling this function should be no operations with protocol data,
+ * they could be freed already.
+ */
+static void
+rpki_force_restart_proto(struct rpki_proto *p)
+{
+ if (p->cache)
+ {
+ CACHE_DBG(p->cache, "Connection object destroying");
+ }
+
+ /* Sign as freed */
+ p->cache = NULL;
+
+ proto_notify_state(&p->p, PS_DOWN);
+}
+
+/**
+ * rpki_cache_change_state - check and change cache state
+ * @cache: RPKI cache instance
+ * @new_state: suggested new state
+ *
+ * This function makes transitions between internal states.
+ * It represents the core of logic management of RPKI protocol.
+ * Cannot transit into the same state as cache is in already.
+ */
+void
+rpki_cache_change_state(struct rpki_cache *cache, const enum rpki_cache_state new_state)
+{
+ const enum rpki_cache_state old_state = cache->state;
+
+ if (old_state == new_state)
+ return;
+
+ cache->state = new_state;
+ CACHE_TRACE(D_EVENTS, cache, "Changing from %s to %s state", rpki_cache_state_to_str(old_state), rpki_cache_state_to_str(new_state));
+
+ switch (new_state)
+ {
+ case RPKI_CS_CONNECTING:
+ {
+ sock *sk = cache->tr_sock->sk;
+
+ if (sk == NULL || sk->fd < 0)
+ rpki_open_connection(cache);
+ else
+ rpki_cache_change_state(cache, RPKI_CS_SYNC_START);
+
+ rpki_schedule_next_retry(cache);
+ break;
+ }
+
+ case RPKI_CS_ESTABLISHED:
+ rpki_schedule_next_refresh(cache);
+ rpki_schedule_next_expire_check(cache);
+ rpki_stop_retry_timer_event(cache);
+ break;
+
+ case RPKI_CS_RESET:
+ /* Resetting cache connection. */
+ cache->request_session_id = 1;
+ cache->serial_num = 0;
+ rpki_cache_change_state(cache, RPKI_CS_SYNC_START);
+ break;
+
+ case RPKI_CS_SYNC_START:
+ /* Requesting for receive ROAs from a cache server. */
+ if (cache->request_session_id)
+ {
+ /* Send request for Session ID */
+ if (rpki_send_reset_query(cache) != RPKI_SUCCESS)
+ rpki_cache_change_state(cache, RPKI_CS_ERROR_TRANSPORT);
+ }
+ else
+ {
+ /* We have already a session_id. So send a Serial Query and start an incremental sync */
+ if (rpki_send_serial_query(cache) != RPKI_SUCCESS)
+ rpki_cache_change_state(cache, RPKI_CS_ERROR_TRANSPORT);
+ }
+ break;
+
+ case RPKI_CS_SYNC_RUNNING:
+ /* The state between Cache Response and End of Data. Only waiting for
+ * receiving all IP Prefix PDUs and finally a End of Data PDU. */
+ break;
+
+ case RPKI_CS_NO_INCR_UPDATE_AVAIL:
+ /* Server was unable to answer the last Serial Query and sent Cache Reset. */
+ rpki_cache_change_state(cache, RPKI_CS_RESET);
+ break;
+
+ case RPKI_CS_ERROR_NO_DATA_AVAIL:
+ /* No validation records are available on the cache server. */
+ rpki_cache_change_state(cache, RPKI_CS_RESET);
+ break;
+
+ case RPKI_CS_ERROR_FATAL:
+ /* Fatal protocol error occurred. */
+ rpki_force_restart_proto(cache->p);
+ break;
+
+ case RPKI_CS_ERROR_TRANSPORT:
+ /* Error on the transport socket occurred. */
+ rpki_close_connection(cache);
+ rpki_schedule_next_retry(cache);
+ rpki_stop_refresh_timer_event(cache);
+ break;
+
+ case RPKI_CS_FAST_RECONNECT:
+ /* Reconnect without any waiting period */
+ rpki_close_connection(cache);
+ rpki_cache_change_state(cache, RPKI_CS_CONNECTING);
+ break;
+
+ case RPKI_CS_SHUTDOWN:
+ bug("This isn't never really called.");
+ break;
+ };
+}
+
+
+/*
+ * RPKI Timer Events
+ */
+
+static void
+rpki_schedule_next_refresh(struct rpki_cache *cache)
+{
+ btime t = cache->refresh_interval S;
+
+ CACHE_DBG(cache, "after %t s", t);
+ tm_start(cache->refresh_timer, t);
+}
+
+static void
+rpki_schedule_next_retry(struct rpki_cache *cache)
+{
+ btime t = cache->retry_interval S;
+
+ CACHE_DBG(cache, "after %t s", t);
+ tm_start(cache->retry_timer, t);
+}
+
+static void
+rpki_schedule_next_expire_check(struct rpki_cache *cache)
+{
+ /* A minimum time to wait is 1 second */
+ btime t = cache->last_update + cache->expire_interval S - current_time();
+ t = MAX(t, 1 S);
+
+ CACHE_DBG(cache, "after %t s", t);
+ tm_start(cache->expire_timer, t);
+}
+
+static void
+rpki_stop_refresh_timer_event(struct rpki_cache *cache)
+{
+ CACHE_DBG(cache, "Stop");
+ tm_stop(cache->refresh_timer);
+}
+
+static void
+rpki_stop_retry_timer_event(struct rpki_cache *cache)
+{
+ CACHE_DBG(cache, "Stop");
+ tm_stop(cache->retry_timer);
+}
+
+static void UNUSED
+rpki_stop_expire_timer_event(struct rpki_cache *cache)
+{
+ CACHE_DBG(cache, "Stop");
+ tm_stop(cache->expire_timer);
+}
+
+static int
+rpki_do_we_recv_prefix_pdu_in_last_seconds(struct rpki_cache *cache)
+{
+ if (!cache->last_rx_prefix)
+ return 0;
+
+ return ((current_time() - cache->last_rx_prefix) <= 2 S);
+}
+
+/**
+ * rpki_refresh_hook - control a scheduling of downloading data from cache server
+ * @tm: refresh timer with cache connection instance in data
+ *
+ * This function is periodically called during &ESTABLISHED or &SYNC* state
+ * cache connection. The first refresh schedule is invoked after receiving a
+ * |End of Data| PDU and has run by some &ERROR is occurred.
+ */
+static void
+rpki_refresh_hook(timer *tm)
+{
+ struct rpki_cache *cache = tm->data;
+
+ CACHE_DBG(cache, "%s", rpki_cache_state_to_str(cache->state));
+
+ switch (cache->state)
+ {
+ case RPKI_CS_ESTABLISHED:
+ rpki_cache_change_state(cache, RPKI_CS_SYNC_START);
+ break;
+
+ case RPKI_CS_SYNC_START:
+ /* We sent Serial/Reset Query in last refresh hook call
+ * and didn't receive Cache Response yet. It is probably
+ * troubles with network. */
+ case RPKI_CS_SYNC_RUNNING:
+ /* We sent Serial/Reset Query in last refresh hook call
+ * and we got Cache Response but didn't get End-Of-Data yet.
+ * It could be a trouble with network or only too long synchronization. */
+ if (!rpki_do_we_recv_prefix_pdu_in_last_seconds(cache))
+ {
+ CACHE_TRACE(D_EVENTS, cache, "Sync takes more time than refresh interval %us, resetting connection", cache->refresh_interval);
+ rpki_cache_change_state(cache, RPKI_CS_ERROR_TRANSPORT);
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ if (cache->state != RPKI_CS_SHUTDOWN && cache->state != RPKI_CS_ERROR_TRANSPORT)
+ rpki_schedule_next_refresh(cache);
+ else
+ rpki_stop_refresh_timer_event(cache);
+}
+
+/**
+ * rpki_retry_hook - control a scheduling of retrying connection to cache server
+ * @tm: retry timer with cache connection instance in data
+ *
+ * This function is periodically called during &ERROR* state cache connection.
+ * The first retry schedule is invoked after any &ERROR* state occurred and
+ * ends by reaching of &ESTABLISHED state again.
+ */
+static void
+rpki_retry_hook(timer *tm)
+{
+ struct rpki_cache *cache = tm->data;
+
+ CACHE_DBG(cache, "%s", rpki_cache_state_to_str(cache->state));
+
+ switch (cache->state)
+ {
+ case RPKI_CS_ESTABLISHED:
+ case RPKI_CS_SHUTDOWN:
+ break;
+
+ case RPKI_CS_CONNECTING:
+ case RPKI_CS_SYNC_START:
+ case RPKI_CS_SYNC_RUNNING:
+ if (!rpki_do_we_recv_prefix_pdu_in_last_seconds(cache))
+ {
+ /* We tried to establish a connection in last retry hook call and haven't done
+ * yet. It looks like troubles with network. We are aggressive here. */
+ CACHE_TRACE(D_EVENTS, cache, "Sync takes more time than retry interval %us, resetting connection.", cache->retry_interval);
+ rpki_cache_change_state(cache, RPKI_CS_ERROR_TRANSPORT);
+ }
+ break;
+
+ default:
+ rpki_cache_change_state(cache, RPKI_CS_CONNECTING);
+ break;
+ }
+
+ if (cache->state != RPKI_CS_ESTABLISHED)
+ rpki_schedule_next_retry(cache);
+ else
+ rpki_stop_retry_timer_event(cache);
+}
+
+/**
+ * rpki_expire_hook - control a expiration of ROA entries
+ * @tm: expire timer with cache connection instance in data
+ *
+ * This function is scheduled after received a |End of Data| PDU.
+ * A waiting interval is calculated dynamically by last update.
+ * If we reach an expiration time then we invoke a restarting
+ * of the protocol.
+ */
+static void
+rpki_expire_hook(timer *tm)
+{
+ struct rpki_cache *cache = tm->data;
+
+ if (!cache->last_update)
+ return;
+
+ CACHE_DBG(cache, "%s", rpki_cache_state_to_str(cache->state));
+
+ btime t = cache->last_update + cache->expire_interval S - current_time();
+ if (t <= 0)
+ {
+ CACHE_TRACE(D_EVENTS, cache, "All ROAs expired");
+ rpki_force_restart_proto(cache->p);
+ }
+ else
+ {
+ CACHE_DBG(cache, "Remains %t seconds to become ROAs obsolete", t);
+ rpki_schedule_next_expire_check(cache);
+ }
+}
+
+/**
+ * rpki_check_refresh_interval - check validity of refresh interval value
+ * @seconds: suggested value
+ *
+ * This function validates value and should return |NULL|.
+ * If the check doesn't pass then returns error message.
+ */
+const char *
+rpki_check_refresh_interval(uint seconds)
+{
+ if (seconds < 1)
+ return "Minimum allowed refresh interval is 1 second";
+ if (seconds > 86400)
+ return "Maximum allowed refresh interval is 86400 seconds";
+ return NULL;
+}
+
+/**
+ * rpki_check_retry_interval - check validity of retry interval value
+ * @seconds: suggested value
+ *
+ * This function validates value and should return |NULL|.
+ * If the check doesn't pass then returns error message.
+ */
+const char *
+rpki_check_retry_interval(uint seconds)
+{
+ if (seconds < 1)
+ return "Minimum allowed retry interval is 1 second";
+ if (seconds > 7200)
+ return "Maximum allowed retry interval is 7200 seconds";
+ return NULL;
+}
+
+/**
+ * rpki_check_expire_interval - check validity of expire interval value
+ * @seconds: suggested value
+ *
+ * This function validates value and should return |NULL|.
+ * If the check doesn't pass then returns error message.
+ */
+const char *
+rpki_check_expire_interval(uint seconds)
+{
+ if (seconds < 600)
+ return "Minimum allowed expire interval is 600 seconds";
+ if (seconds > 172800)
+ return "Maximum allowed expire interval is 172800 seconds";
+ return NULL;
+}
+
+
+/*
+ * RPKI Cache
+ */
+
+static struct rpki_cache *
+rpki_init_cache(struct rpki_proto *p, struct rpki_config *cf)
+{
+ pool *pool = rp_new(p->p.pool, cf->hostname);
+
+ struct rpki_cache *cache = mb_allocz(pool, sizeof(struct rpki_cache));
+
+ cache->pool = pool;
+ cache->p = p;
+
+ cache->state = RPKI_CS_SHUTDOWN;
+ cache->request_session_id = 1;
+ cache->version = RPKI_MAX_VERSION;
+
+ cache->refresh_interval = cf->refresh_interval;
+ cache->retry_interval = cf->retry_interval;
+ cache->expire_interval = cf->expire_interval;
+ cache->refresh_timer = tm_new_init(pool, &rpki_refresh_hook, cache, 0, 0);
+ cache->retry_timer = tm_new_init(pool, &rpki_retry_hook, cache, 0, 0);
+ cache->expire_timer = tm_new_init(pool, &rpki_expire_hook, cache, 0, 0);
+
+ cache->tr_sock = mb_allocz(pool, sizeof(struct rpki_tr_sock));
+ cache->tr_sock->cache = cache;
+
+ switch (cf->tr_config.type)
+ {
+ case RPKI_TR_TCP: rpki_tr_tcp_init(cache->tr_sock); break;
+ case RPKI_TR_SSH: rpki_tr_ssh_init(cache->tr_sock); break;
+ };
+
+ CACHE_DBG(cache, "Connection object created");
+
+ return cache;
+}
+
+/**
+ * rpki_get_cache_ident - give a text representation of cache server name
+ * @cache: RPKI connection instance
+ *
+ * The function converts cache connection into string.
+ */
+const char *
+rpki_get_cache_ident(struct rpki_cache *cache)
+{
+ return rpki_tr_ident(cache->tr_sock);
+}
+
+static int
+rpki_open_connection(struct rpki_cache *cache)
+{
+ CACHE_TRACE(D_EVENTS, cache, "Opening a connection");
+
+ if (rpki_tr_open(cache->tr_sock) == RPKI_TR_ERROR)
+ {
+ rpki_cache_change_state(cache, RPKI_CS_ERROR_TRANSPORT);
+ return RPKI_TR_ERROR;
+ }
+
+ return RPKI_TR_SUCCESS;
+}
+
+static void
+rpki_close_connection(struct rpki_cache *cache)
+{
+ CACHE_TRACE(D_EVENTS, cache, "Closing a connection");
+ rpki_tr_close(cache->tr_sock);
+ proto_notify_state(&cache->p->p, PS_START);
+}
+
+static int
+rpki_shutdown(struct proto *P)
+{
+ struct rpki_proto *p = (void *) P;
+
+ rpki_force_restart_proto(p);
+
+ /* Protocol memory pool will be automatically freed */
+ return PS_DOWN;
+}
+
+
+/*
+ * RPKI Reconfiguration
+ */
+
+static int
+rpki_try_fast_reconnect(struct rpki_cache *cache)
+{
+ if (cache->state == RPKI_CS_ESTABLISHED)
+ {
+ rpki_cache_change_state(cache, RPKI_CS_FAST_RECONNECT);
+ return SUCCESSFUL_RECONF;
+ }
+
+ return NEED_RESTART;
+}
+
+/**
+ * rpki_reconfigure_cache - a cache reconfiguration
+ * @p: RPKI protocol instance
+ * @cache: a cache connection
+ * @new: new RPKI configuration
+ * @old: old RPKI configuration
+ *
+ * This function reconfigures existing single cache server connection with new
+ * existing configuration. Generally, a change of time intervals could be
+ * reconfigured without restarting and all others changes requires a restart of
+ * protocol. Returns |NEED_TO_RESTART| or |SUCCESSFUL_RECONF|.
+ */
+static int
+rpki_reconfigure_cache(struct rpki_proto *p UNUSED, struct rpki_cache *cache, struct rpki_config *new, struct rpki_config *old)
+{
+ u8 try_fast_reconnect = 0;
+
+ if (strcmp(old->hostname, new->hostname) != 0)
+ {
+ CACHE_TRACE(D_EVENTS, cache, "Cache server address changed to %s", new->hostname);
+ return NEED_RESTART;
+ }
+
+ if (old->port != new->port)
+ {
+ CACHE_TRACE(D_EVENTS, cache, "Cache server port changed to %u", new->port);
+ return NEED_RESTART;
+ }
+
+ if (old->tr_config.type != new->tr_config.type)
+ {
+ CACHE_TRACE(D_EVENTS, cache, "Transport type changed");
+ return NEED_RESTART;
+ }
+ else if (new->tr_config.type == RPKI_TR_SSH)
+ {
+ struct rpki_tr_ssh_config *ssh_old = (void *) old->tr_config.spec;
+ struct rpki_tr_ssh_config *ssh_new = (void *) new->tr_config.spec;
+ if ((strcmp(ssh_old->bird_private_key, ssh_new->bird_private_key) != 0) ||
+ (strcmp(ssh_old->cache_public_key, ssh_new->cache_public_key) != 0) ||
+ (strcmp(ssh_old->user, ssh_new->user) != 0))
+ {
+ CACHE_TRACE(D_EVENTS, cache, "Settings of SSH transport configuration changed");
+ try_fast_reconnect = 1;
+ }
+ }
+
+#define TEST_INTERVAL(name, Name) \
+ if (cache->name##_interval != new->name##_interval || \
+ old->keep_##name##_interval != new->keep_##name##_interval) \
+ { \
+ cache->name##_interval = new->name##_interval; \
+ CACHE_TRACE(D_EVENTS, cache, #Name " interval changed to %u seconds %s", cache->name##_interval, (new->keep_##name##_interval ? "and keep it" : "")); \
+ try_fast_reconnect = 1; \
+ }
+ TEST_INTERVAL(refresh, Refresh);
+ TEST_INTERVAL(retry, Retry);
+ TEST_INTERVAL(expire, Expire);
+#undef TEST_INTERVAL
+
+ if (try_fast_reconnect)
+ return rpki_try_fast_reconnect(cache);
+
+ return SUCCESSFUL_RECONF;
+}
+
+/**
+ * rpki_reconfigure - a protocol reconfiguration hook
+ * @P: a protocol instance
+ * @CF: a new protocol configuration
+ *
+ * This function reconfigures whole protocol.
+ * It sets new protocol configuration into a protocol structure.
+ * Returns |NEED_TO_RESTART| or |SUCCESSFUL_RECONF|.
+ */
+static int
+rpki_reconfigure(struct proto *P, struct proto_config *CF)
+{
+ struct rpki_proto *p = (void *) P;
+ struct rpki_config *new = (void *) CF;
+ struct rpki_config *old = (void *) p->p.cf;
+ struct rpki_cache *cache = p->cache;
+
+ if (!proto_configure_channel(&p->p, &p->roa4_channel, proto_cf_find_channel(CF, NET_ROA4)) ||
+ !proto_configure_channel(&p->p, &p->roa6_channel, proto_cf_find_channel(CF, NET_ROA6)))
+ return NEED_RESTART;
+
+ if (rpki_reconfigure_cache(p, cache, new, old) != SUCCESSFUL_RECONF)
+ return NEED_RESTART;
+
+ return SUCCESSFUL_RECONF;
+}
+
+
+/*
+ * RPKI Protocol Glue
+ */
+
+static struct proto *
+rpki_init(struct proto_config *CF)
+{
+ struct proto *P = proto_new(CF);
+ struct rpki_proto *p = (void *) P;
+
+ proto_configure_channel(&p->p, &p->roa4_channel, proto_cf_find_channel(CF, NET_ROA4));
+ proto_configure_channel(&p->p, &p->roa6_channel, proto_cf_find_channel(CF, NET_ROA6));
+
+ return P;
+}
+
+static int
+rpki_start(struct proto *P)
+{
+ struct rpki_proto *p = (void *) P;
+ struct rpki_config *cf = (void *) P->cf;
+
+ p->cache = rpki_init_cache(p, cf);
+ rpki_start_cache(p->cache);
+
+ return PS_START;
+}
+
+static void
+rpki_get_status(struct proto *P, byte *buf)
+{
+ struct rpki_proto *p = (struct rpki_proto *) P;
+
+ if (P->proto_state == PS_DOWN)
+ {
+ *buf = 0;
+ return;
+ }
+
+ if (p->cache)
+ bsprintf(buf, "%s", rpki_cache_state_to_str(p->cache->state));
+ else
+ bsprintf(buf, "No cache server configured");
+}
+
+static void
+rpki_show_proto_info_timer(const char *name, uint num, timer *t)
+{
+ if (tm_active(t))
+ cli_msg(-1006, " %-16s: %t/%u", name, tm_remains(t), num);
+ else
+ cli_msg(-1006, " %-16s: ---", name);
+}
+
+static void
+rpki_show_proto_info(struct proto *P)
+{
+ struct rpki_proto *p = (struct rpki_proto *) P;
+ struct rpki_config *cf = (void *) p->p.cf;
+ struct rpki_cache *cache = p->cache;
+
+ if (P->proto_state == PS_DOWN)
+ return;
+
+ if (cache)
+ {
+ const char *transport_name = "---";
+
+ switch (cf->tr_config.type)
+ {
+ case RPKI_TR_SSH: transport_name = "SSHv2"; break;
+ case RPKI_TR_TCP: transport_name = "Unprotected over TCP"; break;
+ };
+
+ cli_msg(-1006, " Cache server: %s", rpki_get_cache_ident(cache));
+ cli_msg(-1006, " Status: %s", rpki_cache_state_to_str(cache->state));
+ cli_msg(-1006, " Transport: %s", transport_name);
+ cli_msg(-1006, " Protocol version: %u", cache->version);
+
+ if (cache->request_session_id)
+ cli_msg(-1006, " Session ID: ---");
+ else
+ cli_msg(-1006, " Session ID: %u", cache->session_id);
+
+ if (cache->last_update)
+ {
+ cli_msg(-1006, " Serial number: %u", cache->serial_num);
+ cli_msg(-1006, " Last update: before %t s", current_time() - cache->last_update);
+ }
+ else
+ {
+ cli_msg(-1006, " Serial number: ---");
+ cli_msg(-1006, " Last update: ---");
+ }
+
+ rpki_show_proto_info_timer("Refresh timer", cache->refresh_interval, cache->refresh_timer);
+ rpki_show_proto_info_timer("Retry timer", cache->retry_interval, cache->retry_timer);
+ rpki_show_proto_info_timer("Expire timer", cache->expire_interval, cache->expire_timer);
+
+ if (p->roa4_channel)
+ channel_show_info(p->roa4_channel);
+ else
+ cli_msg(-1006, " No roa4 channel");
+
+ if (p->roa6_channel)
+ channel_show_info(p->roa6_channel);
+ else
+ cli_msg(-1006, " No roa6 channel");
+ }
+}
+
+
+/*
+ * RPKI Protocol Configuration
+ */
+
+/**
+ * rpki_check_config - check and complete configuration of RPKI protocol
+ * @cf: RPKI configuration
+ *
+ * This function is called at the end of parsing RPKI protocol configuration.
+ */
+void
+rpki_check_config(struct rpki_config *cf)
+{
+ /* Do not check templates at all */
+ if (cf->c.class == SYM_TEMPLATE)
+ return;
+
+ if (ipa_zero(cf->ip) && cf->hostname == NULL)
+ cf_error("IP address or hostname of cache server must be set");
+
+ /* Set default transport type */
+ if (cf->tr_config.spec == NULL)
+ {
+ cf->tr_config.spec = cfg_allocz(sizeof(struct rpki_tr_tcp_config));
+ cf->tr_config.type = RPKI_TR_TCP;
+ }
+
+ if (cf->port == 0)
+ {
+ /* Set default port numbers */
+ switch (cf->tr_config.type)
+ {
+ case RPKI_TR_SSH:
+ cf->port = RPKI_SSH_PORT;
+ break;
+ default:
+ cf->port = RPKI_TCP_PORT;
+ }
+ }
+}
+
+static void
+rpki_postconfig(struct proto_config *CF)
+{
+ /* Define default channel */
+ if (EMPTY_LIST(CF->channels))
+ channel_config_new(NULL, CF->net_type, CF);
+}
+
+static void
+rpki_copy_config(struct proto_config *dest UNUSED, struct proto_config *src UNUSED)
+{
+ /* FIXME: Should copy transport */
+}
+
+struct protocol proto_rpki = {
+ .name = "RPKI",
+ .template = "rpki%d",
+ .preference = DEF_PREF_RPKI,
+ .proto_size = sizeof(struct rpki_proto),
+ .config_size = sizeof(struct rpki_config),
+ .init = rpki_init,
+ .start = rpki_start,
+ .postconfig = rpki_postconfig,
+ .channel_mask = (NB_ROA4 | NB_ROA6),
+ .show_proto_info = rpki_show_proto_info,
+ .shutdown = rpki_shutdown,
+ .copy_config = rpki_copy_config,
+ .reconfigure = rpki_reconfigure,
+ .get_status = rpki_get_status,
+};
diff --git a/proto/rpki/rpki.h b/proto/rpki/rpki.h
new file mode 100644
index 00000000..8972b33a
--- /dev/null
+++ b/proto/rpki/rpki.h
@@ -0,0 +1,165 @@
+/*
+ * BIRD -- The Resource Public Key Infrastructure (RPKI) to Router Protocol
+ *
+ * (c) 2015 CZ.NIC
+ * (c) 2015 Pavel Tvrdik <pawel.tvrdik@gmail.com>
+ *
+ * Using RTRlib: http://rpki.realmv6.org/
+ *
+ * Can be freely distributed and used under the terms of the GNU GPL.
+ */
+
+#ifndef _BIRD_RPKI_H_
+#define _BIRD_RPKI_H_
+
+#include "nest/bird.h"
+#include "nest/route.h"
+#include "nest/protocol.h"
+#include "lib/socket.h"
+#include "lib/ip.h"
+
+#include "transport.h"
+#include "packets.h"
+
+#define RPKI_TCP_PORT 323
+#define RPKI_SSH_PORT 22
+#define RPKI_RETRY_INTERVAL 600
+#define RPKI_REFRESH_INTERVAL 3600
+#define RPKI_EXPIRE_INTERVAL 7200
+
+#define RPKI_VERSION_0 0
+#define RPKI_VERSION_1 1
+#define RPKI_MAX_VERSION RPKI_VERSION_1
+
+
+/*
+ * RPKI Cache
+ */
+
+enum rpki_cache_state {
+ RPKI_CS_CONNECTING, /* Socket is establishing the transport connection. */
+ RPKI_CS_ESTABLISHED, /* Connection is established, socket is waiting for a Serial Notify or expiration of the refresh_interval timer */
+ RPKI_CS_RESET, /* Resetting RTR connection. */
+ RPKI_CS_SYNC_START, /* Sending a Serial/Reset Query PDU and expecting a Cache Response PDU */
+ RPKI_CS_SYNC_RUNNING, /* Receiving validation records from the RTR server. A state between Cache Response PDU and End of Data PDU */
+ RPKI_CS_FAST_RECONNECT, /* Reconnect without any waiting period */
+ RPKI_CS_NO_INCR_UPDATE_AVAIL, /* Server is unable to answer the last Serial Query and sent Cache Reset. */
+ RPKI_CS_ERROR_NO_DATA_AVAIL, /* Server is unable to answer either a Serial Query or a Reset Query because it has no useful data available at this time. */
+ RPKI_CS_ERROR_FATAL, /* Fatal protocol error occurred. */
+ RPKI_CS_ERROR_TRANSPORT, /* Error on the transport socket occurred. */
+ RPKI_CS_SHUTDOWN, /* RTR Socket is stopped. */
+};
+
+struct rpki_cache {
+ pool *pool; /* Pool containing cache objects */
+ struct rpki_proto *p;
+
+ struct rpki_tr_sock *tr_sock; /* Transport specific socket */
+ enum rpki_cache_state state; /* RPKI_CS_* */
+ u32 session_id;
+ u8 request_session_id; /* 1: have to request new session id; 0: we have already received session id */
+ u32 serial_num; /* Serial number denotes the logical version of data from cache server */
+ u8 version; /* Protocol version */
+ btime last_update; /* Last successful synchronization with cache server */
+ btime last_rx_prefix; /* Last received prefix PDU */
+
+ /* Intervals can be changed by cache server on the fly */
+ u32 refresh_interval; /* Actual refresh interval (in seconds) */
+ u32 retry_interval;
+ u32 expire_interval;
+ timer *retry_timer; /* Retry timer event */
+ timer *refresh_timer; /* Refresh timer event */
+ timer *expire_timer; /* Expire timer event */
+};
+
+const char *rpki_get_cache_ident(struct rpki_cache *cache);
+const char *rpki_cache_state_to_str(enum rpki_cache_state state);
+
+
+/*
+ * Routes handling
+ */
+
+void rpki_table_add_roa(struct rpki_cache *cache, struct channel *channel, const net_addr_union *pfxr);
+void rpki_table_remove_roa(struct rpki_cache *cache, struct channel *channel, const net_addr_union *pfxr);
+
+
+/*
+ * RPKI Protocol Logic
+ */
+
+void rpki_cache_change_state(struct rpki_cache *cache, const enum rpki_cache_state new_state);
+
+
+/*
+ * RPKI Timer Events
+ */
+
+const char *rpki_check_refresh_interval(uint seconds);
+const char *rpki_check_retry_interval(uint seconds);
+const char *rpki_check_expire_interval(uint seconds);
+
+
+/*
+ * RPKI Protocol Configuration
+ */
+
+struct rpki_proto {
+ struct proto p;
+ struct rpki_cache *cache;
+
+ struct channel *roa4_channel;
+ struct channel *roa6_channel;
+ u8 refresh_channels; /* For non-incremental updates using rt_refresh_begin(), rt_refresh_end() */
+};
+
+struct rpki_config {
+ struct proto_config c;
+ const char *hostname; /* Full domain name or stringified IP address of cache server */
+ ip_addr ip; /* IP address of cache server or IPA_NONE */
+ u16 port; /* Port number of cache server */
+ struct rpki_tr_config tr_config; /* Specific transport configuration structure */
+ u32 refresh_interval; /* Time interval (in seconds) for periodical downloading data from cache server */
+ u32 retry_interval; /* Time interval (in seconds) for an unreachable server */
+ u32 expire_interval; /* Maximal lifetime (in seconds) of ROAs without any successful refreshment */
+ u8 keep_refresh_interval:1; /* Do not overwrite refresh interval by cache server update */
+ u8 keep_retry_interval:1; /* Do not overwrite retry interval by cache server update */
+ u8 keep_expire_interval:1; /* Do not overwrite expire interval by cache server update */
+};
+
+void rpki_check_config(struct rpki_config *cf);
+
+
+/*
+ * Logger
+ */
+
+#define RPKI_LOG(log_level, rpki, msg, args...) \
+ do { \
+ log(log_level "%s: " msg, (rpki)->p.name , ## args); \
+ } while(0)
+
+#if defined(LOCAL_DEBUG) || defined(GLOBAL_DEBUG)
+#define CACHE_DBG(cache,msg,args...) \
+ do { \
+ RPKI_LOG(L_DEBUG, (cache)->p, "%s [%s] %s " msg, rpki_get_cache_ident(cache), rpki_cache_state_to_str((cache)->state), __func__, ## args); \
+ } while(0)
+#else
+#define CACHE_DBG(cache,msg,args...) do { } while(0)
+#endif
+
+#define RPKI_TRACE(level,rpki,msg,args...) \
+ do { \
+ if ((rpki)->p.debug & level) \
+ RPKI_LOG(L_TRACE, rpki, msg, ## args); \
+ } while(0)
+
+#define CACHE_TRACE(level,cache,msg,args...) \
+ do { \
+ if ((cache)->p->p.debug & level) \
+ RPKI_LOG(L_TRACE, (cache)->p, msg, ## args); \
+ } while(0)
+
+#define RPKI_WARN(p, msg, args...) RPKI_LOG(L_WARN, p, msg, ## args);
+
+#endif /* _BIRD_RPKI_H_ */
diff --git a/proto/rpki/ssh_transport.c b/proto/rpki/ssh_transport.c
new file mode 100644
index 00000000..cd49ab90
--- /dev/null
+++ b/proto/rpki/ssh_transport.c
@@ -0,0 +1,75 @@
+/*
+ * BIRD -- An implementation of the SSH protocol for the RPKI transport
+ *
+ * (c) 2015 CZ.NIC
+ * (c) 2015 Pavel Tvrdik <pawel.tvrdik@gmail.com>
+ *
+ * This file was a part of RTRlib: http://rpki.realmv6.org/
+ * This transport implementation uses libssh (http://www.libssh.org/)
+ *
+ * Can be freely distributed and used under the terms of the GNU GPL.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+
+#include "rpki.h"
+
+static int
+rpki_tr_ssh_open(struct rpki_tr_sock *tr)
+{
+ struct rpki_cache *cache = tr->cache;
+ struct rpki_config *cf = (void *) cache->p->p.cf;
+ struct rpki_tr_ssh_config *ssh_cf = (void *) cf->tr_config.spec;
+ sock *sk = tr->sk;
+
+ sk->type = SK_SSH_ACTIVE;
+ sk->ssh = mb_allocz(sk->pool, sizeof(struct ssh_sock));
+ sk->ssh->username = ssh_cf->user;
+ sk->ssh->client_privkey_path = ssh_cf->bird_private_key;
+ sk->ssh->server_hostkey_path = ssh_cf->cache_public_key;
+ sk->ssh->subsystem = "rpki-rtr";
+ sk->ssh->state = SK_SSH_CONNECT;
+
+ if (sk_open(sk) != 0)
+ return RPKI_TR_ERROR;
+
+ return RPKI_TR_SUCCESS;
+}
+
+static const char *
+rpki_tr_ssh_ident(struct rpki_tr_sock *tr)
+{
+ ASSERT(tr != NULL);
+
+ struct rpki_cache *cache = tr->cache;
+ struct rpki_config *cf = (void *) cache->p->p.cf;
+ struct rpki_tr_ssh_config *ssh_cf = (void *) cf->tr_config.spec;
+
+ if (tr->ident != NULL)
+ return tr->ident;
+
+ const char *username = ssh_cf->user;
+ const char *host = cf->hostname;
+ u16 port = cf->port;
+
+ size_t len = strlen(username) + 1 + strlen(host) + 1 + 5 + 1; /* <user> + '@' + <host> + ':' + <port> + '\0' */
+ char *ident = mb_alloc(cache->pool, len);
+ bsnprintf(ident, len, "%s@%s:%u", username, host, port);
+ tr->ident = ident;
+
+ return tr->ident;
+}
+
+/**
+ * rpki_tr_ssh_init - initializes the RPKI transport structure for a SSH connection
+ * @tr: allocated RPKI transport structure
+ */
+void
+rpki_tr_ssh_init(struct rpki_tr_sock *tr)
+{
+ tr->open_fp = &rpki_tr_ssh_open;
+ tr->ident_fp = &rpki_tr_ssh_ident;
+}
diff --git a/proto/rpki/tcp_transport.c b/proto/rpki/tcp_transport.c
new file mode 100644
index 00000000..6c05964a
--- /dev/null
+++ b/proto/rpki/tcp_transport.c
@@ -0,0 +1,78 @@
+/*
+ * BIRD -- An implementation of the TCP protocol for the RPKI protocol transport
+ *
+ * (c) 2015 CZ.NIC
+ * (c) 2015 Pavel Tvrdik <pawel.tvrdik@gmail.com>
+ *
+ * This file was a part of RTRlib: http://rpki.realmv6.org/
+ *
+ * Can be freely distributed and used under the terms of the GNU GPL.
+ */
+
+#include <errno.h>
+#include <netdb.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "rpki.h"
+#include "sysdep/unix/unix.h"
+
+static int
+rpki_tr_tcp_open(struct rpki_tr_sock *tr)
+{
+ sock *sk = tr->sk;
+
+ sk->type = SK_TCP_ACTIVE;
+
+ if (sk_open(sk) != 0)
+ return RPKI_TR_ERROR;
+
+ return RPKI_TR_SUCCESS;
+}
+
+static const char *
+rpki_tr_tcp_ident(struct rpki_tr_sock *tr)
+{
+ ASSERT(tr != NULL);
+
+ struct rpki_cache *cache = tr->cache;
+ struct rpki_config *cf = (void *) cache->p->p.cf;
+
+ if (tr->ident != NULL)
+ return tr->ident;
+
+ const char *host = cf->hostname;
+ ip_addr ip = cf->ip;
+ u16 port = cf->port;
+
+ size_t colon_and_port_len = 6; /* max ":65535" */
+ size_t ident_len;
+ if (host)
+ ident_len = strlen(host) + colon_and_port_len + 1;
+ else
+ ident_len = IPA_MAX_TEXT_LENGTH + colon_and_port_len + 1;
+
+ char *ident = mb_alloc(cache->pool, ident_len);
+ if (host)
+ bsnprintf(ident, ident_len, "%s:%u", host, port);
+ else
+ bsnprintf(ident, ident_len, "%I:%u", ip, port);
+
+ tr->ident = ident;
+ return tr->ident;
+}
+
+/**
+ * rpki_tr_tcp_init - initializes the RPKI transport structure for a TCP connection
+ * @tr: allocated RPKI transport structure
+ */
+void
+rpki_tr_tcp_init(struct rpki_tr_sock *tr)
+{
+ tr->open_fp = &rpki_tr_tcp_open;
+ tr->ident_fp = &rpki_tr_tcp_ident;
+}
diff --git a/proto/rpki/transport.c b/proto/rpki/transport.c
new file mode 100644
index 00000000..182667be
--- /dev/null
+++ b/proto/rpki/transport.c
@@ -0,0 +1,135 @@
+/*
+ * BIRD -- The Resource Public Key Infrastructure (RPKI) to Router Protocol
+ *
+ * (c) 2015 CZ.NIC
+ * (c) 2015 Pavel Tvrdik <pawel.tvrdik@gmail.com>
+ *
+ * This file was a part of RTRlib: http://rpki.realmv6.org/
+ *
+ * Can be freely distributed and used under the terms of the GNU GPL.
+ */
+
+#include <sys/socket.h>
+#include <netdb.h>
+
+#include "rpki.h"
+#include "transport.h"
+#include "sysdep/unix/unix.h"
+
+/**
+ * rpki_hostname_autoresolv - auto-resolve an IP address from a hostname
+ * @host: domain name of host, e.g. "rpki-validator.realmv6.org"
+ *
+ * This function resolves an IP address from a hostname.
+ * Returns &ip_addr structure with IP address or |IPA_NONE|.
+ */
+static ip_addr
+rpki_hostname_autoresolv(const char *host)
+{
+ ip_addr addr = {};
+ struct addrinfo *res;
+ struct addrinfo hints = {
+ .ai_family = AF_UNSPEC,
+ .ai_socktype = SOCK_STREAM,
+ .ai_flags = AI_ADDRCONFIG,
+ };
+
+ if (!host)
+ return IPA_NONE;
+
+ int err_code = getaddrinfo(host, NULL, &hints, &res);
+ if (err_code != 0)
+ {
+ log(L_DEBUG "getaddrinfo failed: %s", gai_strerror(err_code));
+ return IPA_NONE;
+ }
+
+ sockaddr sa = {
+ .sa = *res->ai_addr,
+ };
+
+ uint unused;
+ sockaddr_read(&sa, res->ai_family, &addr, NULL, &unused);
+
+ freeaddrinfo(res);
+ return addr;
+}
+
+/**
+ * rpki_tr_open - prepare and open a socket connection
+ * @tr: initialized transport socket
+ *
+ * Prepare and open a socket connection specified by @tr that must be initialized before.
+ * This function ends with a calling the sk_open() function.
+ * Returns RPKI_TR_SUCCESS or RPKI_TR_ERROR.
+ */
+int
+rpki_tr_open(struct rpki_tr_sock *tr)
+{
+ struct rpki_cache *cache = tr->cache;
+ struct rpki_config *cf = (void *) cache->p->p.cf;
+
+ ASSERT(tr->sk == NULL);
+ tr->sk = sk_new(cache->pool);
+ sock *sk = tr->sk;
+
+ /* sk->type -1 is invalid value, a correct value MUST be set in the specific transport layer in open_fp() hook */
+ sk->type = -1;
+
+ sk->tx_hook = rpki_connected_hook;
+ sk->err_hook = rpki_err_hook;
+ sk->data = cache;
+ sk->daddr = cf->ip;
+ sk->dport = cf->port;
+ sk->host = cf->hostname;
+ sk->rbsize = RPKI_RX_BUFFER_SIZE;
+ sk->tbsize = RPKI_TX_BUFFER_SIZE;
+ sk->tos = IP_PREC_INTERNET_CONTROL;
+
+ if (ipa_zero2(sk->daddr) && sk->host)
+ {
+ sk->daddr = rpki_hostname_autoresolv(sk->host);
+ if (ipa_zero(sk->daddr))
+ {
+ CACHE_TRACE(D_EVENTS, cache, "Cannot resolve the hostname '%s'", sk->host);
+ return RPKI_TR_ERROR;
+ }
+ }
+
+ return tr->open_fp(tr);
+}
+
+/**
+ * rpki_tr_close - close socket and prepare it for possible next open
+ * @tr: successfully opened transport socket
+ *
+ * Close socket and free resources.
+ */
+void
+rpki_tr_close(struct rpki_tr_sock *tr)
+{
+ if (tr->ident)
+ {
+ mb_free((char *) tr->ident);
+ tr->ident = NULL;
+ }
+
+ if (tr->sk)
+ {
+ rfree(tr->sk);
+ tr->sk = NULL;
+ }
+}
+
+/**
+ * rpki_tr_ident - Returns a string identifier for the rpki transport socket
+ * @tr: successfully opened transport socket
+ *
+ * Returns a \0 terminated string identifier for the socket endpoint, e.g. "<host>:<port>".
+ * Memory is allocated inside @tr structure.
+ */
+inline const char *
+rpki_tr_ident(struct rpki_tr_sock *tr)
+{
+ return tr->ident_fp(tr);
+}
diff --git a/proto/rpki/transport.h b/proto/rpki/transport.h
new file mode 100644
index 00000000..f90b7e42
--- /dev/null
+++ b/proto/rpki/transport.h
@@ -0,0 +1,79 @@
+/*
+ * BIRD -- The Resource Public Key Infrastructure (RPKI) to Router Protocol
+ *
+ * (c) 2015 CZ.NIC
+ * (c) 2015 Pavel Tvrdik <pawel.tvrdik@gmail.com>
+ *
+ * This file was a part of RTRlib: http://rpki.realmv6.org/
+ *
+ * Can be freely distributed and used under the terms of the GNU GPL.
+ */
+
+/*
+ * The RPKI transport sockets implement the communication channel
+ * (e.g., SSH, TCP, TCP-AO) between an RPKI server and client.
+ *
+ * Before using the transport socket, a tr_socket must be
+ * initialized based on a protocol-dependent init function (e.g.,
+ * rpki_tr_tcp_init()).
+ *
+ * The rpki_tr_* functions call the corresponding function pointers, which are
+ * passed in the rpki_tr_sock structure, and forward the remaining arguments.
+ */
+
+#ifndef _BIRD_RPKI_TRANSPORT_H_
+#define _BIRD_RPKI_TRANSPORT_H_
+
+#include <time.h>
+
+/* The return values for rpki_tr_ functions */
+enum rpki_tr_rtvals {
+ RPKI_TR_SUCCESS = 0, /* Operation was successful */
+ RPKI_TR_ERROR = -1, /* Error occurred */
+ RPKI_TR_WOULDBLOCK = -2, /* No data is available on the socket */
+ RPKI_TR_INTR = -3, /* Call was interrupted from a signal */
+ RPKI_TR_CLOSED = -4 /* Connection closed */
+};
+
+/* A transport socket structure */
+struct rpki_tr_sock {
+ sock *sk; /* Standard BIRD socket */
+ struct rpki_cache *cache; /* Cache server */
+ int (*open_fp)(struct rpki_tr_sock *); /* Function that establishes the socket connection */
+ const char *(*ident_fp)(struct rpki_tr_sock *); /* Function that returns an identifier for the socket endpoint */
+ const char *ident; /* Internal. Use ident_fp() hook instead of this pointer */
+};
+
+int rpki_tr_open(struct rpki_tr_sock *tr);
+void rpki_tr_close(struct rpki_tr_sock *tr);
+const char *rpki_tr_ident(struct rpki_tr_sock *tr);
+
+/* Types of supported transports */
+enum rpki_tr_type {
+ RPKI_TR_TCP, /* Unprotected transport over TCP */
+ RPKI_TR_SSH, /* Protected transport by SSHv2 connection */
+};
+
+/* Common configure structure for transports */
+struct rpki_tr_config {
+ enum rpki_tr_type type; /* RPKI_TR_TCP or RPKI_TR_SSH */
+ const void *spec; /* Specific transport configuration, i.e. rpki_tr_tcp_config or rpki_tr_ssh_config */
+};
+
+struct rpki_tr_tcp_config {
+ /* No internal configuration data */
+};
+
+struct rpki_tr_ssh_config {
+ const char *bird_private_key; /* Filepath to the BIRD server private key */
+ const char *cache_public_key; /* Filepath to the public key of cache server, can be file known_hosts */
+ const char *user; /* Username for SSH connection */
+};
+
+/* ssh_transport.c */
+void rpki_tr_ssh_init(struct rpki_tr_sock *tr);
+
+/* tcp_transport.c */
+void rpki_tr_tcp_init(struct rpki_tr_sock *tr);
+
+#endif /* _BIRD_RPKI_TRANSPORT_H_ */
diff --git a/proto/static/Makefile b/proto/static/Makefile
index 61fadbea..e38f9b74 100644
--- a/proto/static/Makefile
+++ b/proto/static/Makefile
@@ -1,6 +1,6 @@
-source=static.c
-root-rel=../../
-dir-name=proto/static
-
-include ../../Rules
+src := static.c
+obj := $(src-o-files)
+$(all-daemon)
+$(cf-local)
+tests_objs := $(tests_objs) $(src-o-files) \ No newline at end of file
diff --git a/proto/static/config.Y b/proto/static/config.Y
index 182721b3..66e5ea4c 100644
--- a/proto/static/config.Y
+++ b/proto/static/config.Y
@@ -13,98 +13,119 @@ CF_HDR
CF_DEFINES
#define STATIC_CFG ((struct static_config *) this_proto)
-static struct static_route *this_srt, *this_srt_nh, *last_srt_nh;
+static struct static_route *this_srt, *this_snh;
static struct f_inst **this_srt_last_cmd;
+static struct static_route *
+static_nexthop_new(void)
+{
+ struct static_route *nh = this_srt;
+
+ if (this_snh)
+ {
+ /* Additional next hop */
+ nh = cfg_allocz(sizeof(struct static_route));
+ nh->net = this_srt->net;
+ this_snh->mp_next = nh;
+ }
+
+ nh->dest = RTD_UNICAST;
+ nh->mp_head = this_srt;
+ return nh;
+};
+
static void
static_route_finish(void)
{
- struct static_route *r;
-
- /* Update undefined use_bfd entries in multipath nexthops */
- if (this_srt->dest == RTD_MULTIPATH)
- for (r = this_srt->mp_next; r; r = r->mp_next)
- if (r->use_bfd < 0)
- r->use_bfd = this_srt->use_bfd;
+ if (net_type_match(this_srt->net, NB_DEST) == !this_srt->dest)
+ cf_error("Unexpected or missing nexthop/type");
}
CF_DECLS
CF_KEYWORDS(STATIC, ROUTE, VIA, DROP, REJECT, PROHIBIT, PREFERENCE, CHECK, LINK)
-CF_KEYWORDS(MULTIPATH, WEIGHT, RECURSIVE, IGP, TABLE, BLACKHOLE, UNREACHABLE, BFD)
+CF_KEYWORDS(ONLINK, WEIGHT, RECURSIVE, IGP, TABLE, BLACKHOLE, UNREACHABLE, BFD, MPLS)
CF_GRAMMAR
CF_ADDTO(proto, static_proto '}')
-static_proto_start: proto_start STATIC {
- this_proto = proto_config_new(&proto_static, $1);
- static_init_config((struct static_config *) this_proto);
- }
- ;
+static_proto_start: proto_start STATIC
+{
+ this_proto = proto_config_new(&proto_static, $1);
+ init_list(&STATIC_CFG->routes);
+};
static_proto:
static_proto_start proto_name '{'
| static_proto proto_item ';'
+ | static_proto proto_channel ';' { this_proto->net_type = $2->net_type; }
| static_proto CHECK LINK bool ';' { STATIC_CFG->check_link = $4; }
- | static_proto IGP TABLE rtable ';' { STATIC_CFG->igp_table = $4; }
+ | static_proto IGP TABLE rtable ';' {
+ if ($4->addr_type == NET_IP4)
+ STATIC_CFG->igp_table_ip4 = $4;
+ else if ($4->addr_type == NET_IP6)
+ STATIC_CFG->igp_table_ip6 = $4;
+ else
+ cf_error("Incompatible IGP table type");
+ }
| static_proto stat_route stat_route_opt_list ';' { static_route_finish(); }
;
-stat_route0: ROUTE prefix {
- this_srt = cfg_allocz(sizeof(struct static_route));
- add_tail(&STATIC_CFG->other_routes, &this_srt->n);
- this_srt->net = $2.addr;
- this_srt->masklen = $2.len;
- this_srt_last_cmd = &(this_srt->cmds);
+stat_nexthop:
+ VIA ipa ipa_scope {
+ this_snh = static_nexthop_new();
+ this_snh->via = $2;
+ this_snh->iface = $3;
+ }
+ | VIA TEXT {
+ this_snh = static_nexthop_new();
+ this_snh->via = IPA_NONE;
+ this_snh->iface = if_get_by_name($2);
+ }
+ | stat_nexthop MPLS label_stack {
+ this_snh->mls = $3;
}
- ;
+ | stat_nexthop ONLINK bool {
+ this_snh->onlink = $3;
+ }
+ | stat_nexthop WEIGHT expr {
+ this_snh->weight = $3 - 1;
+ if (($3<1) || ($3>256)) cf_error("Weight must be in range 1-256");
+ }
+ | stat_nexthop BFD bool {
+ this_snh->use_bfd = $3; cf_check_bfd($3);
+ }
+;
-stat_multipath1:
- VIA ipa ipa_scope {
- last_srt_nh = this_srt_nh;
- this_srt_nh = cfg_allocz(sizeof(struct static_route));
- this_srt_nh->dest = RTD_NONE;
- this_srt_nh->via = $2;
- this_srt_nh->via_if = $3;
- this_srt_nh->if_name = (void *) this_srt; /* really */
- this_srt_nh->use_bfd = -1; /* undefined */
- }
- | stat_multipath1 WEIGHT expr {
- this_srt_nh->masklen = $3 - 1; /* really */
- if (($3<1) || ($3>256)) cf_error("Weight must be in range 1-256");
- }
- | stat_multipath1 BFD bool {
- this_srt_nh->use_bfd = $3; cf_check_bfd($3);
- }
- ;
+stat_nexthops:
+ stat_nexthop
+ | stat_nexthops stat_nexthop
+;
-stat_multipath:
- stat_multipath1 { this_srt->mp_next = this_srt_nh; }
- | stat_multipath stat_multipath1 { last_srt_nh->mp_next = this_srt_nh; }
+stat_route0: ROUTE net_any {
+ this_srt = cfg_allocz(sizeof(struct static_route));
+ add_tail(&STATIC_CFG->routes, &this_srt->n);
+ this_srt->net = $2;
+ this_srt_last_cmd = &(this_srt->cmds);
+ this_srt->mp_next = NULL;
+ this_snh = NULL;
+ }
;
stat_route:
- stat_route0 VIA ipa ipa_scope {
- this_srt->dest = RTD_ROUTER;
+ stat_route0 stat_nexthops
+ | stat_route0 RECURSIVE ipa {
+ this_srt->dest = RTDX_RECURSIVE;
this_srt->via = $3;
- this_srt->via_if = $4;
- }
- | stat_route0 VIA TEXT {
- this_srt->dest = RTD_DEVICE;
- this_srt->if_name = $3;
- rem_node(&this_srt->n);
- add_tail(&STATIC_CFG->iface_routes, &this_srt->n);
}
- | stat_route0 MULTIPATH stat_multipath {
- this_srt->dest = RTD_MULTIPATH;
- }
- | stat_route0 RECURSIVE ipa {
+ | stat_route0 RECURSIVE ipa MPLS label_stack {
this_srt->dest = RTDX_RECURSIVE;
this_srt->via = $3;
+ this_srt->mls = $5;
}
-
+ | stat_route0 { this_srt->dest = RTD_NONE; }
| stat_route0 DROP { this_srt->dest = RTD_BLACKHOLE; }
| stat_route0 REJECT { this_srt->dest = RTD_UNREACHABLE; }
| stat_route0 BLACKHOLE { this_srt->dest = RTD_BLACKHOLE; }
@@ -114,7 +135,6 @@ stat_route:
stat_route_item:
cmd { *this_srt_last_cmd = $1; this_srt_last_cmd = &($1->next); }
- | BFD bool ';' { this_srt->use_bfd = $2; cf_check_bfd($2); }
;
stat_route_opts:
diff --git a/proto/static/static.c b/proto/static/static.c
index 849067b9..ede4c734 100644
--- a/proto/static/static.c
+++ b/proto/static/static.c
@@ -9,33 +9,32 @@
/**
* DOC: Static
*
- * The Static protocol is implemented in a straightforward way. It keeps
- * two lists of static routes: one containing interface routes and one
- * holding the remaining ones. Interface routes are inserted and removed according
- * to interface events received from the core via the if_notify() hook. Routes
- * pointing to a neighboring router use a sticky node in the neighbor cache
- * to be notified about gaining or losing the neighbor. Special
- * routes like black holes or rejects are inserted all the time.
+ * The Static protocol is implemented in a straightforward way. It keeps a list
+ * of static routes. Routes of dest RTD_UNICAST have associated sticky node in
+ * the neighbor cache to be notified about gaining or losing the neighbor and
+ * about interface-related events (e.g. link down). They may also have a BFD
+ * request if associated with a BFD session. When a route is notified,
+ * static_decide() is used to see whether the route activeness is changed. In
+ * such case, the route is marked as dirty and scheduled to be announced or
+ * withdrawn, which is done asynchronously from event hook. Routes of other
+ * types (e.g. black holes) are announced all the time.
*
- * Multipath routes are tricky. Because these routes depends on
- * several neighbors we need to integrate that to the neighbor
- * notification handling, we use dummy static_route nodes, one for
- * each nexthop. Therefore, a multipath route consists of a master
- * static_route node (of dest RTD_MULTIPATH), which specifies prefix
- * and is used in most circumstances, and a list of dummy static_route
- * nodes (of dest RTD_NONE), which stores info about nexthops and are
- * connected to neighbor entries and neighbor notifications. Dummy
- * nodes are chained using mp_next, they aren't in other_routes list,
- * and abuse some fields (masklen, if_name) for other purposes.
+ * Multipath routes are a bit tricky. To represent additional next hops, dummy
+ * static_route nodes are used, which are chained using @mp_next field and link
+ * to the master node by @mp_head field. Each next hop has a separate neighbor
+ * entry and an activeness state, but the master node is used for most purposes.
+ * Note that most functions DO NOT accept dummy nodes as arguments.
*
* The only other thing worth mentioning is that when asked for reconfiguration,
* Static not only compares the two configurations, but it also calculates
- * difference between the lists of static routes and it just inserts the
- * newly added routes and removes the obsolete ones.
+ * difference between the lists of static routes and it just inserts the newly
+ * added routes, removes the obsolete ones and reannounces changed ones.
*/
#undef LOCAL_DEBUG
+#include <stdlib.h>
+
#include "nest/bird.h"
#include "nest/iface.h"
#include "nest/protocol.h"
@@ -50,107 +49,117 @@
static linpool *static_lp;
-static inline rtable *
-p_igp_table(struct proto *p)
-{
- struct static_config *cf = (void *) p->cf;
- return cf->igp_table ? cf->igp_table->table : p->table;
-}
-
static void
-static_install(struct proto *p, struct static_route *r, struct iface *ifa)
+static_announce_rte(struct static_proto *p, struct static_route *r)
{
- net *n;
- rta a;
- rte *e;
+ rta *a = allocz(RTA_MAX_SIZE);
+ a->src = p->p.main_source;
+ a->source = RTS_STATIC;
+ a->scope = SCOPE_UNIVERSE;
+ a->dest = r->dest;
- if (r->installed > 0)
- return;
+ if (r->dest == RTD_UNICAST)
+ {
+ struct static_route *r2;
+ struct nexthop *nhs = NULL;
- DBG("Installing static route %I/%d, rtd=%d\n", r->net, r->masklen, r->dest);
- bzero(&a, sizeof(a));
- a.src = p->main_source;
- a.source = (r->dest == RTD_DEVICE) ? RTS_STATIC_DEVICE : RTS_STATIC;
- a.scope = SCOPE_UNIVERSE;
- a.cast = RTC_UNICAST;
- a.dest = r->dest;
- a.gw = r->via;
- a.iface = ifa;
-
- if (r->dest == RTD_MULTIPATH)
+ for (r2 = r; r2; r2 = r2->mp_next)
{
- struct static_route *r2;
- struct mpnh *nhs = NULL;
-
- for (r2 = r->mp_next; r2; r2 = r2->mp_next)
- if (r2->installed)
- {
- struct mpnh *nh = alloca(sizeof(struct mpnh));
- nh->gw = r2->via;
- nh->iface = r2->neigh->iface;
- nh->weight = r2->masklen; /* really */
- mpnh_insert(&nhs, nh);
- }
-
- /* There is at least one nexthop */
- if (!nhs->next)
- {
- /* Fallback to unipath route for exactly one nexthop */
- a.dest = RTD_ROUTER;
- a.gw = nhs->gw;
- a.iface = nhs->iface;
- }
- else
- a.nexthops = nhs;
+ if (!r2->active)
+ continue;
+
+ struct nexthop *nh = allocz(NEXTHOP_MAX_SIZE);
+ nh->gw = r2->via;
+ nh->iface = r2->neigh->iface;
+ nh->flags = r2->onlink ? RNF_ONLINK : 0;
+ nh->weight = r2->weight;
+ if (r2->mls)
+ {
+ nh->labels = r2->mls->len;
+ memcpy(nh->label, r2->mls->stack, r2->mls->len * sizeof(u32));
+ }
+
+ nexthop_insert(&nhs, nh);
}
+ if (!nhs)
+ goto withdraw;
+
+ nexthop_link(a, nhs);
+ }
+
if (r->dest == RTDX_RECURSIVE)
- rta_set_recursive_next_hop(p->table, &a, p_igp_table(p), &r->via, &r->via);
+ {
+ rtable *tab = ipa_is_ip4(r->via) ? p->igp_table_ip4 : p->igp_table_ip6;
+ rta_set_recursive_next_hop(p->p.main_channel->table, a, tab, r->via, IPA_NONE, r->mls);
+ }
- /* We skip rta_lookup() here */
+ /* Already announced */
+ if (r->state == SRS_CLEAN)
+ return;
- n = net_get(p->table, r->net, r->masklen);
- e = rte_get_temp(&a);
- e->net = n;
+ /* We skip rta_lookup() here */
+ rte *e = rte_get_temp(a);
e->pflags = 0;
if (r->cmds)
f_eval_rte(r->cmds, &e, static_lp);
- rte_update(p, n, e);
- r->installed = 1;
+ rte_update(&p->p, r->net, e);
+ r->state = SRS_CLEAN;
if (r->cmds)
lp_flush(static_lp);
+
+ return;
+
+withdraw:
+ if (r->state == SRS_DOWN)
+ return;
+
+ rte_update(&p->p, r->net, NULL);
+ r->state = SRS_DOWN;
}
static void
-static_remove(struct proto *p, struct static_route *r)
+static_mark_rte(struct static_proto *p, struct static_route *r)
{
- net *n;
-
- if (!r->installed)
+ if (r->state == SRS_DIRTY)
return;
- DBG("Removing static route %I/%d via %I\n", r->net, r->masklen, r->via);
- n = net_find(p->table, r->net, r->masklen);
- rte_update(p, n, NULL);
- r->installed = 0;
+ r->state = SRS_DIRTY;
+ BUFFER_PUSH(p->marked) = r;
+
+ if (!ev_active(p->event))
+ ev_schedule(p->event);
+}
+
+static void
+static_announce_marked(void *P)
+{
+ struct static_proto *p = P;
+
+ BUFFER_WALK(p->marked, r)
+ static_announce_rte(P, r);
+
+ BUFFER_FLUSH(p->marked);
}
static void
static_bfd_notify(struct bfd_request *req);
static void
-static_update_bfd(struct proto *p, struct static_route *r)
+static_update_bfd(struct static_proto *p, struct static_route *r)
{
+ /* The @r is a RTD_UNICAST next hop, may be a dummy node */
+
struct neighbor *nb = r->neigh;
int bfd_up = (nb->scope > 0) && r->use_bfd;
if (bfd_up && !r->bfd_req)
{
// ip_addr local = ipa_nonzero(r->local) ? r->local : nb->ifa->ip;
- r->bfd_req = bfd_request_session(p->pool, r->via, nb->ifa->ip, nb->iface,
+ r->bfd_req = bfd_request_session(p->p.pool, r->via, nb->ifa->ip, nb->iface,
static_bfd_notify, r);
}
@@ -162,212 +171,173 @@ static_update_bfd(struct proto *p, struct static_route *r)
}
static int
-static_decide(struct static_config *cf, struct static_route *r)
+static_decide(struct static_proto *p, struct static_route *r)
{
- /* r->dest != RTD_MULTIPATH, but may be RTD_NONE (part of multipath route)
- the route also have to be valid (r->neigh != NULL) */
+ /* The @r is a RTD_UNICAST next hop, may be a dummy node */
+
+ struct static_config *cf = (void *) p->p.cf;
+ uint old_active = r->active;
if (r->neigh->scope < 0)
- return 0;
+ goto fail;
if (cf->check_link && !(r->neigh->iface->flags & IF_LINK_UP))
- return 0;
+ goto fail;
- if (r->bfd_req && r->bfd_req->state != BFD_STATE_UP)
- return 0;
+ if (r->bfd_req && (r->bfd_req->state != BFD_STATE_UP))
+ goto fail;
- return 1;
-}
+ r->active = 1;
+ return !old_active;
+fail:
+ r->active = 0;
+ return old_active;
+}
static void
-static_add(struct proto *p, struct static_config *cf, struct static_route *r)
+static_add_rte(struct static_proto *p, struct static_route *r)
{
- DBG("static_add(%I/%d,%d)\n", r->net, r->masklen, r->dest);
- switch (r->dest)
- {
- case RTD_ROUTER:
- {
- struct neighbor *n = neigh_find2(p, &r->via, r->via_if, NEF_STICKY);
- if (n)
- {
- r->chain = n->data;
- n->data = r;
- r->neigh = n;
-
- static_update_bfd(p, r);
- if (static_decide(cf, r))
- static_install(p, r, n->iface);
- else
- static_remove(p, r);
- }
- else
- {
- log(L_ERR "Static route destination %I is invalid. Ignoring.", r->via);
- static_remove(p, r);
- }
- break;
- }
+ if (r->dest == RTD_UNICAST)
+ {
+ struct static_route *r2;
+ struct neighbor *n;
- case RTD_DEVICE:
- break;
+ for (r2 = r; r2; r2 = r2->mp_next)
+ {
+ n = ipa_nonzero(r2->via) ?
+ neigh_find2(&p->p, &r2->via, r2->iface,
+ NEF_STICKY | (r2->onlink ? NEF_ONLINK : 0)) :
+ neigh_find_iface(&p->p, r2->iface);
- case RTD_MULTIPATH:
+ if (!n)
{
- int count = 0;
- struct static_route *r2;
-
- for (r2 = r->mp_next; r2; r2 = r2->mp_next)
- {
- struct neighbor *n = neigh_find2(p, &r2->via, r2->via_if, NEF_STICKY);
- if (n)
- {
- r2->chain = n->data;
- n->data = r2;
- r2->neigh = n;
-
- static_update_bfd(p, r2);
- r2->installed = static_decide(cf, r2);
- count += r2->installed;
- }
- else
- {
- log(L_ERR "Static route destination %I is invalid. Ignoring.", r2->via);
- r2->installed = 0;
- }
- }
-
- if (count)
- static_install(p, r, NULL);
- else
- static_remove(p, r);
- break;
+ log(L_WARN "Invalid next hop %I of static route %N", r2->via, r2->net);
+ continue;
}
- default:
- static_install(p, r, NULL);
+ r2->neigh = n;
+ r2->chain = n->data;
+ n->data = r2;
+
+ static_update_bfd(p, r2);
+ static_decide(p, r2);
}
+ }
+
+ static_announce_rte(p, r);
}
static void
-static_rte_cleanup(struct proto *p UNUSED, struct static_route *r)
+static_reset_rte(struct static_proto *p UNUSED, struct static_route *r)
{
struct static_route *r2;
- if (r->bfd_req)
+ for (r2 = r; r2; r2 = r2->mp_next)
{
- rfree(r->bfd_req);
- r->bfd_req = NULL;
- }
+ r2->neigh = NULL;
+ r2->chain = NULL;
- if (r->dest == RTD_MULTIPATH)
- for (r2 = r->mp_next; r2; r2 = r2->mp_next)
- if (r2->bfd_req)
- {
- rfree(r2->bfd_req);
- r2->bfd_req = NULL;
- }
+ r2->state = 0;
+ r2->active = 0;
+
+ rfree(r2->bfd_req);
+ r2->bfd_req = NULL;
+ }
}
-static int
-static_start(struct proto *p)
+static void
+static_remove_rte(struct static_proto *p, struct static_route *r)
{
- struct static_config *cf = (void *) p->cf;
- struct static_route *r;
+ if (r->state)
+ rte_update(&p->p, r->net, NULL);
- DBG("Static: take off!\n");
+ static_reset_rte(p, r);
+}
- if (!static_lp)
- static_lp = lp_new(&root_pool, 1008);
- if (cf->igp_table)
- rt_lock_table(cf->igp_table->table);
+static inline int
+static_same_dest(struct static_route *x, struct static_route *y)
+{
+ if (x->dest != y->dest)
+ return 0;
+
+ switch (x->dest)
+ {
+ case RTD_UNICAST:
+ for (; x && y; x = x->mp_next, y = y->mp_next)
+ {
+ if (!ipa_equal(x->via, y->via) ||
+ (x->iface != y->iface) ||
+ (x->onlink != y->onlink) ||
+ (x->weight != y->weight) ||
+ (x->use_bfd != y->use_bfd) ||
+ (!x->mls != !y->mls) ||
+ ((x->mls) && (y->mls) && (x->mls->len != y->mls->len)))
+ return 0;
+
+ if (!x->mls)
+ continue;
+
+ for (uint i = 0; i < x->mls->len; i++)
+ if (x->mls->stack[i] != y->mls->stack[i])
+ return 0;
+ }
+ return !x && !y;
- /* We have to go UP before routes could be installed */
- proto_notify_state(p, PS_UP);
+ case RTDX_RECURSIVE:
+ if (!ipa_equal(x->via, y->via) ||
+ (!x->mls != !y->mls) ||
+ ((x->mls) && (y->mls) && (x->mls->len != y->mls->len)))
+ return 0;
- WALK_LIST(r, cf->other_routes)
- static_add(p, cf, r);
- return PS_UP;
-}
+ if (!x->mls)
+ return 1;
-static int
-static_shutdown(struct proto *p)
-{
- struct static_config *cf = (void *) p->cf;
- struct static_route *r;
+ for (uint i = 0; i < x->mls->len; i++)
+ if (x->mls->stack[i] != y->mls->stack[i])
+ return 0;
- /* Just reset the flag, the routes will be flushed by the nest */
- WALK_LIST(r, cf->iface_routes)
- r->installed = 0;
- WALK_LIST(r, cf->other_routes)
- {
- static_rte_cleanup(p, r);
- r->installed = 0;
- }
+ return 1;
- return PS_DOWN;
+ default:
+ return 1;
+ }
}
-static void
-static_cleanup(struct proto *p)
+static inline int
+static_same_rte(struct static_route *or, struct static_route *nr)
{
- struct static_config *cf = (void *) p->cf;
-
- if (cf->igp_table)
- rt_unlock_table(cf->igp_table->table);
+ /* Note that i_same() requires arguments in (new, old) order */
+ return static_same_dest(or, nr) && i_same(nr->cmds, or->cmds);
}
static void
-static_update_rte(struct proto *p, struct static_route *r)
+static_reconfigure_rte(struct static_proto *p, struct static_route *or, struct static_route *nr)
{
- switch (r->dest)
- {
- case RTD_ROUTER:
- if (static_decide((struct static_config *) p->cf, r))
- static_install(p, r, r->neigh->iface);
- else
- static_remove(p, r);
- break;
-
- case RTD_NONE: /* a part of multipath route */
- {
- int decision = static_decide((struct static_config *) p->cf, r);
- if (decision == r->installed)
- break; /* no change */
- r->installed = decision;
-
- struct static_route *r1, *r2;
- int count = 0;
- r1 = (void *) r->if_name; /* really */
- for (r2 = r1->mp_next; r2; r2 = r2->mp_next)
- count += r2->installed;
-
- if (count)
- {
- /* Set of nexthops changed - force reinstall */
- r1->installed = 0;
- static_install(p, r1, NULL);
- }
- else
- static_remove(p, r1);
+ if ((or->state == SRS_CLEAN) && !static_same_rte(or, nr))
+ nr->state = SRS_DIRTY;
+ else
+ nr->state = or->state;
- break;
- }
- }
+ static_add_rte(p, nr);
+ static_reset_rte(p, or);
}
+
static void
static_neigh_notify(struct neighbor *n)
{
- struct proto *p = n->proto;
+ struct static_proto *p = (void *) n->proto;
struct static_route *r;
DBG("Static: neighbor notify for %I: iface %p\n", n->addr, n->iface);
- for(r=n->data; r; r=r->chain)
+ for (r = n->data; r; r = r->chain)
{
static_update_bfd(p, r);
- static_update_rte(p, r);
+
+ if (static_decide(p, r))
+ static_mark_rte(p, r->mp_head);
}
}
@@ -375,241 +345,232 @@ static void
static_bfd_notify(struct bfd_request *req)
{
struct static_route *r = req->data;
- struct proto *p = r->neigh->proto;
+ struct static_proto *p = (void *) r->neigh->proto;
// if (req->down) TRACE(D_EVENTS, "BFD session down for nbr %I on %s", XXXX);
- static_update_rte(p, r);
+ if (static_decide(p, r))
+ static_mark_rte(p, r->mp_head);
}
-static void
-static_dump_rt(struct static_route *r)
+static int
+static_rte_mergable(rte *pri UNUSED, rte *sec UNUSED)
{
- debug("%-1I/%2d: ", r->net, r->masklen);
- switch (r->dest)
- {
- case RTD_ROUTER:
- debug("via %I\n", r->via);
- break;
- case RTD_DEVICE:
- debug("dev %s\n", r->if_name);
- break;
- default:
- debug("rtd %d\n", r->dest);
- break;
- }
+ return 1;
}
+
static void
-static_dump(struct proto *p)
+static_postconfig(struct proto_config *CF)
{
- struct static_config *c = (void *) p->cf;
+ struct static_config *cf = (void *) CF;
struct static_route *r;
- debug("Independent static routes:\n");
- WALK_LIST(r, c->other_routes)
- static_dump_rt(r);
- debug("Device static routes:\n");
- WALK_LIST(r, c->iface_routes)
- static_dump_rt(r);
-}
+ if (EMPTY_LIST(CF->channels))
+ cf_error("Channel not specified");
-static void
-static_if_notify(struct proto *p, unsigned flags, struct iface *i)
-{
- struct static_route *r;
- struct static_config *c = (void *) p->cf;
+ struct channel_config *cc = proto_cf_main_channel(CF);
- if (flags & IF_CHANGE_UP)
- {
- WALK_LIST(r, c->iface_routes)
- if (!strcmp(r->if_name, i->name))
- static_install(p, r, i);
- }
- else if (flags & IF_CHANGE_DOWN)
- {
- WALK_LIST(r, c->iface_routes)
- if (!strcmp(r->if_name, i->name))
- static_remove(p, r);
- }
-}
+ if (!cf->igp_table_ip4)
+ cf->igp_table_ip4 = (cc->table->addr_type == NET_IP4) ?
+ cc->table : cf->c.global->def_tables[NET_IP4];
-int
-static_rte_mergable(rte *pri UNUSED, rte *sec UNUSED)
-{
- return 1;
-}
+ if (!cf->igp_table_ip6)
+ cf->igp_table_ip6 = (cc->table->addr_type == NET_IP6) ?
+ cc->table : cf->c.global->def_tables[NET_IP6];
-void
-static_init_config(struct static_config *c)
-{
- init_list(&c->iface_routes);
- init_list(&c->other_routes);
+ WALK_LIST(r, cf->routes)
+ if (r->net && (r->net->type != CF->net_type))
+ cf_error("Route %N incompatible with channel type", r->net);
}
static struct proto *
-static_init(struct proto_config *c)
+static_init(struct proto_config *CF)
{
- struct proto *p = proto_new(c, sizeof(struct proto));
+ struct proto *P = proto_new(CF);
+ struct static_proto *p = (void *) P;
+ struct static_config *cf = (void *) CF;
- p->neigh_notify = static_neigh_notify;
- p->if_notify = static_if_notify;
- p->rte_mergable = static_rte_mergable;
+ P->main_channel = proto_add_channel(P, proto_cf_main_channel(CF));
- return p;
-}
+ P->neigh_notify = static_neigh_notify;
+ P->rte_mergable = static_rte_mergable;
-static inline int
-static_same_net(struct static_route *x, struct static_route *y)
-{
- return ipa_equal(x->net, y->net) && (x->masklen == y->masklen);
+ if (cf->igp_table_ip4)
+ p->igp_table_ip4 = cf->igp_table_ip4->table;
+
+ if (cf->igp_table_ip6)
+ p->igp_table_ip6 = cf->igp_table_ip6->table;
+
+ return P;
}
-static inline int
-static_same_dest(struct static_route *x, struct static_route *y)
+static int
+static_start(struct proto *P)
{
- if (x->dest != y->dest)
- return 0;
+ struct static_proto *p = (void *) P;
+ struct static_config *cf = (void *) P->cf;
+ struct static_route *r;
- switch (x->dest)
- {
- case RTD_ROUTER:
- return ipa_equal(x->via, y->via) && (x->via_if == y->via_if);
+ if (!static_lp)
+ static_lp = lp_new(&root_pool, LP_GOOD_SIZE(1024));
- case RTD_DEVICE:
- return !strcmp(x->if_name, y->if_name);
+ if (p->igp_table_ip4)
+ rt_lock_table(p->igp_table_ip4);
- case RTD_MULTIPATH:
- for (x = x->mp_next, y = y->mp_next;
- x && y;
- x = x->mp_next, y = y->mp_next)
- if (!ipa_equal(x->via, y->via) || (x->via_if != y->via_if) || (x->use_bfd != y->use_bfd))
- return 0;
- return !x && !y;
+ if (p->igp_table_ip6)
+ rt_lock_table(p->igp_table_ip6);
- case RTDX_RECURSIVE:
- return ipa_equal(x->via, y->via);
+ p->event = ev_new(p->p.pool);
+ p->event->hook = static_announce_marked;
+ p->event->data = p;
- default:
- return 1;
- }
+ BUFFER_INIT(p->marked, p->p.pool, 4);
+
+ /* We have to go UP before routes could be installed */
+ proto_notify_state(P, PS_UP);
+
+ WALK_LIST(r, cf->routes)
+ static_add_rte(p, r);
+
+ return PS_UP;
}
-static inline int
-static_same_rte(struct static_route *x, struct static_route *y)
+static int
+static_shutdown(struct proto *P)
{
- /* Note that i_same() requires arguments in (new, old) order */
- return static_same_dest(x, y) && i_same(y->cmds, x->cmds);
-}
+ struct static_proto *p = (void *) P;
+ struct static_config *cf = (void *) P->cf;
+ struct static_route *r;
+ /* Just reset the flag, the routes will be flushed by the nest */
+ WALK_LIST(r, cf->routes)
+ static_reset_rte(p, r);
+
+ return PS_DOWN;
+}
static void
-static_match(struct proto *p, struct static_route *r, struct static_config *n)
+static_cleanup(struct proto *P)
{
- struct static_route *t;
-
- /*
- * For given old route *r we find whether a route to the same
- * network is also in the new route list. In that case, we keep the
- * route and possibly update the route later if destination changed.
- * Otherwise, we remove the route.
- */
+ struct static_proto *p = (void *) P;
- if (r->neigh)
- r->neigh->data = NULL;
+ if (p->igp_table_ip4)
+ rt_unlock_table(p->igp_table_ip4);
- WALK_LIST(t, n->iface_routes)
- if (static_same_net(r, t))
- goto found;
+ if (p->igp_table_ip6)
+ rt_unlock_table(p->igp_table_ip6);
+}
- WALK_LIST(t, n->other_routes)
- if (static_same_net(r, t))
- goto found;
+static void
+static_dump_rte(struct static_route *r)
+{
+ debug("%-1N: ", r->net);
+ if (r->dest == RTD_UNICAST)
+ if (r->iface && ipa_zero(r->via))
+ debug("dev %s\n", r->iface->name);
+ else
+ debug("via %I%J\n", r->via, r->iface);
+ else
+ debug("rtd %d\n", r->dest);
+}
- static_remove(p, r);
- return;
+static void
+static_dump(struct proto *P)
+{
+ struct static_config *c = (void *) P->cf;
+ struct static_route *r;
- found:
- /* If destination is different, force reinstall */
- if ((r->installed > 0) && !static_same_rte(r, t))
- t->installed = -1;
- else
- t->installed = r->installed;
+ debug("Static routes:\n");
+ WALK_LIST(r, c->routes)
+ static_dump_rte(r);
}
-static inline rtable *
-cf_igp_table(struct static_config *cf)
+#define IGP_TABLE(cf, sym) ((cf)->igp_table_##sym ? (cf)->igp_table_##sym ->table : NULL )
+
+static inline int
+static_cmp_rte(const void *X, const void *Y)
{
- return cf->igp_table ? cf->igp_table->table : NULL;
+ struct static_route *x = *(void **)X, *y = *(void **)Y;
+ return net_compare(x->net, y->net);
}
static int
-static_reconfigure(struct proto *p, struct proto_config *new)
+static_reconfigure(struct proto *P, struct proto_config *CF)
{
- struct static_config *o = (void *) p->cf;
- struct static_config *n = (void *) new;
- struct static_route *r;
+ struct static_proto *p = (void *) P;
+ struct static_config *o = (void *) P->cf;
+ struct static_config *n = (void *) CF;
+ struct static_route *r, *r2, *or, *nr;
+
+ /* Check change in IGP tables */
+ if ((IGP_TABLE(o, ip4) != IGP_TABLE(n, ip4)) ||
+ (IGP_TABLE(o, ip6) != IGP_TABLE(n, ip6)))
+ return 0;
- if (cf_igp_table(o) != cf_igp_table(n))
+ if (!proto_configure_channel(P, &P->main_channel, proto_cf_main_channel(CF)))
return 0;
- /* Delete all obsolete routes and reset neighbor entries */
- WALK_LIST(r, o->iface_routes)
- static_match(p, r, n);
- WALK_LIST(r, o->other_routes)
- static_match(p, r, n);
+ p->p.cf = CF;
- /* Now add all new routes, those not changed will be ignored by static_install() */
- WALK_LIST(r, n->iface_routes)
- {
- struct iface *ifa;
- if ((ifa = if_find_by_name(r->if_name)) && (ifa->flags & IF_UP))
- static_install(p, r, ifa);
- }
- WALK_LIST(r, n->other_routes)
- static_add(p, n, r);
+ /* Reset route lists in neighbor entries */
+ WALK_LIST(r, o->routes)
+ for (r2 = r; r2; r2 = r2->mp_next)
+ if (r2->neigh)
+ r2->neigh->data = NULL;
- WALK_LIST(r, o->other_routes)
- static_rte_cleanup(p, r);
+ /* Reconfigure initial matching sequence */
+ for (or = HEAD(o->routes), nr = HEAD(n->routes);
+ NODE_VALID(or) && NODE_VALID(nr) && net_equal(or->net, nr->net);
+ or = NODE_NEXT(or), nr = NODE_NEXT(nr))
+ static_reconfigure_rte(p, or, nr);
- return 1;
-}
+ if (!NODE_VALID(or) && !NODE_VALID(nr))
+ return 1;
-static void
-static_copy_routes(list *dlst, list *slst)
-{
- struct static_route *dr, *sr;
+ /* Reconfigure remaining routes, sort them to find matching pairs */
+ struct static_route *or2, *nr2, **orbuf, **nrbuf;
+ uint ornum = 0, nrnum = 0, orpos = 0, nrpos = 0, i;
- init_list(dlst);
- WALK_LIST(sr, *slst)
- {
- /* copy one route */
- dr = cfg_alloc(sizeof(struct static_route));
- memcpy(dr, sr, sizeof(struct static_route));
-
- /* This fn is supposed to be called on fresh src routes, which have 'live'
- fields (like .chain, .neigh or .installed) zero, so no need to zero them */
-
- /* We need to copy multipath chain, because there are backptrs in 'if_name' */
- if (dr->dest == RTD_MULTIPATH)
- {
- struct static_route *md, *ms, **mp_last;
-
- mp_last = &(dr->mp_next);
- for (ms = sr->mp_next; ms; ms = ms->mp_next)
- {
- md = cfg_alloc(sizeof(struct static_route));
- memcpy(md, ms, sizeof(struct static_route));
- md->if_name = (void *) dr; /* really */
-
- *mp_last = md;
- mp_last = &(md->mp_next);
- }
- *mp_last = NULL;
- }
-
- add_tail(dlst, (node *) dr);
- }
+ for (or2 = or; NODE_VALID(or2); or2 = NODE_NEXT(or2))
+ ornum++;
+
+ for (nr2 = nr; NODE_VALID(nr2); nr2 = NODE_NEXT(nr2))
+ nrnum++;
+
+ orbuf = xmalloc(ornum * sizeof(void *));
+ nrbuf = xmalloc(nrnum * sizeof(void *));
+
+ for (i = 0, or2 = or; i < ornum; i++, or2 = NODE_NEXT(or2))
+ orbuf[i] = or2;
+
+ for (i = 0, nr2 = nr; i < nrnum; i++, nr2 = NODE_NEXT(nr2))
+ nrbuf[i] = nr2;
+
+ qsort(orbuf, ornum, sizeof(struct static_route *), static_cmp_rte);
+ qsort(nrbuf, nrnum, sizeof(struct static_route *), static_cmp_rte);
+
+ while ((orpos < ornum) && (nrpos < nrnum))
+ {
+ int x = net_compare(orbuf[orpos]->net, nrbuf[nrpos]->net);
+ if (x < 0)
+ static_remove_rte(p, orbuf[orpos++]);
+ else if (x > 0)
+ static_add_rte(p, nrbuf[nrpos++]);
+ else
+ static_reconfigure_rte(p, orbuf[orpos++], nrbuf[nrpos++]);
+ }
+
+ while (orpos < ornum)
+ static_remove_rte(p, orbuf[orpos++]);
+
+ while (nrpos < nrnum)
+ static_add_rte(p, nrbuf[nrpos++]);
+
+ xfree(orbuf);
+ xfree(nrbuf);
+
+ return 1;
}
static void
@@ -618,53 +579,66 @@ static_copy_config(struct proto_config *dest, struct proto_config *src)
struct static_config *d = (struct static_config *) dest;
struct static_config *s = (struct static_config *) src;
- /* Shallow copy of everything */
- proto_copy_rest(dest, src, sizeof(struct static_config));
+ struct static_route *srt, *snh;
- /* Copy route lists */
- static_copy_routes(&d->iface_routes, &s->iface_routes);
- static_copy_routes(&d->other_routes, &s->other_routes);
-}
+ /* Copy route list */
+ init_list(&d->routes);
+ WALK_LIST(srt, s->routes)
+ {
+ struct static_route *drt = NULL, *dnh = NULL, **dnp = &drt;
+ for (snh = srt; snh; snh = snh->mp_next)
+ {
+ dnh = cfg_alloc(sizeof(struct static_route));
+ memcpy(dnh, snh, sizeof(struct static_route));
-struct protocol proto_static = {
- .name = "Static",
- .template = "static%d",
- .preference = DEF_PREF_STATIC,
- .config_size = sizeof(struct static_config),
- .init = static_init,
- .dump = static_dump,
- .start = static_start,
- .shutdown = static_shutdown,
- .cleanup = static_cleanup,
- .reconfigure = static_reconfigure,
- .copy_config = static_copy_config
-};
+ if (!drt)
+ add_tail(&d->routes, &(dnh->n));
+
+ *dnp = dnh;
+ dnp = &(dnh->mp_next);
+
+ if (snh->mp_head)
+ dnh->mp_head = drt;
+ }
+ }
+}
static void
static_show_rt(struct static_route *r)
{
- byte via[STD_ADDRESS_P_LENGTH + 16];
-
switch (r->dest)
+ {
+ case RTD_UNICAST:
+ {
+ struct static_route *r2;
+
+ cli_msg(-1009, "%N", r->net);
+ for (r2 = r; r2; r2 = r2->mp_next)
{
- case RTD_ROUTER: bsprintf(via, "via %I%J", r->via, r->via_if); break;
- case RTD_DEVICE: bsprintf(via, "dev %s", r->if_name); break;
- case RTD_BLACKHOLE: bsprintf(via, "blackhole"); break;
- case RTD_UNREACHABLE: bsprintf(via, "unreachable"); break;
- case RTD_PROHIBIT: bsprintf(via, "prohibited"); break;
- case RTD_MULTIPATH: bsprintf(via, "multipath"); break;
- case RTDX_RECURSIVE: bsprintf(via, "recursive %I", r->via); break;
- default: bsprintf(via, "???");
+ if (r2->iface && ipa_zero(r2->via))
+ cli_msg(-1009, "\tdev %s%s", r2->iface->name,
+ r2->active ? "" : " (dormant)");
+ else
+ cli_msg(-1009, "\tvia %I%J%s%s%s", r2->via, r2->iface,
+ r2->onlink ? " onlink" : "",
+ r2->bfd_req ? " (bfd)" : "",
+ r2->active ? "" : " (dormant)");
}
- cli_msg(-1009, "%I/%d %s%s%s", r->net, r->masklen, via,
- r->bfd_req ? " (bfd)" : "", r->installed ? "" : " (dormant)");
+ break;
+ }
- struct static_route *r2;
- if (r->dest == RTD_MULTIPATH)
- for (r2 = r->mp_next; r2; r2 = r2->mp_next)
- cli_msg(-1009, "\tvia %I%J weight %d%s%s", r2->via, r2->via_if, r2->masklen + 1, /* really */
- r2->bfd_req ? " (bfd)" : "", r2->installed ? "" : " (dormant)");
+ case RTD_NONE:
+ case RTD_BLACKHOLE:
+ case RTD_UNREACHABLE:
+ case RTD_PROHIBIT:
+ cli_msg(-1009, "%N\t%s", r->net, rta_dest_names[r->dest]);
+ break;
+
+ case RTDX_RECURSIVE:
+ cli_msg(-1009, "%N\trecursive %I", r->net, r->via);
+ break;
+ }
}
void
@@ -673,9 +647,25 @@ static_show(struct proto *P)
struct static_config *c = (void *) P->cf;
struct static_route *r;
- WALK_LIST(r, c->other_routes)
- static_show_rt(r);
- WALK_LIST(r, c->iface_routes)
+ WALK_LIST(r, c->routes)
static_show_rt(r);
cli_msg(0, "");
}
+
+
+struct protocol proto_static = {
+ .name = "Static",
+ .template = "static%d",
+ .preference = DEF_PREF_STATIC,
+ .channel_mask = NB_ANY,
+ .proto_size = sizeof(struct static_proto),
+ .config_size = sizeof(struct static_config),
+ .postconfig = static_postconfig,
+ .init = static_init,
+ .dump = static_dump,
+ .start = static_start,
+ .shutdown = static_shutdown,
+ .cleanup = static_cleanup,
+ .reconfigure = static_reconfigure,
+ .copy_config = static_copy_config
+};
diff --git a/proto/static/static.h b/proto/static/static.h
index 6b047234..b202c0b1 100644
--- a/proto/static/static.h
+++ b/proto/static/static.h
@@ -11,41 +11,61 @@
#include "nest/route.h"
#include "nest/bfd.h"
+#include "lib/buffer.h"
struct static_config {
struct proto_config c;
- list iface_routes; /* Routes to search on interface events */
- list other_routes; /* Routes hooked to neighbor cache and reject routes */
+ list routes; /* List of static routes (struct static_route) */
int check_link; /* Whether iface link state is used */
- struct rtable_config *igp_table; /* Table used for recursive next hop lookups */
+ struct rtable_config *igp_table_ip4; /* Table for recursive IPv4 next hop lookups */
+ struct rtable_config *igp_table_ip6; /* Table for recursive IPv6 next hop lookups */
};
+struct static_proto {
+ struct proto p;
-void static_init_config(struct static_config *);
+ struct event *event; /* Event for announcing updated routes */
+ BUFFER(struct static_route *) marked; /* Routes marked for reannouncement */
+ rtable *igp_table_ip4; /* Table for recursive IPv4 next hop lookups */
+ rtable *igp_table_ip6; /* Table for recursive IPv6 next hop lookups */
+};
struct static_route {
node n;
- struct static_route *chain; /* Next for the same neighbor */
- ip_addr net; /* Network we route */
- int masklen; /* Mask length */
- int dest; /* Destination type (RTD_*) */
+ net_addr *net; /* Network we route */
ip_addr via; /* Destination router */
- struct iface *via_if; /* Destination iface, for link-local vias */
- struct neighbor *neigh;
- byte *if_name; /* Name for RTD_DEVICE routes */
- struct static_route *mp_next; /* Nexthops for RTD_MULTIPATH routes */
+ struct iface *iface; /* Destination iface, for link-local vias or device routes */
+ struct neighbor *neigh; /* Associated neighbor entry */
+ struct static_route *chain; /* Next for the same neighbor */
+ struct static_route *mp_head; /* First nexthop of this route */
+ struct static_route *mp_next; /* Nexthops for multipath routes */
struct f_inst *cmds; /* List of commands for setting attributes */
- int installed; /* Installed in rt table, -1 for reinstall */
- int use_bfd; /* Configured to use BFD */
+ byte dest; /* Destination type (RTD_*) */
+ byte state; /* State of route announcement (SRS_*) */
+ byte active; /* Next hop is active (nbr/iface/BFD available) */
+ byte onlink; /* Gateway is onlink regardless of IP ranges */
+ byte weight; /* Multipath next hop weight */
+ byte use_bfd; /* Configured to use BFD */
struct bfd_request *bfd_req; /* BFD request, if BFD is used */
+ mpls_label_stack *mls; /* MPLS label stack; may be NULL */
};
-/* Dummy nodes (parts of multipath route) abuses masklen field for weight
- and if_name field for a ptr to the master (RTD_MULTIPATH) node. */
-
+/*
+ * Note that data fields neigh, chain, state, active and bfd_req are runtime
+ * data, not real configuration data. Must be handled carefully.
+ *
+ * Regular (i.e. dest == RTD_UNICAST) routes use static_route structure for
+ * additional next hops (fields mp_head, mp_next). Note that 'state' is for
+ * whole route, while 'active' is for each next hop. Also note that fields
+ * mp_head, mp_next, active are zero for other kinds of routes.
+ */
#define RTDX_RECURSIVE 0x7f /* Phony dest value for recursive routes */
+#define SRS_DOWN 0 /* Route is not announced */
+#define SRS_CLEAN 1 /* Route is active and announced */
+#define SRS_DIRTY 2 /* Route changed since announcement */
+
void static_show(struct proto *);
#endif