diff options
-rw-r--r-- | configure.in | 3 | ||||
-rw-r--r-- | doc/bird.sgml | 96 | ||||
-rw-r--r-- | doc/reply_codes | 3 | ||||
-rw-r--r-- | lib/birdlib.h | 1 | ||||
-rw-r--r-- | lib/bitops.h | 3 | ||||
-rw-r--r-- | lib/ip.h | 1 | ||||
-rw-r--r-- | lib/printf.c | 31 | ||||
-rw-r--r-- | lib/string.h | 2 | ||||
-rw-r--r-- | nest/proto.c | 3 | ||||
-rw-r--r-- | nest/protocol.h | 2 | ||||
-rw-r--r-- | nest/route.h | 11 | ||||
-rw-r--r-- | proto/Doc | 1 | ||||
-rw-r--r-- | proto/babel/Doc | 2 | ||||
-rw-r--r-- | proto/babel/Makefile | 5 | ||||
-rw-r--r-- | proto/babel/babel.c | 2055 | ||||
-rw-r--r-- | proto/babel/babel.h | 335 | ||||
-rw-r--r-- | proto/babel/config.Y | 129 | ||||
-rw-r--r-- | proto/babel/packets.c | 1093 | ||||
-rw-r--r-- | sysdep/autoconf.h.in | 1 |
19 files changed, 3767 insertions, 10 deletions
diff --git a/configure.in b/configure.in index b9220a1d..16a0b414 100644 --- a/configure.in +++ b/configure.in @@ -206,6 +206,9 @@ fi AC_SUBST(iproutedir) all_protocols="$proto_bfd bgp ospf pipe $proto_radv rip static" +if test "$ip" = ipv6 ; then + all_protocols="$all_protocols babel" +fi all_protocols=`echo $all_protocols | sed 's/ /,/g'` if test "$with_protocols" = all ; then diff --git a/doc/bird.sgml b/doc/bird.sgml index 1a5fbaff..6c6ff3db 100644 --- a/doc/bird.sgml +++ b/doc/bird.sgml @@ -1380,6 +1380,102 @@ corresponding protocol sections. <chapt>Protocols +<sect>Babel + +<sect1>Introduction + +<p>The Babel protocol (RFC6126) is a loop-avoiding distance-vector routing +protocol that is robust and efficient both in ordinary wired networks and in +wireless mesh networks. Babel is conceptually very simple in its operation and +"just works" in its default configuration, though some configuration is possible +and in some cases desirable. + +<p>While the Babel protocol is dual stack (i.e., can carry both IPv4 and IPv6 +routes over the same IPv6 transport), BIRD presently implements only the IPv6 +subset of the protocol. No Babel extensions are implemented, but the BIRD +implementation can coexist with implementations using the extensions (and will +just ignore extension messages). + +<p>The Babel protocol implementation in BIRD is currently in alpha stage. + +<sect1>Configuration + +<p>Babel supports no global configuration options apart from those common to all +other protocols, but supports the following per-interface configuration options: + +<code> +protocol babel [<name>] { + interface <interface pattern> { + type <wired|wireless>; + rxcost <number>; + hello interval <number>; + update interval <number>; + port <number>; + tx class|dscp <number>; + tx priority <number>; + rx buffer <number>; + tx length <number>; + check link <switch>; + }; +} +</code> + +<descrip> + <tag>type wired|wireless </tag> + This option specifies the interface type: Wired or wireless. Wired + interfaces are considered more reliable, and so the default hello + interval is higher, and a neighbour is considered unreachable after only + a small number of "hello" packets are lost. On wireless interfaces, + hello packets are sent more often, and the ETX link quality estimation + technique is used to compute the metrics of routes discovered over this + interface. This technique will gradually degrade the metric of routes + when packets are lost rather than the more binary up/down mechanism of + wired type links. Default: <cf/wired/. + + <tag>rxcost <m/num/</tag> + This specifies the RX cost of the interface. The route metrics will be + computed from this value with a mechanism determined by the interface + <cf/type/. Default: 96 for wired interfaces, 256 for wireless. + + <tag>hello interval <m/num/</tag> + Interval at which periodic "hello" messages are sent on this interface, + in seconds. Default: 4 seconds. + + <tag>update interval <m/num/</tag> + Interval at which periodic (full) updates are sent. Default: 4 times the + hello interval. + + <tag>port <m/number/</tag> + This option selects an UDP port to operate on. The default is to operate + on port 6696 as specified in the Babel RFC. + + <tag>tx class|dscp|priority <m/number/</tag> + These options specify the ToS/DiffServ/Traffic class/Priority of the + outgoing Babel packets. See <ref id="dsc-prio" name="tx class"> common + option for detailed description. + + <tag>rx buffer <m/number/</tag> + This option specifies the size of buffers used for packet processing. + The buffer size should be bigger than maximal size of received packets. + The default value is the interface MTU, and the value will be clamped to a + minimum of 512 bytes + IP packet overhead. + + <tag>tx length <m/number/</tag> + This option specifies the maximum length of generated Babel packets. To + avoid IP fragmentation, it should not exceed the interface MTU value. + The default value is the interface MTU value, and the value will be + clamped to a minimum of 512 bytes + IP packet overhead. + + <tag>check link <m/switch/</tag> + If set, the hardware link state (as reported by OS) is taken into + consideration. When the link disappears (e.g. an ethernet cable is + unplugged), neighbors are immediately considered unreachable and all + routes received from them are withdrawn. It is possible that some + hardware drivers or platforms do not implement this feature. Default: + yes. +</descrip> + + <sect><label id="sect-bfd">BFD <sect1>Introduction diff --git a/doc/reply_codes b/doc/reply_codes index 79a7eb92..3a7f2c90 100644 --- a/doc/reply_codes +++ b/doc/reply_codes @@ -57,6 +57,9 @@ Reply codes of BIRD command-line interface 1020 Show BFD sessions 1021 Show RIP interface 1022 Show RIP neighbors +1023 Show Babel interfaces +1024 Show Babel neighbors +1025 Show Babel entries 8000 Reply too long 8001 Route not found diff --git a/lib/birdlib.h b/lib/birdlib.h index 16f437ef..904544cb 100644 --- a/lib/birdlib.h +++ b/lib/birdlib.h @@ -60,6 +60,7 @@ #define NORET __attribute__((noreturn)) #define UNUSED __attribute__((unused)) +#define PACKED __attribute__((packed)) /* Microsecond time */ diff --git a/lib/bitops.h b/lib/bitops.h index c0ad1a70..ce13732a 100644 --- a/lib/bitops.h +++ b/lib/bitops.h @@ -25,5 +25,6 @@ u32 u32_log2(u32 v); static inline u32 u32_hash(u32 v) { return v * 2902958171u; } -#endif +static inline u8 u32_popcount(u32 v) { return __builtin_popcount(v); } +#endif @@ -26,6 +26,7 @@ #define IP6_OSPF_ALL_ROUTERS ipa_build6(0xFF020000, 0, 0, 5) #define IP6_OSPF_DES_ROUTERS ipa_build6(0xFF020000, 0, 0, 6) #define IP6_RIP_ROUTERS ipa_build6(0xFF020000, 0, 0, 9) +#define IP6_BABEL_ROUTERS ipa_build6(0xFF020000, 0, 0, 0x00010006) #define IP4_NONE _MI4(0) #define IP6_NONE _MI6(0,0,0,0) diff --git a/lib/printf.c b/lib/printf.c index e4cc3006..a067fe98 100644 --- a/lib/printf.c +++ b/lib/printf.c @@ -124,6 +124,7 @@ static char * number(char * str, long num, int base, int size, int precision, * width is automatically replaced by standard IP address width which * depends on whether we use IPv4 or IPv6; |%#I| gives hexadecimal format), * |%R| for Router / Network ID (u32 value printed as IPv4 address) + * |%lR| for 64bit Router / Network ID (u64 value printed as eight :-separated octets) * and |%m| resp. |%M| for error messages (uses strerror() to translate @errno code to * message text). On the other hand, it doesn't support floating * point numbers. @@ -137,9 +138,10 @@ int bvsnprintf(char *buf, int size, const char *fmt, va_list args) unsigned long num; int i, base; u32 x; + u64 X; char *str, *start; const char *s; - char ipbuf[STD_ADDRESS_P_LENGTH+1]; + char ipbuf[MAX(STD_ADDRESS_P_LENGTH,ROUTER_ID_64_LENGTH)+1]; struct iface *iface; int flags; /* flags to number() */ @@ -309,12 +311,27 @@ int bvsnprintf(char *buf, int size, const char *fmt, va_list args) /* Router/Network ID - essentially IPv4 address in u32 value */ case 'R': - x = va_arg(args, u32); - bsprintf(ipbuf, "%d.%d.%d.%d", - ((x >> 24) & 0xff), - ((x >> 16) & 0xff), - ((x >> 8) & 0xff), - (x & 0xff)); + if(qualifier == 'l') { + X = va_arg(args, u64); + bsprintf(ipbuf, "%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x", + ((X >> 56) & 0xff), + ((X >> 48) & 0xff), + ((X >> 40) & 0xff), + ((X >> 32) & 0xff), + ((X >> 24) & 0xff), + ((X >> 16) & 0xff), + ((X >> 8) & 0xff), + (X & 0xff)); + } + else + { + x = va_arg(args, u32); + bsprintf(ipbuf, "%d.%d.%d.%d", + ((x >> 24) & 0xff), + ((x >> 16) & 0xff), + ((x >> 8) & 0xff), + (x & 0xff)); + } s = ipbuf; goto str; diff --git a/lib/string.h b/lib/string.h index 0f249d37..9af49b9e 100644 --- a/lib/string.h +++ b/lib/string.h @@ -30,4 +30,6 @@ static inline char *xbasename(const char *str) return s ? s+1 : (char *) str; } +#define ROUTER_ID_64_LENGTH 23 + #endif diff --git a/nest/proto.c b/nest/proto.c index 436377f1..3e97e9da 100644 --- a/nest/proto.c +++ b/nest/proto.c @@ -919,6 +919,9 @@ protos_build(void) proto_build(&proto_bfd); bfd_init_all(); #endif +#ifdef CONFIG_BABEL + proto_build(&proto_babel); +#endif proto_pool = rp_new(&root_pool, "Protocols"); proto_flush_event = ev_new(proto_pool); diff --git a/nest/protocol.h b/nest/protocol.h index 8c49154f..ec787355 100644 --- a/nest/protocol.h +++ b/nest/protocol.h @@ -76,7 +76,7 @@ void protos_dump_all(void); extern struct protocol proto_device, proto_radv, proto_rip, proto_static, - proto_ospf, proto_pipe, proto_bgp, proto_bfd; + proto_ospf, proto_pipe, proto_bgp, proto_bfd, proto_babel; /* * Routing Protocol Instance diff --git a/nest/route.h b/nest/route.h index 9368808f..3969db6b 100644 --- a/nest/route.h +++ b/nest/route.h @@ -220,6 +220,12 @@ typedef struct rte { u8 suppressed; /* Used for deterministic MED comparison */ } bgp; #endif +#ifdef CONFIG_BABEL + struct { + u16 metric; /* Babel metric */ + u64 router_id; /* Babel router id */ + } babel; +#endif struct { /* Routes generated by krt sync (both temporary and inherited ones) */ s8 src; /* Alleged route source (see krt.h) */ u8 proto; /* Kernel source protocol ID */ @@ -374,6 +380,7 @@ typedef struct rta { #define RTS_OSPF_EXT2 10 /* OSPF external route type 2 */ #define RTS_BGP 11 /* BGP route */ #define RTS_PIPE 12 /* Inter-table wormhole */ +#define RTS_BABEL 13 /* Babel route */ #define RTC_UNICAST 0 #define RTC_BROADCAST 1 @@ -422,7 +429,8 @@ typedef struct eattr { #define EAP_RIP 2 /* RIP */ #define EAP_OSPF 3 /* OSPF */ #define EAP_KRT 4 /* Kernel route attributes */ -#define EAP_MAX 5 +#define EAP_BABEL 5 /* Babel attributes */ +#define EAP_MAX 6 #define EA_CODE(proto,id) (((proto) << 8) | (id)) #define EA_PROTO(ea) ((ea) >> 8) @@ -547,6 +555,7 @@ extern struct protocol *attr_class_to_protocol[EAP_MAX]; #define DEF_PREF_DIRECT 240 /* Directly connected */ #define DEF_PREF_STATIC 200 /* Static route */ #define DEF_PREF_OSPF 150 /* OSPF intra-area, inter-area and type 1 external routes */ +#define DEF_PREF_BABEL 130 /* Babel */ #define DEF_PREF_RIP 120 /* RIP */ #define DEF_PREF_BGP 100 /* BGP */ #define DEF_PREF_PIPE 70 /* Routes piped from other tables */ @@ -1,4 +1,5 @@ H Protocols +C babel C bfd C bgp C ospf diff --git a/proto/babel/Doc b/proto/babel/Doc new file mode 100644 index 00000000..2480239e --- /dev/null +++ b/proto/babel/Doc @@ -0,0 +1,2 @@ +S babel.c +S packet.c diff --git a/proto/babel/Makefile b/proto/babel/Makefile new file mode 100644 index 00000000..400ffbac --- /dev/null +++ b/proto/babel/Makefile @@ -0,0 +1,5 @@ +source=babel.c packets.c +root-rel=../../ +dir-name=proto/babel + +include ../../Rules diff --git a/proto/babel/babel.c b/proto/babel/babel.c new file mode 100644 index 00000000..8e104d60 --- /dev/null +++ b/proto/babel/babel.c @@ -0,0 +1,2055 @@ +/* + * BIRD -- The Babel protocol + * + * Copyright (c) 2015--2016 Toke Hoiland-Jorgensen + * + * Can be freely distributed and used under the terms of the GNU GPL. + * + * This file contains the main routines for handling and sending TLVs, as + * well as timers and interaction with the nest. + */ + +/** + * DOC: The Babel protocol + * + * Babel (RFC6126) is a loop-avoiding distance-vector routing protocol that is + * robust and efficient both in ordinary wired networks and in wireless mesh + * networks. + * + * The Babel protocol keeps state for each neighbour in a &babel_neighbor + * struct, tracking received Hello and I Heard You (IHU) messages. A + * &babel_interface struct keeps hello and update times for each interface, and + * a separate hello seqno is maintained for each interface. + * + * For each prefix, Babel keeps track of both the possible routes (with next hop + * and router IDs), as well as the feasibility distance for each prefix and + * router id. The prefix itself is tracked in a &babel_entry struct, while the + * possible routes for the prefix are tracked as &babel_route entries and the + * feasibility distance is maintained through &babel_source structures. + * + * The main route selection is done in babel_select_route(). This is called when + * an entry is updated by receiving updates from the network or when modified by + * internal timers. It performs feasibility checks on the available routes for + * the prefix and selects the one with the lowest metric to be announced to the + * core. + */ + +#include <stdlib.h> +#include "babel.h" + + +#define OUR_ROUTE(r) (r->neigh == NULL) + +/* + * Is one number greater or equal than another mod 2^16? This is based on the + * definition of serial number space in RFC 1982. Note that arguments are of + * uint type to avoid integer promotion to signed integer. + */ +static inline int ge_mod64k(uint a, uint b) +{ return (u16)(a - b) < 0x8000; } + +static void babel_dump_entry(struct babel_entry *e); +static void babel_dump_route(struct babel_route *r); +static void babel_select_route(struct babel_entry *e); +static void babel_send_route_request(struct babel_entry *e, struct babel_neighbor *n); +static void babel_send_wildcard_request(struct babel_iface *ifa); +static int babel_cache_seqno_request(struct babel_proto *p, ip_addr prefix, u8 plen, + u64 router_id, u16 seqno); +static void babel_trigger_iface_update(struct babel_iface *ifa); +static void babel_trigger_update(struct babel_proto *p); +static void babel_send_seqno_request(struct babel_entry *e); +static inline void babel_kick_timer(struct babel_proto *p); +static inline void babel_iface_kick_timer(struct babel_iface *ifa); + + +/* + * Functions to maintain data structures + */ + +static void +babel_init_entry(struct fib_node *n) +{ + struct babel_entry *e = (void *) n; + e->proto = NULL; + e->selected_in = NULL; + e->selected_out = NULL; + e->updated = now; + init_list(&e->sources); + init_list(&e->routes); +} + +static inline struct babel_entry * +babel_find_entry(struct babel_proto *p, ip_addr prefix, u8 plen) +{ + return fib_find(&p->rtable, &prefix, plen); +} + +static struct babel_entry * +babel_get_entry(struct babel_proto *p, ip_addr prefix, u8 plen) +{ + struct babel_entry *e = fib_get(&p->rtable, &prefix, plen); + e->proto = p; + return e; +} + +static struct babel_source * +babel_find_source(struct babel_entry *e, u64 router_id) +{ + struct babel_source *s; + + WALK_LIST(s, e->sources) + if (s->router_id == router_id) + return s; + + return NULL; +} + +static struct babel_source * +babel_get_source(struct babel_entry *e, u64 router_id) +{ + struct babel_proto *p = e->proto; + struct babel_source *s = babel_find_source(e, router_id); + + if (s) + return s; + + s = sl_alloc(p->source_slab); + s->router_id = router_id; + s->expires = now + BABEL_GARBAGE_INTERVAL; + s->seqno = 0; + s->metric = BABEL_INFINITY; + add_tail(&e->sources, NODE s); + + return s; +} + +static void +babel_expire_sources(struct babel_entry *e) +{ + struct babel_proto *p = e->proto; + struct babel_source *n, *nx; + + WALK_LIST_DELSAFE(n, nx, e->sources) + { + if (n->expires && n->expires <= now) + { + rem_node(NODE n); + sl_free(p->source_slab, n); + } + } +} + +static struct babel_route * +babel_find_route(struct babel_entry *e, struct babel_neighbor *n) +{ + struct babel_route *r; + + WALK_LIST(r, e->routes) + if (r->neigh == n) + return r; + + return NULL; +} + +static struct babel_route * +babel_get_route(struct babel_entry *e, struct babel_neighbor *nbr) +{ + struct babel_proto *p = e->proto; + struct babel_route *r = babel_find_route(e, nbr); + + if (r) + return r; + + r = sl_alloc(p->route_slab); + memset(r, 0, sizeof(*r)); + r->e = e; + add_tail(&e->routes, NODE r); + + if (nbr) + { + r->neigh = nbr; + r->expires = now + BABEL_GARBAGE_INTERVAL; + add_tail(&nbr->routes, NODE &r->neigh_route); + } + + return r; +} + +static void +babel_flush_route(struct babel_route *r) +{ + struct babel_proto *p = r->e->proto; + + DBG("Babel: Flush route %I/%d router_id %lR neigh %I\n", + r->e->n.prefix, r->e->n.pxlen, r->router_id, r->neigh ? r->neigh->addr : IPA_NONE); + + rem_node(NODE r); + + if (r->neigh) + rem_node(&r->neigh_route); + + if (r->e->selected_in == r) + r->e->selected_in = NULL; + + if (r->e->selected_out == r) + r->e->selected_out = NULL; + + sl_free(p->route_slab, r); +} + +static void +babel_expire_route(struct babel_route *r) +{ + struct babel_proto *p = r->e->proto; + struct babel_entry *e = r->e; + + TRACE(D_EVENTS, "Route expiry timer for %I/%d router-id %lR fired", + e->n.prefix, e->n.pxlen, r->router_id); + + if (r->metric < BABEL_INFINITY) + { + r->metric = BABEL_INFINITY; + r->expires = now + r->expiry_interval; + } + else + { + babel_flush_route(r); + } +} + +static void +babel_refresh_route(struct babel_route *r) +{ + if (!OUR_ROUTE(r) && (r == r->e->selected_in)) + babel_send_route_request(r->e, r->neigh); + + r->refresh_time = 0; +} + +static void +babel_expire_routes(struct babel_proto *p) +{ + struct babel_entry *e; + struct babel_route *r, *rx; + struct fib_iterator fit; + + FIB_ITERATE_INIT(&fit, &p->rtable); + +loop: + FIB_ITERATE_START(&p->rtable, &fit, n) + { + e = (struct babel_entry *) n; + int changed = 0; + + WALK_LIST_DELSAFE(r, rx, e->routes) + { + if (r->refresh_time && r->refresh_time <= now) + babel_refresh_route(r); + + if (r->expires && r->expires <= now) + { + babel_expire_route(r); + changed = 1; + } + } + + if (changed) + { + /* + * We have to restart the iteration because there may be a cascade of + * synchronous events babel_select_route() -> nest table change -> + * babel_rt_notify() -> p->rtable change, invalidating hidden variables. + */ + + FIB_ITERATE_PUT(&fit, n); + babel_select_route(e); + goto loop; + } + + babel_expire_sources(e); + + /* Remove empty entries */ + if (EMPTY_LIST(e->sources) && EMPTY_LIST(e->routes)) + { + FIB_ITERATE_PUT(&fit, n); + fib_delete(&p->rtable, e); + goto loop; + } + } + FIB_ITERATE_END(n); +} + +static struct babel_neighbor * +babel_find_neighbor(struct babel_iface *ifa, ip_addr addr) +{ + struct babel_neighbor *nbr; + + WALK_LIST(nbr, ifa->neigh_list) + if (ipa_equal(nbr->addr, addr)) + return nbr; + + return NULL; +} + +static struct babel_neighbor * +babel_get_neighbor(struct babel_iface *ifa, ip_addr addr) +{ + struct babel_neighbor *nbr = babel_find_neighbor(ifa, addr); + + if (nbr) + return nbr; + + nbr = mb_allocz(ifa->pool, sizeof(struct babel_neighbor)); + nbr->ifa = ifa; + nbr->addr = addr; + nbr->txcost = BABEL_INFINITY; + init_list(&nbr->routes); + add_tail(&ifa->neigh_list, NODE nbr); + + return nbr; +} + +static void +babel_flush_neighbor(struct babel_neighbor *nbr) +{ + struct babel_proto *p = nbr->ifa->proto; + node *n; + + TRACE(D_EVENTS, "Flushing neighbor %I", nbr->addr); + + WALK_LIST_FIRST(n, nbr->routes) + { + struct babel_route *r = SKIP_BACK(struct babel_route, neigh_route, n); + struct babel_entry *e = r->e; + int selected = (r == e->selected_in); + + babel_flush_route(r); + + if (selected) + babel_select_route(e); + } + + rem_node(NODE nbr); + mb_free(nbr); +} + +static void +babel_expire_ihu(struct babel_neighbor *nbr) +{ + nbr->txcost = BABEL_INFINITY; +} + +static void +babel_expire_hello(struct babel_neighbor *nbr) +{ + nbr->hello_map <<= 1; + + if (nbr->hello_cnt < 16) + nbr->hello_cnt++; + + if (!nbr->hello_map) + babel_flush_neighbor(nbr); +} + +static void +babel_expire_neighbors(struct babel_proto *p) +{ + struct babel_iface *ifa; + struct babel_neighbor *nbr, *nbx; + + WALK_LIST(ifa, p->interfaces) + { + WALK_LIST_DELSAFE(nbr, nbx, ifa->neigh_list) + { + if (nbr->ihu_expiry && nbr->ihu_expiry <= now) + babel_expire_ihu(nbr); + + if (nbr->hello_expiry && nbr->hello_expiry <= now) + babel_expire_hello(nbr); + } + } +} + + +/* + * Best route selection + */ + +/* + * From the RFC (section 3.5.1): + * + * a route advertisement carrying the quintuple (prefix, plen, router-id, seqno, + * metric) is feasible if one of the following conditions holds: + * + * - metric is infinite; or + * + * - no entry exists in the source table indexed by (id, prefix, plen); or + * + * - an entry (prefix, plen, router-id, seqno', metric') exists in the source + * table, and either + * - seqno' < seqno or + * - seqno = seqno' and metric < metric'. + */ +static inline int +babel_is_feasible(struct babel_source *s, u16 seqno, u16 metric) +{ + return !s || + (metric == BABEL_INFINITY) || + (seqno > s->seqno) || + ((seqno == s->seqno) && (metric < s->metric)); +} + +static u16 +babel_compute_rxcost(struct babel_neighbor *n) +{ + struct babel_iface *ifa = n->ifa; + u8 cnt, missed; + u16 map=n->hello_map; + + if (!map) return BABEL_INFINITY; + cnt = u32_popcount(map); // number of bits set + missed = n->hello_cnt-cnt; + + if (ifa->cf->type == BABEL_IFACE_TYPE_WIRELESS) + { + /* ETX - Appendix 2.2 in the RFC. + + beta = prob. of successful transmission. + rxcost = BABEL_RXCOST_WIRELESS/beta + + Since: beta = 1-missed/n->hello_cnt = cnt/n->hello_cnt + Then: rxcost = BABEL_RXCOST_WIRELESS * n->hello_cnt / cnt + */ + if (!cnt) return BABEL_INFINITY; + return BABEL_RXCOST_WIRELESS * n->hello_cnt / cnt; + } + else + { + /* k-out-of-j selection - Appendix 2.1 in the RFC. */ + DBG("Babel: Missed %d hellos from %I\n", missed, n->addr); + /* Link is bad if more than half the expected hellos were lost */ + return (missed > n->hello_cnt/2) ? BABEL_INFINITY : ifa->cf->rxcost; + } +} + + +static u16 +babel_compute_cost(struct babel_neighbor *n) +{ + struct babel_iface *ifa = n->ifa; + u16 rxcost = babel_compute_rxcost(n); + if (rxcost == BABEL_INFINITY) return rxcost; + else if (ifa->cf->type == BABEL_IFACE_TYPE_WIRELESS) + { + /* ETX - Appendix 2.2 in the RFC */ + return (MAX(n->txcost, BABEL_RXCOST_WIRELESS) * rxcost)/BABEL_RXCOST_WIRELESS; + } + else + { + /* k-out-of-j selection - Appendix 2.1 in the RFC. */ + return n->txcost; + } +} + +/* Simple additive metric - Appendix 3.1 in the RFC */ +static u16 +babel_compute_metric(struct babel_neighbor *n, uint metric) +{ + metric += babel_compute_cost(n); + return MIN(metric, BABEL_INFINITY); +} + + +/** + * babel_announce_rte - announce selected route to the core + * @p: Babel protocol instance + * @e: Babel route entry to announce + * + * This function announces a Babel entry to the core if it has a selected + * incoming path, and retracts it otherwise. If the selected entry has infinite + * metric, the route is announced as unreachable. + */ +static void +babel_announce_rte(struct babel_proto *p, struct babel_entry *e) +{ + struct babel_route *r = e->selected_in; + + if (r) + { + net *n = net_get(p->p.table, e->n.prefix, e->n.pxlen); + rta A = { + .src = p->p.main_source, + .source = RTS_BABEL, + .scope = SCOPE_UNIVERSE, + .cast = RTC_UNICAST, + .dest = r->metric == BABEL_INFINITY ? RTD_UNREACHABLE : RTD_ROUTER, + .flags = 0, + .from = r->neigh->addr, + .iface = r->neigh->ifa->iface, + }; + + if (r->metric < BABEL_INFINITY) + A.gw = r->next_hop; + + rta *a = rta_lookup(&A); + rte *rte = rte_get_temp(a); + rte->u.babel.metric = r->metric; + rte->u.babel.router_id = r->router_id; + rte->net = n; + rte->pflags = 0; + + rte_update(&p->p, n, rte); + } + else + { + /* Retraction */ + net *n = net_find(p->p.table, e->n.prefix, e->n.pxlen); + rte_update(&p->p, n, NULL); + } +} + +/** + * babel_select_route - select best route for given route entry + * @e: Babel entry to select the best route for + * + * Select the best feasible route for a given prefix among the routes received + * from peers, and propagate it to the nest. This just selects the feasible + * route with the lowest metric. + * + * If no feasible route is available for a prefix that previously had a route + * selected, a seqno request is sent to try to get a valid route. In the + * meantime, the route is marked as infeasible in the nest (to blackhole packets + * going to it, as per the RFC). + * + * If no feasible route is available, and no previous route is selected, the + * route is removed from the nest entirely. + */ +static void +babel_select_route(struct babel_entry *e) +{ + struct babel_proto *p = e->proto; + struct babel_route *r, *cur = e->selected_in; + + /* try to find the best feasible route */ + WALK_LIST(r, e->routes) + if (!OUR_ROUTE(r) && /* prevent propagating our own routes back to core */ + (!cur || r->metric < cur->metric) && + babel_is_feasible(babel_find_source(e, r->router_id), r->seqno, r->advert_metric)) + cur = r; + + if (cur && !OUR_ROUTE(cur) && + ((!e->selected_in && cur->metric < BABEL_INFINITY) || + (e->selected_in && cur->metric < e->selected_in->metric))) + { + TRACE(D_EVENTS, "Picked new route for prefix %I/%d: router id %lR metric %d", + e->n.prefix, e->n.pxlen, cur->router_id, cur->metric); + + e->selected_in = cur; + e->updated = now; + babel_announce_rte(p, e); + } + else if (!cur || cur->metric == BABEL_INFINITY) + { + /* Couldn't find a feasible route. If we have a selected route, that means + it just became infeasible; so set it's metric to infinite and install it + (as unreachable), then send a seqno request. + + babel_build_rte() will set the unreachable flag if the metric is BABEL_INFINITY.*/ + if (e->selected_in) + { + TRACE(D_EVENTS, "Lost feasible route for prefix %I/%d", + e->n.prefix, e->n.pxlen); + + e->selected_in->metric = BABEL_INFINITY; + e->updated = now; + + babel_send_seqno_request(e); + babel_announce_rte(p, e); + } + else + { + /* No route currently selected, and no new one selected; this means we + don't have a route to this destination anymore (and were probably + called from an expiry timer). Remove the route from the nest. */ + TRACE(D_EVENTS, "Flushing route for prefix %I/%d", e->n.prefix, e->n.pxlen); + + e->selected_in = NULL; + e->updated = now; + babel_announce_rte(p, e); + } + } +} + +/* + * Functions to send replies + */ + +static void +babel_send_ack(struct babel_iface *ifa, ip_addr dest, u16 nonce) +{ + struct babel_proto *p = ifa->proto; + union babel_msg msg = {}; + + TRACE(D_PACKETS, "Sending ACK to %I with nonce %d", dest, nonce); + + msg.type = BABEL_TLV_ACK; + msg.ack.nonce = nonce; + + babel_send_unicast(&msg, ifa, dest); +} + +static void +babel_build_ihu(union babel_msg *msg, struct babel_iface *ifa, struct babel_neighbor *n) +{ + struct babel_proto *p = ifa->proto; + + msg->type = BABEL_TLV_IHU; + msg->ihu.addr = n->addr; + msg->ihu.rxcost = babel_compute_rxcost(n); + msg->ihu.interval = ifa->cf->ihu_interval; + + TRACE(D_PACKETS, "Sending IHU for %I with rxcost %d interval %d", + msg->ihu.addr, msg->ihu.rxcost, msg->ihu.interval); +} + +static void +babel_send_ihu(struct babel_iface *ifa, struct babel_neighbor *n) +{ + union babel_msg msg = {}; + babel_build_ihu(&msg, ifa, n); + babel_send_unicast(&msg, ifa, n->addr); +} + +static void +babel_send_ihus(struct babel_iface *ifa) +{ + struct babel_neighbor *n; + WALK_LIST(n, ifa->neigh_list) + { + union babel_msg msg = {}; + babel_build_ihu(&msg, ifa, n); + babel_enqueue(&msg, ifa); + } +} + +static void +babel_send_hello(struct babel_iface *ifa, u8 send_ihu) +{ + struct babel_proto *p = ifa->proto; + union babel_msg msg = {}; + + msg.type = BABEL_TLV_HELLO; + msg.hello.seqno = ifa->hello_seqno++; + msg.hello.interval = ifa->cf->hello_interval; + + TRACE(D_PACKETS, "Sending hello on %s with seqno %d interval %d", + ifa->ifname, msg.hello.seqno, msg.hello.interval); + + babel_enqueue(&msg, ifa); + + if (send_ihu) + babel_send_ihus(ifa); +} + +static void +babel_send_route_request(struct babel_entry *e, struct babel_neighbor *n) +{ + struct babel_proto *p = e->proto; + struct babel_iface *ifa = n->ifa; + union babel_msg msg = {}; + + TRACE(D_PACKETS, "Sending route request for %I/%d to %I", + e->n.prefix, e->n.pxlen, n->addr); + + msg.type = BABEL_TLV_ROUTE_REQUEST; + msg.route_request.prefix = e->n.prefix; + msg.route_request.plen = e->n.pxlen; + + babel_send_unicast(&msg, ifa, n->addr); +} + +static void +babel_send_wildcard_request(struct babel_iface *ifa) +{ + struct babel_proto *p = ifa->proto; + union babel_msg msg = {}; + + TRACE(D_PACKETS, "Sending wildcard route request on %s", + ifa->ifname); + + msg.type = BABEL_TLV_ROUTE_REQUEST; + msg.route_request.full = 1; + + babel_enqueue(&msg, ifa); +} + +static void +babel_send_seqno_request(struct babel_entry *e) +{ + struct babel_proto *p = e->proto; + struct babel_route *r = e->selected_in; + struct babel_iface *ifa = NULL; + struct babel_source *s = NULL; + union babel_msg msg = {}; + + s = babel_find_source(e, r->router_id); + if (!s || !babel_cache_seqno_request(p, e->n.prefix, e->n.pxlen, r->router_id, s->seqno + 1)) + return; + + TRACE(D_PACKETS, "Sending seqno request for %I/%d router-id %lR seqno %d", + e->n.prefix, e->n.pxlen, r->router_id, s->seqno + 1); + + msg.type = BABEL_TLV_SEQNO_REQUEST; + msg.seqno_request.plen = e->n.pxlen; + msg.seqno_request.seqno = s->seqno + 1; + msg.seqno_request.hop_count = BABEL_INITIAL_HOP_COUNT; + msg.seqno_request.router_id = r->router_id; + msg.seqno_request.prefix = e->n.prefix; + + WALK_LIST(ifa, p->interfaces) + babel_enqueue(&msg, ifa); +} + +static void +babel_unicast_seqno_request(struct babel_route *r) +{ + struct babel_entry *e = r->e; + struct babel_proto *p = e->proto; + struct babel_iface *ifa = r->neigh->ifa; + struct babel_source *s = NULL; + union babel_msg msg = {}; + + s = babel_find_source(e, r->router_id); + if (!s || !babel_cache_seqno_request(p, e->n.prefix, e->n.pxlen, r->router_id, s->seqno + 1)) + return; + + TRACE(D_PACKETS, "Sending seqno request for %I/%d router-id %lR seqno %d", + e->n.prefix, e->n.pxlen, r->router_id, s->seqno + 1); + + msg.type = BABEL_TLV_SEQNO_REQUEST; + msg.seqno_request.plen = e->n.pxlen; + msg.seqno_request.seqno = s->seqno + 1; + msg.seqno_request.hop_count = BABEL_INITIAL_HOP_COUNT; + msg.seqno_request.router_id = r->router_id; + msg.seqno_request.prefix = e->n.prefix; + + babel_send_unicast(&msg, ifa, r->neigh->addr); +} + +/** + * babel_send_update - send route table updates + * @ifa: Interface to transmit on + * @changed: Only send entries changed since this time + * + * This function produces update TLVs for all entries changed since the time + * indicated by the &changed parameter and queues them for transmission on the + * selected interface. During the process, the feasibility distance for each + * transmitted entry is updated. + */ +static void +babel_send_update(struct babel_iface *ifa, bird_clock_t changed) +{ + struct babel_proto *p = ifa->proto; + + FIB_WALK(&p->rtable, n) + { + struct babel_entry *e = (void *) n; + struct babel_route *r = e->selected_out; + + if (!r) + continue; + + /* Our own seqno might have changed, in which case we update the routes we + originate. */ + if ((r->router_id == p->router_id) && (r->seqno < p->update_seqno)) + { + r->seqno = p->update_seqno; + e->updated = now; + } + + /* Skip routes that weren't updated since 'changed' time */ + if (e->updated < changed) + continue; + + TRACE(D_PACKETS, "Sending update for %I/%d router-id %lR seqno %d metric %d", + e->n.prefix, e->n.pxlen, r->router_id, r->seqno, r->metric); + + union babel_msg msg = {}; + msg.type = BABEL_TLV_UPDATE; + msg.update.plen = e->n.pxlen; + msg.update.interval = ifa->cf->update_interval; + msg.update.seqno = r->seqno; + msg.update.metric = r->metric; + msg.update.prefix = e->n.prefix; + msg.update.router_id = r->router_id; + + /* Update feasibility distance */ + struct babel_source *s = babel_get_source(e, r->router_id); + s->expires = now + BABEL_GARBAGE_INTERVAL; + if ((msg.update.seqno > s->seqno) || + ((msg.update.seqno == s->seqno) && (msg.update.metric < s->metric))) + { + s->seqno = msg.update.seqno; + s->metric = msg.update.metric; + } + babel_enqueue(&msg, ifa); + } + FIB_WALK_END; +} + +static void +babel_trigger_iface_update(struct babel_iface *ifa) +{ + struct babel_proto *p = ifa->proto; + + /* Interface not active or already scheduled */ + if (!ifa->up || ifa->want_triggered) + return; + + TRACE(D_EVENTS, "Scheduling triggered updates for %s seqno %d", + ifa->iface->name, p->update_seqno); + + ifa->want_triggered = now; + babel_iface_kick_timer(ifa); +} + +/* Sends and update on all interfaces. */ +static void +babel_trigger_update(struct babel_proto *p) +{ + if (p->triggered) + return; + + struct babel_iface *ifa; + WALK_LIST(ifa, p->interfaces) + babel_trigger_iface_update(ifa); + + p->triggered = 1; +} + +/* A retraction is an update with an infinite metric */ +static void +babel_send_retraction(struct babel_iface *ifa, ip_addr prefix, int plen) +{ + struct babel_proto *p = ifa->proto; + union babel_msg msg = {}; + + TRACE(D_PACKETS, "Sending retraction for %I/%d router-id %lR seqno %d", + prefix, plen, p->router_id, p->update_seqno); + + msg.type = BABEL_TLV_UPDATE; + msg.update.plen = plen; + msg.update.interval = ifa->cf->update_interval; + msg.update.seqno = p->update_seqno; + msg.update.metric = BABEL_INFINITY; + msg.update.prefix = prefix; + msg.update.router_id = p->router_id; + + babel_enqueue(&msg, ifa); +} + + +/* + * TLV handler helpers + */ + +/* Update hello history according to Appendix A1 of the RFC */ +static void +babel_update_hello_history(struct babel_neighbor *n, u16 seqno, u16 interval) +{ + /* + * Compute the difference between expected and received seqno (modulo 2^16). + * If the expected and received seqnos are within 16 of each other, the modular + * difference is going to be less than 16 for one of the directions. Otherwise, + * the values differ too much, so just reset the state. + */ + + u16 delta = ((uint) seqno - (uint) n->next_hello_seqno); + + if (delta == 0) + { + /* Do nothing */ + } + else if (delta <= 16) + { + /* Sending node decreased interval; fast-forward */ + n->hello_map <<= delta; + n->hello_cnt = MIN(n->hello_cnt + delta, 16); + } + else if (delta >= 0xfff0) + { + u8 diff = (0xffff - delta); + /* Sending node increased interval; undo history */ + n->hello_map >>= diff; + n->hello_cnt = (diff < n->hello_cnt) ? n->hello_cnt - diff : 0; + } + else + { + /* Note state reset - flush entries */ + n->hello_map = n->hello_cnt = 0; + } + + /* Current entry */ + n->hello_map = (n->hello_map << 1) | 1; + n->next_hello_seqno = seqno+1; + if (n->hello_cnt < 16) n->hello_cnt++; + n->hello_expiry = now + BABEL_HELLO_EXPIRY_FACTOR(interval); +} + +static void +babel_expire_seqno_requests(struct babel_proto *p) +{ + struct babel_seqno_request *n, *nx; + WALK_LIST_DELSAFE(n, nx, p->seqno_cache) + { + if ((n->updated + BABEL_SEQNO_REQUEST_EXPIRY) <= now) + { + rem_node(NODE n); + sl_free(p->seqno_slab, n); + } + } +} + +/* + * Checks the seqno request cache for a matching request and returns failure if + * found. Otherwise, a new entry is stored in the cache. + */ +static int +babel_cache_seqno_request(struct babel_proto *p, ip_addr prefix, u8 plen, + u64 router_id, u16 seqno) +{ + struct babel_seqno_request *r; + + WALK_LIST(r, p->seqno_cache) + { + if (ipa_equal(r->prefix, prefix) && (r->plen == plen) && + (r->router_id == router_id) && (r->seqno == seqno)) + return 0; + } + + /* no entries found */ + r = sl_alloc(p->seqno_slab); + r->prefix = prefix; + r->plen = plen; + r->router_id = router_id; + r->seqno = seqno; + r->updated = now; + add_tail(&p->seqno_cache, NODE r); + + return 1; +} + +static void +babel_forward_seqno_request(struct babel_entry *e, + struct babel_msg_seqno_request *in, + ip_addr sender) +{ + struct babel_proto *p = e->proto; + struct babel_route *r; + + TRACE(D_PACKETS, "Forwarding seqno request for %I/%d router-id %lR seqno %d", + e->n.prefix, e->n.pxlen, in->router_id, in->seqno); + + WALK_LIST(r, e->routes) + { + if ((r->router_id == in->router_id) && + !OUR_ROUTE(r) && + !ipa_equal(r->neigh->addr, sender)) + { + if (!babel_cache_seqno_request(p, e->n.prefix, e->n.pxlen, in->router_id, in->seqno)) + return; + + union babel_msg msg = {}; + msg.type = BABEL_TLV_SEQNO_REQUEST; + msg.seqno_request.plen = in->plen; + msg.seqno_request.seqno = in->seqno; + msg.seqno_request.hop_count = in->hop_count-1; + msg.seqno_request.router_id = in->router_id; + msg.seqno_request.prefix = e->n.prefix; + + babel_send_unicast(&msg, r->neigh->ifa, r->neigh->addr); + return; + } + } +} + + +/* + * TLV handlers + */ + +void +babel_handle_ack_req(union babel_msg *m, struct babel_iface *ifa) +{ + struct babel_proto *p = ifa->proto; + struct babel_msg_ack_req *msg = &m->ack_req; + + TRACE(D_PACKETS, "Handling ACK request nonce %d interval %d", + msg->nonce, msg->interval); + + babel_send_ack(ifa, msg->sender, msg->nonce); +} + +void +babel_handle_hello(union babel_msg *m, struct babel_iface *ifa) +{ + struct babel_proto *p = ifa->proto; + struct babel_msg_hello *msg = &m->hello; + + TRACE(D_PACKETS, "Handling hello seqno %d interval %d", + msg->seqno, msg->interval); + + struct babel_neighbor *n = babel_get_neighbor(ifa, msg->sender); + babel_update_hello_history(n, msg->seqno, msg->interval); + if (ifa->cf->type == BABEL_IFACE_TYPE_WIRELESS) + babel_send_ihu(ifa, n); +} + +void +babel_handle_ihu(union babel_msg *m, struct babel_iface *ifa) +{ + struct babel_proto *p = ifa->proto; + struct babel_msg_ihu *msg = &m->ihu; + + /* Ignore IHUs that are not about us */ + if ((msg->ae != BABEL_AE_WILDCARD) && !ipa_equal(msg->addr, ifa->addr)) + return; + + TRACE(D_PACKETS, "Handling IHU rxcost %d interval %d", + msg->rxcost, msg->interval); + + struct babel_neighbor *n = babel_get_neighbor(ifa, msg->sender); + n->txcost = msg->rxcost; + n->ihu_expiry = now + BABEL_IHU_EXPIRY_FACTOR(msg->interval); +} + +/** + * babel_handle_update - handle incoming route updates + * @m: Incoming update TLV + * @ifa: Interface the update was received on + * + * This function is called as a handler for update TLVs and handles the updating + * and maintenance of route entries in Babel's internal routing cache. The + * handling follows the actions described in the Babel RFC, and at the end of + * each update handling, babel_select_route() is called on the affected entry to + * optionally update the selected routes and propagate them to the core. + */ +void +babel_handle_update(union babel_msg *m, struct babel_iface *ifa) +{ + struct babel_proto *p = ifa->proto; + struct babel_msg_update *msg = &m->update; + + struct babel_neighbor *n; + struct babel_entry *e; + struct babel_source *s; + struct babel_route *r; + int feasible; + + TRACE(D_PACKETS, "Handling update for %I/%d with seqno %d metric %d", + msg->prefix, msg->plen, msg->seqno, msg->metric); + + n = babel_find_neighbor(ifa, msg->sender); + if (!n) + { + DBG("Babel: Haven't heard from neighbor %I; ignoring update.\n", msg->sender); + return; + } + + if (msg->router_id == p->router_id) + { + DBG("Babel: Ignoring update for our own router ID.\n"); + return; + } + + /* + * RFC section 3.5.4: + * + * When a Babel node receives an update (id, prefix, seqno, metric) from a + * neighbour neigh with a link cost value equal to cost, it checks whether it + * already has a routing table entry indexed by (neigh, id, prefix). + * + * If no such entry exists: + * + * o if the update is unfeasible, it is ignored; + * + * o if the metric is infinite (the update is a retraction), the update is + * ignored; + * + * o otherwise, a new route table entry is created, indexed by (neigh, id, + * prefix), with seqno equal to seqno and an advertised metric equal to the + * metric carried by the update. + * + * If such an entry exists: + * + * o if the entry is currently installed and the update is unfeasible, then + * the behaviour depends on whether the router-ids of the two entries match. + * If the router-ids are different, the update is treated as though it were + * a retraction (i.e., as though the metric were FFFF hexadecimal). If the + * router-ids are equal, the update is ignored; + * + * o otherwise (i.e., if either the update is feasible or the entry is not + * currently installed), then the entry's sequence number, advertised + * metric, metric, and router-id are updated and, unless the advertised + * metric is infinite, the route's expiry timer is reset to a small multiple + * of the Interval value included in the update. + */ + + if (msg->metric == BABEL_INFINITY) + e = babel_find_entry(p, msg->prefix, msg->plen); + else + e = babel_get_entry(p, msg->prefix, msg->plen); + + if (!e) + return; + + s = babel_find_source(e, msg->router_id); /* for feasibility */ + r = babel_find_route(e, n); /* the route entry indexed by neighbour */ + feasible = babel_is_feasible(s, msg->seqno, msg->metric); + + if (!r) + { + if (!feasible || (msg->metric == BABEL_INFINITY)) + return; + + r = babel_get_route(e, n); + r->advert_metric = msg->metric; + r->router_id = msg->router_id; + r->metric = babel_compute_metric(n, msg->metric); + r->next_hop = msg->next_hop; + r->seqno = msg->seqno; + } + else if (r == r->e->selected_in && !feasible) + { + /* Route is installed and update is infeasible - we may lose the route, so + send a unicast seqno request (section 3.8.2.2 second paragraph). */ + babel_unicast_seqno_request(r); + + if (msg->router_id == r->router_id) return; + r->metric = BABEL_INFINITY; /* retraction */ + } + else + { + /* Last paragraph above - update the entry */ + r->advert_metric = msg->metric; + r->metric = babel_compute_metric(n, msg->metric); + r->router_id = msg->router_id; + r->next_hop = msg->next_hop; + r->seqno = msg->seqno; + + if (msg->metric != BABEL_INFINITY) + { + r->expiry_interval = BABEL_ROUTE_EXPIRY_FACTOR(msg->interval); + r->expires = now + r->expiry_interval; + if (r->expiry_interval > BABEL_ROUTE_REFRESH_INTERVAL) + r->refresh_time = now + r->expiry_interval - BABEL_ROUTE_REFRESH_INTERVAL; + } + + /* If the route is not feasible at this point, it means it is from another + neighbour than the one currently selected; so send a unicast seqno + request to try to get a better route (section 3.8.2.2 last paragraph). */ + if (!feasible) + babel_unicast_seqno_request(r); + } + + babel_select_route(e); +} + +void +babel_handle_route_request(union babel_msg *m, struct babel_iface *ifa) +{ + struct babel_proto *p = ifa->proto; + struct babel_msg_route_request *msg = &m->route_request; + + /* RFC 6126 3.8.1.1 */ + + /* Wildcard request - full update on the interface */ + if (msg->full) + { + TRACE(D_PACKETS, "Handling wildcard route request"); + ifa->want_triggered = 1; + return; + } + + TRACE(D_PACKETS, "Handling route request for %I/%d", msg->prefix, msg->plen); + + /* Non-wildcard request - see if we have an entry for the route. + If not, send a retraction, otherwise send an update. */ + struct babel_entry *e = babel_find_entry(p, msg->prefix, msg->plen); + if (!e) + { + babel_send_retraction(ifa, msg->prefix, msg->plen); + } + else + { + babel_trigger_iface_update(ifa); + e->updated = now; + } +} + + +void +babel_handle_seqno_request(union babel_msg *m, struct babel_iface *ifa) +{ + struct babel_proto *p = ifa->proto; + struct babel_msg_seqno_request *msg = &m->seqno_request; + + /* RFC 6126 3.8.1.2 */ + + TRACE(D_PACKETS, "Handling seqno request for %I/%d router-id %lR seqno %d hop count %d", + msg->prefix, msg->plen, msg->router_id, msg->seqno, msg->hop_count); + + /* Ignore if we have no such entry or entry has infinite metric */ + struct babel_entry *e = babel_find_entry(p, msg->prefix, msg->plen); + if (!e || !e->selected_out || (e->selected_out->metric == BABEL_INFINITY)) + return; + + /* Trigger update on incoming interface if we have a selected route with + different router id or seqno no smaller than requested */ + struct babel_route *r = e->selected_out; + if ((r->router_id != msg->router_id) || ge_mod64k(r->seqno, msg->seqno)) + { + babel_trigger_iface_update(ifa); + e->updated = now; + return; + } + + /* Seqno is larger; check if we own the router id */ + if (msg->router_id == p->router_id) + { + /* Ours; update seqno and trigger global update */ + p->update_seqno++; + babel_trigger_update(p); + } + else + { + /* Not ours; forward if TTL allows it */ + if (msg->hop_count > 1) + babel_forward_seqno_request(e, msg, msg->sender); + } +} + + +/* + * Babel interfaces + */ + +/** + * babel_iface_timer - Babel interface timer handler + * @t: Timer + * + * This function is called by the per-interface timer and triggers sending of + * periodic Hello's and both triggered and periodic updates. Periodic Hello's + * and updates are simply handled by setting the next_{hello,regular} variables + * on the interface, and triggering an update (and resetting the variable) + * whenever 'now' exceeds that value. + * + * For triggered updates, babel_trigger_iface_update() will set the + * want_triggered field on the interface to a timestamp value. If this is set + * (and the next_triggered time has passed; this is a rate limiting mechanism), + * babel_send_update() will be called with this timestamp as the second + * parameter. This causes updates to be send consisting of only the routes that + * have changed since the time saved in want_triggered. + * + * Mostly when an update is triggered, the route being modified will be set to + * the value of 'now' at the time of the trigger; the >= comparison for + * selecting which routes to send in the update will make sure this is included. + */ +static void +babel_iface_timer(timer *t) +{ + struct babel_iface *ifa = t->data; + struct babel_proto *p = ifa->proto; + bird_clock_t hello_period = ifa->cf->hello_interval; + bird_clock_t update_period = ifa->cf->update_interval; + + if (now >= ifa->next_hello) + { + babel_send_hello(ifa, (ifa->cf->type == BABEL_IFACE_TYPE_WIRELESS || + ifa->hello_seqno % BABEL_IHU_INTERVAL_FACTOR == 0)); + ifa->next_hello += hello_period * (1 + (now - ifa->next_hello) / hello_period); + } + + if (now >= ifa->next_regular) + { + TRACE(D_EVENTS, "Sending regular updates on %s", ifa->ifname); + babel_send_update(ifa, 0); + ifa->next_regular += update_period * (1 + (now - ifa->next_regular) / update_period); + ifa->want_triggered = 0; + p->triggered = 0; + } + else if (ifa->want_triggered && (now >= ifa->next_triggered)) + { + TRACE(D_EVENTS, "Sending triggered updates on %s", ifa->ifname); + babel_send_update(ifa, ifa->want_triggered); + ifa->next_triggered = now + MIN(5, update_period / 2 + 1); + ifa->want_triggered = 0; + p->triggered = 0; + } + + bird_clock_t next_event = MIN(ifa->next_hello, ifa->next_regular); + tm_start(ifa->timer, ifa->want_triggered ? 1 : (next_event - now)); +} + +static inline void +babel_iface_kick_timer(struct babel_iface *ifa) +{ + if (ifa->timer->expires > (now + 1)) + tm_start(ifa->timer, 1); +} + +static void +babel_iface_start(struct babel_iface *ifa) +{ + struct babel_proto *p = ifa->proto; + + TRACE(D_EVENTS, "Starting interface %s", ifa->ifname); + + ifa->next_hello = now + (random() % ifa->cf->hello_interval) + 1; + ifa->next_regular = now + (random() % ifa->cf->update_interval) + 1; + ifa->next_triggered = now + MIN(5, ifa->cf->update_interval / 2 + 1); + ifa->want_triggered = 0; /* We send an immediate update (below) */ + tm_start(ifa->timer, 1); + ifa->up = 1; + + babel_send_hello(ifa, 0); + babel_send_wildcard_request(ifa); + babel_send_update(ifa, 0); /* Full update */ +} + +static void +babel_iface_stop(struct babel_iface *ifa) +{ + struct babel_proto *p = ifa->proto; + struct babel_neighbor *nbr; + struct babel_route *r; + node *n; + + TRACE(D_EVENTS, "Stopping interface %s", ifa->ifname); + + /* + * Rather than just flushing the neighbours, we set the metric of their routes + * to infinity. This allows us to keep the neighbour hello state for when the + * interface comes back up. The routes will also be kept until they expire. + */ + WALK_LIST(nbr, ifa->neigh_list) + { + WALK_LIST(n, nbr->routes) + { + r = SKIP_BACK(struct babel_route, neigh_route, n); + r->metric = BABEL_INFINITY; + r->expires = now + r->expiry_interval; + babel_select_route(r->e); + } + } + + tm_stop(ifa->timer); + ifa->up = 0; +} + +static inline int +babel_iface_link_up(struct babel_iface *ifa) +{ + return !ifa->cf->check_link || (ifa->iface->flags & IF_LINK_UP); +} + +static void +babel_iface_update_state(struct babel_iface *ifa) +{ + int up = ifa->sk && babel_iface_link_up(ifa); + + if (up == ifa->up) + return; + + if (up) + babel_iface_start(ifa); + else + babel_iface_stop(ifa); +} + +static void +babel_iface_update_buffers(struct babel_iface *ifa) +{ + if (!ifa->sk) + return; + + uint mtu = MAX(BABEL_MIN_MTU, ifa->iface->mtu); + uint rbsize = ifa->cf->rx_buffer ?: mtu; + uint tbsize = ifa->cf->tx_length ?: mtu; + rbsize = MAX(rbsize, tbsize); + + sk_set_rbsize(ifa->sk, rbsize); + sk_set_tbsize(ifa->sk, tbsize); + + ifa->tx_length = tbsize - BABEL_OVERHEAD; +} + +static struct babel_iface* +babel_find_iface(struct babel_proto *p, struct iface *what) +{ + struct babel_iface *ifa; + + WALK_LIST (ifa, p->interfaces) + if (ifa->iface == what) + return ifa; + + return NULL; +} + +static void +babel_iface_locked(struct object_lock *lock) +{ + struct babel_iface *ifa = lock->data; + struct babel_proto *p = ifa->proto; + + if (!babel_open_socket(ifa)) + { + log(L_ERR "%s: Cannot open socket for %s", p->p.name, ifa->iface->name); + return; + } + + babel_iface_update_buffers(ifa); + babel_iface_update_state(ifa); +} + +static void +babel_add_iface(struct babel_proto *p, struct iface *new, struct babel_iface_config *ic) +{ + struct babel_iface *ifa; + + TRACE(D_EVENTS, "Adding interface %s", new->name); + + pool *pool = rp_new(p->p.pool, new->name); + + ifa = mb_allocz(pool, sizeof(struct babel_iface)); + ifa->proto = p; + ifa->iface = new; + ifa->cf = ic; + ifa->pool = pool; + ifa->ifname = new->name; + + add_tail(&p->interfaces, NODE ifa); + + struct ifa *addr; + WALK_LIST(addr, new->addrs) + if (ipa_is_link_local(addr->ip)) + ifa->addr = addr->ip; + + if (ipa_zero(ifa->addr)) + log(L_WARN "%s: Cannot find link-local addr on %s", p->p.name, new->name); + + init_list(&ifa->neigh_list); + ifa->hello_seqno = 1; + + ifa->timer = tm_new_set(ifa->pool, babel_iface_timer, ifa, 0, 0); + + init_list(&ifa->msg_queue); + ifa->send_event = ev_new(ifa->pool); + ifa->send_event->hook = babel_send_queue; + ifa->send_event->data = ifa; + + struct object_lock *lock = olock_new(ifa->pool); + lock->type = OBJLOCK_UDP; + lock->addr = IP6_BABEL_ROUTERS; + lock->port = ifa->cf->port; + lock->iface = ifa->iface; + lock->hook = babel_iface_locked; + lock->data = ifa; + + olock_acquire(lock); +} + +static void +babel_remove_iface(struct babel_proto *p, struct babel_iface *ifa) +{ + TRACE(D_EVENTS, "Removing interface %s", ifa->iface->name); + + struct babel_neighbor *n; + WALK_LIST_FIRST(n, ifa->neigh_list) + babel_flush_neighbor(n); + + rem_node(NODE ifa); + + rfree(ifa->pool); /* contains ifa itself, locks, socket, etc */ +} + +static void +babel_if_notify(struct proto *P, unsigned flags, struct iface *iface) +{ + struct babel_proto *p = (void *) P; + struct babel_config *cf = (void *) P->cf; + + if (iface->flags & IF_IGNORE) + return; + + if (flags & IF_CHANGE_UP) + { + struct babel_iface_config *ic = (void *) iface_patt_find(&cf->iface_list, iface, iface->addr); + + /* we only speak multicast */ + if (!(iface->flags & IF_MULTICAST)) + return; + + if (ic) + babel_add_iface(p, iface, ic); + + return; + } + + struct babel_iface *ifa = babel_find_iface(p, iface); + + if (!ifa) + return; + + if (flags & IF_CHANGE_DOWN) + { + babel_remove_iface(p, ifa); + return; + } + + if (flags & IF_CHANGE_MTU) + babel_iface_update_buffers(ifa); + + if (flags & IF_CHANGE_LINK) + babel_iface_update_state(ifa); +} + +static int +babel_reconfigure_iface(struct babel_proto *p, struct babel_iface *ifa, struct babel_iface_config *new) +{ + struct babel_iface_config *old = ifa->cf; + + /* Change of these options would require to reset the iface socket */ + if ((new->port != old->port) || + (new->tx_tos != old->tx_tos) || + (new->tx_priority != old->tx_priority)) + return 0; + + TRACE(D_EVENTS, "Reconfiguring interface %s", ifa->iface->name); + + ifa->cf = new; + + if (ifa->next_regular > (now + new->update_interval)) + ifa->next_regular = now + (random() % new->update_interval) + 1; + + if ((new->tx_length != old->tx_length) || (new->rx_buffer != old->rx_buffer)) + babel_iface_update_buffers(ifa); + + if (new->check_link != old->check_link) + babel_iface_update_state(ifa); + + if (ifa->up) + babel_iface_kick_timer(ifa); + + return 1; +} + +static void +babel_reconfigure_ifaces(struct babel_proto *p, struct babel_config *cf) +{ + struct iface *iface; + + WALK_LIST(iface, iface_list) + { + if (! (iface->flags & IF_UP)) + continue; + + struct babel_iface *ifa = babel_find_iface(p, iface); + struct babel_iface_config *ic = (void *) iface_patt_find(&cf->iface_list, iface, NULL); + + if (ifa && ic) + { + if (babel_reconfigure_iface(p, ifa, ic)) + continue; + + /* Hard restart */ + log(L_INFO "%s: Restarting interface %s", p->p.name, ifa->iface->name); + babel_remove_iface(p, ifa); + babel_add_iface(p, iface, ic); + } + + if (ifa && !ic) + babel_remove_iface(p, ifa); + + if (!ifa && ic) + babel_add_iface(p, iface, ic); + } +} + + +/* + * Debugging and info output functions + */ + +static void +babel_dump_source(struct babel_source *s) +{ + debug("Source router_id %lR seqno %d metric %d expires %d\n", + s->router_id, s->seqno, s->metric, s->expires ? s->expires-now : 0); +} + +static void +babel_dump_route(struct babel_route *r) +{ + debug("Route neigh %I if %s seqno %d metric %d/%d router_id %lR expires %d\n", + r->neigh ? r->neigh->addr : IPA_NONE, + r->neigh ? r->neigh->ifa->ifname : "(none)", + r->seqno, r->advert_metric, r->metric, + r->router_id, r->expires ? r->expires-now : 0); +} + +static void +babel_dump_entry(struct babel_entry *e) +{ + struct babel_source *s; + struct babel_route *r; + + debug("Babel: Entry %I/%d:\n", e->n.prefix, e->n.pxlen); + + WALK_LIST(s,e->sources) + { debug(" "); babel_dump_source(s); } + + WALK_LIST(r,e->routes) + { + debug(" "); + if (r == e->selected_out) debug("*"); + if (r == e->selected_in) debug("+"); + babel_dump_route(r); + } +} + +static void +babel_dump_neighbor(struct babel_neighbor *n) +{ + debug("Neighbor %I txcost %d hello_map %x next seqno %d expires %d/%d\n", + n->addr, n->txcost, n->hello_map, n->next_hello_seqno, + n->hello_expiry ? n->hello_expiry - now : 0, + n->ihu_expiry ? n->ihu_expiry - now : 0); +} + +static void +babel_dump_iface(struct babel_iface *ifa) +{ + struct babel_neighbor *n; + + debug("Babel: Interface %s addr %I rxcost %d type %d hello seqno %d intervals %d %d\n", + ifa->ifname, ifa->addr, ifa->cf->rxcost, ifa->cf->type, ifa->hello_seqno, + ifa->cf->hello_interval, ifa->cf->update_interval); + + WALK_LIST(n, ifa->neigh_list) + { debug(" "); babel_dump_neighbor(n); } +} + +static void +babel_dump(struct proto *P) +{ + struct babel_proto *p = (struct babel_proto *) P; + struct babel_iface *ifa; + + debug("Babel: router id %lR update seqno %d\n", p->router_id, p->update_seqno); + + WALK_LIST(ifa, p->interfaces) + babel_dump_iface(ifa); + + FIB_WALK(&p->rtable, n) + { + babel_dump_entry((struct babel_entry *) n); + } + FIB_WALK_END; +} + +static void +babel_get_route_info(rte *rte, byte *buf, ea_list *attrs) +{ + buf += bsprintf(buf, " (%d/%d) [%lR]", rte->pref, rte->u.babel.metric, rte->u.babel.router_id); +} + +static int +babel_get_attr(eattr *a, byte *buf, int buflen UNUSED) +{ + switch (a->id) + { + case EA_BABEL_METRIC: + bsprintf(buf, "metric: %d", a->u.data); + return GA_FULL; + + case EA_BABEL_ROUTER_ID: + { + u64 rid = 0; + memcpy(&rid, a->u.ptr->data, sizeof(u64)); + bsprintf(buf, "router_id: %lR", rid); + return GA_FULL; + } + + default: + return GA_UNKNOWN; + } +} + +void +babel_show_interfaces(struct proto *P, char *iff) +{ + struct babel_proto *p = (void *) P; + struct babel_iface *ifa = NULL; + struct babel_neighbor *nbr = NULL; + + if (p->p.proto_state != PS_UP) + { + cli_msg(-1023, "%s: is not up", p->p.name); + cli_msg(0, ""); + return; + } + + cli_msg(-1023, "%s:", p->p.name); + cli_msg(-1023, "%-10s %-6s %7s %6s %6s", + "Interface", "State", "RX cost", "Nbrs", "Timer"); + + WALK_LIST(ifa, p->interfaces) + { + if (iff && !patmatch(iff, ifa->iface->name)) + continue; + + int nbrs = 0; + WALK_LIST(nbr, ifa->neigh_list) + nbrs++; + + int timer = MIN(ifa->next_regular, ifa->next_hello) - now; + cli_msg(-1023, "%-10s %-6s %7u %6u %6u", + ifa->iface->name, (ifa->up ? "Up" : "Down"), ifa->cf->rxcost, nbrs, MAX(timer, 0)); + } + + cli_msg(0, ""); +} + +void +babel_show_neighbors(struct proto *P, char *iff) +{ + struct babel_proto *p = (void *) P; + struct babel_iface *ifa = NULL; + struct babel_neighbor *n = NULL; + struct babel_route *r = NULL; + + if (p->p.proto_state != PS_UP) + { + cli_msg(-1024, "%s: is not up", p->p.name); + cli_msg(0, ""); + return; + } + + cli_msg(-1024, "%s:", p->p.name); + cli_msg(-1024, "%-25s %-10s %6s %6s %10s", + "IP address", "Interface", "Metric", "Routes", "Next hello"); + + WALK_LIST(ifa, p->interfaces) + { + if (iff && !patmatch(iff, ifa->iface->name)) + continue; + + WALK_LIST(n, ifa->neigh_list) + { + int rts = 0; + WALK_LIST(r, n->routes) + rts++; + + int timer = n->hello_expiry - now; + cli_msg(-1024, "%-25I %-10s %6u %6u %10u", + n->addr, ifa->iface->name, n->txcost, rts, MAX(timer, 0)); + } + } + + cli_msg(0, ""); +} + +void +babel_show_entries(struct proto *P) +{ + struct babel_proto *p = (void *) P; + struct babel_entry *e = NULL; + struct babel_source *s = NULL; + struct babel_route *r = NULL; + + char ipbuf[STD_ADDRESS_P_LENGTH+5]; + char ridbuf[ROUTER_ID_64_LENGTH+1]; + + if (p->p.proto_state != PS_UP) + { + cli_msg(-1025, "%s: is not up", p->p.name); + cli_msg(0, ""); + return; + } + + cli_msg(-1025, "%s:", p->p.name); + cli_msg(-1025, "%-29s %-23s %6s %5s %7s %7s", + "Prefix", "Router ID", "Metric", "Seqno", "Expires", "Sources"); + + FIB_WALK(&p->rtable, n) + { + e = (struct babel_entry *) n; + r = e->selected_in ? e->selected_in : e->selected_out; + + int srcs = 0; + WALK_LIST(s, e->sources) + srcs++; + + bsprintf(ipbuf, "%I/%u", e->n.prefix, e->n.pxlen); + + if (r) + { + if (r->router_id == p->router_id) + bsprintf(ridbuf, "%s", "<self>"); + else + bsprintf(ridbuf, "%lR", r->router_id); + + int time = r->expires ? r->expires - now : 0; + cli_msg(-1025, "%-29s %-23s %6u %5u %7u %7u", + ipbuf, ridbuf, r->metric, r->seqno, MAX(time, 0), srcs); + } + else + { + cli_msg(-1025, "%-29s %-44s %7u", ipbuf, "<pending>", srcs); + } + } + FIB_WALK_END; + + cli_msg(0, ""); +} + + +/* + * Babel protocol glue + */ + +/** + * babel_timer - global timer hook + * @t: Timer + * + * This function is called by the global protocol instance timer and handles + * expiration of routes and neighbours as well as pruning of the seqno request + * cache. + */ +static void +babel_timer(timer *t) +{ + struct babel_proto *p = t->data; + + babel_expire_routes(p); + babel_expire_seqno_requests(p); + babel_expire_neighbors(p); +} + +static inline void +babel_kick_timer(struct babel_proto *p) +{ + if (p->timer->expires > (now + 1)) + tm_start(p->timer, 1); +} + + +static struct ea_list * +babel_prepare_attrs(struct linpool *pool, ea_list *next, uint metric, u64 router_id) +{ + struct ea_list *l = lp_alloc(pool, sizeof(struct ea_list) + 2*sizeof(eattr)); + struct adata *rid = lp_alloc(pool, sizeof(struct adata) + sizeof(u64)); + rid->length = sizeof(u64); + memcpy(&rid->data, &router_id, sizeof(u64)); + + l->next = next; + l->flags = EALF_SORTED; + l->count = 2; + + l->attrs[0].id = EA_BABEL_METRIC; + l->attrs[0].flags = 0; + l->attrs[0].type = EAF_TYPE_INT | EAF_TEMP; + l->attrs[0].u.data = metric; + + l->attrs[1].id = EA_BABEL_ROUTER_ID; + l->attrs[1].flags = 0; + l->attrs[1].type = EAF_TYPE_OPAQUE | EAF_TEMP; + l->attrs[1].u.ptr = rid; + + return l; +} + + +static int +babel_import_control(struct proto *P, struct rte **rt, struct ea_list **attrs, struct linpool *pool) +{ + struct babel_proto *p = (void *) P; + + /* Prepare attributes with initial values */ + if ((*rt)->attrs->source != RTS_BABEL) + *attrs = babel_prepare_attrs(pool, NULL, 0, p->router_id); + + return 0; +} + +static struct ea_list * +babel_make_tmp_attrs(struct rte *rt, struct linpool *pool) +{ + return babel_prepare_attrs(pool, NULL, rt->u.babel.metric, rt->u.babel.router_id); +} + +static void +babel_store_tmp_attrs(struct rte *rt, struct ea_list *attrs) +{ + rt->u.babel.metric = ea_get_int(attrs, EA_BABEL_METRIC, 0); +} + +/* + * babel_rt_notify - core tells us about new route (possibly our own), + * so store it into our data structures. + */ +static void +babel_rt_notify(struct proto *P, struct rtable *table UNUSED, struct network *net, + struct rte *new, struct rte *old, struct ea_list *attrs) +{ + struct babel_proto *p = (void *) P; + struct babel_entry *e; + struct babel_route *r; + + if (new) + { + /* Update */ + e = babel_get_entry(p, net->n.prefix, net->n.pxlen); + + if (new->attrs->src->proto != P) + { + r = babel_get_route(e, NULL); + r->seqno = p->update_seqno; + r->router_id = p->router_id; + r->metric = 0; /* FIXME: should be selectable */ + } + else + r = e->selected_in; + + if (r != e->selected_out) + { + e->selected_out = r; + e->updated = now; + babel_trigger_update(p); + } + } + else + { + /* Withdraw */ + e = babel_find_entry(p, net->n.prefix, net->n.pxlen); + if (!e || !e->selected_out) + return; + + if (OUR_ROUTE(e->selected_out)) + { + /* + * We originate this route, so set its metric to infinity and set an + * expiry time. This causes a retraction to be sent, and later the route + * to be flushed once the hold time has passed. + */ + e->selected_out->metric = BABEL_INFINITY; + e->selected_out->expires = now + BABEL_HOLD_TIME; + e->updated = now; + babel_trigger_update(p); + } + else + { + /* + * This is a route originating from someone else that was lost; presumably + * because an export filter was updated to filter it. This means we can't + * set the metric to infinity (it would be overridden on subsequent + * updates from the peer originating the route), so just clear the + * exported route. + * + * This causes peers to expire the route after a while (like if we just + * shut down), but it's the best we can do in these circumstances; and + * since export filters presumably aren't updated that often this is + * acceptable. + */ + e->selected_out = NULL; + } + } +} + +static int +babel_rte_better(struct rte *new, struct rte *old) +{ + return new->u.babel.metric < old->u.babel.metric; +} + +static int +babel_rte_same(struct rte *new, struct rte *old) +{ + return ((new->u.babel.router_id == old->u.babel.router_id) && + (new->u.babel.metric == old->u.babel.metric)); +} + + +static struct proto * +babel_init(struct proto_config *cfg) +{ + struct proto *P = proto_new(cfg, sizeof(struct babel_proto)); + + P->accept_ra_types = RA_OPTIMAL; + P->if_notify = babel_if_notify; + P->rt_notify = babel_rt_notify; + P->import_control = babel_import_control; + P->make_tmp_attrs = babel_make_tmp_attrs; + P->store_tmp_attrs = babel_store_tmp_attrs; + P->rte_better = babel_rte_better; + P->rte_same = babel_rte_same; + + return P; +} + +static int +babel_start(struct proto *P) +{ + struct babel_proto *p = (void *) P; + struct babel_config *cf = (void *) P->cf; + + fib_init(&p->rtable, P->pool, sizeof(struct babel_entry), 0, babel_init_entry); + init_list(&p->interfaces); + p->timer = tm_new_set(P->pool, babel_timer, p, 0, 1); + tm_start(p->timer, 2); + p->update_seqno = 1; + p->router_id = proto_get_router_id(&cf->c); + + p->route_slab = sl_new(P->pool, sizeof(struct babel_route)); + p->source_slab = sl_new(P->pool, sizeof(struct babel_source)); + p->msg_slab = sl_new(P->pool, sizeof(struct babel_msg_node)); + p->seqno_slab = sl_new(P->pool, sizeof(struct babel_seqno_request)); + init_list(&p->seqno_cache); + + p->log_pkt_tbf = (struct tbf){ .rate = 1, .burst = 5 }; + + return PS_UP; +} + +static int +babel_reconfigure(struct proto *P, struct proto_config *c) +{ + struct babel_proto *p = (void *) P; + struct babel_config *new = (void *) c; + + TRACE(D_EVENTS, "Reconfiguring"); + + p->p.cf = c; + babel_reconfigure_ifaces(p, new); + + babel_trigger_update(p); + babel_kick_timer(p); + + return 1; +} + + +struct protocol proto_babel = { + .name = "Babel", + .template = "babel%d", + .attr_class = EAP_BABEL, + .preference = DEF_PREF_BABEL, + .config_size = sizeof(struct babel_config), + .init = babel_init, + .dump = babel_dump, + .start = babel_start, + .reconfigure = babel_reconfigure, + .get_route_info = babel_get_route_info, + .get_attr = babel_get_attr +}; diff --git a/proto/babel/babel.h b/proto/babel/babel.h new file mode 100644 index 00000000..aea0dd88 --- /dev/null +++ b/proto/babel/babel.h @@ -0,0 +1,335 @@ +/* + * BIRD -- The Babel protocol + * + * Copyright (c) 2015--2016 Toke Hoiland-Jorgensen + * + * Can be freely distributed and used under the terms of the GNU GPL. + * + * This file contains the data structures used by Babel. + */ + +#ifndef _BIRD_BABEL_H_ +#define _BIRD_BABEL_H_ + +#include "nest/bird.h" +#include "nest/cli.h" +#include "nest/iface.h" +#include "nest/route.h" +#include "nest/protocol.h" +#include "nest/locks.h" +#include "lib/resource.h" +#include "lib/lists.h" +#include "lib/socket.h" +#include "lib/string.h" +#include "lib/timer.h" + +#ifndef IPV6 +#error "The Babel protocol only speaks IPv6" +#endif + +#define EA_BABEL_METRIC EA_CODE(EAP_BABEL, 0) +#define EA_BABEL_ROUTER_ID EA_CODE(EAP_BABEL, 1) + +#define BABEL_MAGIC 42 +#define BABEL_VERSION 2 +#define BABEL_PORT 6696 +#define BABEL_INFINITY 0xFFFF + + +#define BABEL_HELLO_INTERVAL_WIRED 4 /* Default hello intervals in seconds */ +#define BABEL_HELLO_INTERVAL_WIRELESS 4 +#define BABEL_UPDATE_INTERVAL_FACTOR 4 +#define BABEL_IHU_INTERVAL_FACTOR 3 +#define BABEL_IHU_EXPIRY_FACTOR(X) ((X)*3/2) /* 1.5 */ +#define BABEL_HELLO_EXPIRY_FACTOR(X) ((X)*3/2) /* 1.5 */ +#define BABEL_ROUTE_EXPIRY_FACTOR(X) ((X)*7/2) /* 3.5 */ +#define BABEL_ROUTE_REFRESH_INTERVAL 2 /* Seconds before route expiry to send route request */ +#define BABEL_HOLD_TIME 10 /* Expiry time for our own routes */ +#define BABEL_RXCOST_WIRED 96 +#define BABEL_RXCOST_WIRELESS 256 +#define BABEL_INITIAL_HOP_COUNT 255 +#define BABEL_MAX_SEND_INTERVAL 5 +#define BABEL_TIME_UNITS 100 /* On-wire times are counted in centiseconds */ + +#define BABEL_SEQNO_REQUEST_EXPIRY 60 +#define BABEL_GARBAGE_INTERVAL 300 + +#define BABEL_OVERHEAD (SIZE_OF_IP_HEADER+UDP_HEADER_LENGTH) +#define BABEL_MIN_MTU (512 + BABEL_OVERHEAD) + + +enum babel_tlv_type { + BABEL_TLV_PAD1 = 0, + BABEL_TLV_PADN = 1, + BABEL_TLV_ACK_REQ = 2, + BABEL_TLV_ACK = 3, + BABEL_TLV_HELLO = 4, + BABEL_TLV_IHU = 5, + BABEL_TLV_ROUTER_ID = 6, + BABEL_TLV_NEXT_HOP = 7, + BABEL_TLV_UPDATE = 8, + BABEL_TLV_ROUTE_REQUEST = 9, + BABEL_TLV_SEQNO_REQUEST = 10, + /* extensions - not implemented + BABEL_TLV_TS_PC = 11, + BABEL_TLV_HMAC = 12, + BABEL_TLV_SS_UPDATE = 13, + BABEL_TLV_SS_REQUEST = 14, + BABEL_TLV_SS_SEQNO_REQUEST = 15, + */ + BABEL_TLV_MAX +}; + +enum babel_iface_type { + /* In practice, UNDEF and WIRED give equivalent behaviour */ + BABEL_IFACE_TYPE_UNDEF = 0, + BABEL_IFACE_TYPE_WIRED = 1, + BABEL_IFACE_TYPE_WIRELESS = 2, + BABEL_IFACE_TYPE_MAX +}; + +enum babel_ae_type { + BABEL_AE_WILDCARD = 0, + BABEL_AE_IP4 = 1, + BABEL_AE_IP6 = 2, + BABEL_AE_IP6_LL = 3, + BABEL_AE_MAX +}; + + +struct babel_config { + struct proto_config c; + + list iface_list; /* Patterns configured -- keep it first; see babel_reconfigure why */ +}; + +struct babel_iface_config { + struct iface_patt i; + + u16 rxcost; + u8 type; + u8 check_link; + int port; + u16 hello_interval; + u16 ihu_interval; + u16 update_interval; + + u16 rx_buffer; /* RX buffer size, 0 for MTU */ + u16 tx_length; /* TX packet length limit (including headers), 0 for MTU */ + int tx_tos; + int tx_priority; +}; + +struct babel_proto { + struct proto p; + timer *timer; + struct fib rtable; + list interfaces; /* Interfaces we really know about (struct babel_iface) */ + u64 router_id; + u16 update_seqno; /* To be increased on request */ + u8 triggered; /* For triggering global updates */ + + slab *route_slab; + slab *source_slab; + slab *msg_slab; + + slab *seqno_slab; + list seqno_cache; /* Seqno requests in the cache (struct babel_seqno_request) */ + + struct tbf log_pkt_tbf; /* TBF for packet messages */ +}; + +struct babel_iface { + node n; + + struct babel_proto *proto; + struct iface *iface; + + struct babel_iface_config *cf; + + u8 up; + + pool *pool; + char *ifname; + sock *sk; + ip_addr addr; + int tx_length; + list neigh_list; /* List of neighbors seen on this iface (struct babel_neighbor) */ + list msg_queue; + + u16 hello_seqno; /* To be increased on each hello */ + + bird_clock_t next_hello; + bird_clock_t next_regular; + bird_clock_t next_triggered; + bird_clock_t want_triggered; + + timer *timer; + event *send_event; +}; + +struct babel_neighbor { + node n; + struct babel_iface *ifa; + + ip_addr addr; + u16 txcost; + u8 hello_cnt; + u16 hello_map; + u16 next_hello_seqno; + /* expiry timers */ + bird_clock_t hello_expiry; + bird_clock_t ihu_expiry; + + list routes; /* Routes this neighbour has sent us (struct babel_route) */ +}; + +struct babel_source { + node n; + + u64 router_id; + u16 seqno; + u16 metric; + bird_clock_t expires; +}; + +struct babel_route { + node n; + node neigh_route; + struct babel_entry *e; + struct babel_neighbor *neigh; + + u16 seqno; + u16 advert_metric; + u16 metric; + u64 router_id; + ip_addr next_hop; + bird_clock_t refresh_time; + bird_clock_t expires; + u16 expiry_interval; +}; + +struct babel_entry { + struct fib_node n; + struct babel_proto *proto; + struct babel_route *selected_in; + struct babel_route *selected_out; + + bird_clock_t updated; + + list sources; /* Source entries for this prefix (struct babel_source). */ + list routes; /* Routes for this prefix (struct babel_route) */ +}; + +/* Stores forwarded seqno requests for duplicate suppression. */ +struct babel_seqno_request { + node n; + ip_addr prefix; + u8 plen; + u64 router_id; + u16 seqno; + bird_clock_t updated; +}; + + +/* + * Internal TLV messages + */ + +struct babel_msg_ack_req { + u8 type; + u16 nonce; + u16 interval; + ip_addr sender; +}; + +struct babel_msg_ack { + u8 type; + u16 nonce; +}; + +struct babel_msg_hello { + u8 type; + u16 seqno; + u16 interval; + ip_addr sender; +}; + +struct babel_msg_ihu { + u8 type; + u8 ae; + u16 rxcost; + u16 interval; + ip_addr addr; + ip_addr sender; +}; + +struct babel_msg_update { + u8 type; + u8 ae; + u8 plen; + u16 interval; + u16 seqno; + u16 metric; + ip_addr prefix; + u64 router_id; + ip_addr next_hop; + ip_addr sender; +}; + +struct babel_msg_route_request { + u8 type; + u8 full; + u8 plen; + ip_addr prefix; +}; + +struct babel_msg_seqno_request { + u8 type; + u8 plen; + u16 seqno; + u8 hop_count; + u64 router_id; + ip_addr prefix; + ip_addr sender; +}; + +union babel_msg { + u8 type; + struct babel_msg_ack_req ack_req; + struct babel_msg_ack ack; + struct babel_msg_hello hello; + struct babel_msg_ihu ihu; + struct babel_msg_update update; + struct babel_msg_route_request route_request; + struct babel_msg_seqno_request seqno_request; +}; + +struct babel_msg_node { + node n; + union babel_msg msg; +}; + + +/* babel.c */ +void babel_handle_ack_req(union babel_msg *msg, struct babel_iface *ifa); +void babel_handle_ack(union babel_msg *msg, struct babel_iface *ifa); +void babel_handle_hello(union babel_msg *msg, struct babel_iface *ifa); +void babel_handle_ihu(union babel_msg *msg, struct babel_iface *ifa); +void babel_handle_router_id(union babel_msg *msg, struct babel_iface *ifa); +void babel_handle_update(union babel_msg *msg, struct babel_iface *ifa); +void babel_handle_route_request(union babel_msg *msg, struct babel_iface *ifa); +void babel_handle_seqno_request(union babel_msg *msg, struct babel_iface *ifa); + +void babel_show_interfaces(struct proto *P, char *iff); +void babel_show_neighbors(struct proto *P, char *iff); +void babel_show_entries(struct proto *P); + +/* packets.c */ +void babel_enqueue(union babel_msg *msg, struct babel_iface *ifa); +void babel_send_unicast(union babel_msg *msg, struct babel_iface *ifa, ip_addr dest); +int babel_open_socket(struct babel_iface *ifa); +void babel_send_queue(void *arg); + + +#endif diff --git a/proto/babel/config.Y b/proto/babel/config.Y new file mode 100644 index 00000000..e7ce6a93 --- /dev/null +++ b/proto/babel/config.Y @@ -0,0 +1,129 @@ +/* + * BIRD -- Babel Configuration + * + * Copyright (c) 2015-2016 Toke Hoiland-Jorgensen + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + + + +CF_HDR + +#include "proto/babel/babel.h" +#include "nest/iface.h" + +CF_DEFINES + +#define BABEL_CFG ((struct babel_config *) this_proto) +#define BABEL_IFACE ((struct babel_iface_config *) this_ipatt) + +CF_DECLS + +CF_KEYWORDS(BABEL, METRIC, RXCOST, HELLO, UPDATE, INTERVAL, PORT, WIRED, +WIRELESS, RX, TX, BUFFER, LENGTH, CHECK, LINK, BABEL_METRIC) + +CF_GRAMMAR + +CF_ADDTO(proto, babel_proto) + +babel_proto_start: proto_start BABEL +{ + this_proto = proto_config_new(&proto_babel, $1); + init_list(&BABEL_CFG->iface_list); +}; + +babel_proto_item: + proto_item + | INTERFACE babel_iface + ; + +babel_proto_opts: + /* empty */ + | babel_proto_opts babel_proto_item ';' + ; + +babel_proto: + babel_proto_start proto_name '{' babel_proto_opts '}'; + + +babel_iface_start: +{ + this_ipatt = cfg_allocz(sizeof(struct babel_iface_config)); + add_tail(&BABEL_CFG->iface_list, NODE this_ipatt); + init_list(&this_ipatt->ipn_list); + BABEL_IFACE->port = BABEL_PORT; + BABEL_IFACE->type = BABEL_IFACE_TYPE_WIRED; + BABEL_IFACE->tx_tos = IP_PREC_INTERNET_CONTROL; + BABEL_IFACE->tx_priority = sk_priority_control; + BABEL_IFACE->check_link = 1; +}; + + +babel_iface_finish: +{ + if (BABEL_IFACE->type == BABEL_IFACE_TYPE_WIRELESS) + { + if (!BABEL_IFACE->hello_interval) + BABEL_IFACE->hello_interval = BABEL_HELLO_INTERVAL_WIRELESS; + if (!BABEL_IFACE->rxcost) + BABEL_IFACE->rxcost = BABEL_RXCOST_WIRELESS; + } + else + { + if (!BABEL_IFACE->hello_interval) + BABEL_IFACE->hello_interval = BABEL_HELLO_INTERVAL_WIRED; + if (!BABEL_IFACE->rxcost) + BABEL_IFACE->rxcost = BABEL_RXCOST_WIRED; + } + + if (!BABEL_IFACE->update_interval) + BABEL_IFACE->update_interval = BABEL_IFACE->hello_interval*BABEL_UPDATE_INTERVAL_FACTOR; + BABEL_IFACE->ihu_interval = BABEL_IFACE->hello_interval*BABEL_IHU_INTERVAL_FACTOR; +}; + + +babel_iface_item: + | PORT expr { BABEL_IFACE->port = $2; if (($2<1) || ($2>65535)) cf_error("Invalid port number"); } + | RXCOST expr { BABEL_IFACE->rxcost = $2; if (($2<1) || ($2>65535)) cf_error("Invalid rxcost"); } + | HELLO INTERVAL expr { BABEL_IFACE->hello_interval = $3; if (($3<1) || ($3>65535)) cf_error("Invalid hello interval"); } + | UPDATE INTERVAL expr { BABEL_IFACE->update_interval = $3; if (($3<1) || ($3>65535)) cf_error("Invalid hello interval"); } + | TYPE WIRED { BABEL_IFACE->type = BABEL_IFACE_TYPE_WIRED; } + | TYPE WIRELESS { BABEL_IFACE->type = BABEL_IFACE_TYPE_WIRELESS; } + | RX BUFFER expr { BABEL_IFACE->rx_buffer = $3; if (($3<256) || ($3>65535)) cf_error("RX buffer must be in range 256-65535"); } + | TX LENGTH expr { BABEL_IFACE->tx_length = $3; if (($3<256) || ($3>65535)) cf_error("TX length must be in range 256-65535"); } + | TX tos { BABEL_IFACE->tx_tos = $2; } + | TX PRIORITY expr { BABEL_IFACE->tx_priority = $3; } + | CHECK LINK bool { BABEL_IFACE->check_link = $3; } + ; + +babel_iface_opts: + /* empty */ + | babel_iface_opts babel_iface_item ';' + ; + +babel_iface_opt_list: + /* empty */ + | '{' babel_iface_opts '}' + ; + + +babel_iface: + babel_iface_start iface_patt_list_nopx babel_iface_opt_list babel_iface_finish; + +CF_ADDTO(dynamic_attr, BABEL_METRIC { $$ = f_new_dynamic_attr(EAF_TYPE_INT | EAF_TEMP, T_INT, EA_BABEL_METRIC); }) + +CF_CLI_HELP(SHOW BABEL, ..., [[Show information about Babel protocol]]); + +CF_CLI(SHOW BABEL INTERFACES, optsym opttext, [<name>] [\"<interface>\"], [[Show information about Babel interfaces]]) +{ babel_show_interfaces(proto_get_named($4, &proto_babel), $5); }; + +CF_CLI(SHOW BABEL NEIGHBORS, optsym opttext, [<name>] [\"<interface>\"], [[Show information about Babel neighbors]]) +{ babel_show_neighbors(proto_get_named($4, &proto_babel), $5); }; + +CF_CLI(SHOW BABEL ENTRIES, optsym opttext, [<name>], [[Show information about Babel prefix entries]]) +{ babel_show_entries(proto_get_named($4, &proto_babel)); }; + +CF_CODE + +CF_END diff --git a/proto/babel/packets.c b/proto/babel/packets.c new file mode 100644 index 00000000..be47aa75 --- /dev/null +++ b/proto/babel/packets.c @@ -0,0 +1,1093 @@ +/* + * BIRD -- The Babel protocol + * + * Copyright (c) 2015--2016 Toke Hoiland-Jorgensen + * + * Can be freely distributed and used under the terms of the GNU GPL. + * + * This file contains the packet and TLV handling code for the protocol. + */ + +#include "babel.h" + + +struct babel_pkt_header { + u8 magic; + u8 version; + u16 length; +} PACKED; + +struct babel_tlv { + u8 type; + u8 length; + u8 value[0]; +} PACKED; + +struct babel_tlv_ack_req { + u8 type; + u8 length; + u16 reserved; + u16 nonce; + u16 interval; +} PACKED; + +struct babel_tlv_ack { + u8 type; + u8 length; + u16 nonce; +} PACKED; + +struct babel_tlv_hello { + u8 type; + u8 length; + u16 reserved; + u16 seqno; + u16 interval; +} PACKED; + +struct babel_tlv_ihu { + u8 type; + u8 length; + u8 ae; + u8 reserved; + u16 rxcost; + u16 interval; + u8 addr[0]; +} PACKED; + +struct babel_tlv_router_id { + u8 type; + u8 length; + u16 reserved; + u64 router_id; +} PACKED; + +struct babel_tlv_next_hop { + u8 type; + u8 length; + u8 ae; + u8 reserved; + u8 addr[0]; +} PACKED; + +struct babel_tlv_update { + u8 type; + u8 length; + u8 ae; + u8 flags; + u8 plen; + u8 omitted; + u16 interval; + u16 seqno; + u16 metric; + u8 addr[0]; +} PACKED; + +struct babel_tlv_route_request { + u8 type; + u8 length; + u8 ae; + u8 plen; + u8 addr[0]; +} PACKED; + +struct babel_tlv_seqno_request { + u8 type; + u8 length; + u8 ae; + u8 plen; + u16 seqno; + u8 hop_count; + u8 reserved; + u64 router_id; + u8 addr[0]; +} PACKED; + + +#define BABEL_FLAG_DEF_PREFIX 0x80 +#define BABEL_FLAG_ROUTER_ID 0x40 + + +struct babel_parse_state { + struct babel_proto *proto; + struct babel_iface *ifa; + ip_addr saddr; + ip_addr next_hop; + u64 router_id; /* Router ID used in subsequent updates */ + u8 def_ip6_prefix[16]; /* Implicit IPv6 prefix in network order */ + u8 def_ip4_prefix[4]; /* Implicit IPv4 prefix in network order */ + u8 router_id_seen; /* router_id field is valid */ + u8 def_ip6_prefix_seen; /* def_ip6_prefix is valid */ + u8 def_ip4_prefix_seen; /* def_ip4_prefix is valid */ +}; + +enum parse_result { + PARSE_SUCCESS, + PARSE_ERROR, + PARSE_IGNORE, +}; + +struct babel_write_state { + u64 router_id; + u8 router_id_seen; +// ip_addr next_hop; +}; + + +#define DROP(DSC,VAL) do { err_dsc = DSC; err_val = VAL; goto drop; } while(0) +#define DROP1(DSC) do { err_dsc = DSC; goto drop; } while(0) +#define LOG_PKT(msg, args...) \ + log_rl(&p->log_pkt_tbf, L_REMOTE "%s: " msg, p->p.name, args) + +#define FIRST_TLV(p) ((struct babel_tlv *) (((struct babel_pkt_header *) p) + 1)) +#define NEXT_TLV(t) ((struct babel_tlv *) (((byte *) t) + TLV_LENGTH(t))) +#define TLV_LENGTH(t) (t->type == BABEL_TLV_PAD1 ? 1 : t->length + sizeof(struct babel_tlv)) +#define TLV_OPT_LENGTH(t) (t->length + sizeof(struct babel_tlv) - sizeof(*t)) +#define TLV_HDR(tlv,t,l) ({ tlv->type = t; tlv->length = l - sizeof(struct babel_tlv); }) +#define TLV_HDR0(tlv,t) TLV_HDR(tlv, t, tlv_data[t].min_length) + + +static inline u16 +get_time16(const void *p) +{ + u16 v = get_u16(p) / BABEL_TIME_UNITS; + return MAX(1, v); +} + +static inline void +put_time16(void *p, u16 v) +{ + put_u16(p, v * BABEL_TIME_UNITS); +} + +static inline ip6_addr +get_ip6_px(const void *p, int plen) +{ + ip6_addr addr = IPA_NONE; + memcpy(&addr, p, (plen + 7) / 8); + return ip6_ntoh(addr); +} + +static inline void +put_ip6_px(void *p, ip6_addr addr, int plen) +{ + addr = ip6_hton(addr); + memcpy(p, &addr, (plen + 7) / 8); +} + +static inline ip6_addr +get_ip6_ll(const void *p) +{ + return ip6_build(0xfe800000, 0, get_u32(p+0), get_u32(p+4)); +} + +static inline void +put_ip6_ll(void *p, ip6_addr addr) +{ + put_u32(p+0, _I2(addr)); + put_u32(p+4, _I3(addr)); +} + + +/* + * TLV read/write functions + */ + +static int babel_read_ack_req(struct babel_tlv *hdr, union babel_msg *msg, struct babel_parse_state *state); +static int babel_read_hello(struct babel_tlv *hdr, union babel_msg *msg, struct babel_parse_state *state); +static int babel_read_ihu(struct babel_tlv *hdr, union babel_msg *msg, struct babel_parse_state *state); +static int babel_read_router_id(struct babel_tlv *hdr, union babel_msg *msg, struct babel_parse_state *state); +static int babel_read_next_hop(struct babel_tlv *hdr, union babel_msg *msg, struct babel_parse_state *state); +static int babel_read_update(struct babel_tlv *hdr, union babel_msg *msg, struct babel_parse_state *state); +static int babel_read_route_request(struct babel_tlv *hdr, union babel_msg *msg, struct babel_parse_state *state); +static int babel_read_seqno_request(struct babel_tlv *hdr, union babel_msg *msg, struct babel_parse_state *state); + +static int babel_write_ack(struct babel_tlv *hdr, union babel_msg *msg, struct babel_write_state *state, int max_len); +static int babel_write_hello(struct babel_tlv *hdr, union babel_msg *msg, struct babel_write_state *state, int max_len); +static int babel_write_ihu(struct babel_tlv *hdr, union babel_msg *msg, struct babel_write_state *state, int max_len); +static int babel_write_update(struct babel_tlv *hdr, union babel_msg *msg, struct babel_write_state *state, int max_len); +static int babel_write_route_request(struct babel_tlv *hdr, union babel_msg *msg, struct babel_write_state *state, int max_len); +static int babel_write_seqno_request(struct babel_tlv *hdr, union babel_msg *msg, struct babel_write_state *state, int max_len); + +struct babel_tlv_data { + u8 min_length; + int (*read_tlv)(struct babel_tlv *hdr, union babel_msg *m, struct babel_parse_state *state); + int (*write_tlv)(struct babel_tlv *hdr, union babel_msg *m, struct babel_write_state *state, int max_len); + void (*handle_tlv)(union babel_msg *m, struct babel_iface *ifa); +}; + +const static struct babel_tlv_data tlv_data[BABEL_TLV_MAX] = { + [BABEL_TLV_ACK_REQ] = { + sizeof(struct babel_tlv_ack_req), + babel_read_ack_req, + NULL, + babel_handle_ack_req + }, + [BABEL_TLV_ACK] = { + sizeof(struct babel_tlv_ack), + NULL, + babel_write_ack, + NULL + }, + [BABEL_TLV_HELLO] = { + sizeof(struct babel_tlv_hello), + babel_read_hello, + babel_write_hello, + babel_handle_hello + }, + [BABEL_TLV_IHU] = { + sizeof(struct babel_tlv_ihu), + babel_read_ihu, + babel_write_ihu, + babel_handle_ihu + }, + [BABEL_TLV_ROUTER_ID] = { + sizeof(struct babel_tlv_router_id), + babel_read_router_id, + NULL, + NULL + }, + [BABEL_TLV_NEXT_HOP] = { + sizeof(struct babel_tlv_next_hop), + babel_read_next_hop, + NULL, + NULL + }, + [BABEL_TLV_UPDATE] = { + sizeof(struct babel_tlv_update), + babel_read_update, + babel_write_update, + babel_handle_update + }, + [BABEL_TLV_ROUTE_REQUEST] = { + sizeof(struct babel_tlv_route_request), + babel_read_route_request, + babel_write_route_request, + babel_handle_route_request + }, + [BABEL_TLV_SEQNO_REQUEST] = { + sizeof(struct babel_tlv_seqno_request), + babel_read_seqno_request, + babel_write_seqno_request, + babel_handle_seqno_request + }, +}; + +static int +babel_read_ack_req(struct babel_tlv *hdr, union babel_msg *m, + struct babel_parse_state *state) +{ + struct babel_tlv_ack_req *tlv = (void *) hdr; + struct babel_msg_ack_req *msg = &m->ack_req; + + msg->type = BABEL_TLV_ACK_REQ; + msg->nonce = get_u16(&tlv->nonce); + msg->interval = get_time16(&tlv->interval); + msg->sender = state->saddr; + + if (!msg->interval) + return PARSE_ERROR; + + return PARSE_SUCCESS; +} + +static int +babel_write_ack(struct babel_tlv *hdr, union babel_msg *m, + struct babel_write_state *state, int max_len) +{ + struct babel_tlv_ack *tlv = (void *) hdr; + struct babel_msg_ack *msg = &m->ack; + + TLV_HDR0(tlv, BABEL_TLV_ACK); + put_u16(&tlv->nonce, msg->nonce); + + return sizeof(struct babel_tlv_ack); +} + +static int +babel_read_hello(struct babel_tlv *hdr, union babel_msg *m, + struct babel_parse_state *state) +{ + struct babel_tlv_hello *tlv = (void *) hdr; + struct babel_msg_hello *msg = &m->hello; + + msg->type = BABEL_TLV_HELLO; + msg->seqno = get_u16(&tlv->seqno); + msg->interval = get_time16(&tlv->interval); + msg->sender = state->saddr; + + return PARSE_SUCCESS; +} + +static int +babel_write_hello(struct babel_tlv *hdr, union babel_msg *m, + struct babel_write_state *state, int max_len) +{ + struct babel_tlv_hello *tlv = (void *) hdr; + struct babel_msg_hello *msg = &m->hello; + + TLV_HDR0(tlv, BABEL_TLV_HELLO); + put_u16(&tlv->seqno, msg->seqno); + put_time16(&tlv->interval, msg->interval); + + return sizeof(struct babel_tlv_hello); +} + +static int +babel_read_ihu(struct babel_tlv *hdr, union babel_msg *m, + struct babel_parse_state *state) +{ + struct babel_tlv_ihu *tlv = (void *) hdr; + struct babel_msg_ihu *msg = &m->ihu; + + msg->type = BABEL_TLV_IHU; + msg->ae = tlv->ae; + msg->rxcost = get_u16(&tlv->rxcost); + msg->interval = get_time16(&tlv->interval); + msg->addr = IPA_NONE; + msg->sender = state->saddr; + + if (msg->ae >= BABEL_AE_MAX) + return PARSE_IGNORE; + + // We handle link-local IPs. In every other case, the addr field will be 0 but + // validation will succeed. The handler takes care of these cases. + if (msg->ae == BABEL_AE_IP6_LL) + { + if (TLV_OPT_LENGTH(tlv) < 8) + return PARSE_ERROR; + + msg->addr = ipa_from_ip6(get_ip6_ll(&tlv->addr)); + } + + return PARSE_SUCCESS; +} + +static int +babel_write_ihu(struct babel_tlv *hdr, union babel_msg *m, + struct babel_write_state *state, int max_len) +{ + struct babel_tlv_ihu *tlv = (void *) hdr; + struct babel_msg_ihu *msg = &m->ihu; + + if (ipa_is_link_local(msg->addr) && max_len < sizeof(struct babel_tlv_ihu) + 8) + return 0; + + TLV_HDR0(tlv, BABEL_TLV_IHU); + put_u16(&tlv->rxcost, msg->rxcost); + put_time16(&tlv->interval, msg->interval); + + if (!ipa_is_link_local(msg->addr)) + { + tlv->ae = BABEL_AE_WILDCARD; + return sizeof(struct babel_tlv_ihu); + } + put_ip6_ll(&tlv->addr, msg->addr); + tlv->ae = BABEL_AE_IP6_LL; + hdr->length += 8; + return sizeof(struct babel_tlv_ihu) + 8; +} + +static int +babel_read_router_id(struct babel_tlv *hdr, union babel_msg *m UNUSED, + struct babel_parse_state *state) +{ + struct babel_tlv_router_id *tlv = (void *) hdr; + + state->router_id = get_u64(&tlv->router_id); + state->router_id_seen = 1; + + return PARSE_IGNORE; +} + +/* This is called directly from babel_write_update() */ +static int +babel_write_router_id(struct babel_tlv *hdr, u64 router_id, + struct babel_write_state *state, int max_len UNUSED) +{ + struct babel_tlv_router_id *tlv = (void *) hdr; + + /* We still assume that first min_length bytes are available and zeroed */ + + TLV_HDR0(tlv, BABEL_TLV_ROUTER_ID); + put_u64(&tlv->router_id, router_id); + + state->router_id = router_id; + state->router_id_seen = 1; + + return sizeof(struct babel_tlv_router_id); +} + +static int +babel_read_next_hop(struct babel_tlv *hdr, union babel_msg *m UNUSED, + struct babel_parse_state *state) +{ + struct babel_tlv_next_hop *tlv = (void *) hdr; + + switch (tlv->ae) + { + case BABEL_AE_WILDCARD: + return PARSE_ERROR; + + case BABEL_AE_IP4: + /* TODO */ + return PARSE_IGNORE; + + case BABEL_AE_IP6: + if (TLV_OPT_LENGTH(tlv) < sizeof(ip6_addr)) + return PARSE_ERROR; + + state->next_hop = ipa_from_ip6(get_ip6(&tlv->addr)); + return PARSE_IGNORE; + + case BABEL_AE_IP6_LL: + if (TLV_OPT_LENGTH(tlv) < 8) + return PARSE_ERROR; + + state->next_hop = ipa_from_ip6(get_ip6_ll(&tlv->addr)); + return PARSE_IGNORE; + + default: + return PARSE_IGNORE; + } + + return PARSE_IGNORE; +} + +static int +babel_read_update(struct babel_tlv *hdr, union babel_msg *m, + struct babel_parse_state *state) +{ + struct babel_tlv_update *tlv = (void *) hdr; + struct babel_msg_update *msg = &m->update; + + msg->type = BABEL_TLV_UPDATE; + msg->ae = tlv->ae; + msg->interval = get_time16(&tlv->interval); + msg->seqno = get_u16(&tlv->seqno); + msg->metric = get_u16(&tlv->metric); + + /* Length of received prefix data without omitted part */ + int len = (tlv->plen + 7)/8 - (int) tlv->omitted; + u8 buf[16] = {}; + + if ((len < 0) || (len > TLV_OPT_LENGTH(tlv))) + return PARSE_ERROR; + + switch (tlv->ae) + { + case BABEL_AE_WILDCARD: + if (tlv->plen > 0) + return PARSE_ERROR; + + msg->prefix = IPA_NONE; + break; + + case BABEL_AE_IP4: + /* TODO */ + return PARSE_IGNORE; + + case BABEL_AE_IP6: + if (tlv->plen > MAX_PREFIX_LENGTH) + return PARSE_ERROR; + + /* Cannot omit data if there is no saved prefix */ + if (tlv->omitted && !state->def_ip6_prefix_seen) + return PARSE_ERROR; + + /* Merge saved prefix and received prefix parts */ + memcpy(buf, state->def_ip6_prefix, tlv->omitted); + memcpy(buf + tlv->omitted, tlv->addr, len); + + msg->plen = tlv->plen; + msg->prefix = ipa_from_ip6(get_ip6(buf)); + + if (tlv->flags & BABEL_FLAG_DEF_PREFIX) + { + put_ip6(state->def_ip6_prefix, msg->prefix); + state->def_ip6_prefix_seen = 1; + } + + if (tlv->flags & BABEL_FLAG_ROUTER_ID) + { + state->router_id = ((u64) _I2(msg->prefix)) << 32 | _I3(msg->prefix); + state->router_id_seen = 1; + } + break; + + case BABEL_AE_IP6_LL: + /* ??? */ + return PARSE_IGNORE; + + default: + return PARSE_IGNORE; + } + + if (!state->router_id_seen) + { + DBG("Babel: No router ID seen before update\n"); + return PARSE_ERROR; + } + + msg->router_id = state->router_id; + msg->next_hop = state->next_hop; + msg->sender = state->saddr; + + return PARSE_SUCCESS; +} + +static int +babel_write_update(struct babel_tlv *hdr, union babel_msg *m, + struct babel_write_state *state, int max_len) +{ + struct babel_tlv_update *tlv = (void *) hdr; + struct babel_msg_update *msg = &m->update; + int len0 = 0; + + /* + * When needed, we write Router-ID TLV before Update TLV and return size of + * both of them. There is enough space for the Router-ID TLV, because + * sizeof(struct babel_tlv_router_id) == sizeof(struct babel_tlv_update). + */ + if (!state->router_id_seen || (msg->router_id != state->router_id)) + { + len0 = babel_write_router_id(hdr, msg->router_id, state, max_len); + tlv = (struct babel_tlv_update *) NEXT_TLV(tlv); + } + + int len = sizeof(struct babel_tlv_update) + (msg->plen + 7)/8; + + if (len0 + len > max_len) + return 0; + + memset(tlv, 0, sizeof(struct babel_tlv_update)); + TLV_HDR(tlv, BABEL_TLV_UPDATE, len); + tlv->ae = BABEL_AE_IP6; + tlv->plen = msg->plen; + put_time16(&tlv->interval, msg->interval); + put_u16(&tlv->seqno, msg->seqno); + put_u16(&tlv->metric, msg->metric); + put_ip6_px(tlv->addr, msg->prefix, msg->plen); + + return len0 + len; +} + +static int +babel_read_route_request(struct babel_tlv *hdr, union babel_msg *m, + struct babel_parse_state *state) +{ + struct babel_tlv_route_request *tlv = (void *) hdr; + struct babel_msg_route_request *msg = &m->route_request; + + msg->type = BABEL_TLV_ROUTE_REQUEST; + + switch (tlv->ae) + { + case BABEL_AE_WILDCARD: + /* Wildcard requests must have plen 0 */ + if (tlv->plen > 0) + return PARSE_ERROR; + + msg->full = 1; + return PARSE_SUCCESS; + + case BABEL_AE_IP4: + /* TODO */ + return PARSE_IGNORE; + + case BABEL_AE_IP6: + if (tlv->plen > MAX_PREFIX_LENGTH) + return PARSE_ERROR; + + if (TLV_OPT_LENGTH(tlv) < (tlv->plen + 7)/8) + return PARSE_ERROR; + + msg->plen = tlv->plen; + msg->prefix = get_ip6_px(tlv->addr, tlv->plen); + return PARSE_SUCCESS; + + case BABEL_AE_IP6_LL: + return PARSE_ERROR; + + default: + return PARSE_IGNORE; + } + + return PARSE_IGNORE; +} + +static int +babel_write_route_request(struct babel_tlv *hdr, union babel_msg *m, + struct babel_write_state *state, int max_len) +{ + struct babel_tlv_route_request *tlv = (void *) hdr; + struct babel_msg_route_request *msg = &m->route_request; + + int len = sizeof(struct babel_tlv_route_request) + (msg->plen + 7)/8; + + if (len > max_len) + return 0; + + TLV_HDR(tlv, BABEL_TLV_ROUTE_REQUEST, len); + + if (msg->full) + { + tlv->ae = BABEL_AE_WILDCARD; + tlv->plen = 0; + } + else + { + tlv->ae = BABEL_AE_IP6; + tlv->plen = msg->plen; + put_ip6_px(tlv->addr, msg->prefix, msg->plen); + } + + return len; +} + +static int +babel_read_seqno_request(struct babel_tlv *hdr, union babel_msg *m, + struct babel_parse_state *state) +{ + struct babel_tlv_seqno_request *tlv = (void *) hdr; + struct babel_msg_seqno_request *msg = &m->seqno_request; + + msg->type = BABEL_TLV_SEQNO_REQUEST; + msg->seqno = get_u16(&tlv->seqno); + msg->hop_count = tlv->hop_count; + msg->router_id = get_u64(&tlv->router_id); + msg->sender = state->saddr; + + if (tlv->hop_count == 0) + return PARSE_ERROR; + + switch (tlv->ae) + { + case BABEL_AE_WILDCARD: + return PARSE_ERROR; + + case BABEL_AE_IP4: + /* TODO */ + return PARSE_IGNORE; + + case BABEL_AE_IP6: + if (tlv->plen > MAX_PREFIX_LENGTH) + return PARSE_ERROR; + + if (TLV_OPT_LENGTH(tlv) < (tlv->plen + 7)/8) + return PARSE_ERROR; + + msg->plen = tlv->plen; + msg->prefix = get_ip6_px(tlv->addr, tlv->plen); + return PARSE_SUCCESS; + + case BABEL_AE_IP6_LL: + return PARSE_ERROR; + + default: + return PARSE_IGNORE; + } + + return PARSE_IGNORE; +} + +static int +babel_write_seqno_request(struct babel_tlv *hdr, union babel_msg *m, + struct babel_write_state *state, int max_len) +{ + struct babel_tlv_seqno_request *tlv = (void *) hdr; + struct babel_msg_seqno_request *msg = &m->seqno_request; + + int len = sizeof(struct babel_tlv_seqno_request) + (msg->plen + 7)/8; + + if (len > max_len) + return 0; + + TLV_HDR(tlv, BABEL_TLV_SEQNO_REQUEST, len); + tlv->ae = BABEL_AE_IP6; + tlv->plen = msg->plen; + put_u16(&tlv->seqno, msg->seqno); + tlv->hop_count = msg->hop_count; + put_u64(&tlv->router_id, msg->router_id); + put_ip6_px(tlv->addr, msg->prefix, msg->plen); + + return len; +} + +static inline int +babel_read_tlv(struct babel_tlv *hdr, + union babel_msg *msg, + struct babel_parse_state *state) +{ + if ((hdr->type <= BABEL_TLV_PADN) || + (hdr->type >= BABEL_TLV_MAX) || + !tlv_data[hdr->type].read_tlv) + return PARSE_IGNORE; + + if (TLV_LENGTH(hdr) < tlv_data[hdr->type].min_length) + return PARSE_ERROR; + + memset(msg, 0, sizeof(*msg)); + return tlv_data[hdr->type].read_tlv(hdr, msg, state); +} + +static int +babel_write_tlv(struct babel_tlv *hdr, + union babel_msg *msg, + struct babel_write_state *state, + int max_len) +{ + if ((msg->type <= BABEL_TLV_PADN) || + (msg->type >= BABEL_TLV_MAX) || + !tlv_data[msg->type].write_tlv) + return 0; + + if (tlv_data[msg->type].min_length > max_len) + return 0; + + memset(hdr, 0, tlv_data[msg->type].min_length); + return tlv_data[msg->type].write_tlv(hdr, msg, state, max_len); +} + + +/* + * Packet RX/TX functions + */ + +static int +babel_send_to(struct babel_iface *ifa, ip_addr dest) +{ + sock *sk = ifa->sk; + struct babel_pkt_header *hdr = (void *) sk->tbuf; + int len = get_u16(&hdr->length) + sizeof(struct babel_pkt_header); + + DBG("Babel: Sending %d bytes to %I\n", len, dest); + return sk_send_to(sk, len, dest, 0); +} + +/** + * babel_write_queue - Write a TLV queue to a transmission buffer + * @ifa: Interface holding the transmission buffer + * @queue: TLV queue to write (containing internal-format TLVs) + * + * This function writes a packet to the interface transmission buffer with as + * many TLVs from the &queue as will fit in the buffer. It returns the number of + * bytes written (NOT counting the packet header). The function is called by + * babel_send_queue() and babel_send_unicast() to construct packets for + * transmission, and uses per-TLV helper functions to convert the + * internal-format TLVs to their wire representations. + * + * The TLVs in the queue are freed after they are written to the buffer. + */ +static int +babel_write_queue(struct babel_iface *ifa, list *queue) +{ + struct babel_proto *p = ifa->proto; + struct babel_write_state state = {}; + + if (EMPTY_LIST(*queue)) + return 0; + + byte *pos = ifa->sk->tbuf; + byte *end = pos + ifa->tx_length; + + struct babel_pkt_header *pkt = (void *) pos; + pkt->magic = BABEL_MAGIC; + pkt->version = BABEL_VERSION; + pkt->length = 0; + pos += sizeof(struct babel_pkt_header); + + struct babel_msg_node *msg; + WALK_LIST_FIRST(msg, *queue) + { + int len = babel_write_tlv((struct babel_tlv *) pos, &msg->msg, &state, end - pos); + + if (!len) + break; + + pos += len; + rem_node(NODE msg); + sl_free(p->msg_slab, msg); + } + + int plen = pos - (byte *) pkt; + put_u16(&pkt->length, plen - sizeof(struct babel_pkt_header)); + + return plen; +} + +void +babel_send_queue(void *arg) +{ + struct babel_iface *ifa = arg; + while ((babel_write_queue(ifa, &ifa->msg_queue) > 0) && + (babel_send_to(ifa, IP6_BABEL_ROUTERS) > 0)); +} + +static inline void +babel_kick_queue(struct babel_iface *ifa) +{ + /* + * Only schedule send event if there is not already data in the socket buffer. + * Otherwise we may overwrite the data already in the buffer. + */ + + if ((ifa->sk->tpos == ifa->sk->tbuf) && !ev_active(ifa->send_event)) + ev_schedule(ifa->send_event); +} + +/** + * babel_send_unicast - send a single TLV via unicast to a destination + * @msg: TLV to send + * @ifa: Interface to send via + * @dest: Destination of the TLV + * + * This function is used to send a single TLV via unicast to a designated + * receiver. This is used for replying to certain incoming requests, and for + * sending unicast requests to refresh routes before they expire. + */ +void +babel_send_unicast(union babel_msg *msg, struct babel_iface *ifa, ip_addr dest) +{ + struct babel_proto *p = ifa->proto; + struct babel_msg_node *msgn = sl_alloc(p->msg_slab); + list queue; + + msgn->msg = *msg; + init_list(&queue); + add_tail(&queue, NODE msgn); + babel_write_queue(ifa, &queue); + babel_send_to(ifa, dest); + + /* We could overwrite waiting packet here, we may have to kick TX queue */ + if (!EMPTY_LIST(ifa->msg_queue)) + babel_kick_queue(ifa); +} + +/** + * babel_enqueue - enqueue a TLV for transmission on an interface + * @msg: TLV to enqueue (in internal TLV format) + * @ifa: Interface to enqueue to + * + * This function is called to enqueue a TLV for subsequent transmission on an + * interface. The transmission event is triggered whenever a TLV is enqueued; + * this ensures that TLVs will be transmitted in a timely manner, but that TLVs + * which are enqueued in rapid succession can be transmitted together in one + * packet. + */ +void +babel_enqueue(union babel_msg *msg, struct babel_iface *ifa) +{ + struct babel_proto *p = ifa->proto; + struct babel_msg_node *msgn = sl_alloc(p->msg_slab); + msgn->msg = *msg; + add_tail(&ifa->msg_queue, NODE msgn); + babel_kick_queue(ifa); +} + +/** + * babel_process_packet - process incoming data packet + * @pkt: Pointer to the packet data + * @len: Length of received packet + * @saddr: Address of packet sender + * @ifa: Interface packet was received on. + * + * This function is the main processing hook of incoming Babel packets. It + * checks that the packet header is well-formed, then processes the TLVs + * contained in the packet. This is done in two passes: First all TLVs are + * parsed into the internal TLV format. If a TLV parser fails, processing of the + * rest of the packet is aborted. + * + * After the parsing step, the TLV handlers are called for each parsed TLV in + * order. + */ +static void +babel_process_packet(struct babel_pkt_header *pkt, int len, + ip_addr saddr, struct babel_iface *ifa) +{ + struct babel_proto *p = ifa->proto; + struct babel_tlv *tlv; + struct babel_msg_node *msg; + list msgs; + int res; + + int plen = sizeof(struct babel_pkt_header) + get_u16(&pkt->length); + byte *pos; + byte *end = (byte *)pkt + plen; + + struct babel_parse_state state = { + .proto = p, + .ifa = ifa, + .saddr = saddr, + .next_hop = saddr, + }; + + if ((pkt->magic != BABEL_MAGIC) || (pkt->version != BABEL_VERSION)) + { + TRACE(D_PACKETS, "Strange packet from %I via %s - magic %d version %d", + saddr, ifa->iface->name, pkt->magic, pkt->version); + return; + } + + if (plen > len) + { + LOG_PKT("Bad packet from %I via %s - %s (%u)", + saddr, ifa->iface->name, "length mismatch", plen); + return; + } + + TRACE(D_PACKETS, "Packet received from %I via %s", + saddr, ifa->iface->name); + + init_list(&msgs); + + /* First pass through the packet TLV by TLV, parsing each into internal data + structures. */ + for (tlv = FIRST_TLV(pkt); + (byte *)tlv < end; + tlv = NEXT_TLV(tlv)) + { + /* Ugly special case */ + if (tlv->type == BABEL_TLV_PAD1) + continue; + + /* The end of the common TLV header */ + pos = (byte *)tlv + sizeof(struct babel_tlv); + if ((pos > end) || (pos + tlv->length > end)) + { + LOG_PKT("Bad TLV from %I via %s type %d pos %d - framing error", + saddr, ifa->iface->name, tlv->type, (byte *)tlv - (byte *)pkt); + break; + } + + msg = sl_alloc(p->msg_slab); + res = babel_read_tlv(tlv, &msg->msg, &state); + if (res == PARSE_SUCCESS) + { + add_tail(&msgs, NODE msg); + } + else if (res == PARSE_IGNORE) + { + DBG("Babel: Ignoring TLV of type %d\n", tlv->type); + sl_free(p->msg_slab, msg); + } + else /* PARSE_ERROR */ + { + LOG_PKT("Bad TLV from %I via %s type %d pos %d - parse error", + saddr, ifa->iface->name, tlv->type, (byte *)tlv - (byte *)pkt); + sl_free(p->msg_slab, msg); + break; + } + } + + /* Parsing done, handle all parsed TLVs */ + WALK_LIST_FIRST(msg, msgs) + { + if (tlv_data[msg->msg.type].handle_tlv) + tlv_data[msg->msg.type].handle_tlv(&msg->msg, ifa); + rem_node(NODE msg); + sl_free(p->msg_slab, msg); + } +} + +static void +babel_err_hook(sock *sk, int err) +{ + struct babel_iface *ifa = sk->data; + struct babel_proto *p = ifa->proto; + + log(L_ERR "%s: Socket error on %s: %M", p->p.name, ifa->iface->name, err); + /* FIXME: Drop queued TLVs here? */ +} + + +static void +babel_tx_hook(sock *sk) +{ + struct babel_iface *ifa = sk->data; + + DBG("Babel: TX hook called (iface %s, src %I, dst %I)\n", + sk->iface->name, sk->saddr, sk->daddr); + + babel_send_queue(ifa); +} + + +static int +babel_rx_hook(sock *sk, int len) +{ + struct babel_iface *ifa = sk->data; + struct babel_proto *p = ifa->proto; + const char *err_dsc = NULL; + uint err_val = 0; + + if (sk->lifindex != ifa->iface->index) + return 1; + + DBG("Babel: RX hook called (iface %s, src %I, dst %I)\n", + sk->iface->name, sk->faddr, sk->laddr); + + /* Silently ignore my own packets */ + if (ipa_equal(ifa->iface->addr->ip, sk->faddr)) + return 1; + + if (!ipa_is_link_local(sk->faddr)) + DROP1("wrong src address"); + + if (sk->fport != ifa->cf->port) + DROP("wrong src port", sk->fport); + + if (len < sizeof(struct babel_pkt_header)) + DROP("too short", len); + + if (sk->flags & SKF_TRUNCATED) + DROP("truncated", len); + + babel_process_packet((struct babel_pkt_header *) sk->rbuf, len, sk->faddr, ifa); + return 1; + +drop: + LOG_PKT("Bad packet from %I via %s - %s (%u)", + sk->faddr, sk->iface->name, err_dsc, err_val); + return 1; +} + +int +babel_open_socket(struct babel_iface *ifa) +{ + struct babel_proto *p = ifa->proto; + + sock *sk; + sk = sk_new(ifa->pool); + sk->type = SK_UDP; + sk->sport = ifa->cf->port; + sk->dport = ifa->cf->port; + sk->iface = ifa->iface; + + sk->rx_hook = babel_rx_hook; + sk->tx_hook = babel_tx_hook; + sk->err_hook = babel_err_hook; + sk->data = ifa; + + sk->tos = ifa->cf->tx_tos; + sk->priority = ifa->cf->tx_priority; + sk->ttl = 1; + sk->flags = SKF_LADDR_RX; + + if (sk_open(sk) < 0) + goto err; + + if (sk_setup_multicast(sk) < 0) + goto err; + + if (sk_join_group(sk, IP6_BABEL_ROUTERS) < 0) + goto err; + + ifa->sk = sk; + return 1; + +err: + sk_log_error(sk, p->p.name); + rfree(sk); + return 0; +} diff --git a/sysdep/autoconf.h.in b/sysdep/autoconf.h.in index a9e46e27..c73270c3 100644 --- a/sysdep/autoconf.h.in +++ b/sysdep/autoconf.h.in @@ -43,6 +43,7 @@ #undef CONFIG_BGP #undef CONFIG_OSPF #undef CONFIG_PIPE +#undef CONFIG_BABEL /* We use multithreading */ #undef USE_PTHREADS |