summaryrefslogtreecommitdiff
path: root/proto
diff options
context:
space:
mode:
Diffstat (limited to 'proto')
-rw-r--r--proto/babel/babel.c84
-rw-r--r--proto/babel/babel.h24
-rw-r--r--proto/babel/config.Y20
-rw-r--r--proto/babel/packets.c126
-rw-r--r--proto/bgp/bgp.c2
-rw-r--r--proto/bgp/bgp.h2
-rw-r--r--proto/bgp/packets.c24
-rw-r--r--proto/radv/config.Y18
-rw-r--r--proto/radv/packets.c43
-rw-r--r--proto/radv/radv.h10
10 files changed, 330 insertions, 23 deletions
diff --git a/proto/babel/babel.c b/proto/babel/babel.c
index 9f33dd34..7f0cca73 100644
--- a/proto/babel/babel.c
+++ b/proto/babel/babel.c
@@ -596,6 +596,7 @@ babel_update_cost(struct babel_neighbor *nbr)
switch (cf->type)
{
case BABEL_IFACE_TYPE_WIRED:
+ case BABEL_IFACE_TYPE_TUNNEL:
/* k-out-of-j selection - Appendix 2.1 in the RFC. */
/* Link is bad if less than cf->limit/16 of expected hellos were received */
@@ -624,6 +625,24 @@ babel_update_cost(struct babel_neighbor *nbr)
break;
}
+ if (cf->rtt_cost && nbr->srtt > cf->rtt_min)
+ {
+ uint rtt_cost = cf->rtt_cost;
+
+ if (nbr->srtt < cf->rtt_max)
+ {
+ uint rtt_interval = cf->rtt_max TO_US - cf->rtt_min TO_US;
+ uint rtt_diff = (nbr->srtt TO_US - cf->rtt_min TO_US);
+
+ rtt_cost = (rtt_cost * rtt_diff) / rtt_interval;
+ }
+
+ txcost = MIN(txcost + rtt_cost, BABEL_INFINITY);
+
+ TRACE(D_EVENTS, "Added RTT cost %u to nbr %I on %s with srtt %t ms",
+ rtt_cost, nbr->addr, nbr->ifa->iface->name, nbr->srtt * 1000);
+ }
+
done:
/* If RX cost changed, send IHU with next Hello */
if (rxcost != nbr->rxcost)
@@ -854,6 +873,12 @@ babel_build_ihu(union babel_msg *msg, struct babel_iface *ifa, struct babel_neig
msg->ihu.rxcost = n->rxcost;
msg->ihu.interval = ifa->cf->ihu_interval;
+ if (n->last_tstamp_rcvd && ifa->cf->rtt_send)
+ {
+ msg->ihu.tstamp = n->last_tstamp;
+ msg->ihu.tstamp_rcvd = n->last_tstamp_rcvd TO_US;
+ }
+
TRACE(D_PACKETS, "Sending IHU for %I with rxcost %d interval %t",
msg->ihu.addr, msg->ihu.rxcost, (btime) msg->ihu.interval);
}
@@ -893,6 +918,9 @@ babel_send_hello(struct babel_iface *ifa, uint interval)
msg.hello.seqno = ifa->hello_seqno++;
msg.hello.interval = interval ?: ifa->cf->hello_interval;
+ if (ifa->cf->rtt_send)
+ msg.hello.tstamp = 1; /* real timestamp will be set on TLV write */
+
TRACE(D_PACKETS, "Sending hello on %s with seqno %d interval %t",
ifa->ifname, msg.hello.seqno, (btime) msg.hello.interval);
@@ -1199,14 +1227,26 @@ babel_handle_hello(union babel_msg *m, struct babel_iface *ifa)
msg->seqno, (btime) msg->interval);
struct babel_neighbor *n = babel_get_neighbor(ifa, msg->sender);
+ struct babel_iface_config *cf = n->ifa->cf;
int first_hello = !n->hello_cnt;
+ if (msg->tstamp)
+ {
+ n->last_tstamp = msg->tstamp;
+ n->last_tstamp_rcvd = msg->pkt_received;
+ }
babel_update_hello_history(n, msg->seqno, msg->interval);
babel_update_cost(n);
/* Speed up session establishment by sending IHU immediately */
if (first_hello)
- babel_send_ihu(ifa, n);
+ {
+ /* if using RTT, all IHUs must be paired with hellos */
+ if(cf->rtt_send)
+ babel_send_hello(ifa, 0);
+ else
+ babel_send_ihu(ifa, n);
+ }
}
void
@@ -1225,6 +1265,39 @@ babel_handle_ihu(union babel_msg *m, struct babel_iface *ifa)
struct babel_neighbor *n = babel_get_neighbor(ifa, msg->sender);
n->txcost = msg->rxcost;
n->ihu_expiry = current_time() + BABEL_IHU_EXPIRY_FACTOR(msg->interval);
+
+ if (msg->tstamp)
+ {
+ u32 rtt_sample = 0, pkt_received = msg->pkt_received TO_US;
+ int remote_time, full_time;
+
+ /* processing time reported by peer */
+ remote_time = (n->last_tstamp - msg->tstamp_rcvd);
+ /* time since we sent the last timestamp - RTT including remote time */
+ full_time = (pkt_received - msg->tstamp);
+
+ /* sanity checks */
+ if (remote_time < 0 || full_time < 0 ||
+ remote_time US_ > BABEL_RTT_MAX_VALUE || full_time US_ > BABEL_RTT_MAX_VALUE)
+ goto out;
+
+ if (remote_time < full_time)
+ rtt_sample = full_time - remote_time;
+
+ if (n->srtt)
+ {
+ uint decay = n->ifa->cf->rtt_decay;
+
+ n->srtt = (decay * rtt_sample + (256 - decay) * n->srtt) / 256;
+ }
+ else
+ n->srtt = rtt_sample;
+
+ TRACE(D_EVENTS, "RTT sample for neighbour %I on %s: %u us (srtt %t ms)",
+ n->addr, ifa->ifname, rtt_sample, n->srtt * 1000);
+ }
+
+out:
babel_update_cost(n);
}
@@ -2199,8 +2272,8 @@ babel_show_neighbors(struct proto *P, const char *iff)
}
cli_msg(-1024, "%s:", p->p.name);
- cli_msg(-1024, "%-25s %-10s %6s %6s %6s %7s %4s",
- "IP address", "Interface", "Metric", "Routes", "Hellos", "Expires", "Auth");
+ cli_msg(-1024, "%-25s %-10s %6s %6s %6s %7s %4s %9s",
+ "IP address", "Interface", "Metric", "Routes", "Hellos", "Expires", "Auth", "RTT (ms)");
WALK_LIST(ifa, p->interfaces)
{
@@ -2215,9 +2288,10 @@ babel_show_neighbors(struct proto *P, const char *iff)
uint hellos = u32_popcount(n->hello_map);
btime timer = (n->hello_expiry ?: n->init_expiry) - current_time();
- cli_msg(-1024, "%-25I %-10s %6u %6u %6u %7t %-4s",
+ cli_msg(-1024, "%-25I %-10s %6u %6u %6u %7t %-4s %9t",
n->addr, ifa->iface->name, n->cost, rts, hellos, MAX(timer, 0),
- n->auth_passed ? "Yes" : "No");
+ n->auth_passed ? "Yes" : "No",
+ n->srtt * 1000);
}
}
}
diff --git a/proto/babel/babel.h b/proto/babel/babel.h
index dcd303e1..edde4cab 100644
--- a/proto/babel/babel.h
+++ b/proto/babel/babel.h
@@ -53,10 +53,16 @@
#define BABEL_GARBAGE_INTERVAL (300 S_)
#define BABEL_RXCOST_WIRED 96
#define BABEL_RXCOST_WIRELESS 256
+#define BABEL_RXCOST_RTT 96
#define BABEL_INITIAL_HOP_COUNT 255
#define BABEL_MAX_SEND_INTERVAL 5 /* Unused ? */
#define BABEL_INITIAL_NEIGHBOR_TIMEOUT (60 S_)
+#define BABEL_RTT_MAX_VALUE (600 S_)
+#define BABEL_RTT_MIN (10 MS_)
+#define BABEL_RTT_MAX (120 MS_)
+#define BABEL_RTT_DECAY 42
+
/* Max interval that will not overflow when carried as 16-bit centiseconds */
#define BABEL_TIME_UNITS 10000 /* On-wire times are counted in centiseconds */
#define BABEL_MIN_INTERVAL (0x0001 * BABEL_TIME_UNITS)
@@ -96,6 +102,8 @@ enum babel_tlv_type {
enum babel_subtlv_type {
BABEL_SUBTLV_PAD1 = 0,
BABEL_SUBTLV_PADN = 1,
+ BABEL_SUBTLV_DIVERSITY = 2, /* we don't support this */
+ BABEL_SUBTLV_TIMESTAMP = 3,
/* Mandatory subtlvs */
BABEL_SUBTLV_SOURCE_PREFIX = 128,
@@ -106,6 +114,7 @@ enum babel_iface_type {
BABEL_IFACE_TYPE_UNDEF = 0,
BABEL_IFACE_TYPE_WIRED = 1,
BABEL_IFACE_TYPE_WIRELESS = 2,
+ BABEL_IFACE_TYPE_TUNNEL = 3,
BABEL_IFACE_TYPE_MAX
};
@@ -141,6 +150,12 @@ struct babel_iface_config {
uint ihu_interval; /* IHU interval, in us */
uint update_interval; /* Update interval, in us */
+ btime rtt_min; /* rtt above which to start penalising metric */
+ btime rtt_max; /* max rtt metric penalty applied above this */
+ u16 rtt_cost; /* metric penalty to apply at rtt_max */
+ u16 rtt_decay; /* decay of neighbour RTT (units of 1/256) */
+ u8 rtt_send; /* whether to send timestamps on this interface */
+
u16 rx_buffer; /* RX buffer size, 0 for MTU */
u16 tx_length; /* TX packet length limit (including headers), 0 for MTU */
int tx_tos;
@@ -229,6 +244,10 @@ struct babel_neighbor {
u16 next_hello_seqno;
uint last_hello_int;
+ u32 last_tstamp;
+ btime last_tstamp_rcvd;
+ btime srtt;
+
u32 auth_pc_unicast;
u32 auth_pc_multicast;
u8 auth_passed;
@@ -326,6 +345,8 @@ struct babel_msg_hello {
u16 seqno;
uint interval;
ip_addr sender;
+ u32 tstamp;
+ btime pkt_received;
};
struct babel_msg_ihu {
@@ -335,6 +356,9 @@ struct babel_msg_ihu {
uint interval;
ip_addr addr;
ip_addr sender;
+ u32 tstamp;
+ u32 tstamp_rcvd;
+ btime pkt_received;
};
struct babel_msg_update {
diff --git a/proto/babel/config.Y b/proto/babel/config.Y
index 1b4dc6f5..b8af0267 100644
--- a/proto/babel/config.Y
+++ b/proto/babel/config.Y
@@ -26,7 +26,7 @@ CF_KEYWORDS(BABEL, INTERFACE, METRIC, RXCOST, HELLO, UPDATE, INTERVAL, PORT,
TYPE, WIRED, WIRELESS, RX, TX, BUFFER, PRIORITY, LENGTH, CHECK, LINK,
NEXT, HOP, IPV4, IPV6, BABEL_METRIC, SHOW, INTERFACES, NEIGHBORS,
ENTRIES, RANDOMIZE, ROUTER, ID, AUTHENTICATION, NONE, MAC, PERMISSIVE,
- EXTENDED)
+ EXTENDED, TUNNEL, RTT, MIN, MAX, DECAY, SEND, TIMESTAMPS)
CF_GRAMMAR
@@ -67,6 +67,10 @@ babel_iface_start:
BABEL_IFACE->limit = BABEL_HELLO_LIMIT;
BABEL_IFACE->tx_tos = IP_PREC_INTERNET_CONTROL;
BABEL_IFACE->tx_priority = sk_priority_control;
+ BABEL_IFACE->rtt_min = BABEL_RTT_MIN;
+ BABEL_IFACE->rtt_max = BABEL_RTT_MAX;
+ BABEL_IFACE->rtt_decay = BABEL_RTT_DECAY;
+ BABEL_IFACE->rtt_send = 1;
BABEL_IFACE->check_link = 1;
BABEL_IFACE->ext_next_hop = 1;
};
@@ -87,8 +91,16 @@ babel_iface_finish:
BABEL_IFACE->hello_interval = BABEL_HELLO_INTERVAL_WIRED;
if (!BABEL_IFACE->rxcost)
BABEL_IFACE->rxcost = BABEL_RXCOST_WIRED;
+ if (BABEL_IFACE->type == BABEL_IFACE_TYPE_TUNNEL && !BABEL_IFACE->rtt_cost)
+ BABEL_IFACE->rtt_cost = BABEL_RXCOST_RTT;
}
+ if (BABEL_IFACE->rtt_cost && !BABEL_IFACE->rtt_send)
+ cf_error("Can't set RTT cost when sending timestamps is disabled");
+
+ if (BABEL_IFACE->rtt_min >= BABEL_IFACE->rtt_max)
+ cf_error("Min RTT must be smaller than max RTT");
+
/* Make sure we do not overflow the 16-bit centisec fields */
if (!BABEL_IFACE->update_interval)
BABEL_IFACE->update_interval = MIN_(BABEL_IFACE->hello_interval*BABEL_UPDATE_INTERVAL_FACTOR, BABEL_MAX_INTERVAL);
@@ -136,6 +148,7 @@ babel_iface_item:
| LIMIT expr { BABEL_IFACE->limit = $2; if (($2<1) || ($2>16)) cf_error("Limit must be in range 1-16"); }
| TYPE WIRED { BABEL_IFACE->type = BABEL_IFACE_TYPE_WIRED; }
| TYPE WIRELESS { BABEL_IFACE->type = BABEL_IFACE_TYPE_WIRELESS; }
+ | TYPE TUNNEL { BABEL_IFACE->type = BABEL_IFACE_TYPE_TUNNEL; }
| HELLO INTERVAL expr_us { BABEL_IFACE->hello_interval = $3; if (($3<BABEL_MIN_INTERVAL) || ($3>BABEL_MAX_INTERVAL)) cf_error("Hello interval must be in range 10 ms - 655 s"); }
| UPDATE INTERVAL expr_us { BABEL_IFACE->update_interval = $3; if (($3<BABEL_MIN_INTERVAL) || ($3>BABEL_MAX_INTERVAL)) cf_error("Update interval must be in range 10 ms - 655 s"); }
| RX BUFFER expr { BABEL_IFACE->rx_buffer = $3; if (($3<256) || ($3>65535)) cf_error("RX buffer must be in range 256-65535"); }
@@ -149,6 +162,11 @@ babel_iface_item:
| AUTHENTICATION NONE { BABEL_IFACE->auth_type = BABEL_AUTH_NONE; }
| AUTHENTICATION MAC { BABEL_IFACE->auth_type = BABEL_AUTH_MAC; BABEL_IFACE->auth_permissive = 0; }
| AUTHENTICATION MAC PERMISSIVE { BABEL_IFACE->auth_type = BABEL_AUTH_MAC; BABEL_IFACE->auth_permissive = 1; }
+ | RTT MIN expr_us { BABEL_IFACE->rtt_min = $3; }
+ | RTT MAX expr_us { BABEL_IFACE->rtt_max = $3; }
+ | RTT COST expr { BABEL_IFACE->rtt_cost = $3; if ($3 >= BABEL_INFINITY) cf_error("RTT cost must be < 65535"); }
+ | RTT DECAY expr { BABEL_IFACE->rtt_decay = $3; if (($3 < 1) || ($3 > 256)) cf_error("RTT decay must be between 1-256"); }
+ | SEND TIMESTAMPS bool { BABEL_IFACE->rtt_send = $3; }
| password_list
;
diff --git a/proto/babel/packets.c b/proto/babel/packets.c
index 61c94cc5..f1895655 100644
--- a/proto/babel/packets.c
+++ b/proto/babel/packets.c
@@ -58,6 +58,13 @@ struct babel_tlv_ihu {
u8 addr[0];
} PACKED;
+struct babel_subtlv_timestamp {
+ u8 type;
+ u8 length;
+ u32 tstamp;
+ u32 tstamp_rcvd; /* only used in IHU */
+} PACKED;
+
struct babel_tlv_router_id {
u8 type;
u8 length;
@@ -161,6 +168,7 @@ struct babel_parse_state {
const struct babel_tlv_data* (*get_subtlv_data)(u8 type);
struct babel_proto *proto;
struct babel_iface *ifa;
+ btime received_time;
ip_addr saddr;
ip_addr next_hop_ip4;
ip_addr next_hop_ip6;
@@ -172,6 +180,7 @@ struct babel_parse_state {
u8 def_ip6_prefix_seen; /* def_ip6_prefix is valid */
u8 def_ip4_prefix_seen; /* def_ip4_prefix is valid */
u8 def_ip4_via_ip6_prefix_seen; /* def_ip4_via_ip6_prefix is valid */
+ u8 hello_tstamp_seen; /* pkt contains a hello timestamp */
u8 current_tlv_endpos; /* End of self-terminating TLVs (offset from start) */
u8 sadr_enabled;
u8 is_unicast;
@@ -336,6 +345,7 @@ static int babel_read_update(struct babel_tlv *hdr, union babel_msg *msg, struct
static int babel_read_route_request(struct babel_tlv *hdr, union babel_msg *msg, struct babel_parse_state *state);
static int babel_read_seqno_request(struct babel_tlv *hdr, union babel_msg *msg, struct babel_parse_state *state);
static int babel_read_source_prefix(struct babel_tlv *hdr, union babel_msg *msg, struct babel_parse_state *state);
+static int babel_read_timestamp(struct babel_tlv *hdr, union babel_msg *msg, struct babel_parse_state *state);
static uint babel_write_ack(struct babel_tlv *hdr, union babel_msg *msg, struct babel_write_state *state, uint max_len);
static uint babel_write_hello(struct babel_tlv *hdr, union babel_msg *msg, struct babel_write_state *state, uint max_len);
@@ -344,6 +354,7 @@ static uint babel_write_update(struct babel_tlv *hdr, union babel_msg *msg, stru
static uint babel_write_route_request(struct babel_tlv *hdr, union babel_msg *msg, struct babel_write_state *state, uint max_len);
static uint babel_write_seqno_request(struct babel_tlv *hdr, union babel_msg *msg, struct babel_write_state *state, uint max_len);
static int babel_write_source_prefix(struct babel_tlv *hdr, net_addr *net, uint max_len);
+static int babel_write_timestamp(struct babel_tlv *hdr, u32 tstamp, u32 tstamp_rcvd, uint max_len);
static const struct babel_tlv_data tlv_data[BABEL_TLV_MAX] = {
[BABEL_TLV_ACK_REQ] = {
@@ -419,6 +430,13 @@ static const struct babel_tlv_data *get_packet_tlv_data(u8 type)
return type < sizeof(tlv_data) / sizeof(*tlv_data) ? &tlv_data[type] : NULL;
}
+static const struct babel_tlv_data timestamp_tlv_data = {
+ sizeof(struct babel_subtlv_timestamp),
+ babel_read_timestamp,
+ NULL,
+ NULL
+};
+
static const struct babel_tlv_data source_prefix_tlv_data = {
sizeof(struct babel_subtlv_source_prefix),
babel_read_source_prefix,
@@ -430,6 +448,8 @@ static const struct babel_tlv_data *get_packet_subtlv_data(u8 type)
{
switch (type)
{
+ case BABEL_SUBTLV_TIMESTAMP:
+ return &timestamp_tlv_data;
case BABEL_SUBTLV_SOURCE_PREFIX:
return &source_prefix_tlv_data;
@@ -491,16 +511,34 @@ babel_read_hello(struct babel_tlv *hdr, union babel_msg *m,
static uint
babel_write_hello(struct babel_tlv *hdr, union babel_msg *m,
- struct babel_write_state *state UNUSED, uint max_len UNUSED)
+ struct babel_write_state *state UNUSED, uint max_len)
{
struct babel_tlv_hello *tlv = (void *) hdr;
struct babel_msg_hello *msg = &m->hello;
+ uint len = sizeof(struct babel_tlv_hello);
TLV_HDR0(tlv, BABEL_TLV_HELLO);
put_u16(&tlv->seqno, msg->seqno);
put_time16(&tlv->interval, msg->interval);
- return sizeof(struct babel_tlv_hello);
+ if (msg->tstamp)
+ {
+ /*
+ * There can be a substantial delay between when the babel_msg was created
+ * and when it is serialised. We don't want this included in the RTT
+ * measurement, so replace the timestamp with the current time to get as
+ * close as possible to on-wire time for the packet.
+ */
+ u32 tstamp = current_time_now() TO_US;
+
+ int l = babel_write_timestamp(hdr, tstamp, 0, max_len);
+ if (l < 0)
+ return 0;
+
+ len += l;
+ }
+
+ return len;
}
static int
@@ -565,6 +603,7 @@ babel_write_ihu(struct babel_tlv *hdr, union babel_msg *m,
{
struct babel_tlv_ihu *tlv = (void *) hdr;
struct babel_msg_ihu *msg = &m->ihu;
+ uint len = sizeof(*tlv);
if (ipa_is_link_local(msg->addr) && max_len < sizeof(struct babel_tlv_ihu) + 8)
return 0;
@@ -576,12 +615,24 @@ babel_write_ihu(struct babel_tlv *hdr, union babel_msg *m,
if (!ipa_is_link_local(msg->addr))
{
tlv->ae = BABEL_AE_WILDCARD;
- return sizeof(struct babel_tlv_ihu);
+ goto out;
}
put_ip6_ll(&tlv->addr, msg->addr);
tlv->ae = BABEL_AE_IP6_LL;
hdr->length += 8;
- return sizeof(struct babel_tlv_ihu) + 8;
+ len += 8;
+
+out:
+ if (msg->tstamp)
+ {
+ int l = babel_write_timestamp(hdr, msg->tstamp, msg->tstamp_rcvd, max_len);
+ if (l < 0)
+ return 0;
+
+ len += l;
+ }
+
+ return len;
}
static int
@@ -1249,6 +1300,66 @@ babel_write_source_prefix(struct babel_tlv *hdr, net_addr *n, uint max_len)
return len;
}
+static int
+babel_read_timestamp(struct babel_tlv *hdr, union babel_msg *msg,
+ struct babel_parse_state *state)
+{
+ struct babel_subtlv_timestamp *tlv = (void *) hdr;
+
+ switch (msg->type)
+ {
+ case BABEL_TLV_HELLO:
+ if (tlv->length < 4)
+ return PARSE_ERROR;
+
+ msg->hello.tstamp = get_u32(&tlv->tstamp);
+ msg->hello.pkt_received = state->received_time;
+ state->hello_tstamp_seen = 1;
+ break;
+
+ case BABEL_TLV_IHU:
+ if (tlv->length < 8)
+ return PARSE_ERROR;
+
+ /* RTT calculation relies on a Hello always being present with an IHU */
+ if (!state->hello_tstamp_seen)
+ break;
+
+ msg->ihu.tstamp = get_u32(&tlv->tstamp);
+ msg->ihu.tstamp_rcvd = get_u32(&tlv->tstamp_rcvd);
+ msg->ihu.pkt_received = state->received_time;
+ break;
+
+ default:
+ return PARSE_ERROR;
+ }
+
+ return PARSE_SUCCESS;
+}
+
+static int
+babel_write_timestamp(struct babel_tlv *hdr, u32 tstamp, u32 tstamp_rcvd, uint max_len)
+{
+ struct babel_subtlv_timestamp *tlv = (void *) NEXT_TLV(hdr);
+ uint len = sizeof(*tlv);
+
+ if (hdr->type == BABEL_TLV_HELLO)
+ len -= 4;
+
+ if (len > max_len)
+ return -1;
+
+ TLV_HDR(tlv, BABEL_SUBTLV_TIMESTAMP, len);
+ hdr->length += len;
+
+ put_u32(&tlv->tstamp, tstamp);
+
+ if (hdr->type == BABEL_TLV_IHU)
+ put_u32(&tlv->tstamp_rcvd, tstamp_rcvd);
+
+ return len;
+}
+
static inline int
babel_read_subtlvs(struct babel_tlv *hdr,
union babel_msg *msg,
@@ -1518,6 +1629,13 @@ babel_process_packet(struct babel_iface *ifa,
.saddr = saddr,
.next_hop_ip6 = saddr,
.sadr_enabled = babel_sadr_enabled(p),
+
+ /*
+ * The core updates current_time() after returning from poll(), so this is
+ * actually the time the packet was received, even though there may have
+ * been a bit of delay before we got to process it
+ */
+ .received_time = current_time(),
};
if ((pkt->magic != BABEL_MAGIC) || (pkt->version != BABEL_VERSION))
diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c
index a6e9cf83..0a2e8f5a 100644
--- a/proto/bgp/bgp.c
+++ b/proto/bgp/bgp.c
@@ -86,7 +86,6 @@
* RFC 5065 - AS confederations for BGP
* RFC 5082 - Generalized TTL Security Mechanism
* RFC 5492 - Capabilities Advertisement with BGP
- * RFC 5549 - Advertising IPv4 NLRI with an IPv6 Next Hop
* RFC 5575 - Dissemination of Flow Specification Rules
* RFC 5668 - 4-Octet AS Specific BGP Extended Community
* RFC 6286 - AS-Wide Unique BGP Identifier
@@ -101,6 +100,7 @@
* RFC 8203 - BGP Administrative Shutdown Communication
* RFC 8212 - Default EBGP Route Propagation Behavior without Policies
* RFC 8654 - Extended Message Support for BGP
+ * RFC 8950 - Advertising IPv4 NLRI with an IPv6 Next Hop
* RFC 9072 - Extended Optional Parameters Length for BGP OPEN Message
* RFC 9117 - Revised Validation Procedure for BGP Flow Specifications
* RFC 9234 - Route Leak Prevention and Detection Using Roles
diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h
index 324df43c..c11433ec 100644
--- a/proto/bgp/bgp.h
+++ b/proto/bgp/bgp.h
@@ -232,7 +232,7 @@ struct bgp_af_caps {
u8 llgr_able; /* Long-lived GR, RFC draft */
u32 llgr_time; /* Long-lived GR stale time */
u8 llgr_flags; /* Long-lived GR per-AF flags */
- u8 ext_next_hop; /* Extended IPv6 next hop, RFC 5549 */
+ u8 ext_next_hop; /* Extended IPv6 next hop, RFC 8950 */
u8 add_path; /* Multiple paths support, RFC 7911 */
};
diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c
index 6b728b4e..ee98115d 100644
--- a/proto/bgp/packets.c
+++ b/proto/bgp/packets.c
@@ -214,6 +214,13 @@ bgp_af_caps_cmp(const void *X, const void *Y)
return (x->afi < y->afi) ? -1 : (x->afi > y->afi) ? 1 : 0;
}
+struct bgp_caps *
+bgp_alloc_capabilities(struct bgp_proto *p, int n)
+{
+ struct bgp_caps *caps = mb_allocz(p->p.pool, sizeof(struct bgp_caps) + n * sizeof(struct bgp_af_caps));
+ caps->role = BGP_ROLE_UNDEFINED;
+ return caps;
+}
void
bgp_prepare_capabilities(struct bgp_conn *conn)
@@ -226,13 +233,13 @@ bgp_prepare_capabilities(struct bgp_conn *conn)
if (!p->cf->capabilities)
{
/* Just prepare empty local_caps */
- conn->local_caps = mb_allocz(p->p.pool, sizeof(struct bgp_caps));
+ conn->local_caps = bgp_alloc_capabilities(p, 0);
return;
}
/* Prepare bgp_caps structure */
int n = list_length(&p->p.channels);
- caps = mb_allocz(p->p.pool, sizeof(struct bgp_caps) + n * sizeof(struct bgp_af_caps));
+ caps = bgp_alloc_capabilities(p, n);
conn->local_caps = caps;
caps->as4_support = p->cf->enable_as4;
@@ -463,10 +470,7 @@ bgp_read_capabilities(struct bgp_conn *conn, byte *pos, int len)
u32 af;
if (!conn->remote_caps)
- {
- caps = mb_allocz(p->p.pool, sizeof(struct bgp_caps) + sizeof(struct bgp_af_caps));
- caps->role = BGP_ROLE_UNDEFINED;
- }
+ caps = bgp_alloc_capabilities(p, 1);
else
{
caps = conn->remote_caps;
@@ -502,7 +506,7 @@ bgp_read_capabilities(struct bgp_conn *conn, byte *pos, int len)
caps->route_refresh = 1;
break;
- case 5: /* Extended next hop encoding capability, RFC 5549 */
+ case 5: /* Extended next hop encoding capability, RFC 8950 */
if (cl % 6)
goto err;
@@ -762,7 +766,7 @@ bgp_read_options(struct bgp_conn *conn, byte *pos, uint len, uint rest)
/* Prepare empty caps if no capability option was announced */
if (!conn->remote_caps)
- conn->remote_caps = mb_allocz(p->p.pool, sizeof(struct bgp_caps));
+ conn->remote_caps = bgp_alloc_capabilities(p, 0);
return 0;
@@ -1262,7 +1266,7 @@ bgp_encode_next_hop_ip(struct bgp_write_state *s, eattr *a, byte *buf, uint size
/*
* Both IPv4 and IPv6 next hops can be used (with ext_next_hop enabled). This
- * is specified in RFC 5549 for IPv4 and in RFC 4798 for IPv6. The difference
+ * is specified in RFC 8950 for IPv4 and in RFC 4798 for IPv6. The difference
* is that IPv4 address is directly encoded with IPv4 NLRI, but as IPv4-mapped
* IPv6 address with IPv6 NLRI.
*/
@@ -1337,7 +1341,7 @@ bgp_encode_next_hop_vpn(struct bgp_write_state *s, eattr *a, byte *buf, uint siz
/*
* Both IPv4 and IPv6 next hops can be used (with ext_next_hop enabled). This
- * is specified in RFC 5549 for VPNv4 and in RFC 4659 for VPNv6. The difference
+ * is specified in RFC 8950 for VPNv4 and in RFC 4659 for VPNv6. The difference
* is that IPv4 address is directly encoded with VPNv4 NLRI, but as IPv4-mapped
* IPv6 address with VPNv6 NLRI.
*/
diff --git a/proto/radv/config.Y b/proto/radv/config.Y
index 8d4a3ab9..eeafe6f4 100644
--- a/proto/radv/config.Y
+++ b/proto/radv/config.Y
@@ -25,6 +25,15 @@ static struct radv_dnssl_config this_radv_dnssl;
static list radv_dns_list; /* Used by radv_rdnss and radv_dnssl */
static u8 radv_mult_val; /* Used by radv_mult for second return value */
+static inline void
+radv_add_to_custom_list(list *l, int type, const struct bytestring *payload)
+{
+ if (type < 0 || type > 255) cf_error("RA cusom type must be in range 0-255");
+ struct radv_custom_config *cf = cfg_allocz(sizeof(struct radv_custom_config));
+ add_tail(l, NODE cf);
+ cf->type = type;
+ cf->payload = payload;
+}
CF_DECLS
@@ -33,7 +42,7 @@ CF_KEYWORDS(RADV, PREFIX, INTERFACE, MIN, MAX, RA, DELAY, INTERVAL, SOLICITED,
RETRANS, TIMER, CURRENT, HOP, LIMIT, DEFAULT, VALID, PREFERRED, MULT,
LIFETIME, SKIP, ONLINK, AUTONOMOUS, RDNSS, DNSSL, NS, DOMAIN, LOCAL,
TRIGGER, SENSITIVE, PREFERENCE, LOW, MEDIUM, HIGH, PROPAGATE, ROUTE,
- ROUTES, RA_PREFERENCE, RA_LIFETIME)
+ ROUTES, RA_PREFERENCE, RA_LIFETIME, CUSTOM, OPTION, TYPE, VALUE)
CF_ENUM(T_ENUM_RA_PREFERENCE, RA_PREF_, LOW, MEDIUM, HIGH)
@@ -41,6 +50,8 @@ CF_ENUM(T_ENUM_RA_PREFERENCE, RA_PREF_, LOW, MEDIUM, HIGH)
CF_GRAMMAR
+kw_sym: CUSTOM | OPTION | VALUE ;
+
proto: radv_proto ;
radv_proto_start: proto_start RADV
@@ -52,6 +63,7 @@ radv_proto_start: proto_start RADV
init_list(&RADV_CFG->pref_list);
init_list(&RADV_CFG->rdnss_list);
init_list(&RADV_CFG->dnssl_list);
+ init_list(&RADV_CFG->custom_list);
};
radv_proto_item:
@@ -61,6 +73,7 @@ radv_proto_item:
| PREFIX radv_prefix { add_tail(&RADV_CFG->pref_list, NODE this_radv_prefix); }
| RDNSS { init_list(&radv_dns_list); } radv_rdnss { add_tail_list(&RADV_CFG->rdnss_list, &radv_dns_list); }
| DNSSL { init_list(&radv_dns_list); } radv_dnssl { add_tail_list(&RADV_CFG->dnssl_list, &radv_dns_list); }
+ | CUSTOM OPTION TYPE expr VALUE BYTESTRING { radv_add_to_custom_list(&RADV_CFG->custom_list, $4, $6); }
| TRIGGER net_ip6 { RADV_CFG->trigger = $2; }
| PROPAGATE ROUTES bool { RADV_CFG->propagate_routes = $3; }
;
@@ -82,6 +95,7 @@ radv_iface_start:
init_list(&RADV_IFACE->pref_list);
init_list(&RADV_IFACE->rdnss_list);
init_list(&RADV_IFACE->dnssl_list);
+ init_list(&RADV_IFACE->custom_list);
RADV_IFACE->min_ra_int = (u32) -1; /* undefined */
RADV_IFACE->max_ra_int = DEFAULT_MAX_RA_INT;
@@ -124,8 +138,10 @@ radv_iface_item:
| PREFIX radv_prefix { add_tail(&RADV_IFACE->pref_list, NODE this_radv_prefix); }
| RDNSS { init_list(&radv_dns_list); } radv_rdnss { add_tail_list(&RADV_IFACE->rdnss_list, &radv_dns_list); }
| DNSSL { init_list(&radv_dns_list); } radv_dnssl { add_tail_list(&RADV_IFACE->dnssl_list, &radv_dns_list); }
+ | CUSTOM OPTION TYPE expr VALUE BYTESTRING { radv_add_to_custom_list(&RADV_IFACE->custom_list, $4, $6); }
| RDNSS LOCAL bool { RADV_IFACE->rdnss_local = $3; }
| DNSSL LOCAL bool { RADV_IFACE->dnssl_local = $3; }
+ | CUSTOM OPTION LOCAL bool { RADV_IFACE->custom_local = $4; }
;
radv_preference:
diff --git a/proto/radv/packets.c b/proto/radv/packets.c
index 5cd8b2de..77c98794 100644
--- a/proto/radv/packets.c
+++ b/proto/radv/packets.c
@@ -82,6 +82,13 @@ struct radv_opt_dnssl
char domain[];
};
+struct radv_opt_custom
+{
+ u8 type;
+ u8 length;
+ u8 payload[];
+};
+
static int
radv_prepare_route(struct radv_iface *ifa, struct radv_route *rt,
char **buf, char *bufend)
@@ -255,6 +262,34 @@ radv_prepare_dnssl(struct radv_iface *ifa, list *dnssl_list, char **buf, char *b
}
static int
+radv_prepare_custom(struct radv_iface *ifa, list *custom_list, char **buf, char *bufend)
+{
+ struct radv_custom_config *ccf;
+ WALK_LIST(ccf, *custom_list)
+ {
+ struct radv_opt_custom *op = (void *) *buf;
+ /* Add 2 octets for type and size and 8 - 1 for ceiling the division up to 8 octets */
+ int size = (ccf->payload->length + 2 + 8 - 1) / 8;
+ if (bufend - *buf < size * 8)
+ goto too_much;
+
+ memset(op, 0, size * 8); /* Clear buffer so there is no tail garbage */
+ op->type = ccf->type;
+ op->length = size;
+ memcpy(op->payload, ccf->payload->data, ccf->payload->length);
+
+ *buf += 8 * op->length;
+ }
+
+ return 0;
+
+ too_much:
+ log(L_WARN "%s: Too many RA options on interface %s",
+ ifa->ra->p.name, ifa->iface->name);
+ return -1;
+}
+
+static int
radv_prepare_prefix(struct radv_iface *ifa, struct radv_prefix *px,
char **buf, char *bufend)
{
@@ -352,6 +387,14 @@ radv_prepare_ra(struct radv_iface *ifa)
if (radv_prepare_dnssl(ifa, &ic->dnssl_list, &buf, bufend) < 0)
goto done;
+ if (! ic->custom_local)
+ if (radv_prepare_custom(ifa, &cf->custom_list, &buf, bufend) < 0)
+ goto done;
+
+ if (radv_prepare_custom(ifa, &ic->custom_list, &buf, bufend) < 0)
+ goto done;
+
+
if (p->fib_up)
{
FIB_WALK(&p->routes, struct radv_route, rt)
diff --git a/proto/radv/radv.h b/proto/radv/radv.h
index 14d40f8a..2baf0bad 100644
--- a/proto/radv/radv.h
+++ b/proto/radv/radv.h
@@ -51,6 +51,7 @@ struct radv_config
list pref_list; /* Global list of prefix configs (struct radv_prefix_config) */
list rdnss_list; /* Global list of RDNSS configs (struct radv_rdnss_config) */
list dnssl_list; /* Global list of DNSSL configs (struct radv_dnssl_config) */
+ list custom_list; /* Global list of custom configs (struct radv_custom_config) */
net_addr trigger; /* Prefix of a trigger route, if defined */
u8 propagate_routes; /* Do we propagate more specific routes (RFC 4191)? */
@@ -63,6 +64,7 @@ struct radv_iface_config
list pref_list; /* Local list of prefix configs (struct radv_prefix_config) */
list rdnss_list; /* Local list of RDNSS configs (struct radv_rdnss_config) */
list dnssl_list; /* Local list of DNSSL configs (struct radv_dnssl_config) */
+ list custom_list; /* Local list of custom configs (struct radv_custom_config) */
u32 min_ra_int; /* Standard options from RFC 4861 */
u32 max_ra_int;
@@ -75,6 +77,7 @@ struct radv_iface_config
u8 rdnss_local; /* Global list is not used for RDNSS */
u8 dnssl_local; /* Global list is not used for DNSSL */
+ u8 custom_local; /* Global list is not used for custom */
u8 managed; /* Standard options from RFC 4861 */
u8 other_config;
@@ -122,6 +125,13 @@ struct radv_dnssl_config
const char *domain; /* Domain for DNS search list, in processed form */
};
+struct radv_custom_config
+{
+ node n;
+ u8 type; /* Identifier of the type of option */
+ const struct bytestring *payload; /* Payload of the option */
+};
+
/*
* One more specific route as per RFC 4191.
*