diff options
Diffstat (limited to 'proto')
-rw-r--r-- | proto/babel/babel.c | 84 | ||||
-rw-r--r-- | proto/babel/babel.h | 24 | ||||
-rw-r--r-- | proto/babel/config.Y | 20 | ||||
-rw-r--r-- | proto/babel/packets.c | 126 |
4 files changed, 244 insertions, 10 deletions
diff --git a/proto/babel/babel.c b/proto/babel/babel.c index 9f33dd34..04613788 100644 --- a/proto/babel/babel.c +++ b/proto/babel/babel.c @@ -596,6 +596,7 @@ babel_update_cost(struct babel_neighbor *nbr) switch (cf->type) { case BABEL_IFACE_TYPE_WIRED: + case BABEL_IFACE_TYPE_TUNNEL: /* k-out-of-j selection - Appendix 2.1 in the RFC. */ /* Link is bad if less than cf->limit/16 of expected hellos were received */ @@ -624,6 +625,24 @@ babel_update_cost(struct babel_neighbor *nbr) break; } + if (cf->rtt_cost && nbr->srtt > cf->rtt_min) + { + uint rtt_cost = cf->rtt_cost; + + if (nbr->srtt < cf->rtt_max) + { + uint rtt_interval = cf->rtt_max TO_US - cf->rtt_min TO_US; + uint rtt_diff = (nbr->srtt TO_US - cf->rtt_min TO_US); + + rtt_cost = (rtt_cost * rtt_diff) / rtt_interval; + } + + txcost = MIN(txcost + rtt_cost, BABEL_INFINITY); + + TRACE(D_EVENTS, "Added RTT cost %u to nbr %I on %s with srtt %u.%03u ms", + rtt_cost, nbr->addr, nbr->ifa->iface->name, nbr->srtt/1000, nbr->srtt%1000); + } + done: /* If RX cost changed, send IHU with next Hello */ if (rxcost != nbr->rxcost) @@ -854,6 +873,12 @@ babel_build_ihu(union babel_msg *msg, struct babel_iface *ifa, struct babel_neig msg->ihu.rxcost = n->rxcost; msg->ihu.interval = ifa->cf->ihu_interval; + if (n->last_tstamp_rcvd && ifa->cf->rtt_send) + { + msg->ihu.tstamp = n->last_tstamp; + msg->ihu.tstamp_rcvd = n->last_tstamp_rcvd TO_US; + } + TRACE(D_PACKETS, "Sending IHU for %I with rxcost %d interval %t", msg->ihu.addr, msg->ihu.rxcost, (btime) msg->ihu.interval); } @@ -893,6 +918,9 @@ babel_send_hello(struct babel_iface *ifa, uint interval) msg.hello.seqno = ifa->hello_seqno++; msg.hello.interval = interval ?: ifa->cf->hello_interval; + if (ifa->cf->rtt_send) + msg.hello.tstamp = 1; /* real timestamp will be set on TLV write */ + TRACE(D_PACKETS, "Sending hello on %s with seqno %d interval %t", ifa->ifname, msg.hello.seqno, (btime) msg.hello.interval); @@ -1199,14 +1227,26 @@ babel_handle_hello(union babel_msg *m, struct babel_iface *ifa) msg->seqno, (btime) msg->interval); struct babel_neighbor *n = babel_get_neighbor(ifa, msg->sender); + struct babel_iface_config *cf = n->ifa->cf; int first_hello = !n->hello_cnt; + if (msg->tstamp) + { + n->last_tstamp = msg->tstamp; + n->last_tstamp_rcvd = msg->pkt_received; + } babel_update_hello_history(n, msg->seqno, msg->interval); babel_update_cost(n); /* Speed up session establishment by sending IHU immediately */ if (first_hello) - babel_send_ihu(ifa, n); + { + /* if using RTT, all IHUs must be paired with hellos */ + if(cf->rtt_send) + babel_send_hello(ifa, 0); + else + babel_send_ihu(ifa, n); + } } void @@ -1225,6 +1265,39 @@ babel_handle_ihu(union babel_msg *m, struct babel_iface *ifa) struct babel_neighbor *n = babel_get_neighbor(ifa, msg->sender); n->txcost = msg->rxcost; n->ihu_expiry = current_time() + BABEL_IHU_EXPIRY_FACTOR(msg->interval); + + if (msg->tstamp) + { + u32 rtt_sample = 0, pkt_received = msg->pkt_received TO_US; + int remote_time, full_time; + + /* processing time reported by peer */ + remote_time = (n->last_tstamp - msg->tstamp_rcvd); + /* time since we sent the last timestamp - RTT including remote time */ + full_time = (pkt_received - msg->tstamp); + + /* sanity checks */ + if (remote_time < 0 || full_time < 0 || + remote_time US_ > BABEL_RTT_MAX_VALUE || full_time US_ > BABEL_RTT_MAX_VALUE) + goto out; + + if (remote_time < full_time) + rtt_sample = full_time - remote_time; + + if (n->srtt) + { + uint decay = n->ifa->cf->rtt_decay; + + n->srtt = (decay * rtt_sample + (256 - decay) * n->srtt) / 256; + } + else + n->srtt = rtt_sample; + + TRACE(D_EVENTS, "RTT sample for neighbour %I on %s: %u us (srtt %u.%03u ms)", + n->addr, ifa->ifname, rtt_sample, n->srtt/1000, n->srtt%1000); + } + +out: babel_update_cost(n); } @@ -2199,8 +2272,8 @@ babel_show_neighbors(struct proto *P, const char *iff) } cli_msg(-1024, "%s:", p->p.name); - cli_msg(-1024, "%-25s %-10s %6s %6s %6s %7s %4s", - "IP address", "Interface", "Metric", "Routes", "Hellos", "Expires", "Auth"); + cli_msg(-1024, "%-25s %-10s %6s %6s %6s %7s %4s %11s", + "IP address", "Interface", "Metric", "Routes", "Hellos", "Expires", "Auth", "RTT"); WALK_LIST(ifa, p->interfaces) { @@ -2215,9 +2288,10 @@ babel_show_neighbors(struct proto *P, const char *iff) uint hellos = u32_popcount(n->hello_map); btime timer = (n->hello_expiry ?: n->init_expiry) - current_time(); - cli_msg(-1024, "%-25I %-10s %6u %6u %6u %7t %-4s", + cli_msg(-1024, "%-25I %-10s %6u %6u %6u %7t %-4s %5u.%03ums", n->addr, ifa->iface->name, n->cost, rts, hellos, MAX(timer, 0), - n->auth_passed ? "Yes" : "No"); + n->auth_passed ? "Yes" : "No", + n->srtt/1000, n->srtt%1000); } } } diff --git a/proto/babel/babel.h b/proto/babel/babel.h index dcd303e1..edde4cab 100644 --- a/proto/babel/babel.h +++ b/proto/babel/babel.h @@ -53,10 +53,16 @@ #define BABEL_GARBAGE_INTERVAL (300 S_) #define BABEL_RXCOST_WIRED 96 #define BABEL_RXCOST_WIRELESS 256 +#define BABEL_RXCOST_RTT 96 #define BABEL_INITIAL_HOP_COUNT 255 #define BABEL_MAX_SEND_INTERVAL 5 /* Unused ? */ #define BABEL_INITIAL_NEIGHBOR_TIMEOUT (60 S_) +#define BABEL_RTT_MAX_VALUE (600 S_) +#define BABEL_RTT_MIN (10 MS_) +#define BABEL_RTT_MAX (120 MS_) +#define BABEL_RTT_DECAY 42 + /* Max interval that will not overflow when carried as 16-bit centiseconds */ #define BABEL_TIME_UNITS 10000 /* On-wire times are counted in centiseconds */ #define BABEL_MIN_INTERVAL (0x0001 * BABEL_TIME_UNITS) @@ -96,6 +102,8 @@ enum babel_tlv_type { enum babel_subtlv_type { BABEL_SUBTLV_PAD1 = 0, BABEL_SUBTLV_PADN = 1, + BABEL_SUBTLV_DIVERSITY = 2, /* we don't support this */ + BABEL_SUBTLV_TIMESTAMP = 3, /* Mandatory subtlvs */ BABEL_SUBTLV_SOURCE_PREFIX = 128, @@ -106,6 +114,7 @@ enum babel_iface_type { BABEL_IFACE_TYPE_UNDEF = 0, BABEL_IFACE_TYPE_WIRED = 1, BABEL_IFACE_TYPE_WIRELESS = 2, + BABEL_IFACE_TYPE_TUNNEL = 3, BABEL_IFACE_TYPE_MAX }; @@ -141,6 +150,12 @@ struct babel_iface_config { uint ihu_interval; /* IHU interval, in us */ uint update_interval; /* Update interval, in us */ + btime rtt_min; /* rtt above which to start penalising metric */ + btime rtt_max; /* max rtt metric penalty applied above this */ + u16 rtt_cost; /* metric penalty to apply at rtt_max */ + u16 rtt_decay; /* decay of neighbour RTT (units of 1/256) */ + u8 rtt_send; /* whether to send timestamps on this interface */ + u16 rx_buffer; /* RX buffer size, 0 for MTU */ u16 tx_length; /* TX packet length limit (including headers), 0 for MTU */ int tx_tos; @@ -229,6 +244,10 @@ struct babel_neighbor { u16 next_hello_seqno; uint last_hello_int; + u32 last_tstamp; + btime last_tstamp_rcvd; + btime srtt; + u32 auth_pc_unicast; u32 auth_pc_multicast; u8 auth_passed; @@ -326,6 +345,8 @@ struct babel_msg_hello { u16 seqno; uint interval; ip_addr sender; + u32 tstamp; + btime pkt_received; }; struct babel_msg_ihu { @@ -335,6 +356,9 @@ struct babel_msg_ihu { uint interval; ip_addr addr; ip_addr sender; + u32 tstamp; + u32 tstamp_rcvd; + btime pkt_received; }; struct babel_msg_update { diff --git a/proto/babel/config.Y b/proto/babel/config.Y index 1b4dc6f5..b8af0267 100644 --- a/proto/babel/config.Y +++ b/proto/babel/config.Y @@ -26,7 +26,7 @@ CF_KEYWORDS(BABEL, INTERFACE, METRIC, RXCOST, HELLO, UPDATE, INTERVAL, PORT, TYPE, WIRED, WIRELESS, RX, TX, BUFFER, PRIORITY, LENGTH, CHECK, LINK, NEXT, HOP, IPV4, IPV6, BABEL_METRIC, SHOW, INTERFACES, NEIGHBORS, ENTRIES, RANDOMIZE, ROUTER, ID, AUTHENTICATION, NONE, MAC, PERMISSIVE, - EXTENDED) + EXTENDED, TUNNEL, RTT, MIN, MAX, DECAY, SEND, TIMESTAMPS) CF_GRAMMAR @@ -67,6 +67,10 @@ babel_iface_start: BABEL_IFACE->limit = BABEL_HELLO_LIMIT; BABEL_IFACE->tx_tos = IP_PREC_INTERNET_CONTROL; BABEL_IFACE->tx_priority = sk_priority_control; + BABEL_IFACE->rtt_min = BABEL_RTT_MIN; + BABEL_IFACE->rtt_max = BABEL_RTT_MAX; + BABEL_IFACE->rtt_decay = BABEL_RTT_DECAY; + BABEL_IFACE->rtt_send = 1; BABEL_IFACE->check_link = 1; BABEL_IFACE->ext_next_hop = 1; }; @@ -87,8 +91,16 @@ babel_iface_finish: BABEL_IFACE->hello_interval = BABEL_HELLO_INTERVAL_WIRED; if (!BABEL_IFACE->rxcost) BABEL_IFACE->rxcost = BABEL_RXCOST_WIRED; + if (BABEL_IFACE->type == BABEL_IFACE_TYPE_TUNNEL && !BABEL_IFACE->rtt_cost) + BABEL_IFACE->rtt_cost = BABEL_RXCOST_RTT; } + if (BABEL_IFACE->rtt_cost && !BABEL_IFACE->rtt_send) + cf_error("Can't set RTT cost when sending timestamps is disabled"); + + if (BABEL_IFACE->rtt_min >= BABEL_IFACE->rtt_max) + cf_error("Min RTT must be smaller than max RTT"); + /* Make sure we do not overflow the 16-bit centisec fields */ if (!BABEL_IFACE->update_interval) BABEL_IFACE->update_interval = MIN_(BABEL_IFACE->hello_interval*BABEL_UPDATE_INTERVAL_FACTOR, BABEL_MAX_INTERVAL); @@ -136,6 +148,7 @@ babel_iface_item: | LIMIT expr { BABEL_IFACE->limit = $2; if (($2<1) || ($2>16)) cf_error("Limit must be in range 1-16"); } | TYPE WIRED { BABEL_IFACE->type = BABEL_IFACE_TYPE_WIRED; } | TYPE WIRELESS { BABEL_IFACE->type = BABEL_IFACE_TYPE_WIRELESS; } + | TYPE TUNNEL { BABEL_IFACE->type = BABEL_IFACE_TYPE_TUNNEL; } | HELLO INTERVAL expr_us { BABEL_IFACE->hello_interval = $3; if (($3<BABEL_MIN_INTERVAL) || ($3>BABEL_MAX_INTERVAL)) cf_error("Hello interval must be in range 10 ms - 655 s"); } | UPDATE INTERVAL expr_us { BABEL_IFACE->update_interval = $3; if (($3<BABEL_MIN_INTERVAL) || ($3>BABEL_MAX_INTERVAL)) cf_error("Update interval must be in range 10 ms - 655 s"); } | RX BUFFER expr { BABEL_IFACE->rx_buffer = $3; if (($3<256) || ($3>65535)) cf_error("RX buffer must be in range 256-65535"); } @@ -149,6 +162,11 @@ babel_iface_item: | AUTHENTICATION NONE { BABEL_IFACE->auth_type = BABEL_AUTH_NONE; } | AUTHENTICATION MAC { BABEL_IFACE->auth_type = BABEL_AUTH_MAC; BABEL_IFACE->auth_permissive = 0; } | AUTHENTICATION MAC PERMISSIVE { BABEL_IFACE->auth_type = BABEL_AUTH_MAC; BABEL_IFACE->auth_permissive = 1; } + | RTT MIN expr_us { BABEL_IFACE->rtt_min = $3; } + | RTT MAX expr_us { BABEL_IFACE->rtt_max = $3; } + | RTT COST expr { BABEL_IFACE->rtt_cost = $3; if ($3 >= BABEL_INFINITY) cf_error("RTT cost must be < 65535"); } + | RTT DECAY expr { BABEL_IFACE->rtt_decay = $3; if (($3 < 1) || ($3 > 256)) cf_error("RTT decay must be between 1-256"); } + | SEND TIMESTAMPS bool { BABEL_IFACE->rtt_send = $3; } | password_list ; diff --git a/proto/babel/packets.c b/proto/babel/packets.c index 61c94cc5..f1895655 100644 --- a/proto/babel/packets.c +++ b/proto/babel/packets.c @@ -58,6 +58,13 @@ struct babel_tlv_ihu { u8 addr[0]; } PACKED; +struct babel_subtlv_timestamp { + u8 type; + u8 length; + u32 tstamp; + u32 tstamp_rcvd; /* only used in IHU */ +} PACKED; + struct babel_tlv_router_id { u8 type; u8 length; @@ -161,6 +168,7 @@ struct babel_parse_state { const struct babel_tlv_data* (*get_subtlv_data)(u8 type); struct babel_proto *proto; struct babel_iface *ifa; + btime received_time; ip_addr saddr; ip_addr next_hop_ip4; ip_addr next_hop_ip6; @@ -172,6 +180,7 @@ struct babel_parse_state { u8 def_ip6_prefix_seen; /* def_ip6_prefix is valid */ u8 def_ip4_prefix_seen; /* def_ip4_prefix is valid */ u8 def_ip4_via_ip6_prefix_seen; /* def_ip4_via_ip6_prefix is valid */ + u8 hello_tstamp_seen; /* pkt contains a hello timestamp */ u8 current_tlv_endpos; /* End of self-terminating TLVs (offset from start) */ u8 sadr_enabled; u8 is_unicast; @@ -336,6 +345,7 @@ static int babel_read_update(struct babel_tlv *hdr, union babel_msg *msg, struct static int babel_read_route_request(struct babel_tlv *hdr, union babel_msg *msg, struct babel_parse_state *state); static int babel_read_seqno_request(struct babel_tlv *hdr, union babel_msg *msg, struct babel_parse_state *state); static int babel_read_source_prefix(struct babel_tlv *hdr, union babel_msg *msg, struct babel_parse_state *state); +static int babel_read_timestamp(struct babel_tlv *hdr, union babel_msg *msg, struct babel_parse_state *state); static uint babel_write_ack(struct babel_tlv *hdr, union babel_msg *msg, struct babel_write_state *state, uint max_len); static uint babel_write_hello(struct babel_tlv *hdr, union babel_msg *msg, struct babel_write_state *state, uint max_len); @@ -344,6 +354,7 @@ static uint babel_write_update(struct babel_tlv *hdr, union babel_msg *msg, stru static uint babel_write_route_request(struct babel_tlv *hdr, union babel_msg *msg, struct babel_write_state *state, uint max_len); static uint babel_write_seqno_request(struct babel_tlv *hdr, union babel_msg *msg, struct babel_write_state *state, uint max_len); static int babel_write_source_prefix(struct babel_tlv *hdr, net_addr *net, uint max_len); +static int babel_write_timestamp(struct babel_tlv *hdr, u32 tstamp, u32 tstamp_rcvd, uint max_len); static const struct babel_tlv_data tlv_data[BABEL_TLV_MAX] = { [BABEL_TLV_ACK_REQ] = { @@ -419,6 +430,13 @@ static const struct babel_tlv_data *get_packet_tlv_data(u8 type) return type < sizeof(tlv_data) / sizeof(*tlv_data) ? &tlv_data[type] : NULL; } +static const struct babel_tlv_data timestamp_tlv_data = { + sizeof(struct babel_subtlv_timestamp), + babel_read_timestamp, + NULL, + NULL +}; + static const struct babel_tlv_data source_prefix_tlv_data = { sizeof(struct babel_subtlv_source_prefix), babel_read_source_prefix, @@ -430,6 +448,8 @@ static const struct babel_tlv_data *get_packet_subtlv_data(u8 type) { switch (type) { + case BABEL_SUBTLV_TIMESTAMP: + return ×tamp_tlv_data; case BABEL_SUBTLV_SOURCE_PREFIX: return &source_prefix_tlv_data; @@ -491,16 +511,34 @@ babel_read_hello(struct babel_tlv *hdr, union babel_msg *m, static uint babel_write_hello(struct babel_tlv *hdr, union babel_msg *m, - struct babel_write_state *state UNUSED, uint max_len UNUSED) + struct babel_write_state *state UNUSED, uint max_len) { struct babel_tlv_hello *tlv = (void *) hdr; struct babel_msg_hello *msg = &m->hello; + uint len = sizeof(struct babel_tlv_hello); TLV_HDR0(tlv, BABEL_TLV_HELLO); put_u16(&tlv->seqno, msg->seqno); put_time16(&tlv->interval, msg->interval); - return sizeof(struct babel_tlv_hello); + if (msg->tstamp) + { + /* + * There can be a substantial delay between when the babel_msg was created + * and when it is serialised. We don't want this included in the RTT + * measurement, so replace the timestamp with the current time to get as + * close as possible to on-wire time for the packet. + */ + u32 tstamp = current_time_now() TO_US; + + int l = babel_write_timestamp(hdr, tstamp, 0, max_len); + if (l < 0) + return 0; + + len += l; + } + + return len; } static int @@ -565,6 +603,7 @@ babel_write_ihu(struct babel_tlv *hdr, union babel_msg *m, { struct babel_tlv_ihu *tlv = (void *) hdr; struct babel_msg_ihu *msg = &m->ihu; + uint len = sizeof(*tlv); if (ipa_is_link_local(msg->addr) && max_len < sizeof(struct babel_tlv_ihu) + 8) return 0; @@ -576,12 +615,24 @@ babel_write_ihu(struct babel_tlv *hdr, union babel_msg *m, if (!ipa_is_link_local(msg->addr)) { tlv->ae = BABEL_AE_WILDCARD; - return sizeof(struct babel_tlv_ihu); + goto out; } put_ip6_ll(&tlv->addr, msg->addr); tlv->ae = BABEL_AE_IP6_LL; hdr->length += 8; - return sizeof(struct babel_tlv_ihu) + 8; + len += 8; + +out: + if (msg->tstamp) + { + int l = babel_write_timestamp(hdr, msg->tstamp, msg->tstamp_rcvd, max_len); + if (l < 0) + return 0; + + len += l; + } + + return len; } static int @@ -1249,6 +1300,66 @@ babel_write_source_prefix(struct babel_tlv *hdr, net_addr *n, uint max_len) return len; } +static int +babel_read_timestamp(struct babel_tlv *hdr, union babel_msg *msg, + struct babel_parse_state *state) +{ + struct babel_subtlv_timestamp *tlv = (void *) hdr; + + switch (msg->type) + { + case BABEL_TLV_HELLO: + if (tlv->length < 4) + return PARSE_ERROR; + + msg->hello.tstamp = get_u32(&tlv->tstamp); + msg->hello.pkt_received = state->received_time; + state->hello_tstamp_seen = 1; + break; + + case BABEL_TLV_IHU: + if (tlv->length < 8) + return PARSE_ERROR; + + /* RTT calculation relies on a Hello always being present with an IHU */ + if (!state->hello_tstamp_seen) + break; + + msg->ihu.tstamp = get_u32(&tlv->tstamp); + msg->ihu.tstamp_rcvd = get_u32(&tlv->tstamp_rcvd); + msg->ihu.pkt_received = state->received_time; + break; + + default: + return PARSE_ERROR; + } + + return PARSE_SUCCESS; +} + +static int +babel_write_timestamp(struct babel_tlv *hdr, u32 tstamp, u32 tstamp_rcvd, uint max_len) +{ + struct babel_subtlv_timestamp *tlv = (void *) NEXT_TLV(hdr); + uint len = sizeof(*tlv); + + if (hdr->type == BABEL_TLV_HELLO) + len -= 4; + + if (len > max_len) + return -1; + + TLV_HDR(tlv, BABEL_SUBTLV_TIMESTAMP, len); + hdr->length += len; + + put_u32(&tlv->tstamp, tstamp); + + if (hdr->type == BABEL_TLV_IHU) + put_u32(&tlv->tstamp_rcvd, tstamp_rcvd); + + return len; +} + static inline int babel_read_subtlvs(struct babel_tlv *hdr, union babel_msg *msg, @@ -1518,6 +1629,13 @@ babel_process_packet(struct babel_iface *ifa, .saddr = saddr, .next_hop_ip6 = saddr, .sadr_enabled = babel_sadr_enabled(p), + + /* + * The core updates current_time() after returning from poll(), so this is + * actually the time the packet was received, even though there may have + * been a bit of delay before we got to process it + */ + .received_time = current_time(), }; if ((pkt->magic != BABEL_MAGIC) || (pkt->version != BABEL_VERSION)) |