summaryrefslogtreecommitdiff
path: root/proto/bgp/packets.c
diff options
context:
space:
mode:
Diffstat (limited to 'proto/bgp/packets.c')
-rw-r--r--proto/bgp/packets.c2920
1 files changed, 1774 insertions, 1146 deletions
diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c
index 3e816839..385d5a36 100644
--- a/proto/bgp/packets.c
+++ b/proto/bgp/packets.c
@@ -2,12 +2,16 @@
* BIRD -- BGP Packet Processing
*
* (c) 2000 Martin Mares <mj@ucw.cz>
+ * (c) 2008--2016 Ondrej Zajicek <santiago@crfreenet.org>
+ * (c) 2008--2016 CZ.NIC z.s.p.o.
*
* Can be freely distributed and used under the terms of the GNU GPL.
*/
#undef LOCAL_DEBUG
+#include <stdlib.h>
+
#include "nest/bird.h"
#include "nest/iface.h"
#include "nest/protocol.h"
@@ -16,6 +20,7 @@
#include "nest/mrtdump.h"
#include "conf/conf.h"
#include "lib/unaligned.h"
+#include "lib/flowspec.h"
#include "lib/socket.h"
#include "nest/cli.h"
@@ -38,6 +43,46 @@ static byte fsm_err_subcode[BS_MAX] = {
[BS_ESTABLISHED] = 3
};
+
+static struct bgp_channel *
+bgp_get_channel(struct bgp_proto *p, u32 afi)
+{
+ uint i;
+
+ for (i = 0; i < p->channel_count; i++)
+ if (p->afi_map[i] == afi)
+ return p->channel_map[i];
+
+ return NULL;
+}
+
+static inline void
+put_af3(byte *buf, u32 id)
+{
+ put_u16(buf, id >> 16);
+ buf[2] = id & 0xff;
+}
+
+static inline void
+put_af4(byte *buf, u32 id)
+{
+ put_u16(buf, id >> 16);
+ buf[2] = 0;
+ buf[3] = id & 0xff;
+}
+
+static inline u32
+get_af3(byte *buf)
+{
+ return (get_u16(buf) << 16) | buf[2];
+}
+
+static inline u32
+get_af4(byte *buf)
+{
+ return (get_u16(buf) << 16) | buf[3];
+}
+
/*
* MRT Dump format is not semantically specified.
* We will use these values in appropriate fields:
@@ -58,31 +103,41 @@ static byte *
mrt_put_bgp4_hdr(byte *buf, struct bgp_conn *conn, int as4)
{
struct bgp_proto *p = conn->bgp;
+ uint v4 = ipa_is_ip4(p->cf->remote_ip);
if (as4)
- {
- put_u32(buf+0, p->remote_as);
- put_u32(buf+4, p->local_as);
- buf+=8;
- }
+ {
+ put_u32(buf+0, p->remote_as);
+ put_u32(buf+4, p->public_as);
+ buf+=8;
+ }
else
- {
- put_u16(buf+0, (p->remote_as <= 0xFFFF) ? p->remote_as : AS_TRANS);
- put_u16(buf+2, (p->local_as <= 0xFFFF) ? p->local_as : AS_TRANS);
- buf+=4;
- }
+ {
+ put_u16(buf+0, (p->remote_as <= 0xFFFF) ? p->remote_as : AS_TRANS);
+ put_u16(buf+2, (p->public_as <= 0xFFFF) ? p->public_as : AS_TRANS);
+ buf+=4;
+ }
put_u16(buf+0, (p->neigh && p->neigh->iface) ? p->neigh->iface->index : 0);
- put_u16(buf+2, BGP_AF);
+ put_u16(buf+2, v4 ? BGP_AFI_IPV4 : BGP_AFI_IPV6);
buf+=4;
- buf = put_ipa(buf, conn->sk ? conn->sk->daddr : IPA_NONE);
- buf = put_ipa(buf, conn->sk ? conn->sk->saddr : IPA_NONE);
+
+ if (v4)
+ {
+ buf = put_ip4(buf, conn->sk ? ipa_to_ip4(conn->sk->daddr) : IP4_NONE);
+ buf = put_ip4(buf, conn->sk ? ipa_to_ip4(conn->sk->saddr) : IP4_NONE);
+ }
+ else
+ {
+ buf = put_ip6(buf, conn->sk ? ipa_to_ip6(conn->sk->daddr) : IP6_NONE);
+ buf = put_ip6(buf, conn->sk ? ipa_to_ip6(conn->sk->saddr) : IP6_NONE);
+ }
return buf;
}
static void
-mrt_dump_bgp_packet(struct bgp_conn *conn, byte *pkt, unsigned len)
+mrt_dump_bgp_packet(struct bgp_conn *conn, byte *pkt, uint len)
{
byte *buf = alloca(128+len); /* 128 is enough for MRT headers */
byte *bp = buf + MRTDUMP_HDR_LENGTH;
@@ -96,14 +151,14 @@ mrt_dump_bgp_packet(struct bgp_conn *conn, byte *pkt, unsigned len)
}
static inline u16
-convert_state(unsigned state)
+convert_state(uint state)
{
/* Convert state from our BS_* values to values used in MRTDump */
return (state == BS_CLOSE) ? 1 : state + 1;
}
void
-mrt_dump_bgp_state_change(struct bgp_conn *conn, unsigned old, unsigned new)
+mrt_dump_bgp_state_change(struct bgp_conn *conn, uint old, uint new)
{
byte buf[128];
byte *bp = buf + MRTDUMP_HDR_LENGTH;
@@ -127,1298 +182,1947 @@ bgp_create_notification(struct bgp_conn *conn, byte *buf)
return buf + 2 + conn->notify_size;
}
-#ifdef IPV6
-static byte *
-bgp_put_cap_ipv6(struct bgp_proto *p UNUSED, byte *buf)
-{
- *buf++ = 1; /* Capability 1: Multiprotocol extensions */
- *buf++ = 4; /* Capability data length */
- *buf++ = 0; /* We support AF IPv6 */
- *buf++ = BGP_AF_IPV6;
- *buf++ = 0; /* RFU */
- *buf++ = 1; /* and SAFI 1 */
- return buf;
-}
-#else
+/* Capability negotiation as per RFC 5492 */
-static byte *
-bgp_put_cap_ipv4(struct bgp_proto *p UNUSED, byte *buf)
-{
- *buf++ = 1; /* Capability 1: Multiprotocol extensions */
- *buf++ = 4; /* Capability data length */
- *buf++ = 0; /* We support AF IPv4 */
- *buf++ = BGP_AF_IPV4;
- *buf++ = 0; /* RFU */
- *buf++ = 1; /* and SAFI 1 */
- return buf;
+const struct bgp_af_caps *
+bgp_find_af_caps(struct bgp_caps *caps, u32 afi)
+{
+ struct bgp_af_caps *ac;
+
+ WALK_AF_CAPS(caps, ac)
+ if (ac->afi == afi)
+ return ac;
+
+ return NULL;
}
-#endif
-static byte *
-bgp_put_cap_rr(struct bgp_proto *p UNUSED, byte *buf)
+static struct bgp_af_caps *
+bgp_get_af_caps(struct bgp_caps *caps, u32 afi)
{
- *buf++ = 2; /* Capability 2: Support for route refresh */
- *buf++ = 0; /* Capability data length */
- return buf;
+ struct bgp_af_caps *ac;
+
+ WALK_AF_CAPS(caps, ac)
+ if (ac->afi == afi)
+ return ac;
+
+ ac = &caps->af_data[caps->af_count++];
+ memset(ac, 0, sizeof(struct bgp_af_caps));
+ ac->afi = afi;
+
+ return ac;
}
-static byte *
-bgp_put_cap_ext_msg(struct bgp_proto *p UNUSED, byte *buf)
+static int
+bgp_af_caps_cmp(const void *X, const void *Y)
{
- *buf++ = 6; /* Capability 6: Support for extended messages */
- *buf++ = 0; /* Capability data length */
- return buf;
+ const struct bgp_af_caps *x = X, *y = Y;
+ return (x->afi < y->afi) ? -1 : (x->afi > y->afi) ? 1 : 0;
}
+
static byte *
-bgp_put_cap_gr1(struct bgp_proto *p, byte *buf)
+bgp_write_capabilities(struct bgp_conn *conn, byte *buf)
{
- *buf++ = 64; /* Capability 64: Support for graceful restart */
- *buf++ = 6; /* Capability data length */
+ struct bgp_proto *p = conn->bgp;
+ struct bgp_channel *c;
+ struct bgp_caps *caps;
+ struct bgp_af_caps *ac;
+ uint any_ext_next_hop = 0;
+ uint any_add_path = 0;
+ byte *data;
- put_u16(buf, p->cf->gr_time);
- if (p->p.gr_recovery)
- buf[0] |= BGP_GRF_RESTART;
- buf += 2;
+ /* Prepare bgp_caps structure */
+
+ int n = list_length(&p->p.channels);
+ caps = mb_allocz(p->p.pool, sizeof(struct bgp_caps) + n * sizeof(struct bgp_af_caps));
+ conn->local_caps = caps;
+
+ caps->as4_support = p->cf->enable_as4;
+ caps->ext_messages = p->cf->enable_extended_messages;
+ caps->route_refresh = p->cf->enable_refresh;
+ caps->enhanced_refresh = p->cf->enable_refresh;
+
+ if (caps->as4_support)
+ caps->as4_number = p->public_as;
+
+ if (p->cf->gr_mode)
+ {
+ caps->gr_aware = 1;
+ caps->gr_time = p->cf->gr_time;
+ caps->gr_flags = p->p.gr_recovery ? BGP_GRF_RESTART : 0;
+ }
+
+ /* Allocate and fill per-AF fields */
+ WALK_LIST(c, p->p.channels)
+ {
+ ac = &caps->af_data[caps->af_count++];
+ ac->afi = c->afi;
+ ac->ready = 1;
- *buf++ = 0; /* Appropriate AF */
- *buf++ = BGP_AF;
- *buf++ = 1; /* and SAFI 1 */
- *buf++ = p->p.gr_recovery ? BGP_GRF_FORWARDING : 0;
+ ac->ext_next_hop = bgp_channel_is_ipv4(c) && c->cf->ext_next_hop;
+ any_ext_next_hop |= ac->ext_next_hop;
+
+ ac->add_path = c->cf->add_path;
+ any_add_path |= ac->add_path;
+
+ if (c->cf->gr_able)
+ {
+ ac->gr_able = 1;
+
+ if (p->p.gr_recovery)
+ ac->gr_af_flags |= BGP_GRF_FORWARDING;
+ }
+ }
+
+ /* Sort capability fields by AFI/SAFI */
+ qsort(caps->af_data, caps->af_count, sizeof(struct bgp_af_caps), bgp_af_caps_cmp);
+
+
+ /* Create capability list in buffer */
+
+ /*
+ * Note that max length is ~ 20+14*af_count. With max 6 channels that is
+ * 104. Option limit is 253 and buffer size is 4096, so we cannot overflow
+ * unless we add new capabilities or more AFs.
+ */
+
+ WALK_AF_CAPS(caps, ac)
+ if (ac->ready)
+ {
+ *buf++ = 1; /* Capability 1: Multiprotocol extensions */
+ *buf++ = 4; /* Capability data length */
+ put_af4(buf, ac->afi);
+ buf += 4;
+ }
+
+ if (caps->route_refresh)
+ {
+ *buf++ = 2; /* Capability 2: Support for route refresh */
+ *buf++ = 0; /* Capability data length */
+ }
+
+ if (any_ext_next_hop)
+ {
+ *buf++ = 5; /* Capability 5: Support for extended next hop */
+ *buf++ = 0; /* Capability data length, will be fixed later */
+ data = buf;
+
+ WALK_AF_CAPS(caps, ac)
+ if (ac->ext_next_hop)
+ {
+ put_af4(buf, ac->afi);
+ put_u16(buf+4, BGP_AFI_IPV6);
+ buf += 6;
+ }
+
+ data[-1] = buf - data;
+ }
+
+ if (caps->ext_messages)
+ {
+ *buf++ = 6; /* Capability 6: Support for extended messages */
+ *buf++ = 0; /* Capability data length */
+ }
+
+ if (caps->gr_aware)
+ {
+ *buf++ = 64; /* Capability 64: Support for graceful restart */
+ *buf++ = 0; /* Capability data length, will be fixed later */
+ data = buf;
+
+ put_u16(buf, caps->gr_time);
+ buf[0] |= caps->gr_flags;
+ buf += 2;
+
+ WALK_AF_CAPS(caps, ac)
+ if (ac->gr_able)
+ {
+ put_af3(buf, ac->afi);
+ buf[3] = ac->gr_af_flags;
+ buf += 4;
+ }
+
+ data[-1] = buf - data;
+ }
+
+ if (caps->as4_support)
+ {
+ *buf++ = 65; /* Capability 65: Support for 4-octet AS number */
+ *buf++ = 4; /* Capability data length */
+ put_u32(buf, p->public_as);
+ buf += 4;
+ }
+
+ if (any_add_path)
+ {
+ *buf++ = 69; /* Capability 69: Support for ADD-PATH */
+ *buf++ = 0; /* Capability data length, will be fixed later */
+ data = buf;
+
+ WALK_AF_CAPS(caps, ac)
+ if (ac->add_path)
+ {
+ put_af3(buf, ac->afi);
+ buf[3] = ac->add_path;
+ buf += 4;
+ }
+
+ data[-1] = buf - data;
+ }
+
+ if (caps->enhanced_refresh)
+ {
+ *buf++ = 70; /* Capability 70: Support for enhanced route refresh */
+ *buf++ = 0; /* Capability data length */
+ }
return buf;
}
-static byte *
-bgp_put_cap_gr2(struct bgp_proto *p UNUSED, byte *buf)
+static void
+bgp_read_capabilities(struct bgp_conn *conn, struct bgp_caps *caps, byte *pos, int len)
{
- *buf++ = 64; /* Capability 64: Support for graceful restart */
- *buf++ = 2; /* Capability data length */
- put_u16(buf, 0);
- return buf + 2;
-}
+ struct bgp_proto *p = conn->bgp;
+ struct bgp_af_caps *ac;
+ int i, cl;
+ u32 af;
-static byte *
-bgp_put_cap_as4(struct bgp_proto *p, byte *buf)
-{
- *buf++ = 65; /* Capability 65: Support for 4-octet AS number */
- *buf++ = 4; /* Capability data length */
- put_u32(buf, p->local_as);
- return buf + 4;
-}
+ while (len > 0)
+ {
+ if (len < 2 || len < (2 + pos[1]))
+ goto err;
-static byte *
-bgp_put_cap_add_path(struct bgp_proto *p, byte *buf)
-{
- *buf++ = 69; /* Capability 69: Support for ADD-PATH */
- *buf++ = 4; /* Capability data length */
+ /* Capability length */
+ cl = pos[1];
- *buf++ = 0; /* Appropriate AF */
- *buf++ = BGP_AF;
- *buf++ = 1; /* SAFI 1 */
+ /* Capability type */
+ switch (pos[0])
+ {
+ case 1: /* Multiprotocol capability, RFC 4760 */
+ if (cl != 4)
+ goto err;
- *buf++ = p->cf->add_path;
+ af = get_af4(pos+2);
+ ac = bgp_get_af_caps(caps, af);
+ ac->ready = 1;
+ break;
- return buf;
+ case 2: /* Route refresh capability, RFC 2918 */
+ if (cl != 0)
+ goto err;
+
+ caps->route_refresh = 1;
+ break;
+
+ case 5: /* Extended next hop encoding capability, RFC 5549 */
+ if (cl % 6)
+ goto err;
+
+ for (i = 0; i < cl; i += 6)
+ {
+ /* Specified only for IPv4 prefixes with IPv6 next hops */
+ if ((get_u16(pos+2+i+0) != BGP_AFI_IPV4) ||
+ (get_u16(pos+2+i+4) != BGP_AFI_IPV6))
+ continue;
+
+ af = get_af4(pos+2+i);
+ ac = bgp_get_af_caps(caps, af);
+ ac->ext_next_hop = 1;
+ }
+ break;
+
+ case 6: /* Extended message length capability, RFC draft */
+ if (cl != 0)
+ goto err;
+
+ caps->ext_messages = 1;
+ break;
+
+ case 64: /* Graceful restart capability, RFC 4724 */
+ if (cl % 4 != 2)
+ goto err;
+
+ /* Only the last instance is valid */
+ WALK_AF_CAPS(caps, ac)
+ {
+ ac->gr_able = 0;
+ ac->gr_af_flags = 0;
+ }
+
+ caps->gr_aware = 1;
+ caps->gr_flags = pos[2] & 0xf0;
+ caps->gr_time = get_u16(pos + 2) & 0x0fff;
+
+ for (i = 2; i < cl; i += 4)
+ {
+ af = get_af3(pos+2+i);
+ ac = bgp_get_af_caps(caps, af);
+ ac->gr_able = 1;
+ ac->gr_af_flags = pos[2+i+3];
+ }
+ break;
+
+ case 65: /* AS4 capability, RFC 4893 */
+ if (cl != 4)
+ goto err;
+
+ caps->as4_support = 1;
+ caps->as4_number = get_u32(pos + 2);
+ break;
+
+ case 69: /* ADD-PATH capability, RFC 7911 */
+ if (cl % 4)
+ goto err;
+
+ for (i = 0; i < cl; i += 4)
+ {
+ byte val = pos[2+i+3];
+ if (!val || (val > BGP_ADD_PATH_FULL))
+ {
+ log(L_WARN "%s: Got ADD-PATH capability with unknown value %u, ignoring",
+ p->p.name, val);
+ break;
+ }
+ }
+
+ for (i = 0; i < cl; i += 4)
+ {
+ af = get_af3(pos+2+i);
+ ac = bgp_get_af_caps(caps, af);
+ ac->add_path = pos[2+i+3];
+ }
+ break;
+
+ case 70: /* Enhanced route refresh capability, RFC 7313 */
+ if (cl != 0)
+ goto err;
+
+ caps->enhanced_refresh = 1;
+ break;
+
+ /* We can safely ignore all other capabilities */
+ }
+
+ ADVANCE(pos, len, 2 + cl);
+ }
+ return;
+
+err:
+ bgp_error(conn, 2, 0, NULL, 0);
+ return;
}
-static byte *
-bgp_put_cap_err(struct bgp_proto *p UNUSED, byte *buf)
+static int
+bgp_read_options(struct bgp_conn *conn, byte *pos, int len)
{
- *buf++ = 70; /* Capability 70: Support for enhanced route refresh */
- *buf++ = 0; /* Capability data length */
- return buf;
-}
+ struct bgp_proto *p = conn->bgp;
+ struct bgp_caps *caps;
+ int ol;
+
+ /* Max number of announced AFIs is limited by max option length (255) */
+ caps = alloca(sizeof(struct bgp_caps) + 64 * sizeof(struct bgp_af_caps));
+ memset(caps, 0, sizeof(struct bgp_caps));
+
+ while (len > 0)
+ {
+ if ((len < 2) || (len < (2 + pos[1])))
+ { bgp_error(conn, 2, 0, NULL, 0); return -1; }
+
+ ol = pos[1];
+ if (pos[0] == 2)
+ {
+ /* BGP capabilities, RFC 5492 */
+ if (p->cf->capabilities)
+ bgp_read_capabilities(conn, caps, pos + 2, ol);
+ }
+ else
+ {
+ /* Unknown option */
+ bgp_error(conn, 2, 4, pos, ol); /* FIXME: ol or ol+2 ? */
+ return -1;
+ }
+
+ ADVANCE(pos, len, 2 + ol);
+ }
+
+ uint n = sizeof(struct bgp_caps) + caps->af_count * sizeof(struct bgp_af_caps);
+ conn->remote_caps = mb_allocz(p->p.pool, n);
+ memcpy(conn->remote_caps, caps, n);
+ return 0;
+}
static byte *
bgp_create_open(struct bgp_conn *conn, byte *buf)
{
struct bgp_proto *p = conn->bgp;
- byte *cap;
- int cap_len;
BGP_TRACE(D_PACKETS, "Sending OPEN(ver=%d,as=%d,hold=%d,id=%08x)",
- BGP_VERSION, p->local_as, p->cf->hold_time, p->local_id);
+ BGP_VERSION, p->public_as, p->cf->hold_time, p->local_id);
+
buf[0] = BGP_VERSION;
- put_u16(buf+1, (p->local_as < 0xFFFF) ? p->local_as : AS_TRANS);
+ put_u16(buf+1, (p->public_as < 0xFFFF) ? p->public_as : AS_TRANS);
put_u16(buf+3, p->cf->hold_time);
put_u32(buf+5, p->local_id);
- if (conn->start_state == BSS_CONNECT_NOCAP)
- {
- BGP_TRACE(D_PACKETS, "Skipping capabilities");
- buf[9] = 0;
- return buf + 10;
- }
+ if (p->cf->capabilities)
+ {
+ /* Prepare local_caps and write capabilities to buffer */
+ byte *end = bgp_write_capabilities(conn, buf+12);
+ uint len = end - (buf+12);
- /* Skipped 3 B for length field and Capabilities parameter header */
- cap = buf + 12;
+ buf[9] = len + 2; /* Optional parameters length */
+ buf[10] = 2; /* Option 2: Capability list */
+ buf[11] = len; /* Option data length */
-#ifndef IPV6
- if (p->cf->advertise_ipv4)
- cap = bgp_put_cap_ipv4(p, cap);
-#endif
+ return end;
+ }
+ else
+ {
+ /* Prepare empty local_caps */
+ conn->local_caps = mb_allocz(p->p.pool, sizeof(struct bgp_caps));
-#ifdef IPV6
- cap = bgp_put_cap_ipv6(p, cap);
-#endif
+ buf[9] = 0; /* No optional parameters */
+ return buf + 10;
+ }
- if (p->cf->enable_refresh)
- cap = bgp_put_cap_rr(p, cap);
+ return buf;
+}
- if (p->cf->gr_mode == BGP_GR_ABLE)
- cap = bgp_put_cap_gr1(p, cap);
- else if (p->cf->gr_mode == BGP_GR_AWARE)
- cap = bgp_put_cap_gr2(p, cap);
+static void
+bgp_rx_open(struct bgp_conn *conn, byte *pkt, uint len)
+{
+ struct bgp_proto *p = conn->bgp;
+ struct bgp_conn *other;
+ u32 asn, hold, id;
- if (p->cf->enable_as4)
- cap = bgp_put_cap_as4(p, cap);
+ /* Check state */
+ if (conn->state != BS_OPENSENT)
+ { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
- if (p->cf->add_path)
- cap = bgp_put_cap_add_path(p, cap);
+ /* Check message contents */
+ if (len < 29 || len != 29 + (uint) pkt[28])
+ { bgp_error(conn, 1, 2, pkt+16, 2); return; }
- if (p->cf->enable_refresh)
- cap = bgp_put_cap_err(p, cap);
+ if (pkt[19] != BGP_VERSION)
+ { u16 val = BGP_VERSION; bgp_error(conn, 2, 1, (byte *) &val, 2); return; }
- if (p->cf->enable_extended_messages)
- cap = bgp_put_cap_ext_msg(p, cap);
+ asn = get_u16(pkt+20);
+ hold = get_u16(pkt+22);
+ id = get_u32(pkt+24);
+ BGP_TRACE(D_PACKETS, "Got OPEN(as=%d,hold=%d,id=%R)", asn, hold, id);
- cap_len = cap - buf - 12;
- if (cap_len > 0)
- {
- buf[9] = cap_len + 2; /* Optional params len */
- buf[10] = 2; /* Option: Capability list */
- buf[11] = cap_len; /* Option length */
- return cap;
- }
+ if (bgp_read_options(conn, pkt+29, pkt[28]) < 0)
+ return;
+
+ if (hold > 0 && hold < 3)
+ { bgp_error(conn, 2, 6, pkt+22, 2); return; }
+
+ /* RFC 6286 2.2 - router ID is nonzero and AS-wide unique */
+ if (!id || (p->is_internal && id == p->local_id))
+ { bgp_error(conn, 2, 3, pkt+24, -4); return; }
+
+ struct bgp_caps *caps = conn->remote_caps;
+
+ if (caps->as4_support)
+ {
+ u32 as4 = caps->as4_number;
+
+ if ((as4 != asn) && (asn != AS_TRANS))
+ log(L_WARN "%s: Peer advertised inconsistent AS numbers", p->p.name);
+
+ if (as4 != p->remote_as)
+ { as4 = htonl(as4); bgp_error(conn, 2, 2, (byte *) &as4, 4); return; }
+ }
else
+ {
+ if (asn != p->remote_as)
+ { bgp_error(conn, 2, 2, pkt+20, 2); return; }
+ }
+
+ /* Check the other connection */
+ other = (conn == &p->outgoing_conn) ? &p->incoming_conn : &p->outgoing_conn;
+ switch (other->state)
+ {
+ case BS_CONNECT:
+ case BS_ACTIVE:
+ /* Stop outgoing connection attempts */
+ bgp_conn_enter_idle_state(other);
+ break;
+
+ case BS_IDLE:
+ case BS_OPENSENT:
+ case BS_CLOSE:
+ break;
+
+ case BS_OPENCONFIRM:
+ /*
+ * Description of collision detection rules in RFC 4271 is confusing and
+ * contradictory, but it is essentially:
+ *
+ * 1. Router with higher ID is dominant
+ * 2. If both have the same ID, router with higher ASN is dominant [RFC6286]
+ * 3. When both connections are in OpenConfirm state, one initiated by
+ * the dominant router is kept.
+ *
+ * The first line in the expression below evaluates whether the neighbor
+ * is dominant, the second line whether the new connection was initiated
+ * by the neighbor. If both are true (or both are false), we keep the new
+ * connection, otherwise we keep the old one.
+ */
+ if (((p->local_id < id) || ((p->local_id == id) && (p->public_as < p->remote_as)))
+ == (conn == &p->incoming_conn))
{
- buf[9] = 0; /* No optional parameters */
- return buf + 10;
+ /* Should close the other connection */
+ BGP_TRACE(D_EVENTS, "Connection collision, giving up the other connection");
+ bgp_error(other, 6, 7, NULL, 0);
+ break;
}
-}
+ /* Fall thru */
+ case BS_ESTABLISHED:
+ /* Should close this connection */
+ BGP_TRACE(D_EVENTS, "Connection collision, giving up this connection");
+ bgp_error(conn, 6, 7, NULL, 0);
+ return;
-static uint
-bgp_encode_prefixes(struct bgp_proto *p, byte *w, struct bgp_bucket *buck, uint remains)
-{
- byte *start = w;
- ip_addr a;
- int bytes;
+ default:
+ bug("bgp_rx_open: Unknown state");
+ }
- while (!EMPTY_LIST(buck->prefixes) && (remains >= (5+sizeof(ip_addr))))
- {
- struct bgp_prefix *px = SKIP_BACK(struct bgp_prefix, bucket_node, HEAD(buck->prefixes));
- DBG("\tDequeued route %I/%d\n", px->n.prefix, px->n.pxlen);
+ /* Update our local variables */
+ conn->hold_time = MIN(hold, p->cf->hold_time);
+ conn->keepalive_time = p->cf->keepalive_time ? : conn->hold_time / 3;
+ conn->as4_session = conn->local_caps->as4_support && caps->as4_support;
+ conn->ext_messages = conn->local_caps->ext_messages && caps->ext_messages;
+ p->remote_id = id;
- if (p->add_path_tx)
- {
- put_u32(w, px->path_id);
- w += 4;
- remains -= 4;
- }
+ DBG("BGP: Hold timer set to %d, keepalive to %d, AS to %d, ID to %x, AS4 session to %d\n",
+ conn->hold_time, conn->keepalive_time, p->remote_as, p->remote_id, conn->as4_session);
- *w++ = px->n.pxlen;
- bytes = (px->n.pxlen + 7) / 8;
- a = px->n.prefix;
- ipa_hton(a);
- memcpy(w, &a, bytes);
- w += bytes;
- remains -= bytes + 1;
- rem_node(&px->bucket_node);
- bgp_free_prefix(p, px);
- // fib_delete(&p->prefix_fib, px);
- }
- return w - start;
+ bgp_schedule_packet(conn, NULL, PKT_KEEPALIVE);
+ bgp_start_timer(conn->hold_timer, conn->hold_time);
+ bgp_conn_enter_openconfirm_state(conn);
}
+
+/*
+ * Next hop handling
+ */
+
+#define REPORT(msg, args...) \
+ ({ log(L_REMOTE "%s: " msg, s->proto->p.name, ## args); })
+
+#define DISCARD(msg, args...) \
+ ({ REPORT(msg, ## args); return; })
+
+#define WITHDRAW(msg, args...) \
+ ({ REPORT(msg, ## args); s->err_withdraw = 1; return; })
+
+#define BAD_AFI "Unexpected AF <%u/%u> in UPDATE"
+#define BAD_NEXT_HOP "Invalid NEXT_HOP attribute"
+#define NO_NEXT_HOP "Missing NEXT_HOP attribute"
+
+
static void
-bgp_flush_prefixes(struct bgp_proto *p, struct bgp_bucket *buck)
+bgp_apply_next_hop(struct bgp_parse_state *s, rta *a, ip_addr gw, ip_addr ll)
{
- while (!EMPTY_LIST(buck->prefixes))
- {
- struct bgp_prefix *px = SKIP_BACK(struct bgp_prefix, bucket_node, HEAD(buck->prefixes));
- log(L_ERR "%s: - route %I/%d skipped", p->p.name, px->n.prefix, px->n.pxlen);
- rem_node(&px->bucket_node);
- bgp_free_prefix(p, px);
- // fib_delete(&p->prefix_fib, px);
- }
+ struct bgp_proto *p = s->proto;
+ struct bgp_channel *c = s->channel;
+
+ if (c->cf->gw_mode == GW_DIRECT)
+ {
+ neighbor *nbr = NULL;
+
+ /* GW_DIRECT -> single_hop -> p->neigh != NULL */
+ if (ipa_nonzero(gw))
+ nbr = neigh_find2(&p->p, &gw, NULL, 0);
+ else if (ipa_nonzero(ll))
+ nbr = neigh_find2(&p->p, &ll, p->neigh->iface, 0);
+
+ if (!nbr || (nbr->scope == SCOPE_HOST))
+ WITHDRAW(BAD_NEXT_HOP);
+
+ a->dest = RTD_ROUTER;
+ a->gw = nbr->addr;
+ a->iface = nbr->iface;
+ a->hostentry = NULL;
+ a->igp_metric = 0;
+ }
+ else /* GW_RECURSIVE */
+ {
+ if (ipa_zero(gw))
+ WITHDRAW(BAD_NEXT_HOP);
+
+ rta_set_recursive_next_hop(c->c.table, a, c->igp_table, gw, ll);
+ }
}
-#ifndef IPV6 /* IPv4 version */
+static inline int
+bgp_use_next_hop(struct bgp_export_state *s, eattr *a)
+{
+ struct bgp_proto *p = s->proto;
+ ip_addr *nh = (void *) a->u.ptr->data;
-static byte *
-bgp_create_update(struct bgp_conn *conn, byte *buf)
+ if (s->channel->cf->next_hop_self)
+ return 0;
+
+ if (s->channel->cf->next_hop_keep)
+ return 1;
+
+ /* Keep it when explicitly set in export filter */
+ if (a->type & EAF_FRESH)
+ return 1;
+
+ /* Keep it when exported to internal peers */
+ if (p->is_interior && ipa_nonzero(*nh))
+ return 1;
+
+ /* Keep it when forwarded between single-hop BGPs on the same iface */
+ struct iface *ifa = (s->src && s->src->neigh) ? s->src->neigh->iface : NULL;
+ return p->neigh && (p->neigh->iface == ifa);
+}
+
+static inline int
+bgp_use_gateway(struct bgp_export_state *s)
{
- struct bgp_proto *p = conn->bgp;
- struct bgp_bucket *buck;
- int remains = bgp_max_packet_length(p) - BGP_HEADER_LENGTH - 4;
- byte *w;
- int wd_size = 0;
- int r_size = 0;
- int a_size = 0;
-
- w = buf+2;
- if ((buck = p->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
- {
- DBG("Withdrawn routes:\n");
- wd_size = bgp_encode_prefixes(p, w, buck, remains);
- w += wd_size;
- remains -= wd_size;
- }
- put_u16(buf, wd_size);
+ struct bgp_proto *p = s->proto;
+ rta *ra = s->route->attrs;
- if (!wd_size)
- {
- while ((buck = (struct bgp_bucket *) HEAD(p->bucket_queue))->send_node.next)
- {
- if (EMPTY_LIST(buck->prefixes))
- {
- DBG("Deleting empty bucket %p\n", buck);
- rem_node(&buck->send_node);
- bgp_free_bucket(p, buck);
- continue;
- }
-
- DBG("Processing bucket %p\n", buck);
- a_size = bgp_encode_attrs(p, w+2, buck->eattrs, remains - 1024);
-
- if (a_size < 0)
- {
- log(L_ERR "%s: Attribute list too long, skipping corresponding routes", p->p.name);
- bgp_flush_prefixes(p, buck);
- rem_node(&buck->send_node);
- bgp_free_bucket(p, buck);
- continue;
- }
-
- put_u16(w, a_size);
- w += a_size + 2;
- r_size = bgp_encode_prefixes(p, w, buck, remains - a_size);
- w += r_size;
- break;
- }
- }
- if (!a_size) /* Attributes not already encoded */
+ if (s->channel->cf->next_hop_self)
+ return 0;
+
+ /* We need valid global gateway */
+ if ((ra->dest != RTD_ROUTER) || ipa_zero(ra->gw) || ipa_is_link_local(ra->gw))
+ return 0;
+
+ /* Use it when exported to internal peers */
+ if (p->is_interior)
+ return 1;
+
+ /* Use it when forwarded to single-hop BGP peer on on the same iface */
+ return p->neigh && (p->neigh->iface == ra->iface);
+}
+
+static void
+bgp_update_next_hop_ip(struct bgp_export_state *s, eattr *a, ea_list **to)
+{
+ if (!a || !bgp_use_next_hop(s, a))
+ {
+ if (bgp_use_gateway(s))
{
- put_u16(w, 0);
- w += 2;
+ ip_addr nh[1] = { s->route->attrs->gw };
+ bgp_set_attr_data(to, s->pool, BA_NEXT_HOP, 0, nh, 16);
}
- if (wd_size || r_size)
+ else
{
- BGP_TRACE_RL(&rl_snd_update, D_PACKETS, "Sending UPDATE");
- return w;
+ ip_addr nh[2] = { s->channel->next_hop_addr, s->channel->link_addr };
+ bgp_set_attr_data(to, s->pool, BA_NEXT_HOP, 0, nh, ipa_nonzero(nh[1]) ? 32 : 16);
}
- else
- return NULL;
+ }
+
+ /* Check if next hop is valid */
+ a = bgp_find_attr(*to, BA_NEXT_HOP);
+ if (!a)
+ WITHDRAW(NO_NEXT_HOP);
+
+ ip_addr *nh = (void *) a->u.ptr->data;
+ ip_addr peer = s->proto->cf->remote_ip;
+ uint len = a->u.ptr->length;
+
+ if (ipa_zero(nh[0]) && ((len != 32) || ipa_zero(nh[1])))
+ WITHDRAW(BAD_NEXT_HOP);
+
+ if (ipa_equal(peer, nh[0]) || ((len == 32) && ipa_equal(peer, nh[1])))
+ WITHDRAW(BAD_NEXT_HOP);
}
-static byte *
-bgp_create_end_mark(struct bgp_conn *conn, byte *buf)
+static uint
+bgp_encode_next_hop_none(struct bgp_write_state *s UNUSED, eattr *a UNUSED, byte *buf UNUSED, uint size UNUSED)
{
- struct bgp_proto *p = conn->bgp;
- BGP_TRACE(D_PACKETS, "Sending END-OF-RIB");
+ return 0;
+}
- put_u32(buf, 0);
- return buf+4;
+static void
+bgp_decode_next_hop_none(struct bgp_parse_state *s UNUSED, byte *data UNUSED, uint len UNUSED, rta *a UNUSED)
+{
+ /*
+ * Although we expect no next hop and RFC 7606 7.11 states that attribute
+ * MP_REACH_NLRI with unexpected next hop length is considered malformed,
+ * FlowSpec RFC 5575 4 states that next hop shall be ignored on receipt.
+ */
+
+ return;
}
-#else /* IPv6 version */
+static void
+bgp_update_next_hop_none(struct bgp_export_state *s, eattr *a, ea_list **to)
+{
+ /* NEXT_HOP shall not pass */
+ if (a)
+ bgp_unset_attr(to, s->pool, BA_NEXT_HOP);
+}
-static inline int
-same_iface(struct bgp_proto *p, ip_addr *ip)
+
+/*
+ * UPDATE
+ */
+
+static void
+bgp_rte_update(struct bgp_parse_state *s, net_addr *n, u32 path_id, rta *a0)
{
- neighbor *n = neigh_find(&p->p, ip, 0);
- return n && p->neigh && n->iface == p->neigh->iface;
+ if (path_id != s->last_id)
+ {
+ s->last_src = rt_get_source(&s->proto->p, path_id);
+ s->last_id = path_id;
+
+ rta_free(s->cached_rta);
+ s->cached_rta = NULL;
+ }
+
+ if (!a0)
+ {
+ /* Route withdraw */
+ rte_update2(&s->channel->c, n, NULL, s->last_src);
+ return;
+ }
+
+ /* Prepare cached route attributes */
+ if (s->cached_rta == NULL)
+ {
+ a0->src = s->last_src;
+
+ /* Workaround for rta_lookup() breaking eattrs */
+ ea_list *ea = a0->eattrs;
+ s->cached_rta = rta_lookup(a0);
+ a0->eattrs = ea;
+ }
+
+ rta *a = rta_clone(s->cached_rta);
+ rte *e = rte_get_temp(a);
+
+ e->pflags = 0;
+ e->u.bgp.suppressed = 0;
+ rte_update2(&s->channel->c, n, e, s->last_src);
}
-static byte *
-bgp_create_update(struct bgp_conn *conn, byte *buf)
+
+
+static uint
+bgp_encode_nlri_ip4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
{
- struct bgp_proto *p = conn->bgp;
- struct bgp_bucket *buck;
- int size, second, rem_stored;
- int remains = bgp_max_packet_length(p) - BGP_HEADER_LENGTH - 4;
- byte *w, *w_stored, *tmp, *tstart;
- ip_addr *ipp, ip, ip_ll;
- ea_list *ea;
- eattr *nh;
+ byte *pos = buf;
- put_u16(buf, 0);
- w = buf+4;
+ while (!EMPTY_LIST(buck->prefixes) && (size >= (5 + sizeof(ip4_addr))))
+ {
+ struct bgp_prefix *px = HEAD(buck->prefixes);
+ struct net_addr_ip4 *net = (void *) px->net;
- if ((buck = p->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
+ /* Encode path ID */
+ if (s->add_path)
{
- DBG("Withdrawn routes:\n");
- tmp = bgp_attach_attr_wa(&ea, bgp_linpool, BA_MP_UNREACH_NLRI, remains-8);
- *tmp++ = 0;
- *tmp++ = BGP_AF_IPV6;
- *tmp++ = 1;
- ea->attrs[0].u.ptr->length = 3 + bgp_encode_prefixes(p, tmp, buck, remains-11);
- size = bgp_encode_attrs(p, w, ea, remains);
- ASSERT(size >= 0);
- w += size;
- remains -= size;
- }
- else
- {
- while ((buck = (struct bgp_bucket *) HEAD(p->bucket_queue))->send_node.next)
- {
- if (EMPTY_LIST(buck->prefixes))
- {
- DBG("Deleting empty bucket %p\n", buck);
- rem_node(&buck->send_node);
- bgp_free_bucket(p, buck);
- continue;
- }
-
- DBG("Processing bucket %p\n", buck);
- rem_stored = remains;
- w_stored = w;
-
- size = bgp_encode_attrs(p, w, buck->eattrs, remains - 1024);
- if (size < 0)
- {
- log(L_ERR "%s: Attribute list too long, skipping corresponding routes", p->p.name);
- bgp_flush_prefixes(p, buck);
- rem_node(&buck->send_node);
- bgp_free_bucket(p, buck);
- continue;
- }
- w += size;
- remains -= size;
-
- /* We have two addresses here in NEXT_HOP eattr. Really.
- Unless NEXT_HOP was modified by filter */
- nh = ea_find(buck->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP));
- ASSERT(nh);
- second = (nh->u.ptr->length == NEXT_HOP_LENGTH);
- ipp = (ip_addr *) nh->u.ptr->data;
- ip = ipp[0];
- ip_ll = IPA_NONE;
-
- if (ipa_equal(ip, p->source_addr))
- ip_ll = p->local_link;
- else
- {
- /* If we send a route with 'third party' next hop destinated
- * in the same interface, we should also send a link local
- * next hop address. We use the received one (stored in the
- * other part of BA_NEXT_HOP eattr). If we didn't received
- * it (for example it is a static route), we can't use
- * 'third party' next hop and we have to use local IP address
- * as next hop. Sending original next hop address without
- * link local address seems to be a natural way to solve that
- * problem, but it is contrary to RFC 2545 and Quagga does not
- * accept such routes.
- *
- * There are two cases, either we have global IP, or
- * IPA_NONE if the neighbor is link-local. For IPA_NONE,
- * we suppose it is on the same iface, see bgp_update_attrs().
- */
-
- if (ipa_zero(ip) || same_iface(p, &ip))
- {
- if (second && ipa_nonzero(ipp[1]))
- ip_ll = ipp[1];
- else
- {
- switch (p->cf->missing_lladdr)
- {
- case MLL_SELF:
- ip = p->source_addr;
- ip_ll = p->local_link;
- break;
- case MLL_DROP:
- log(L_ERR "%s: Missing link-local next hop address, skipping corresponding routes", p->p.name);
- w = w_stored;
- remains = rem_stored;
- bgp_flush_prefixes(p, buck);
- rem_node(&buck->send_node);
- bgp_free_bucket(p, buck);
- continue;
- case MLL_IGNORE:
- break;
- }
- }
- }
- }
-
- tstart = tmp = bgp_attach_attr_wa(&ea, bgp_linpool, BA_MP_REACH_NLRI, remains-8);
- *tmp++ = 0;
- *tmp++ = BGP_AF_IPV6;
- *tmp++ = 1;
-
- if (ipa_is_link_local(ip))
- ip = IPA_NONE;
-
- if (ipa_nonzero(ip_ll))
- {
- *tmp++ = 32;
- ipa_hton(ip);
- memcpy(tmp, &ip, 16);
- ipa_hton(ip_ll);
- memcpy(tmp+16, &ip_ll, 16);
- tmp += 32;
- }
- else
- {
- *tmp++ = 16;
- ipa_hton(ip);
- memcpy(tmp, &ip, 16);
- tmp += 16;
- }
-
- *tmp++ = 0; /* No SNPA information */
- tmp += bgp_encode_prefixes(p, tmp, buck, remains - (8+3+32+1));
- ea->attrs[0].u.ptr->length = tmp - tstart;
- size = bgp_encode_attrs(p, w, ea, remains);
- ASSERT(size >= 0);
- w += size;
- break;
- }
+ put_u32(pos, px->path_id);
+ ADVANCE(pos, size, 4);
}
- size = w - (buf+4);
- put_u16(buf+2, size);
- lp_flush(bgp_linpool);
- if (size)
+ ip4_addr a = ip4_hton(net->prefix);
+ uint b = (net->pxlen + 7) / 8;
+
+ /* Encode prefix length */
+ *pos = net->pxlen;
+ ADVANCE(pos, size, 1);
+
+ /* Encode prefix body */
+ memcpy(pos, &a, b);
+ ADVANCE(pos, size, b);
+
+ bgp_free_prefix(s->channel, px);
+ }
+
+ return pos - buf;
+}
+
+static void
+bgp_decode_nlri_ip4(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
+{
+ while (len)
+ {
+ net_addr_ip4 net;
+ u32 path_id = 0;
+
+ /* Decode path ID */
+ if (s->add_path)
{
- BGP_TRACE_RL(&rl_snd_update, D_PACKETS, "Sending UPDATE");
- return w;
+ if (len < 5)
+ bgp_parse_error(s, 1);
+
+ path_id = get_u32(pos);
+ ADVANCE(pos, len, 4);
}
- else
- return NULL;
+
+ /* Decode prefix length */
+ uint l = *pos;
+ uint b = (l + 7) / 8;
+ ADVANCE(pos, len, 1);
+
+ if (l > IP4_MAX_PREFIX_LENGTH)
+ bgp_parse_error(s, 10);
+
+ if (len < b)
+ bgp_parse_error(s, 1);
+
+ /* Decode prefix body */
+ ip4_addr addr = IP4_NONE;
+ memcpy(&addr, pos, b);
+ ADVANCE(pos, len, b);
+
+ net = NET_ADDR_IP4(ip4_ntoh(addr), l);
+ net_normalize_ip4(&net);
+
+ // XXXX validate prefix
+
+ bgp_rte_update(s, (net_addr *) &net, path_id, a);
+ }
}
-static byte *
-bgp_create_end_mark(struct bgp_conn *conn, byte *buf)
+static uint
+bgp_encode_next_hop_ip4(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size UNUSED)
{
- struct bgp_proto *p = conn->bgp;
- BGP_TRACE(D_PACKETS, "Sending END-OF-RIB");
+ /* This function is used only for MP-BGP, see bgp_encode_next_hop() for IPv4 BGP */
- put_u16(buf+0, 0);
- put_u16(buf+2, 6); /* length 4-9 */
- buf += 4;
+ ASSERT(a->u.ptr->length == sizeof(ip_addr));
- /* Empty MP_UNREACH_NLRI atribute */
- *buf++ = BAF_OPTIONAL;
- *buf++ = BA_MP_UNREACH_NLRI;
- *buf++ = 3; /* Length 7-9 */
- *buf++ = 0; /* AFI */
- *buf++ = BGP_AF_IPV6;
- *buf++ = 1; /* SAFI */
- return buf;
-}
+ put_ip4(buf, ipa_to_ip4( *(ip_addr *) a->u.ptr->data ));
-#endif
+ return 4;
+}
-static inline byte *
-bgp_create_route_refresh(struct bgp_conn *conn, byte *buf)
+static void
+bgp_decode_next_hop_ip4(struct bgp_parse_state *s, byte *data, uint len, rta *a)
{
- struct bgp_proto *p = conn->bgp;
- BGP_TRACE(D_PACKETS, "Sending ROUTE-REFRESH");
+ if (len != 4)
+ bgp_parse_error(s, 9);
- /* Original original route refresh request, RFC 2918 */
- *buf++ = 0;
- *buf++ = BGP_AF;
- *buf++ = BGP_RR_REQUEST;
- *buf++ = 1; /* SAFI */
- return buf;
+ ip_addr nh = ipa_from_ip4(get_ip4(data));
+
+ // XXXX validate next hop
+
+ bgp_set_attr_data(&(a->eattrs), s->pool, BA_NEXT_HOP, 0, &nh, sizeof(nh));
+ bgp_apply_next_hop(s, a, nh, IPA_NONE);
}
-static inline byte *
-bgp_create_begin_refresh(struct bgp_conn *conn, byte *buf)
+
+static uint
+bgp_encode_nlri_ip6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
{
- struct bgp_proto *p = conn->bgp;
- BGP_TRACE(D_PACKETS, "Sending BEGIN-OF-RR");
+ byte *pos = buf;
- /* Demarcation of beginning of route refresh (BoRR), RFC 7313 */
- *buf++ = 0;
- *buf++ = BGP_AF;
- *buf++ = BGP_RR_BEGIN;
- *buf++ = 1; /* SAFI */
- return buf;
+ while (!EMPTY_LIST(buck->prefixes) && (size >= (5 + sizeof(ip6_addr))))
+ {
+ struct bgp_prefix *px = HEAD(buck->prefixes);
+ struct net_addr_ip6 *net = (void *) px->net;
+
+ /* Encode path ID */
+ if (s->add_path)
+ {
+ put_u32(pos, px->path_id);
+ ADVANCE(pos, size, 4);
+ }
+
+ ip6_addr a = ip6_hton(net->prefix);
+ uint b = (net->pxlen + 7) / 8;
+
+ /* Encode prefix length */
+ *pos = net->pxlen;
+ ADVANCE(pos, size, 1);
+
+ /* Encode prefix body */
+ memcpy(pos, &a, b);
+ ADVANCE(pos, size, b);
+
+ bgp_free_prefix(s->channel, px);
+ }
+
+ return pos - buf;
}
-static inline byte *
-bgp_create_end_refresh(struct bgp_conn *conn, byte *buf)
+static void
+bgp_decode_nlri_ip6(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
{
- struct bgp_proto *p = conn->bgp;
- BGP_TRACE(D_PACKETS, "Sending END-OF-RR");
+ while (len)
+ {
+ net_addr_ip6 net;
+ u32 path_id = 0;
- /* Demarcation of ending of route refresh (EoRR), RFC 7313 */
- *buf++ = 0;
- *buf++ = BGP_AF;
- *buf++ = BGP_RR_END;
- *buf++ = 1; /* SAFI */
- return buf;
+ /* Decode path ID */
+ if (s->add_path)
+ {
+ if (len < 5)
+ bgp_parse_error(s, 1);
+
+ path_id = get_u32(pos);
+ ADVANCE(pos, len, 4);
+ }
+
+ /* Decode prefix length */
+ uint l = *pos;
+ uint b = (l + 7) / 8;
+ ADVANCE(pos, len, 1);
+
+ if (l > IP6_MAX_PREFIX_LENGTH)
+ bgp_parse_error(s, 10);
+
+ if (len < b)
+ bgp_parse_error(s, 1);
+
+ /* Decode prefix body */
+ ip6_addr addr = IP6_NONE;
+ memcpy(&addr, pos, b);
+ ADVANCE(pos, len, b);
+
+ net = NET_ADDR_IP6(ip6_ntoh(addr), l);
+ net_normalize_ip6(&net);
+
+ // XXXX validate prefix
+
+ bgp_rte_update(s, (net_addr *) &net, path_id, a);
+ }
}
+static uint
+bgp_encode_next_hop_ip6(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size UNUSED)
+{
+ ip_addr *nh = (void *) a->u.ptr->data;
+ uint len = a->u.ptr->length;
+
+ ASSERT((len == 16) || (len == 32));
+
+ put_ip6(buf, ipa_to_ip6(nh[0]));
+
+ if (len == 32)
+ put_ip6(buf+16, ipa_to_ip6(nh[1]));
+
+ return len;
+}
static void
-bgp_create_header(byte *buf, uint len, uint type)
+bgp_decode_next_hop_ip6(struct bgp_parse_state *s, byte *data, uint len, rta *a)
{
- memset(buf, 0xff, 16); /* Marker */
- put_u16(buf+16, len);
- buf[18] = type;
+ struct adata *ad = lp_alloc_adata(s->pool, 32);
+ ip_addr *nh = (void *) ad->data;
+
+ if ((len != 16) && (len != 32))
+ bgp_parse_error(s, 9);
+
+ nh[0] = ipa_from_ip6(get_ip6(data));
+ nh[1] = (len == 32) ? ipa_from_ip6(get_ip6(data+16)) : IPA_NONE;
+
+ if (ip6_is_link_local(nh[0]))
+ {
+ nh[1] = nh[0];
+ nh[0] = IPA_NONE;
+ }
+
+ if (!ip6_is_link_local(nh[1]))
+ nh[1] = IPA_NONE;
+
+ if (ipa_zero(nh[1]))
+ ad->length = 16;
+
+ // XXXX validate next hop
+
+ bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_NEXT_HOP, 0, ad);
+ bgp_apply_next_hop(s, a, nh[0], nh[1]);
}
-/**
- * bgp_fire_tx - transmit packets
- * @conn: connection
- *
- * Whenever the transmit buffers of the underlying TCP connection
- * are free and we have any packets queued for sending, the socket functions
- * call bgp_fire_tx() which takes care of selecting the highest priority packet
- * queued (Notification > Keepalive > Open > Update), assembling its header
- * and body and sending it to the connection.
- */
-static int
-bgp_fire_tx(struct bgp_conn *conn)
+
+static uint
+bgp_encode_nlri_flow4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
{
- struct bgp_proto *p = conn->bgp;
- uint s = conn->packets_to_send;
- sock *sk = conn->sk;
- byte *buf, *pkt, *end;
- int type;
+ byte *pos = buf;
+
+ while (!EMPTY_LIST(buck->prefixes) && (size >= 4))
+ {
+ struct bgp_prefix *px = HEAD(buck->prefixes);
+ struct net_addr_flow4 *net = (void *) px->net;
+ uint flen = net->length - sizeof(net_addr_flow4);
- if (!sk)
+ /* Encode path ID */
+ if (s->add_path)
{
- conn->packets_to_send = 0;
- return 0;
+ put_u32(pos, px->path_id);
+ ADVANCE(pos, size, 4);
}
- buf = sk->tbuf;
- pkt = buf + BGP_HEADER_LENGTH;
- if (s & (1 << PKT_SCHEDULE_CLOSE))
+ if (flen > size)
+ break;
+
+ /* Copy whole flow data including length */
+ memcpy(pos, net->data, flen);
+ ADVANCE(pos, size, flen);
+
+ bgp_free_prefix(s->channel, px);
+ }
+
+ return pos - buf;
+}
+
+static void
+bgp_decode_nlri_flow4(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
+{
+ while (len)
+ {
+ u32 path_id = 0;
+
+ /* Decode path ID */
+ if (s->add_path)
{
- /* We can finally close connection and enter idle state */
- bgp_conn_enter_idle_state(conn);
- return 0;
+ if (len < 4)
+ bgp_parse_error(s, 1);
+
+ path_id = get_u32(pos);
+ ADVANCE(pos, len, 4);
}
- if (s & (1 << PKT_NOTIFICATION))
+
+ if (len < 2)
+ bgp_parse_error(s, 1);
+
+ /* Decode flow length */
+ uint hlen = flow_hdr_length(pos);
+ uint dlen = flow_read_length(pos);
+ uint flen = hlen + dlen;
+ byte *data = pos + hlen;
+
+ if (len < flen)
+ bgp_parse_error(s, 1);
+
+ /* Validate flow data */
+ enum flow_validated_state r = flow4_validate(data, dlen);
+ if (r != FLOW_ST_VALID)
{
- s = 1 << PKT_SCHEDULE_CLOSE;
- type = PKT_NOTIFICATION;
- end = bgp_create_notification(conn, pkt);
+ log(L_REMOTE "%s: Invalid flow route: %s", s->proto->p.name, flow_validated_state_str(r));
+ bgp_parse_error(s, 1);
}
- else if (s & (1 << PKT_KEEPALIVE))
+
+ if (data[0] != FLOW_TYPE_DST_PREFIX)
{
- s &= ~(1 << PKT_KEEPALIVE);
- type = PKT_KEEPALIVE;
- end = pkt; /* Keepalives carry no data */
- BGP_TRACE(D_PACKETS, "Sending KEEPALIVE");
- bgp_start_timer(conn->keepalive_timer, conn->keepalive_time);
+ log(L_REMOTE "%s: No dst prefix at first pos", s->proto->p.name);
+ bgp_parse_error(s, 1);
}
- else if (s & (1 << PKT_OPEN))
+
+ /* Decode dst prefix */
+ ip4_addr px = IP4_NONE;
+ uint pxlen = data[1];
+
+ // FIXME: Use some generic function
+ memcpy(&px, data, BYTES(pxlen));
+ px = ip4_and(px, ip4_mkmask(pxlen));
+
+ /* Prepare the flow */
+ net_addr *n = alloca(sizeof(struct net_addr_flow4) + flen);
+ net_fill_flow4(n, px, pxlen, pos, flen);
+ ADVANCE(pos, len, flen);
+
+ bgp_rte_update(s, n, path_id, a);
+ }
+}
+
+
+static uint
+bgp_encode_nlri_flow6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
+{
+ byte *pos = buf;
+
+ while (!EMPTY_LIST(buck->prefixes) && (size >= 4))
+ {
+ struct bgp_prefix *px = HEAD(buck->prefixes);
+ struct net_addr_flow6 *net = (void *) px->net;
+ uint flen = net->length - sizeof(net_addr_flow6);
+
+ /* Encode path ID */
+ if (s->add_path)
{
- s &= ~(1 << PKT_OPEN);
- type = PKT_OPEN;
- end = bgp_create_open(conn, pkt);
+ put_u32(pos, px->path_id);
+ ADVANCE(pos, size, 4);
}
- else if (s & (1 << PKT_ROUTE_REFRESH))
+
+ if (flen > size)
+ break;
+
+ /* Copy whole flow data including length */
+ memcpy(pos, net->data, flen);
+ ADVANCE(pos, size, flen);
+
+ bgp_free_prefix(s->channel, px);
+ }
+
+ return pos - buf;
+}
+
+static void
+bgp_decode_nlri_flow6(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
+{
+ while (len)
+ {
+ u32 path_id = 0;
+
+ /* Decode path ID */
+ if (s->add_path)
{
- s &= ~(1 << PKT_ROUTE_REFRESH);
- type = PKT_ROUTE_REFRESH;
- end = bgp_create_route_refresh(conn, pkt);
+ if (len < 4)
+ bgp_parse_error(s, 1);
+
+ path_id = get_u32(pos);
+ ADVANCE(pos, len, 4);
}
- else if (s & (1 << PKT_BEGIN_REFRESH))
+
+ if (len < 2)
+ bgp_parse_error(s, 1);
+
+ /* Decode flow length */
+ uint hlen = flow_hdr_length(pos);
+ uint dlen = flow_read_length(pos);
+ uint flen = hlen + dlen;
+ byte *data = pos + hlen;
+
+ if (len < flen)
+ bgp_parse_error(s, 1);
+
+ /* Validate flow data */
+ enum flow_validated_state r = flow6_validate(data, dlen);
+ if (r != FLOW_ST_VALID)
{
- s &= ~(1 << PKT_BEGIN_REFRESH);
- type = PKT_ROUTE_REFRESH; /* BoRR is a subtype of RR */
- end = bgp_create_begin_refresh(conn, pkt);
+ log(L_REMOTE "%s: Invalid flow route: %s", s->proto->p.name, flow_validated_state_str(r));
+ bgp_parse_error(s, 1);
}
- else if (s & (1 << PKT_UPDATE))
+
+ if (data[0] != FLOW_TYPE_DST_PREFIX)
{
- type = PKT_UPDATE;
- end = bgp_create_update(conn, pkt);
+ log(L_REMOTE "%s: No dst prefix at first pos", s->proto->p.name);
+ bgp_parse_error(s, 1);
+ }
- if (!end)
- {
- /* No update to send, perhaps we need to send End-of-RIB or EoRR */
+ /* Decode dst prefix */
+ ip6_addr px = IP6_NONE;
+ uint pxlen = data[1];
- conn->packets_to_send = 0;
+ // FIXME: Use some generic function
+ memcpy(&px, data, BYTES(pxlen));
+ px = ip6_and(px, ip6_mkmask(pxlen));
- if (p->feed_state == BFS_LOADED)
- {
- type = PKT_UPDATE;
- end = bgp_create_end_mark(conn, pkt);
- }
+ /* Prepare the flow */
+ net_addr *n = alloca(sizeof(struct net_addr_flow6) + flen);
+ net_fill_flow6(n, px, pxlen, pos, flen);
+ ADVANCE(pos, len, flen);
- else if (p->feed_state == BFS_REFRESHED)
- {
- type = PKT_ROUTE_REFRESH;
- end = bgp_create_end_refresh(conn, pkt);
- }
+ bgp_rte_update(s, n, path_id, a);
+ }
+}
- else /* Really nothing to send */
- return 0;
- p->feed_state = BFS_NONE;
- }
- }
- else
- return 0;
+static const struct bgp_af_desc bgp_af_table[] = {
+ {
+ .afi = BGP_AF_IPV4,
+ .net = NET_IP4,
+ .name = "ipv4",
+ .encode_nlri = bgp_encode_nlri_ip4,
+ .decode_nlri = bgp_decode_nlri_ip4,
+ .encode_next_hop = bgp_encode_next_hop_ip4,
+ .decode_next_hop = bgp_decode_next_hop_ip4,
+ .update_next_hop = bgp_update_next_hop_ip,
+ },
+ {
+ .afi = BGP_AF_IPV4_MC,
+ .net = NET_IP4,
+ .name = "ipv4-mc",
+ .encode_nlri = bgp_encode_nlri_ip4,
+ .decode_nlri = bgp_decode_nlri_ip4,
+ .encode_next_hop = bgp_encode_next_hop_ip4,
+ .decode_next_hop = bgp_decode_next_hop_ip4,
+ .update_next_hop = bgp_update_next_hop_ip,
+ },
+ {
+ .afi = BGP_AF_FLOW4,
+ .net = NET_FLOW4,
+ .name = "flow4",
+ .encode_nlri = bgp_encode_nlri_flow4,
+ .decode_nlri = bgp_decode_nlri_flow4,
+ .encode_next_hop = bgp_encode_next_hop_none,
+ .decode_next_hop = bgp_decode_next_hop_none,
+ .update_next_hop = bgp_update_next_hop_none,
+ },
+ {
+ .afi = BGP_AF_IPV6,
+ .net = NET_IP6,
+ .name = "ipv6",
+ .encode_nlri = bgp_encode_nlri_ip6,
+ .decode_nlri = bgp_decode_nlri_ip6,
+ .encode_next_hop = bgp_encode_next_hop_ip6,
+ .decode_next_hop = bgp_decode_next_hop_ip6,
+ .update_next_hop = bgp_update_next_hop_ip,
+ },
+ {
+ .afi = BGP_AF_IPV6_MC,
+ .net = NET_IP6,
+ .name = "ipv6-mc",
+ .encode_nlri = bgp_encode_nlri_ip6,
+ .decode_nlri = bgp_decode_nlri_ip6,
+ .encode_next_hop = bgp_encode_next_hop_ip6,
+ .decode_next_hop = bgp_decode_next_hop_ip6,
+ .update_next_hop = bgp_update_next_hop_ip,
+ },
+ {
+ .afi = BGP_AF_FLOW6,
+ .net = NET_FLOW6,
+ .name = "flow6",
+ .encode_nlri = bgp_encode_nlri_flow6,
+ .decode_nlri = bgp_decode_nlri_flow6,
+ .encode_next_hop = bgp_encode_next_hop_none,
+ .decode_next_hop = bgp_decode_next_hop_none,
+ .update_next_hop = bgp_update_next_hop_none,
+ },
+};
+
+const struct bgp_af_desc *
+bgp_get_af_desc(u32 afi)
+{
+ uint i;
+ for (i = 0; i < ARRAY_SIZE(bgp_af_table); i++)
+ if (bgp_af_table[i].afi == afi)
+ return &bgp_af_table[i];
- conn->packets_to_send = s;
- bgp_create_header(buf, end - buf, type);
- return sk_send(sk, end - buf);
+ return NULL;
}
-/**
- * bgp_schedule_packet - schedule a packet for transmission
- * @conn: connection
- * @type: packet type
- *
- * Schedule a packet of type @type to be sent as soon as possible.
- */
-void
-bgp_schedule_packet(struct bgp_conn *conn, int type)
+static inline uint
+bgp_encode_nlri(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end)
{
- DBG("BGP: Scheduling packet type %d\n", type);
- conn->packets_to_send |= 1 << type;
- if (conn->sk && conn->sk->tpos == conn->sk->tbuf && !ev_active(conn->tx_ev))
- ev_schedule(conn->tx_ev);
+ return s->channel->desc->encode_nlri(s, buck, buf, end - buf);
}
-void
-bgp_kick_tx(void *vconn)
+static inline uint
+bgp_encode_next_hop(struct bgp_write_state *s, eattr *nh, byte *buf)
{
- struct bgp_conn *conn = vconn;
-
- DBG("BGP: kicking TX\n");
- while (bgp_fire_tx(conn) > 0)
- ;
+ return s->channel->desc->encode_next_hop(s, nh, buf, 255);
}
void
-bgp_tx(sock *sk)
+bgp_update_next_hop(struct bgp_export_state *s, eattr *a, ea_list **to)
{
- struct bgp_conn *conn = sk->data;
-
- DBG("BGP: TX hook\n");
- while (bgp_fire_tx(conn) > 0)
- ;
+ s->channel->desc->update_next_hop(s, a, to);
}
-/* Capatibility negotiation as per RFC 2842 */
+#define MAX_ATTRS_LENGTH (end-buf+BGP_HEADER_LENGTH - 1024)
-void
-bgp_parse_capabilities(struct bgp_conn *conn, byte *opt, int len)
+static byte *
+bgp_create_ip_reach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end)
{
- // struct bgp_proto *p = conn->bgp;
- int i, cl;
+ /*
+ * 2 B Withdrawn Routes Length (zero)
+ * --- IPv4 Withdrawn Routes NLRI (unused)
+ * 2 B Total Path Attribute Length
+ * var Path Attributes
+ * var IPv4 Network Layer Reachability Information
+ */
+
+ int lr, la;
+
+ la = bgp_encode_attrs(s, buck->eattrs, buf+4, buf + MAX_ATTRS_LENGTH);
+ if (la < 0)
+ {
+ /* Attribute list too long */
+ bgp_withdraw_bucket(s->channel, buck);
+ return NULL;
+ }
- while (len > 0)
- {
- if (len < 2 || len < 2 + opt[1])
- goto err;
+ put_u16(buf+0, 0);
+ put_u16(buf+2, la);
- cl = opt[1];
+ lr = bgp_encode_nlri(s, buck, buf+4+la, end);
- switch (opt[0])
- {
- case 2: /* Route refresh capability, RFC 2918 */
- if (cl != 0)
- goto err;
- conn->peer_refresh_support = 1;
- break;
+ return buf+4+la+lr;
+}
- case 6: /* Extended message length capability, draft */
- if (cl != 0)
- goto err;
- conn->peer_ext_messages_support = 1;
- break;
+static byte *
+bgp_create_mp_reach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end)
+{
+ /*
+ * 2 B IPv4 Withdrawn Routes Length (zero)
+ * --- IPv4 Withdrawn Routes NLRI (unused)
+ * 2 B Total Path Attribute Length
+ * 1 B MP_REACH_NLRI hdr - Attribute Flags
+ * 1 B MP_REACH_NLRI hdr - Attribute Type Code
+ * 2 B MP_REACH_NLRI hdr - Length of Attribute Data
+ * 2 B MP_REACH_NLRI data - Address Family Identifier
+ * 1 B MP_REACH_NLRI data - Subsequent Address Family Identifier
+ * 1 B MP_REACH_NLRI data - Length of Next Hop Network Address
+ * var MP_REACH_NLRI data - Network Address of Next Hop
+ * 1 B MP_REACH_NLRI data - Reserved (zero)
+ * var MP_REACH_NLRI data - Network Layer Reachability Information
+ * var Rest of Path Attributes
+ * --- IPv4 Network Layer Reachability Information (unused)
+ */
+
+ int lh, lr, la; /* Lengths of next hop, NLRI and attributes */
+
+ /* Begin of MP_REACH_NLRI atribute */
+ buf[4] = BAF_OPTIONAL | BAF_EXT_LEN;
+ buf[5] = BA_MP_REACH_NLRI;
+ put_u16(buf+6, 0); /* Will be fixed later */
+ put_af3(buf+8, s->channel->afi);
+ byte *pos = buf+11;
+
+ /* Encode attributes to temporary buffer */
+ byte *abuf = alloca(MAX_ATTRS_LENGTH);
+ la = bgp_encode_attrs(s, buck->eattrs, abuf, abuf + MAX_ATTRS_LENGTH);
+ if (la < 0)
+ {
+ /* Attribute list too long */
+ bgp_withdraw_bucket(s->channel, buck);
+ return NULL;
+ }
- case 64: /* Graceful restart capability, RFC 4724 */
- if (cl % 4 != 2)
- goto err;
- conn->peer_gr_aware = 1;
- conn->peer_gr_able = 0;
- conn->peer_gr_time = get_u16(opt + 2) & 0x0fff;
- conn->peer_gr_flags = opt[2] & 0xf0;
- conn->peer_gr_aflags = 0;
- for (i = 2; i < cl; i += 4)
- if (opt[2+i+0] == 0 && opt[2+i+1] == BGP_AF && opt[2+i+2] == 1) /* Match AFI/SAFI */
- {
- conn->peer_gr_able = 1;
- conn->peer_gr_aflags = opt[2+i+3];
- }
- break;
+ /* Encode the next hop */
+ lh = bgp_encode_next_hop(s, s->mp_next_hop, pos+1);
+ *pos = lh;
+ pos += 1+lh;
- case 65: /* AS4 capability, RFC 4893 */
- if (cl != 4)
- goto err;
- conn->peer_as4_support = 1;
- if (conn->bgp->cf->enable_as4)
- conn->advertised_as = get_u32(opt + 2);
- break;
+ /* Reserved field */
+ *pos++ = 0;
- case 69: /* ADD-PATH capability, draft */
- if (cl % 4)
- goto err;
- for (i = 0; i < cl; i += 4)
- if (opt[2+i+0] == 0 && opt[2+i+1] == BGP_AF && opt[2+i+2] == 1) /* Match AFI/SAFI */
- conn->peer_add_path = opt[2+i+3];
- if (conn->peer_add_path > ADD_PATH_FULL)
- goto err;
- break;
+ /* Encode the NLRI */
+ lr = bgp_encode_nlri(s, buck, pos, end - la);
+ pos += lr;
- case 70: /* Enhanced route refresh capability, RFC 7313 */
- if (cl != 0)
- goto err;
- conn->peer_enhanced_refresh_support = 1;
- break;
+ /* End of MP_REACH_NLRI atribute, update data length */
+ put_u16(buf+6, pos-buf-8);
- /* We can safely ignore all other capabilities */
- }
- len -= 2 + cl;
- opt += 2 + cl;
- }
- return;
+ /* Copy remaining attributes */
+ memcpy(pos, abuf, la);
+ pos += la;
- err:
- bgp_error(conn, 2, 0, NULL, 0);
- return;
+ /* Initial UPDATE fields */
+ put_u16(buf+0, 0);
+ put_u16(buf+2, pos-buf-4);
+
+ return pos;
}
-static int
-bgp_parse_options(struct bgp_conn *conn, byte *opt, int len)
+#undef MAX_ATTRS_LENGTH
+
+static byte *
+bgp_create_ip_unreach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end)
{
- struct bgp_proto *p = conn->bgp;
- int ol;
+ /*
+ * 2 B Withdrawn Routes Length
+ * var IPv4 Withdrawn Routes NLRI
+ * 2 B Total Path Attribute Length (zero)
+ * --- Path Attributes (unused)
+ * --- IPv4 Network Layer Reachability Information (unused)
+ */
- while (len > 0)
- {
- if (len < 2 || len < 2 + opt[1])
- { bgp_error(conn, 2, 0, NULL, 0); return 0; }
-#ifdef LOCAL_DEBUG
- {
- int i;
- DBG("\tOption %02x:", opt[0]);
- for(i=0; i<opt[1]; i++)
- DBG(" %02x", opt[2+i]);
- DBG("\n");
- }
-#endif
+ uint len = bgp_encode_nlri(s, buck, buf+2, end);
- ol = opt[1];
- switch (opt[0])
- {
- case 2:
- if (conn->start_state == BSS_CONNECT_NOCAP)
- BGP_TRACE(D_PACKETS, "Ignoring received capabilities");
- else
- bgp_parse_capabilities(conn, opt + 2, ol);
- break;
+ put_u16(buf+0, len);
+ put_u16(buf+2+len, 0);
- default:
- /*
- * BGP specs don't tell us to send which option
- * we didn't recognize, but it's common practice
- * to do so. Also, capability negotiation with
- * Cisco routers doesn't work without that.
- */
- bgp_error(conn, 2, 4, opt, ol);
- return 0;
- }
- len -= 2 + ol;
- opt += 2 + ol;
- }
- return 0;
+ return buf+4+len;
}
-static void
-bgp_rx_open(struct bgp_conn *conn, byte *pkt, uint len)
+static byte *
+bgp_create_mp_unreach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end)
{
- struct bgp_conn *other;
- struct bgp_proto *p = conn->bgp;
- unsigned hold;
- u16 base_as;
- u32 id;
+ /*
+ * 2 B Withdrawn Routes Length (zero)
+ * --- IPv4 Withdrawn Routes NLRI (unused)
+ * 2 B Total Path Attribute Length
+ * 1 B MP_UNREACH_NLRI hdr - Attribute Flags
+ * 1 B MP_UNREACH_NLRI hdr - Attribute Type Code
+ * 2 B MP_UNREACH_NLRI hdr - Length of Attribute Data
+ * 2 B MP_UNREACH_NLRI data - Address Family Identifier
+ * 1 B MP_UNREACH_NLRI data - Subsequent Address Family Identifier
+ * var MP_UNREACH_NLRI data - Network Layer Reachability Information
+ * --- IPv4 Network Layer Reachability Information (unused)
+ */
+
+ uint len = bgp_encode_nlri(s, buck, buf+11, end);
- /* Check state */
- if (conn->state != BS_OPENSENT)
- { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
+ put_u16(buf+0, 0);
+ put_u16(buf+2, 7+len);
- /* Check message contents */
- if (len < 29 || len != 29U + pkt[28])
- { bgp_error(conn, 1, 2, pkt+16, 2); return; }
- if (pkt[19] != BGP_VERSION)
- { bgp_error(conn, 2, 1, pkt+19, 1); return; } /* RFC 1771 says 16 bits, draft-09 tells to use 8 */
- conn->advertised_as = base_as = get_u16(pkt+20);
- hold = get_u16(pkt+22);
- id = get_u32(pkt+24);
- BGP_TRACE(D_PACKETS, "Got OPEN(as=%d,hold=%d,id=%08x)", conn->advertised_as, hold, id);
+ /* Begin of MP_UNREACH_NLRI atribute */
+ buf[4] = BAF_OPTIONAL | BAF_EXT_LEN;
+ buf[5] = BA_MP_UNREACH_NLRI;
+ put_u16(buf+6, 3+len);
+ put_af3(buf+8, s->channel->afi);
- if (bgp_parse_options(conn, pkt+29, pkt[28]))
- return;
+ return buf+11+len;
+}
- if (hold > 0 && hold < 3)
- { bgp_error(conn, 2, 6, pkt+22, 2); return; }
+static byte *
+bgp_create_update(struct bgp_channel *c, byte *buf)
+{
+ struct bgp_proto *p = (void *) c->c.proto;
+ struct bgp_bucket *buck;
+ byte *end = buf + (bgp_max_packet_length(p->conn) - BGP_HEADER_LENGTH);
+ byte *res = NULL;
+
+ /* Initialize write state */
+ struct bgp_write_state s = {
+ .proto = p,
+ .channel = c,
+ .pool = bgp_linpool,
+ .as4_session = p->as4_session,
+ .add_path = c->add_path_tx,
+ };
+
+again:
+
+ /* Try unreachable bucket */
+ if ((buck = c->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
+ {
+ res = (c->afi == BGP_AF_IPV4) ?
+ bgp_create_ip_unreach(&s, buck, buf, end):
+ bgp_create_mp_unreach(&s, buck, buf, end);
- /* RFC 6286 2.2 - router ID is nonzero and AS-wide unique */
- if (!id || (p->is_internal && id == p->local_id))
- { bgp_error(conn, 2, 3, pkt+24, -4); return; }
+ goto done;
+ }
- if ((conn->advertised_as != base_as) && (base_as != AS_TRANS))
- log(L_WARN "%s: Peer advertised inconsistent AS numbers", p->p.name);
+ /* Try reachable buckets */
+ if (!EMPTY_LIST(c->bucket_queue))
+ {
+ buck = HEAD(c->bucket_queue);
- if (conn->advertised_as != p->remote_as)
+ /* Cleanup empty buckets */
+ if (EMPTY_LIST(buck->prefixes))
{
- if (conn->peer_as4_support)
- {
- u32 val = htonl(conn->advertised_as);
- bgp_error(conn, 2, 2, (byte *) &val, 4);
- }
- else
- bgp_error(conn, 2, 2, pkt+20, 2);
-
- return;
+ bgp_free_bucket(c, buck);
+ goto again;
}
- /* Check the other connection */
- other = (conn == &p->outgoing_conn) ? &p->incoming_conn : &p->outgoing_conn;
- switch (other->state)
- {
- case BS_CONNECT:
- case BS_ACTIVE:
- /* Stop outgoing connection attempts */
- bgp_conn_enter_idle_state(other);
- break;
+ res = (c->afi == BGP_AF_IPV4) ?
+ bgp_create_ip_reach(&s, buck, buf, end):
+ bgp_create_mp_reach(&s, buck, buf, end);
- case BS_IDLE:
- case BS_OPENSENT:
- case BS_CLOSE:
- break;
+ if (EMPTY_LIST(buck->prefixes))
+ bgp_free_bucket(c, buck);
+ else
+ bgp_defer_bucket(c, buck);
- case BS_OPENCONFIRM:
- /*
- * Description of collision detection rules in RFC 4271 is confusing and
- * contradictory, but it is essentially:
- *
- * 1. Router with higher ID is dominant
- * 2. If both have the same ID, router with higher ASN is dominant [RFC6286]
- * 3. When both connections are in OpenConfirm state, one initiated by
- * the dominant router is kept.
- *
- * The first line in the expression below evaluates whether the neighbor
- * is dominant, the second line whether the new connection was initiated
- * by the neighbor. If both are true (or both are false), we keep the new
- * connection, otherwise we keep the old one.
- */
- if (((p->local_id < id) || ((p->local_id == id) && (p->local_as < p->remote_as)))
- == (conn == &p->incoming_conn))
- {
- /* Should close the other connection */
- BGP_TRACE(D_EVENTS, "Connection collision, giving up the other connection");
- bgp_error(other, 6, 7, NULL, 0);
- break;
- }
- /* Fall thru */
- case BS_ESTABLISHED:
- /* Should close this connection */
- BGP_TRACE(D_EVENTS, "Connection collision, giving up this connection");
- bgp_error(conn, 6, 7, NULL, 0);
- return;
- default:
- bug("bgp_rx_open: Unknown state");
- }
+ if (!res)
+ goto again;
- /* Update our local variables */
- conn->hold_time = MIN(hold, p->cf->hold_time);
- conn->keepalive_time = p->cf->keepalive_time ? : conn->hold_time / 3;
- p->remote_id = id;
- p->as4_session = p->cf->enable_as4 && conn->peer_as4_support;
- p->add_path_rx = (p->cf->add_path & ADD_PATH_RX) && (conn->peer_add_path & ADD_PATH_TX);
- p->add_path_tx = (p->cf->add_path & ADD_PATH_TX) && (conn->peer_add_path & ADD_PATH_RX);
- p->gr_ready = p->cf->gr_mode && conn->peer_gr_able;
- p->ext_messages = p->cf->enable_extended_messages && conn->peer_ext_messages_support;
+ goto done;
+ }
- if (p->add_path_tx)
- p->p.accept_ra_types = RA_ANY;
+ /* No more prefixes to send */
+ return NULL;
- DBG("BGP: Hold timer set to %d, keepalive to %d, AS to %d, ID to %x, AS4 session to %d\n", conn->hold_time, conn->keepalive_time, p->remote_as, p->remote_id, p->as4_session);
+done:
+ BGP_TRACE_RL(&rl_snd_update, D_PACKETS, "Sending UPDATE");
+ lp_flush(s.pool);
- bgp_schedule_packet(conn, PKT_KEEPALIVE);
- bgp_start_timer(conn->hold_timer, conn->hold_time);
- bgp_conn_enter_openconfirm_state(conn);
+ return res;
}
+static byte *
+bgp_create_ip_end_mark(struct bgp_channel *c UNUSED, byte *buf)
+{
+ /* Empty update packet */
+ put_u32(buf, 0);
-static inline void
-bgp_rx_end_mark(struct bgp_proto *p)
+ return buf+4;
+}
+
+static byte *
+bgp_create_mp_end_mark(struct bgp_channel *c, byte *buf)
{
- BGP_TRACE(D_PACKETS, "Got END-OF-RIB");
+ put_u16(buf+0, 0);
+ put_u16(buf+2, 6); /* length 4--9 */
- if (p->load_state == BFS_LOADING)
- p->load_state = BFS_NONE;
+ /* Empty MP_UNREACH_NLRI atribute */
+ buf[4] = BAF_OPTIONAL;
+ buf[5] = BA_MP_UNREACH_NLRI;
+ buf[6] = 3; /* Length 7--9 */
+ put_af3(buf+7, c->afi);
- if (p->p.gr_recovery)
- proto_graceful_restart_unlock(&p->p);
-
- if (p->gr_active)
- bgp_graceful_restart_done(p);
-}
-
-
-#define DECODE_PREFIX(pp, ll) do { \
- if (p->add_path_rx) \
- { \
- if (ll < 5) { err=1; goto done; } \
- path_id = get_u32(pp); \
- pp += 4; \
- ll -= 4; \
- } \
- int b = *pp++; \
- int q; \
- ll--; \
- if (b > BITS_PER_IP_ADDRESS) { err=10; goto done; } \
- q = (b+7) / 8; \
- if (ll < q) { err=1; goto done; } \
- memcpy(&prefix, pp, q); \
- pp += q; \
- ll -= q; \
- ipa_ntoh(prefix); \
- prefix = ipa_and(prefix, ipa_mkmask(b)); \
- pxlen = b; \
-} while (0)
+ return buf+10;
+}
+static byte *
+bgp_create_end_mark(struct bgp_channel *c, byte *buf)
+{
+ struct bgp_proto *p = (void *) c->c.proto;
+
+ BGP_TRACE(D_PACKETS, "Sending END-OF-RIB");
+
+ return (c->afi == BGP_AF_IPV4) ?
+ bgp_create_ip_end_mark(c, buf):
+ bgp_create_mp_end_mark(c, buf);
+}
static inline void
-bgp_rte_update(struct bgp_proto *p, ip_addr prefix, int pxlen,
- u32 path_id, u32 *last_id, struct rte_src **src,
- rta *a0, rta **a)
+bgp_rx_end_mark(struct bgp_parse_state *s, u32 afi)
{
- if (path_id != *last_id)
- {
- *src = rt_get_source(&p->p, path_id);
- *last_id = path_id;
+ struct bgp_proto *p = s->proto;
+ struct bgp_channel *c = bgp_get_channel(p, afi);
- if (*a)
- {
- rta_free(*a);
- *a = NULL;
- }
- }
+ BGP_TRACE(D_PACKETS, "Got END-OF-RIB");
- /* Prepare cached route attributes */
- if (!*a)
- {
- a0->src = *src;
+ if (!c)
+ DISCARD(BAD_AFI, BGP_AFI(afi), BGP_SAFI(afi));
- /* Workaround for rta_lookup() breaking eattrs */
- ea_list *ea = a0->eattrs;
- *a = rta_lookup(a0);
- a0->eattrs = ea;
- }
+ if (c->load_state == BFS_LOADING)
+ c->load_state = BFS_NONE;
- net *n = net_get(p->p.table, prefix, pxlen);
- rte *e = rte_get_temp(rta_clone(*a));
- e->net = n;
- e->pflags = 0;
- e->u.bgp.suppressed = 0;
- rte_update2(p->p.main_ahook, n, e, *src);
+ if (p->p.gr_recovery)
+ channel_graceful_restart_unlock(&c->c);
+
+ if (c->gr_active)
+ bgp_graceful_restart_done(c);
}
static inline void
-bgp_rte_withdraw(struct bgp_proto *p, ip_addr prefix, int pxlen,
- u32 path_id, u32 *last_id, struct rte_src **src)
+bgp_decode_nlri(struct bgp_parse_state *s, u32 afi, byte *nlri, uint len, ea_list *ea, byte *nh, uint nh_len)
{
- if (path_id != *last_id)
- {
- *src = rt_find_source(&p->p, path_id);
- *last_id = path_id;
- }
+ struct bgp_channel *c = bgp_get_channel(s->proto, afi);
+ rta *a = NULL;
- net *n = net_find(p->p.table, prefix, pxlen);
- rte_update2( p->p.main_ahook, n, NULL, *src);
-}
+ if (!c)
+ DISCARD(BAD_AFI, BGP_AFI(afi), BGP_SAFI(afi));
-static inline int
-bgp_set_next_hop(struct bgp_proto *p, rta *a)
-{
- struct eattr *nh = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP));
- ip_addr *nexthop = (ip_addr *) nh->u.ptr->data;
+ s->channel = c;
+ s->add_path = c->add_path_rx;
-#ifdef IPV6
- int second = (nh->u.ptr->length == NEXT_HOP_LENGTH) && ipa_nonzero(nexthop[1]);
+ s->last_id = 0;
+ s->last_src = s->proto->p.main_source;
- /* First address should not be link-local, but may be zero in direct mode */
- if (ipa_is_link_local(*nexthop))
- *nexthop = IPA_NONE;
-#else
- int second = 0;
-#endif
+ /*
+ * IPv4 BGP and MP-BGP may be used together in one update, therefore we do not
+ * add BA_NEXT_HOP in bgp_decode_attrs(), but we add it here independently for
+ * IPv4 BGP and MP-BGP. We undo the attribute (and possibly others attached by
+ * decode_next_hop hooks) by restoring a->eattrs afterwards.
+ */
- if (p->cf->gw_mode == GW_DIRECT)
- {
- neighbor *ng = NULL;
+ if (ea)
+ {
+ a = alloca(sizeof(struct rta));
+ memset(a, 0, sizeof(struct rta));
- if (ipa_nonzero(*nexthop))
- ng = neigh_find(&p->p, nexthop, 0);
- else if (second) /* GW_DIRECT -> single_hop -> p->neigh != NULL */
- ng = neigh_find2(&p->p, nexthop + 1, p->neigh->iface, 0);
+ a->source = RTS_BGP;
+ a->scope = SCOPE_UNIVERSE;
+ a->cast = RTC_UNICAST;
+ a->dest = RTD_UNREACHABLE;
+ a->from = s->proto->cf->remote_ip;
+ a->eattrs = ea;
- /* Fallback */
- if (!ng)
- ng = p->neigh;
+ c->desc->decode_next_hop(s, nh, nh_len, a);
- if (ng->scope == SCOPE_HOST)
- return 0;
+ /* Handle withdraw during next hop decoding */
+ if (s->err_withdraw)
+ a = NULL;
+ }
- a->dest = RTD_ROUTER;
- a->gw = ng->addr;
- a->iface = ng->iface;
- a->hostentry = NULL;
- a->igp_metric = 0;
- }
- else /* GW_RECURSIVE */
- {
- if (ipa_zero(*nexthop))
- return 0;
+ c->desc->decode_nlri(s, nlri, len, a);
- rta_set_recursive_next_hop(p->p.table, a, p->igp_table, nexthop, nexthop + second);
- }
-
- return 1;
+ rta_free(s->cached_rta);
+ s->cached_rta = NULL;
}
-#ifndef IPV6 /* IPv4 version */
-
static void
-bgp_do_rx_update(struct bgp_conn *conn,
- byte *withdrawn, int withdrawn_len,
- byte *nlri, int nlri_len,
- byte *attrs, int attr_len)
+bgp_rx_update(struct bgp_conn *conn, byte *pkt, uint len)
{
struct bgp_proto *p = conn->bgp;
- struct rte_src *src = p->p.main_source;
- rta *a0, *a = NULL;
- ip_addr prefix;
- int pxlen, err = 0;
- u32 path_id = 0;
- u32 last_id = 0;
+ ea_list *ea = NULL;
- /* Check for End-of-RIB marker */
- if (!withdrawn_len && !attr_len && !nlri_len)
- {
- bgp_rx_end_mark(p);
- return;
- }
+ BGP_TRACE_RL(&rl_rcv_update, D_PACKETS, "Got UPDATE");
- /* Withdraw routes */
- while (withdrawn_len)
- {
- DECODE_PREFIX(withdrawn, withdrawn_len);
- DBG("Withdraw %I/%d\n", prefix, pxlen);
+ /* Workaround for some BGP implementations that skip initial KEEPALIVE */
+ if (conn->state == BS_OPENCONFIRM)
+ bgp_conn_enter_established_state(conn);
- bgp_rte_withdraw(p, prefix, pxlen, path_id, &last_id, &src);
- }
+ if (conn->state != BS_ESTABLISHED)
+ { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
- if (!attr_len && !nlri_len) /* shortcut */
- return;
+ bgp_start_timer(conn->hold_timer, conn->hold_time);
- a0 = bgp_decode_attrs(conn, attrs, attr_len, bgp_linpool, nlri_len);
+ /* Initialize parse state */
+ struct bgp_parse_state s = {
+ .proto = p,
+ .pool = bgp_linpool,
+ .as4_session = p->as4_session,
+ };
- if (conn->state != BS_ESTABLISHED) /* fatal error during decoding */
- return;
+ /* Parse error handler */
+ if (setjmp(s.err_jmpbuf))
+ {
+ bgp_error(conn, 3, s.err_subcode, NULL, 0);
+ goto done;
+ }
- if (a0 && nlri_len && !bgp_set_next_hop(p, a0))
- a0 = NULL;
+ /* Check minimal length */
+ if (len < 23)
+ { bgp_error(conn, 1, 2, pkt+16, 2); return; }
- last_id = 0;
- src = p->p.main_source;
+ /* Skip fixed header */
+ uint pos = 19;
- while (nlri_len)
- {
- DECODE_PREFIX(nlri, nlri_len);
- DBG("Add %I/%d\n", prefix, pxlen);
+ /*
+ * UPDATE message format
+ *
+ * 2 B IPv4 Withdrawn Routes Length
+ * var IPv4 Withdrawn Routes NLRI
+ * 2 B Total Path Attribute Length
+ * var Path Attributes
+ * var IPv4 Reachable Routes NLRI
+ */
- if (a0)
- bgp_rte_update(p, prefix, pxlen, path_id, &last_id, &src, a0, &a);
- else /* Forced withdraw as a result of soft error */
- bgp_rte_withdraw(p, prefix, pxlen, path_id, &last_id, &src);
- }
+ s.ip_unreach_len = get_u16(pkt + pos);
+ s.ip_unreach_nlri = pkt + pos + 2;
+ pos += 2 + s.ip_unreach_len;
- done:
- if (a)
- rta_free(a);
+ if (pos + 2 > len)
+ bgp_parse_error(&s, 1);
- if (err)
- bgp_error(conn, 3, err, NULL, 0);
+ s.attr_len = get_u16(pkt + pos);
+ s.attrs = pkt + pos + 2;
+ pos += 2 + s.attr_len;
+
+ if (pos > len)
+ bgp_parse_error(&s, 1);
+
+ s.ip_reach_len = len - pos;
+ s.ip_reach_nlri = pkt + pos;
+
+
+ if (s.attr_len)
+ ea = bgp_decode_attrs(&s, s.attrs, s.attr_len);
+
+ /* Check for End-of-RIB marker */
+ if (!s.attr_len && !s.ip_unreach_len && !s.ip_reach_len)
+ { bgp_rx_end_mark(&s, BGP_AF_IPV4); goto done; }
+ /* Check for MP End-of-RIB marker */
+ if ((s.attr_len < 8) && !s.ip_unreach_len && !s.ip_reach_len &&
+ !s.mp_reach_len && !s.mp_unreach_len && s.mp_unreach_af)
+ { bgp_rx_end_mark(&s, s.mp_unreach_af); goto done; }
+
+ if (s.ip_unreach_len)
+ bgp_decode_nlri(&s, BGP_AF_IPV4, s.ip_unreach_nlri, s.ip_unreach_len, NULL, NULL, 0);
+
+ if (s.mp_unreach_len)
+ bgp_decode_nlri(&s, s.mp_unreach_af, s.mp_unreach_nlri, s.mp_unreach_len, NULL, NULL, 0);
+
+ if (s.ip_reach_len)
+ bgp_decode_nlri(&s, BGP_AF_IPV4, s.ip_reach_nlri, s.ip_reach_len,
+ ea, s.ip_next_hop_data, s.ip_next_hop_len);
+
+ if (s.mp_reach_len)
+ bgp_decode_nlri(&s, s.mp_reach_af, s.mp_reach_nlri, s.mp_reach_len,
+ ea, s.mp_next_hop_data, s.mp_next_hop_len);
+
+done:
+ rta_free(s.cached_rta);
+ lp_flush(s.pool);
return;
}
-#else /* IPv6 version */
-#define DO_NLRI(name) \
- x = p->name##_start; \
- len = len0 = p->name##_len; \
- if (len) \
- { \
- if (len < 3) { err=9; goto done; } \
- af = get_u16(x); \
- x += 3; \
- len -= 3; \
- DBG("\tNLRI AF=%d sub=%d len=%d\n", af, x[-1], len);\
- } \
- else \
- af = 0; \
- if (af == BGP_AF_IPV6)
+/*
+ * ROUTE-REFRESH
+ */
-static void
-bgp_attach_next_hop(rta *a0, byte *x)
+static inline byte *
+bgp_create_route_refresh(struct bgp_channel *c, byte *buf)
{
- ip_addr *nh = (ip_addr *) bgp_attach_attr_wa(&a0->eattrs, bgp_linpool, BA_NEXT_HOP, NEXT_HOP_LENGTH);
- memcpy(nh, x+1, 16);
- ipa_ntoh(nh[0]);
+ struct bgp_proto *p = (void *) c->c.proto;
- /* We store received link local address in the other part of BA_NEXT_HOP eattr. */
- if (*x == 32)
- {
- memcpy(nh+1, x+17, 16);
- ipa_ntoh(nh[1]);
- }
- else
- nh[1] = IPA_NONE;
+ BGP_TRACE(D_PACKETS, "Sending ROUTE-REFRESH");
+
+ /* Original route refresh request, RFC 2918 */
+ put_af4(buf, c->afi);
+ buf[2] = BGP_RR_REQUEST;
+
+ return buf+4;
}
+static inline byte *
+bgp_create_begin_refresh(struct bgp_channel *c, byte *buf)
+{
+ struct bgp_proto *p = (void *) c->c.proto;
+
+ BGP_TRACE(D_PACKETS, "Sending BEGIN-OF-RR");
+
+ /* Demarcation of beginning of route refresh (BoRR), RFC 7313 */
+ put_af4(buf, c->afi);
+ buf[2] = BGP_RR_BEGIN;
+
+ return buf+4;
+}
+
+static inline byte *
+bgp_create_end_refresh(struct bgp_channel *c, byte *buf)
+{
+ struct bgp_proto *p = (void *) c->c.proto;
+
+ BGP_TRACE(D_PACKETS, "Sending END-OF-RR");
+
+ /* Demarcation of ending of route refresh (EoRR), RFC 7313 */
+ put_af4(buf, c->afi);
+ buf[2] = BGP_RR_END;
+
+ return buf+4;
+}
static void
-bgp_do_rx_update(struct bgp_conn *conn,
- byte *withdrawn UNUSED, int withdrawn_len,
- byte *nlri UNUSED, int nlri_len,
- byte *attrs, int attr_len)
+bgp_rx_route_refresh(struct bgp_conn *conn, byte *pkt, uint len)
{
struct bgp_proto *p = conn->bgp;
- struct rte_src *src = p->p.main_source;
- byte *x;
- int len, len0;
- unsigned af;
- rta *a0, *a = NULL;
- ip_addr prefix;
- int pxlen, err = 0;
- u32 path_id = 0;
- u32 last_id = 0;
-
- p->mp_reach_len = 0;
- p->mp_unreach_len = 0;
- a0 = bgp_decode_attrs(conn, attrs, attr_len, bgp_linpool, 0);
-
- if (conn->state != BS_ESTABLISHED) /* fatal error during decoding */
- return;
- /* Check for End-of-RIB marker */
- if ((attr_len < 8) && !withdrawn_len && !nlri_len && !p->mp_reach_len &&
- (p->mp_unreach_len == 3) && (get_u16(p->mp_unreach_start) == BGP_AF_IPV6))
- {
- bgp_rx_end_mark(p);
- return;
- }
+ if (conn->state != BS_ESTABLISHED)
+ { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
- DO_NLRI(mp_unreach)
- {
- while (len)
- {
- DECODE_PREFIX(x, len);
- DBG("Withdraw %I/%d\n", prefix, pxlen);
- bgp_rte_withdraw(p, prefix, pxlen, path_id, &last_id, &src);
- }
- }
+ if (!conn->local_caps->route_refresh)
+ { bgp_error(conn, 1, 3, pkt+18, 1); return; }
- DO_NLRI(mp_reach)
- {
- /* Create fake NEXT_HOP attribute */
- if (len < 1 || (*x != 16 && *x != 32) || len < *x + 2)
- { err = 9; goto done; }
+ if (len < (BGP_HEADER_LENGTH + 4))
+ { bgp_error(conn, 1, 2, pkt+16, 2); return; }
- if (a0)
- bgp_attach_next_hop(a0, x);
+ if (len > (BGP_HEADER_LENGTH + 4))
+ { bgp_error(conn, 7, 1, pkt, MIN(len, 2048)); return; }
- /* Also ignore one reserved byte */
- len -= *x + 2;
- x += *x + 2;
+ struct bgp_channel *c = bgp_get_channel(p, get_af4(pkt+19));
+ if (!c)
+ {
+ log(L_WARN "%s: Got ROUTE-REFRESH subtype %u for AF %u.%u, ignoring",
+ p->p.name, pkt[21], get_u16(pkt+19), pkt[22]);
+ return;
+ }
- if (a0 && ! bgp_set_next_hop(p, a0))
- a0 = NULL;
+ /* RFC 7313 redefined reserved field as RR message subtype */
+ uint subtype = p->enhanced_refresh ? pkt[21] : BGP_RR_REQUEST;
- last_id = 0;
- src = p->p.main_source;
+ switch (subtype)
+ {
+ case BGP_RR_REQUEST:
+ BGP_TRACE(D_PACKETS, "Got ROUTE-REFRESH");
+ channel_request_feeding(&c->c);
+ break;
- while (len)
- {
- DECODE_PREFIX(x, len);
- DBG("Add %I/%d\n", prefix, pxlen);
+ case BGP_RR_BEGIN:
+ BGP_TRACE(D_PACKETS, "Got BEGIN-OF-RR");
+ bgp_refresh_begin(c);
+ break;
- if (a0)
- bgp_rte_update(p, prefix, pxlen, path_id, &last_id, &src, a0, &a);
- else /* Forced withdraw as a result of soft error */
- bgp_rte_withdraw(p, prefix, pxlen, path_id, &last_id, &src);
- }
- }
+ case BGP_RR_END:
+ BGP_TRACE(D_PACKETS, "Got END-OF-RR");
+ bgp_refresh_end(c);
+ break;
- done:
- if (a)
- rta_free(a);
+ default:
+ log(L_WARN "%s: Got ROUTE-REFRESH message with unknown subtype %u, ignoring",
+ p->p.name, subtype);
+ break;
+ }
+}
- if (err) /* Use subcode 9, not err */
- bgp_error(conn, 3, 9, NULL, 0);
+static inline struct bgp_channel *
+bgp_get_channel_to_send(struct bgp_proto *p, struct bgp_conn *conn)
+{
+ uint i = conn->last_channel;
- return;
+ /* Try the last channel, but at most several times */
+ if ((conn->channels_to_send & (1 << i)) &&
+ (conn->last_channel_count < 16))
+ goto found;
+
+ /* Find channel with non-zero channels_to_send */
+ do
+ {
+ i++;
+ if (i >= p->channel_count)
+ i = 0;
+ }
+ while (! (conn->channels_to_send & (1 << i)));
+
+ /* Use that channel */
+ conn->last_channel = i;
+ conn->last_channel_count = 0;
+
+found:
+ conn->last_channel_count++;
+ return p->channel_map[i];
}
-#endif
+static inline int
+bgp_send(struct bgp_conn *conn, uint type, uint len)
+{
+ sock *sk = conn->sk;
+ byte *buf = sk->tbuf;
-static void
-bgp_rx_update(struct bgp_conn *conn, byte *pkt, uint len)
+ memset(buf, 0xff, 16); /* Marker */
+ put_u16(buf+16, len);
+ buf[18] = type;
+
+ return sk_send(sk, len);
+}
+
+/**
+ * bgp_fire_tx - transmit packets
+ * @conn: connection
+ *
+ * Whenever the transmit buffers of the underlying TCP connection
+ * are free and we have any packets queued for sending, the socket functions
+ * call bgp_fire_tx() which takes care of selecting the highest priority packet
+ * queued (Notification > Keepalive > Open > Update), assembling its header
+ * and body and sending it to the connection.
+ */
+static int
+bgp_fire_tx(struct bgp_conn *conn)
{
struct bgp_proto *p = conn->bgp;
- byte *withdrawn, *attrs, *nlri;
- uint withdrawn_len, attr_len, nlri_len;
+ struct bgp_channel *c;
+ byte *buf, *pkt, *end;
+ uint s;
- BGP_TRACE_RL(&rl_rcv_update, D_PACKETS, "Got UPDATE");
+ if (!conn->sk)
+ return 0;
- /* Workaround for some BGP implementations that skip initial KEEPALIVE */
- if (conn->state == BS_OPENCONFIRM)
- bgp_conn_enter_established_state(conn);
+ buf = conn->sk->tbuf;
+ pkt = buf + BGP_HEADER_LENGTH;
+ s = conn->packets_to_send;
- if (conn->state != BS_ESTABLISHED)
- { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
- bgp_start_timer(conn->hold_timer, conn->hold_time);
+ if (s & (1 << PKT_SCHEDULE_CLOSE))
+ {
+ /* We can finally close connection and enter idle state */
+ bgp_conn_enter_idle_state(conn);
+ return 0;
+ }
+ if (s & (1 << PKT_NOTIFICATION))
+ {
+ conn->packets_to_send = 1 << PKT_SCHEDULE_CLOSE;
+ end = bgp_create_notification(conn, pkt);
+ return bgp_send(conn, PKT_NOTIFICATION, end - buf);
+ }
+ else if (s & (1 << PKT_KEEPALIVE))
+ {
+ conn->packets_to_send &= ~(1 << PKT_KEEPALIVE);
+ BGP_TRACE(D_PACKETS, "Sending KEEPALIVE");
+ bgp_start_timer(conn->keepalive_timer, conn->keepalive_time);
+ return bgp_send(conn, PKT_KEEPALIVE, BGP_HEADER_LENGTH);
+ }
+ else if (s & (1 << PKT_OPEN))
+ {
+ conn->packets_to_send &= ~(1 << PKT_OPEN);
+ end = bgp_create_open(conn, pkt);
+ return bgp_send(conn, PKT_OPEN, end - buf);
+ }
+ else while (conn->channels_to_send)
+ {
+ c = bgp_get_channel_to_send(p, conn);
+ s = c->packets_to_send;
- /* Find parts of the packet and check sizes */
- if (len < 23)
+ if (s & (1 << PKT_ROUTE_REFRESH))
{
- bgp_error(conn, 1, 2, pkt+16, 2);
- return;
+ c->packets_to_send &= ~(1 << PKT_ROUTE_REFRESH);
+ end = bgp_create_route_refresh(c, pkt);
+ return bgp_send(conn, PKT_ROUTE_REFRESH, end - buf);
}
- withdrawn = pkt + 21;
- withdrawn_len = get_u16(pkt + 19);
- if (withdrawn_len + 23 > len)
- goto malformed;
- attrs = withdrawn + withdrawn_len + 2;
- attr_len = get_u16(attrs - 2);
- if (withdrawn_len + attr_len + 23 > len)
- goto malformed;
- nlri = attrs + attr_len;
- nlri_len = len - withdrawn_len - attr_len - 23;
- if (!attr_len && nlri_len)
- goto malformed;
- DBG("Sizes: withdrawn=%d, attrs=%d, NLRI=%d\n", withdrawn_len, attr_len, nlri_len);
-
- lp_flush(bgp_linpool);
-
- bgp_do_rx_update(conn, withdrawn, withdrawn_len, nlri, nlri_len, attrs, attr_len);
- return;
+ else if (s & (1 << PKT_BEGIN_REFRESH))
+ {
+ /* BoRR is a subtype of RR, but uses separate bit in packets_to_send */
+ c->packets_to_send &= ~(1 << PKT_BEGIN_REFRESH);
+ end = bgp_create_begin_refresh(c, pkt);
+ return bgp_send(conn, PKT_ROUTE_REFRESH, end - buf);
+ }
+ else if (s & (1 << PKT_UPDATE))
+ {
+ end = bgp_create_update(c, pkt);
+ if (end)
+ return bgp_send(conn, PKT_UPDATE, end - buf);
+
+ /* No update to send, perhaps we need to send End-of-RIB or EoRR */
+ c->packets_to_send = 0;
+ conn->channels_to_send &= ~(1 << c->index);
+
+ if (c->feed_state == BFS_LOADED)
+ {
+ c->feed_state = BFS_NONE;
+ end = bgp_create_end_mark(c, pkt);
+ return bgp_send(conn, PKT_UPDATE, end - buf);
+ }
+
+ else if (c->feed_state == BFS_REFRESHED)
+ {
+ c->feed_state = BFS_NONE;
+ end = bgp_create_end_refresh(c, pkt);
+ return bgp_send(conn, PKT_ROUTE_REFRESH, end - buf);
+ }
+ }
+ else if (s)
+ bug("Channel packets_to_send: %x", s);
+
+ c->packets_to_send = 0;
+ conn->channels_to_send &= ~(1 << c->index);
+ }
+
+ return 0;
+}
+
+/**
+ * bgp_schedule_packet - schedule a packet for transmission
+ * @conn: connection
+ * @c: channel
+ * @type: packet type
+ *
+ * Schedule a packet of type @type to be sent as soon as possible.
+ */
+void
+bgp_schedule_packet(struct bgp_conn *conn, struct bgp_channel *c, int type)
+{
+ ASSERT(conn->sk);
+
+ DBG("BGP: Scheduling packet type %d\n", type);
+
+ if (c)
+ {
+ if (! conn->channels_to_send)
+ {
+ conn->last_channel = c->index;
+ conn->last_channel_count = 0;
+ }
+
+ c->packets_to_send |= 1 << type;
+ conn->channels_to_send |= 1 << c->index;
+ }
+ else
+ conn->packets_to_send |= 1 << type;
+
+ if ((conn->sk->tpos == conn->sk->tbuf) && !ev_active(conn->tx_ev))
+ ev_schedule(conn->tx_ev);
+}
+
+void
+bgp_kick_tx(void *vconn)
+{
+ struct bgp_conn *conn = vconn;
+
+ DBG("BGP: kicking TX\n");
+ while (bgp_fire_tx(conn) > 0)
+ ;
+}
+
+void
+bgp_tx(sock *sk)
+{
+ struct bgp_conn *conn = sk->data;
-malformed:
- bgp_error(conn, 3, 1, NULL, 0);
+ DBG("BGP: TX hook\n");
+ while (bgp_fire_tx(conn) > 0)
+ ;
}
+
static struct {
byte major, minor;
byte *msg;
@@ -1475,26 +2179,25 @@ static struct {
* which might be static string or given temporary buffer.
*/
const char *
-bgp_error_dsc(unsigned code, unsigned subcode)
+bgp_error_dsc(uint code, uint subcode)
{
static char buff[32];
- unsigned i;
+ uint i;
+
for (i=0; i < ARRAY_SIZE(bgp_msg_table); i++)
if (bgp_msg_table[i].major == code && bgp_msg_table[i].minor == subcode)
- {
- return bgp_msg_table[i].msg;
- }
+ return bgp_msg_table[i].msg;
- bsprintf(buff, "Unknown error %d.%d", code, subcode);
+ bsprintf(buff, "Unknown error %u.%u", code, subcode);
return buff;
}
void
-bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsigned subcode, byte *data, unsigned len)
+bgp_log_error(struct bgp_proto *p, u8 class, char *msg, uint code, uint subcode, byte *data, uint len)
{
const byte *name;
byte *t, argbuf[36];
- unsigned i;
+ uint i;
/* Don't report Cease messages generated by myself */
if (code == 6 && class == BE_BGP_TX)
@@ -1510,7 +2213,7 @@ bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsigned
if ((code == 2) && (subcode == 2) && ((len == 2) || (len == 4)))
{
/* Bad peer AS - we would like to print the AS */
- t += bsprintf(t, "%d", (len == 2) ? get_u16(data) : get_u32(data));
+ t += bsprintf(t, "%u", (len == 2) ? get_u16(data) : get_u32(data));
goto done;
}
if (len > 16)
@@ -1527,47 +2230,25 @@ static void
bgp_rx_notification(struct bgp_conn *conn, byte *pkt, uint len)
{
struct bgp_proto *p = conn->bgp;
+
if (len < 21)
- {
- bgp_error(conn, 1, 2, pkt+16, 2);
- return;
- }
+ { bgp_error(conn, 1, 2, pkt+16, 2); return; }
- unsigned code = pkt[19];
- unsigned subcode = pkt[20];
+ uint code = pkt[19];
+ uint subcode = pkt[20];
int err = (code != 6);
bgp_log_error(p, BE_BGP_RX, "Received", code, subcode, pkt+21, len-21);
bgp_store_error(p, conn, BE_BGP_RX, (code << 16) | subcode);
-#ifndef IPV6
- if ((code == 2) && ((subcode == 4) || (subcode == 7))
- /* Error related to capability:
- * 4 - Peer does not support capabilities at all.
- * 7 - Peer request some capability. Strange unless it is IPv6 only peer.
- */
- && (p->cf->capabilities == 2)
- /* Capabilities are not explicitly enabled or disabled, therefore heuristic is used */
- && (conn->start_state == BSS_CONNECT)
- /* Failed connection attempt have used capabilities */
- && (p->cf->remote_as <= 0xFFFF))
- /* Not possible with disabled capabilities */
- {
- /* We try connect without capabilities */
- log(L_WARN "%s: Capability related error received, retry with capabilities disabled", p->p.name);
- p->start_state = BSS_CONNECT_NOCAP;
- err = 0;
- }
-#endif
-
bgp_conn_enter_close_state(conn);
- bgp_schedule_packet(conn, PKT_SCHEDULE_CLOSE);
+ bgp_schedule_packet(conn, NULL, PKT_SCHEDULE_CLOSE);
- if (err)
- {
- bgp_update_startup_delay(p);
- bgp_stop(p, 0);
- }
+ if (err)
+ {
+ bgp_update_startup_delay(p);
+ bgp_stop(p, 0);
+ }
}
static void
@@ -1577,64 +2258,12 @@ bgp_rx_keepalive(struct bgp_conn *conn)
BGP_TRACE(D_PACKETS, "Got KEEPALIVE");
bgp_start_timer(conn->hold_timer, conn->hold_time);
- switch (conn->state)
- {
- case BS_OPENCONFIRM:
- bgp_conn_enter_established_state(conn);
- break;
- case BS_ESTABLISHED:
- break;
- default:
- bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0);
- }
-}
-static void
-bgp_rx_route_refresh(struct bgp_conn *conn, byte *pkt, uint len)
-{
- struct bgp_proto *p = conn->bgp;
+ if (conn->state == BS_OPENCONFIRM)
+ { bgp_conn_enter_established_state(conn); return; }
if (conn->state != BS_ESTABLISHED)
- { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
-
- if (!p->cf->enable_refresh)
- { bgp_error(conn, 1, 3, pkt+18, 1); return; }
-
- if (len < (BGP_HEADER_LENGTH + 4))
- { bgp_error(conn, 1, 2, pkt+16, 2); return; }
-
- if (len > (BGP_HEADER_LENGTH + 4))
- { bgp_error(conn, 7, 1, pkt, MIN(len, 2048)); return; }
-
- /* FIXME - we ignore AFI/SAFI values, as we support
- just one value and even an error code for an invalid
- request is not defined */
-
- /* RFC 7313 redefined reserved field as RR message subtype */
- uint subtype = conn->peer_enhanced_refresh_support ? pkt[21] : BGP_RR_REQUEST;
-
- switch (subtype)
- {
- case BGP_RR_REQUEST:
- BGP_TRACE(D_PACKETS, "Got ROUTE-REFRESH");
- proto_request_feeding(&p->p);
- break;
-
- case BGP_RR_BEGIN:
- BGP_TRACE(D_PACKETS, "Got BEGIN-OF-RR");
- bgp_refresh_begin(p);
- break;
-
- case BGP_RR_END:
- BGP_TRACE(D_PACKETS, "Got END-OF-RR");
- bgp_refresh_end(p);
- break;
-
- default:
- log(L_WARN "%s: Got ROUTE-REFRESH message with unknown subtype %u, ignoring",
- p->p.name, subtype);
- break;
- }
+ bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0);
}
@@ -1648,7 +2277,7 @@ bgp_rx_route_refresh(struct bgp_conn *conn, byte *pkt, uint len)
* packet handler according to the packet type.
*/
static void
-bgp_rx_packet(struct bgp_conn *conn, byte *pkt, unsigned len)
+bgp_rx_packet(struct bgp_conn *conn, byte *pkt, uint len)
{
byte type = pkt[18];
@@ -1658,14 +2287,14 @@ bgp_rx_packet(struct bgp_conn *conn, byte *pkt, unsigned len)
mrt_dump_bgp_packet(conn, pkt, len);
switch (type)
- {
- case PKT_OPEN: return bgp_rx_open(conn, pkt, len);
- case PKT_UPDATE: return bgp_rx_update(conn, pkt, len);
- case PKT_NOTIFICATION: return bgp_rx_notification(conn, pkt, len);
- case PKT_KEEPALIVE: return bgp_rx_keepalive(conn);
- case PKT_ROUTE_REFRESH: return bgp_rx_route_refresh(conn, pkt, len);
- default: bgp_error(conn, 1, 3, pkt+18, 1);
- }
+ {
+ case PKT_OPEN: return bgp_rx_open(conn, pkt, len);
+ case PKT_UPDATE: return bgp_rx_update(conn, pkt, len);
+ case PKT_NOTIFICATION: return bgp_rx_notification(conn, pkt, len);
+ case PKT_KEEPALIVE: return bgp_rx_keepalive(conn);
+ case PKT_ROUTE_REFRESH: return bgp_rx_route_refresh(conn, pkt, len);
+ default: bgp_error(conn, 1, 3, pkt+18, 1);
+ }
}
/**
@@ -1682,10 +2311,9 @@ int
bgp_rx(sock *sk, uint size)
{
struct bgp_conn *conn = sk->data;
- struct bgp_proto *p = conn->bgp;
byte *pkt_start = sk->rbuf;
byte *end = pkt_start + size;
- unsigned i, len;
+ uint i, len;
DBG("BGP: RX hook: Got %d bytes\n", size);
while (end >= pkt_start + BGP_HEADER_LENGTH)
@@ -1699,7 +2327,7 @@ bgp_rx(sock *sk, uint size)
break;
}
len = get_u16(pkt_start+16);
- if (len < BGP_HEADER_LENGTH || len > bgp_max_packet_length(p))
+ if ((len < BGP_HEADER_LENGTH) || (len > bgp_max_packet_length(conn)))
{
bgp_error(conn, 1, 2, pkt_start+16, 2);
break;