summaryrefslogtreecommitdiff
path: root/proto/bgp
diff options
context:
space:
mode:
authorOndrej Zajicek <santiago@crfreenet.org>2008-10-27 00:20:22 +0100
committerOndrej Zajicek <santiago@crfreenet.org>2008-10-27 00:20:22 +0100
commita98fbf0f12b5e83e25afa0f585ca6a4d4ac5f6bf (patch)
treef6c215cd05ec9278696fae7b8814b5071a4b3c6a /proto/bgp
parenta3b70dc499b64f41aa776b5b4afee5c7bfb8dfa6 (diff)
parent1567edea8d3da7da08092eef15bb3bd4544c6464 (diff)
Merge branch 'dev' into out
Diffstat (limited to 'proto/bgp')
-rw-r--r--proto/bgp/attrs.c653
-rw-r--r--proto/bgp/bgp.c35
-rw-r--r--proto/bgp/bgp.h26
-rw-r--r--proto/bgp/config.Y20
-rw-r--r--proto/bgp/packets.c158
5 files changed, 714 insertions, 178 deletions
diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c
index 30699f84..2210cbe7 100644
--- a/proto/bgp/attrs.c
+++ b/proto/bgp/attrs.c
@@ -55,22 +55,38 @@ bgp_format_origin(eattr *a, byte *buf)
}
static int
-bgp_check_path(struct bgp_proto *p UNUSED, byte *a, int len)
+bgp_check_path(byte *a, int len, int bs, int errcode)
{
while (len)
{
DBG("Path segment %02x %02x\n", a[0], a[1]);
if (len < 2 ||
- a[0] != AS_PATH_SET && a[0] != AS_PATH_SEQUENCE ||
- 2*a[1] + 2 > len)
- return 11;
- len -= 2*a[1] + 2;
- a += 2*a[1] + 2;
+ (a[0] != AS_PATH_SET && a[0] != AS_PATH_SEQUENCE) ||
+ bs * a[1] + 2 > len)
+ return errcode;
+ len -= bs * a[1] + 2;
+ a += bs * a[1] + 2;
}
return 0;
}
static int
+bgp_check_as_path(struct bgp_proto *p, byte *a, int len)
+{
+ return bgp_check_path(a, len, p->as4_session ? 4 : 2, 11);
+}
+
+static int
+bgp_check_as4_path(struct bgp_proto *p, byte *a, int len)
+{
+ if (bgp_as4_support && (! p->as4_session))
+ return bgp_check_path(a, len, 4, 9);
+ else
+ return 0;
+}
+
+
+static int
bgp_check_next_hop(struct bgp_proto *p UNUSED, byte *a, int len)
{
#ifdef IPV6
@@ -88,6 +104,20 @@ bgp_check_next_hop(struct bgp_proto *p UNUSED, byte *a, int len)
}
static int
+bgp_check_aggregator(struct bgp_proto *p, UNUSED byte *a, int len)
+{
+ int exp_len = p->as4_session ? 8 : 6;
+
+ return (len == exp_len) ? 0 : 5;
+}
+
+static int
+bgp_check_cluster_list(struct bgp_proto *p UNUSED, UNUSED byte *a, int len)
+{
+ return ((len % 4) == 0) ? 0 : 5;
+}
+
+static int
bgp_check_reach_nlri(struct bgp_proto *p UNUSED, byte *a UNUSED, int len UNUSED)
{
#ifdef IPV6
@@ -113,21 +143,23 @@ static struct attr_desc bgp_attr_table[] = {
{ "origin", 1, BAF_TRANSITIVE, EAF_TYPE_INT, 1, /* BA_ORIGIN */
bgp_check_origin, bgp_format_origin },
{ "as_path", -1, BAF_TRANSITIVE, EAF_TYPE_AS_PATH, 1, /* BA_AS_PATH */
- bgp_check_path, NULL },
+ bgp_check_as_path, NULL },
{ "next_hop", 4, BAF_TRANSITIVE, EAF_TYPE_IP_ADDRESS, 1, /* BA_NEXT_HOP */
bgp_check_next_hop, NULL },
- { "med", 4, BAF_OPTIONAL, EAF_TYPE_INT, 0, /* BA_MULTI_EXIT_DISC */
+ { "med", 4, BAF_OPTIONAL, EAF_TYPE_INT, 1, /* BA_MULTI_EXIT_DISC */
NULL, NULL },
{ "local_pref", 4, BAF_TRANSITIVE, EAF_TYPE_INT, 0, /* BA_LOCAL_PREF */
NULL, NULL },
{ "atomic_aggr", 0, BAF_TRANSITIVE, EAF_TYPE_OPAQUE, 1, /* BA_ATOMIC_AGGR */
NULL, NULL },
- { "aggregator", 6, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_OPAQUE, 1, /* BA_AGGREGATOR */
- NULL, NULL },
+ { "aggregator", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_OPAQUE, 1, /* BA_AGGREGATOR */
+ bgp_check_aggregator, NULL },
{ "community", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_INT_SET, 1, /* BA_COMMUNITY */
NULL, NULL },
- { NULL, }, /* BA_ORIGINATOR_ID */
- { NULL, }, /* BA_CLUSTER_LIST */
+ { "originator_id", 4, BAF_OPTIONAL, EAF_TYPE_INT, 0, /* BA_ORIGINATOR_ID */
+ NULL, NULL },
+ { "cluster_list", -1, BAF_OPTIONAL, EAF_TYPE_INT_SET, 0, /* BA_CLUSTER_LIST */
+ bgp_check_cluster_list, NULL },
{ NULL, }, /* BA_DPA */
{ NULL, }, /* BA_ADVERTISER */
{ NULL, }, /* BA_RCID_PATH */
@@ -135,43 +167,152 @@ static struct attr_desc bgp_attr_table[] = {
bgp_check_reach_nlri, NULL },
{ "mp_unreach_nlri", -1, BAF_OPTIONAL, EAF_TYPE_OPAQUE, 1, /* BA_MP_UNREACH_NLRI */
bgp_check_unreach_nlri, NULL },
+ { NULL, }, /* BA_EXTENDED_COMM */
+ { "as4_path", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_OPAQUE, 1, /* BA_AS4_PATH */
+ bgp_check_as4_path, NULL },
+ { "as4_aggregator", 8, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_OPAQUE, 1, /* BA_AS4_PATH */
+ NULL, NULL }
};
+/* BA_AS4_PATH is type EAF_TYPE_OPAQUE and not type EAF_TYPE_AS_PATH because
+ * EAF_TYPE_AS_PATH is supposed to have different format (2 or 4 B for each ASN)
+ * depending on bgp_as4_support variable.
+ */
+
#define ATTR_KNOWN(code) ((code) < ARRAY_SIZE(bgp_attr_table) && bgp_attr_table[code].name)
-static byte *
-bgp_set_attr(eattr *e, struct linpool *pool, unsigned attr, unsigned val)
+static inline struct adata *
+bgp_alloc_adata(struct linpool *pool, unsigned len)
+{
+ struct adata *ad = lp_alloc(pool, sizeof(struct adata) + len);
+ ad->length = len;
+ return ad;
+}
+
+static void
+bgp_set_attr(eattr *e, unsigned attr, uintptr_t val)
{
ASSERT(ATTR_KNOWN(attr));
e->id = EA_CODE(EAP_BGP, attr);
e->type = bgp_attr_table[attr].type;
e->flags = bgp_attr_table[attr].expected_flags;
if (e->type & EAF_EMBEDDED)
- {
- e->u.data = val;
- return NULL;
- }
+ e->u.data = val;
else
- {
- e->u.ptr = lp_alloc(pool, sizeof(struct adata) + val);
- e->u.ptr->length = val;
- return e->u.ptr->data;
- }
+ e->u.ptr = (struct adata *) val;
}
-byte *
-bgp_attach_attr(ea_list **to, struct linpool *pool, unsigned attr, unsigned val)
+static byte *
+bgp_set_attr_wa(eattr *e, struct linpool *pool, unsigned attr, unsigned len)
+{
+ struct adata *ad = bgp_alloc_adata(pool, len);
+ bgp_set_attr(e, attr, (uintptr_t) ad);
+ return ad->data;
+}
+
+void
+bgp_attach_attr(ea_list **to, struct linpool *pool, unsigned attr, uintptr_t val)
{
ea_list *a = lp_alloc(pool, sizeof(ea_list) + sizeof(eattr));
a->next = *to;
*to = a;
a->flags = EALF_SORTED;
a->count = 1;
- return bgp_set_attr(a->attrs, pool, attr, val);
+ bgp_set_attr(a->attrs, attr, val);
}
+byte *
+bgp_attach_attr_wa(ea_list **to, struct linpool *pool, unsigned attr, unsigned len)
+{
+ struct adata *ad = bgp_alloc_adata(pool, len);
+ bgp_attach_attr(to, pool, attr, (uintptr_t) ad);
+ return ad->data;
+}
+
+static int
+bgp_encode_attr_hdr(byte *dst, unsigned int flags, unsigned code, int len)
+{
+ int wlen;
+
+ DBG("\tAttribute %02x (%d bytes, flags %02x)\n", code, len, flags);
+
+ if (len < 256)
+ {
+ *dst++ = flags;
+ *dst++ = code;
+ *dst++ = len;
+ wlen = 3;
+ }
+ else
+ {
+ *dst++ = flags | BAF_EXT_LEN;
+ *dst++ = code;
+ put_u16(dst, len);
+ wlen = 4;
+ }
+
+ return wlen;
+}
+
+static void
+aggregator_convert_to_old(struct adata *aggr, byte *dst, int *new_used)
+{
+ byte *src = aggr->data;
+ *new_used = 0;
+
+ u32 as = get_u32(src);
+ if (as > 0xFFFF)
+ {
+ as = AS_TRANS;
+ *new_used = 1;
+ }
+ put_u16(dst, as);
+
+ /* Copy IPv4 address */
+ memcpy(dst + 2, src + 4, 4);
+}
+
+static void
+aggregator_convert_to_new(struct adata *aggr, byte *dst)
+{
+ byte *src = aggr->data;
+
+ u32 as = get_u16(src);
+ put_u32(dst, as);
+
+ /* Copy IPv4 address */
+ memcpy(dst + 4, src + 2, 4);
+}
+
+static int
+bgp_get_attr_len(eattr *a)
+{
+ int len;
+ if (ATTR_KNOWN(EA_ID(a->id)))
+ {
+ int code = EA_ID(a->id);
+ struct attr_desc *desc = &bgp_attr_table[code];
+ len = desc->expected_length;
+ if (len < 0)
+ {
+ ASSERT(!(a->type & EAF_EMBEDDED));
+ len = a->u.ptr->length;
+ }
+ }
+ else
+ {
+ ASSERT((a->type & EAF_TYPE_MASK) == EAF_TYPE_OPAQUE);
+ len = a->u.ptr->length;
+ }
+
+ return len;
+}
+
+#define ADVANCE(w, r, l) do { r -= l; w += l; } while (0)
+
/**
* bgp_encode_attrs - encode BGP attributes
+ * @p: BGP instance
* @w: buffer
* @attrs: a list of extended attributes
* @remains: remaining space in the buffer
@@ -182,11 +323,11 @@ bgp_attach_attr(ea_list **to, struct linpool *pool, unsigned attr, unsigned val)
* Result: Length of the attribute block generated.
*/
unsigned int
-bgp_encode_attrs(byte *w, ea_list *attrs, int remains)
+bgp_encode_attrs(struct bgp_proto *p, byte *w, ea_list *attrs, int remains)
{
unsigned int i, code, flags;
byte *start = w;
- int len;
+ int len, rv;
for(i=0; i<attrs->count; i++)
{
@@ -198,43 +339,90 @@ bgp_encode_attrs(byte *w, ea_list *attrs, int remains)
if (code == BA_NEXT_HOP)
continue;
#endif
- flags = a->flags & (BAF_OPTIONAL | BAF_TRANSITIVE | BAF_PARTIAL);
- if (ATTR_KNOWN(code))
- {
- struct attr_desc *desc = &bgp_attr_table[code];
- len = desc->expected_length;
- if (len < 0)
- {
- ASSERT(!(a->type & EAF_EMBEDDED));
- len = a->u.ptr->length;
- }
- }
- else
+
+ /* When AS4-aware BGP speaker is talking to non-AS4-aware BGP speaker,
+ * we have to convert our 4B AS_PATH to 2B AS_PATH and send our AS_PATH
+ * as optional AS4_PATH attribute.
+ */
+ if ((code == BA_AS_PATH) && bgp_as4_support && (! p->as4_session))
{
- ASSERT((a->type & EAF_TYPE_MASK) == EAF_TYPE_OPAQUE);
len = a->u.ptr->length;
+
+ if (remains < (len + 4))
+ goto err_no_buffer;
+
+ /* Using temporary buffer because don't know a length of created attr
+ * and therefore a length of a header. Perhaps i should better always
+ * use BAF_EXT_LEN. */
+
+ byte buf[len];
+ int new_used;
+ int nl = as_path_convert_to_old(a->u.ptr, buf, &new_used);
+
+ rv = bgp_encode_attr_hdr(w, BAF_TRANSITIVE, BA_AS_PATH, nl);
+ ADVANCE(w, remains, rv);
+ memcpy(w, buf, nl);
+ ADVANCE(w, remains, nl);
+
+ if (! new_used)
+ continue;
+
+ if (remains < (len + 4))
+ goto err_no_buffer;
+
+ /* We should discard AS_CONFED_SEQUENCE or AS_CONFED_SET path segments
+ * here but we don't support confederations and such paths we already
+ * discarded in bgp_check_as_path().
+ */
+
+ rv = bgp_encode_attr_hdr(w, BAF_OPTIONAL | BAF_TRANSITIVE, BA_AS4_PATH, len);
+ ADVANCE(w, remains, rv);
+ memcpy(w, a->u.ptr->data, len);
+ ADVANCE(w, remains, len);
+
+ continue;
}
- DBG("\tAttribute %02x (type %02x, %d bytes, flags %02x)\n", code, a->type, len, flags);
- if (remains < len + 4)
- {
- log(L_ERR "BGP: attribute list too long, ignoring the remaining attributes");
- break;
- }
- if (len < 256)
- {
- *w++ = flags;
- *w++ = code;
- *w++ = len;
- remains -= 3;
- }
- else
+
+ /* The same issue with AGGREGATOR attribute */
+ if ((code == BA_AGGREGATOR) && bgp_as4_support && (! p->as4_session))
{
- *w++ = flags | BAF_EXT_LEN;
- *w++ = code;
- put_u16(w, len);
- w += 2;
- remains -= 4;
+ int new_used;
+
+ len = 6;
+ if (remains < (len + 3))
+ goto err_no_buffer;
+
+ rv = bgp_encode_attr_hdr(w, BAF_OPTIONAL | BAF_TRANSITIVE, BA_AGGREGATOR, len);
+ ADVANCE(w, remains, rv);
+ aggregator_convert_to_old(a->u.ptr, w, &new_used);
+ ADVANCE(w, remains, len);
+
+ if (! new_used)
+ continue;
+
+ len = 8;
+ if (remains < (len + 3))
+ goto err_no_buffer;
+
+ rv = bgp_encode_attr_hdr(w, BAF_OPTIONAL | BAF_TRANSITIVE, BA_AS4_AGGREGATOR, len);
+ ADVANCE(w, remains, rv);
+ memcpy(w, a->u.ptr->data, len);
+ ADVANCE(w, remains, len);
+
+ continue;
}
+
+ /* Standard path continues here ... */
+
+ flags = a->flags & (BAF_OPTIONAL | BAF_TRANSITIVE | BAF_PARTIAL);
+ len = bgp_get_attr_len(a);
+
+ if (remains < len + 4)
+ goto err_no_buffer;
+
+ rv = bgp_encode_attr_hdr(w, flags, code, len);
+ ADVANCE(w, remains, rv);
+
switch (a->type & EAF_TYPE_MASK)
{
case EAF_TYPE_INT:
@@ -266,10 +454,13 @@ bgp_encode_attrs(byte *w, ea_list *attrs, int remains)
default:
bug("bgp_encode_attrs: unknown attribute type %02x", a->type);
}
- remains -= len;
- w += len;
+ ADVANCE(w, remains, len);
}
return w - start;
+
+ err_no_buffer:
+ log(L_ERR "BGP: attribute list too long, ignoring the remaining attributes");
+ return w - start;
}
static void
@@ -547,6 +738,7 @@ bgp_rt_notify(struct proto *P, net *n, rte *new, rte *old UNUSED, ea_list *attrs
bgp_schedule_packet(p->conn, PKT_UPDATE);
}
+
static int
bgp_create_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *pool)
{
@@ -559,20 +751,24 @@ bgp_create_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *p
ea->flags = EALF_SORTED;
ea->count = 4;
- bgp_set_attr(ea->attrs, pool, BA_ORIGIN,
+ bgp_set_attr(ea->attrs, BA_ORIGIN,
((rta->source == RTS_OSPF_EXT1) || (rta->source == RTS_OSPF_EXT2)) ? ORIGIN_INCOMPLETE : ORIGIN_IGP);
if (p->is_internal)
- bgp_set_attr(ea->attrs+1, pool, BA_AS_PATH, 0);
+ bgp_set_attr_wa(ea->attrs+1, pool, BA_AS_PATH, 0);
else
{
- z = bgp_set_attr(ea->attrs+1, pool, BA_AS_PATH, 4);
+ z = bgp_set_attr_wa(ea->attrs+1, pool, BA_AS_PATH, bgp_as4_support ? 6 : 4);
z[0] = AS_PATH_SEQUENCE;
z[1] = 1; /* 1 AS */
- put_u16(z+2, p->local_as);
+
+ if (bgp_as4_support)
+ put_u32(z+2, p->local_as);
+ else
+ put_u16(z+2, p->local_as);
}
- z = bgp_set_attr(ea->attrs+2, pool, BA_NEXT_HOP, sizeof(ip_addr));
+ z = bgp_set_attr_wa(ea->attrs+2, pool, BA_NEXT_HOP, sizeof(ip_addr));
if (p->cf->next_hop_self ||
!p->is_internal ||
rta->dest != RTD_ROUTER)
@@ -585,34 +781,65 @@ bgp_create_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *p
else
*(ip_addr *)z = e->attrs->gw;
- bgp_set_attr(ea->attrs+3, pool, BA_LOCAL_PREF, 0);
+ bgp_set_attr(ea->attrs+3, BA_LOCAL_PREF, 0);
return 0; /* Leave decision to the filters */
}
-static ea_list *
-bgp_path_prepend(struct linpool *pool, eattr *a, ea_list *old, int as)
+
+static inline int
+bgp_as_path_loopy(struct bgp_proto *p, rta *a)
{
- struct ea_list *e = lp_alloc(pool, sizeof(ea_list) + sizeof(eattr));
- struct adata *olda = a->u.ptr;
+ eattr *e = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
+ return (e && as_path_is_member(e->u.ptr, p->local_as));
+}
- e->next = old;
- e->flags = EALF_SORTED;
- e->count = 1;
- e->attrs[0].id = EA_CODE(EAP_BGP, BA_AS_PATH);
- e->attrs[0].flags = BAF_TRANSITIVE;
- e->attrs[0].type = EAF_TYPE_AS_PATH;
- e->attrs[0].u.ptr = as_path_prepend(pool, olda, as);
- return e;
+static inline int
+bgp_originator_id_loopy(struct bgp_proto *p, rta *a)
+{
+ eattr *e = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID));
+ return (e && (e->u.data == p->local_id));
+}
+
+static inline int
+bgp_cluster_list_loopy(struct bgp_proto *p, rta *a)
+{
+ eattr *e = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_CLUSTER_LIST));
+ return (e && p->rr_client && int_set_contains(e->u.ptr, p->rr_cluster_id));
+}
+
+
+static inline void
+bgp_path_prepend(rte *e, ea_list **attrs, struct linpool *pool, u32 as)
+{
+ eattr *a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
+ bgp_attach_attr(attrs, pool, BA_AS_PATH, (uintptr_t) as_path_prepend(pool, a->u.ptr, as));
+}
+
+static inline void
+bgp_cluster_list_prepend(rte *e, ea_list **attrs, struct linpool *pool, u32 cid)
+{
+ eattr *a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_CLUSTER_LIST));
+ bgp_attach_attr(attrs, pool, BA_CLUSTER_LIST, (uintptr_t) int_set_add(pool, a ? a->u.ptr : NULL, cid));
}
static int
-bgp_update_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *pool)
+bgp_update_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *pool, int rr)
{
eattr *a;
- if (!p->is_internal && (a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH))))
- *attrs = bgp_path_prepend(pool, a, *attrs, p->local_as);
+ if (!p->is_internal)
+ {
+ bgp_path_prepend(e, attrs, pool, p->local_as);
+
+ /* The MULTI_EXIT_DISC attribute received from a neighboring AS MUST NOT be
+ * propagated to other neighboring ASes.
+ * Perhaps it would be better to undefine it.
+ */
+ a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
+ if (a)
+ bgp_attach_attr(attrs, pool, BA_MULTI_EXIT_DISC, 0);
+ }
a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP));
if (a && (p->is_internal || (!p->is_internal && e->attrs->iface == p->neigh->iface)))
@@ -622,7 +849,24 @@ bgp_update_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *p
else
{
/* Need to create new one */
- *(ip_addr *) bgp_attach_attr(attrs, pool, BA_NEXT_HOP, sizeof(ip_addr)) = p->local_addr;
+ bgp_attach_attr_ip(attrs, pool, BA_NEXT_HOP, p->local_addr);
+ }
+
+ if (rr)
+ {
+ /* Handling route reflection, RFC 4456 */
+ struct bgp_proto *src = (struct bgp_proto *) e->attrs->proto;
+
+ a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID));
+ if (!a)
+ bgp_attach_attr(attrs, pool, BA_ORIGINATOR_ID, src->remote_id);
+
+ /* We attach proper cluster ID according to whether the route is entering or leaving the cluster */
+ bgp_cluster_list_prepend(e, attrs, pool, src->rr_client ? src->rr_cluster_id : p->rr_cluster_id);
+
+ /* Two RR clients with different cluster ID, hmmm */
+ if (src->rr_client && p->rr_client && (src->rr_cluster_id != p->rr_cluster_id))
+ bgp_cluster_list_prepend(e, attrs, pool, p->rr_cluster_id);
}
return 0; /* Leave decision to the filters */
@@ -639,14 +883,39 @@ bgp_import_control(struct proto *P, rte **new, ea_list **attrs, struct linpool *
return -1;
if (new_bgp)
{
+ /* We should check here for cluster list loop, because the receiving BGP instance
+ might have different cluster ID */
+ if (bgp_cluster_list_loopy(p, e->attrs))
+ return -1;
+
if (p->local_as == new_bgp->local_as && p->is_internal && new_bgp->is_internal)
- return -1; /* Don't redistribute internal routes with IBGP */
- return bgp_update_attrs(p, e, attrs, pool);
+ {
+ /* Redistribution of internal routes with IBGP */
+ if (p->rr_client || new_bgp->rr_client)
+ /* Route reflection, RFC 4456 */
+ return bgp_update_attrs(p, e, attrs, pool, 1);
+ else
+ return -1;
+ }
+ else
+ return bgp_update_attrs(p, e, attrs, pool, 0);
}
else
return bgp_create_attrs(p, e, attrs, pool);
}
+static inline u32
+bgp_get_neighbor(rte *r)
+{
+ eattr *e = ea_find(r->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
+ u32 as;
+
+ if (e && as_path_get_last(e->u.ptr, &as))
+ return as;
+ else
+ return ((struct bgp_proto *) r->attrs->proto)->remote_as;
+}
+
int
bgp_rte_better(rte *new, rte *old)
{
@@ -665,20 +934,20 @@ bgp_rte_better(rte *new, rte *old)
if (n < o)
return 0;
- /* Use AS path lengths */
+ /* RFC 4271 9.1.2.2. a) Use AS path lengths */
if (new_bgp->cf->compare_path_lengths || old_bgp->cf->compare_path_lengths)
{
x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
- n = x ? as_path_getlen(x->u.ptr) : 100000;
- o = y ? as_path_getlen(y->u.ptr) : 100000;
+ n = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN;
+ o = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN;
if (n < o)
return 1;
if (n > o)
return 0;
}
- /* Use origins */
+ /* RFC 4271 9.1.2.2. b) Use origins */
x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGIN));
y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGIN));
n = x ? x->u.data : ORIGIN_INCOMPLETE;
@@ -688,47 +957,163 @@ bgp_rte_better(rte *new, rte *old)
if (n > o)
return 0;
- /* Compare MED's */
- x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
- y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
- n = x ? x->u.data : new_bgp->cf->default_med;
- o = y ? y->u.data : old_bgp->cf->default_med;
+ /* RFC 4271 9.1.2.2. c) Compare MED's */
+
+ if (bgp_get_neighbor(new) == bgp_get_neighbor(old))
+ {
+ x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
+ y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
+ n = x ? x->u.data : new_bgp->cf->default_med;
+ o = y ? y->u.data : old_bgp->cf->default_med;
+ if (n < o)
+ return 1;
+ if (n > o)
+ return 0;
+ }
+
+ /* RFC 4271 9.1.2.2. d) Prefer external peers */
+ if (new_bgp->is_internal > old_bgp->is_internal)
+ return 0;
+ if (new_bgp->is_internal < old_bgp->is_internal)
+ return 1;
+
+ /* Skipping RFC 4271 9.1.2.2. e) */
+ /* We don't have interior distances */
+
+ /* RFC 4456 9. b) Compare cluster list lengths */
+ x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_CLUSTER_LIST));
+ y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_CLUSTER_LIST));
+ n = x ? int_set_get_size(x->u.ptr) : 0;
+ o = y ? int_set_get_size(y->u.ptr) : 0;
if (n < o)
return 1;
if (n > o)
return 0;
- /* A tie breaking procedure according to RFC 1771, section 9.1.2.1 */
- /* We don't have interior distances */
- /* We prefer external peers */
- if (new_bgp->is_internal > old_bgp->is_internal)
- return 0;
- if (new_bgp->is_internal < old_bgp->is_internal)
+ /* RFC 4271 9.1.2.2. f) Compare BGP identifiers */
+ /* RFC 4456 9. a) Use ORIGINATOR_ID instead of local neighor ID */
+ x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID));
+ y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID));
+ n = x ? x->u.data : new_bgp->remote_id;
+ o = y ? y->u.data : old_bgp->remote_id;
+ if (n < o)
return 1;
- /* Finally we compare BGP identifiers */
- return (new_bgp->remote_id < old_bgp->remote_id);
+ if (n > o)
+ return 0;
+
+
+ /* RFC 4271 9.1.2.2. g) Compare peer IP adresses */
+ return (ipa_compare(new_bgp->cf->remote_ip, old_bgp->cf->remote_ip) < 0);
}
-static int
-bgp_path_loopy(struct bgp_proto *p, eattr *a)
+static struct adata *
+bgp_aggregator_convert_to_new(struct adata *old, struct linpool *pool)
+{
+ struct adata *newa = lp_alloc(pool, sizeof(struct adata) + 8);
+ newa->length = 8;
+ aggregator_convert_to_new(old, newa->data);
+ return newa;
+}
+
+
+/* Take last req_as ASNs from path old2 (in 2B format), convert to 4B format
+ * and append path old4 (in 4B format).
+ */
+static struct adata *
+bgp_merge_as_paths(struct adata *old2, struct adata *old4, int req_as, struct linpool *pool)
+{
+ byte buf[old2->length * 2];
+
+ int ol = as_path_convert_to_new(old2, buf, req_as);
+ int nl = ol + (old4 ? old4->length : 0);
+
+ struct adata *newa = lp_alloc(pool, sizeof(struct adata) + nl);
+ newa->length = nl;
+ memcpy(newa->data, buf, ol);
+ if (old4) memcpy(newa->data + ol, old4->data, old4->length);
+
+ return newa;
+}
+
+
+/* Reconstruct 4B AS_PATH and AGGREGATOR according to RFC 4893 4.2.3 */
+static void
+bgp_reconstruct_4b_atts(struct bgp_proto *p, rta *a, struct linpool *pool)
+{
+ eattr *p2 =ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
+ eattr *p4 =ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AS4_PATH));
+ eattr *a2 =ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AGGREGATOR));
+ eattr *a4 =ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AS4_AGGREGATOR));
+
+ if (a2)
+ {
+ u32 a2_as = get_u16(a2->u.ptr->data);
+
+ if (a4)
+ {
+ if (a2_as != AS_TRANS)
+ {
+ /* Routes were aggregated by old router and therefore AS4_PATH
+ * and AS4_AGGREGATOR is invalid
+ *
+ * Convert AS_PATH and AGGREGATOR to 4B format and finish.
+ */
+
+ a2->u.ptr = bgp_aggregator_convert_to_new(a2->u.ptr, pool);
+ p2->u.ptr = bgp_merge_as_paths(p2->u.ptr, NULL, AS_PATH_MAXLEN, pool);
+
+ return;
+ }
+ else
+ {
+ /* Common case, use AS4_AGGREGATOR attribute */
+ a2->u.ptr = a4->u.ptr;
+ }
+ }
+ else
+ {
+ /* Common case, use old AGGREGATOR attribute */
+ a2->u.ptr = bgp_aggregator_convert_to_new(a2->u.ptr, pool);
+
+ if (a2_as == AS_TRANS)
+ log(L_WARN "BGP: AGGREGATOR attribute contain AS_TRANS, but AS4_AGGREGATOR is missing");
+ }
+ }
+ else
+ if (a4)
+ log(L_WARN "BGP: AS4_AGGREGATOR attribute received, but AGGREGATOR attribute is missing");
+
+ int p2_len = as_path_getlen(p2->u.ptr);
+ int p4_len = p4 ? as_path_getlen(p4->u.ptr) : AS_PATH_MAXLEN;
+
+ if (p2_len < p4_len)
+ p2->u.ptr = bgp_merge_as_paths(p2->u.ptr, NULL, AS_PATH_MAXLEN, pool);
+ else
+ p2->u.ptr = bgp_merge_as_paths(p2->u.ptr, p4->u.ptr, p2_len - p4_len, pool);
+
+}
+
+static void
+bgp_remove_as4_attrs(struct bgp_proto *p, rta *a)
{
- byte *path = a->u.ptr->data;
- int len = a->u.ptr->length;
- int i, n;
+ unsigned id1 = EA_CODE(EAP_BGP, BA_AS4_PATH);
+ unsigned id2 = EA_CODE(EAP_BGP, BA_AS4_AGGREGATOR);
+ ea_list **el = &(a->eattrs);
- while (len > 0)
+ /* We know that ea_lists constructed in bgp_decode_attrs have one attribute per ea_list struct */
+ while (*el != NULL)
{
- n = path[1];
- len -= 2 + 2*n;
- path += 2;
- for(i=0; i<n; i++)
+ unsigned fid = (*el)->attrs[0].id;
+
+ if ((fid == id1) || (fid == id2))
{
- if (get_u16(path) == p->local_as)
- return 1;
- path += 2;
+ *el = (*el)->next;
+ if (p->as4_session)
+ log(L_WARN "BGP: Unexpected AS4_* attributes received");
}
+ else
+ el = &((*el)->next);
}
- return 0;
}
/**
@@ -883,20 +1268,34 @@ bgp_decode_attrs(struct bgp_conn *conn, byte *attr, unsigned int len, struct lin
}
}
}
+
+ /* When receiving attributes from non-AS4-aware BGP speaker,
+ * we have to reconstruct 4B AS_PATH and AGGREGATOR attributes
+ */
+ if (bgp_as4_support && (! bgp->as4_session))
+ bgp_reconstruct_4b_atts(bgp, a, pool);
+
+ if (bgp_as4_support)
+ bgp_remove_as4_attrs(bgp, a);
/* If the AS path attribute contains our AS, reject the routes */
- e = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
- if (e && bgp_path_loopy(bgp, e))
- {
- DBG("BGP: Path loop!\n");
- return NULL;
- }
+ if (bgp_as_path_loopy(bgp, a))
+ goto loop;
+
+ /* Two checks for IBGP loops caused by route reflection, RFC 4456 */
+ if (bgp_originator_id_loopy(bgp, a) ||
+ bgp_cluster_list_loopy(bgp, a))
+ goto loop;
/* If there's no local preference, define one */
- if (!(seen[0] && (1 << BA_LOCAL_PREF)))
+ if (!(seen[0] & (1 << BA_LOCAL_PREF)))
bgp_attach_attr(&a->eattrs, pool, BA_LOCAL_PREF, 0);
return a;
+loop:
+ DBG("BGP: Path loop!\n");
+ return NULL;
+
malformed:
bgp_error(conn, 3, 1, NULL, 0);
return NULL;
@@ -945,11 +1344,11 @@ bgp_get_route_info(rte *e, byte *buf, ea_list *attrs)
{
eattr *p = ea_find(attrs, EA_CODE(EAP_BGP, BA_AS_PATH));
eattr *o = ea_find(attrs, EA_CODE(EAP_BGP, BA_ORIGIN));
- int origas;
+ u32 origas;
buf += bsprintf(buf, " (%d) [", e->pref);
- if (p && (origas = as_path_get_first(p->u.ptr)) >= 0)
- buf += bsprintf(buf, "AS%d", origas);
+ if (p && as_path_get_first(p->u.ptr, &origas))
+ buf += bsprintf(buf, "AS%u", origas);
if (o)
buf += bsprintf(buf, "%c", "ie?"[o->u.data]);
strcpy(buf, "]");
diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c
index cedd223b..0d580be1 100644
--- a/proto/bgp/bgp.c
+++ b/proto/bgp/bgp.c
@@ -76,11 +76,16 @@ static void bgp_connect(struct bgp_proto *p);
static void bgp_initiate(struct bgp_proto *p);
static void bgp_setup_listen_sk(void);
+
static void
-bgp_close(struct bgp_proto *p UNUSED)
+bgp_close(struct bgp_proto *p)
{
ASSERT(bgp_counter);
bgp_counter--;
+
+ if (p->cf->password)
+ sk_set_md5_auth(bgp_listen_sk, p->cf->remote_ip, NULL);
+
if (!bgp_counter)
{
rfree(bgp_listen_sk);
@@ -329,6 +334,7 @@ bgp_connect(struct bgp_proto *p) /* Enter Connect state and start establishing c
bgp_setup_conn(p, conn);
bgp_setup_sk(p, conn, s);
s->tx_hook = bgp_connected;
+ s->password = p->cf->password;
conn->state = BS_CONNECT;
if (sk_open(s))
{
@@ -479,6 +485,13 @@ bgp_start_locked(struct object_lock *lock)
p->local_id = cf->c.global->router_id;
p->next_hop = cf->multihop ? cf->multihop_via : cf->remote_ip;
p->neigh = neigh_find(&p->p, &p->next_hop, NEF_STICKY);
+
+ if (cf->rr_client)
+ {
+ p->rr_cluster_id = cf->rr_cluster_id ? cf->rr_cluster_id : p->local_id;
+ p->rr_client = cf->rr_client;
+ }
+
if (!p->neigh)
{
log(L_ERR "%s: Invalid next hop %I", p->p.name, p->next_hop);
@@ -505,6 +518,7 @@ bgp_start(struct proto *P)
bgp_counter++;
bgp_setup_listen_sk();
+
if (!bgp_linpool)
bgp_linpool = lp_new(&root_pool, 4080);
@@ -522,6 +536,17 @@ bgp_start(struct proto *P)
lock->hook = bgp_start_locked;
lock->data = p;
olock_acquire(lock);
+
+ /* We should create security association after we get a lock not to
+ * break existing connections.
+ */
+ if (p->cf->password)
+ {
+ int rv = sk_set_md5_auth(bgp_listen_sk, p->cf->remote_ip, p->cf->password);
+ if (rv < 0)
+ return PS_STOP;
+ }
+
return PS_START;
}
@@ -611,6 +636,14 @@ bgp_check(struct bgp_config *c)
cf_error("Local AS number must be set");
if (!c->remote_as)
cf_error("Neighbor must be configured");
+ if (!bgp_as4_support && c->enable_as4)
+ cf_error("AS4 support disabled globbaly");
+ if (!c->enable_as4 && (c->local_as > 0xFFFF))
+ cf_error("Local AS number out of range");
+ if (!c->enable_as4 && (c->remote_as > 0xFFFF))
+ cf_error("Neighbor AS number out of range");
+ if ((c->local_as != c->remote_as) && (c->rr_client))
+ cf_error("Only internal neighbor can be RR client");
}
static void
diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h
index 6519db85..1d67e336 100644
--- a/proto/bgp/bgp.h
+++ b/proto/bgp/bgp.h
@@ -16,7 +16,7 @@ struct eattr;
struct bgp_config {
struct proto_config c;
- unsigned int local_as, remote_as;
+ u32 local_as, remote_as;
ip_addr remote_ip;
int multihop; /* Number of hops if multihop */
ip_addr multihop_via; /* Multihop: address to route to */
@@ -25,6 +25,9 @@ struct bgp_config {
int compare_path_lengths; /* Use path lengths when selecting best route */
u32 default_local_pref; /* Default value for LOCAL_PREF attribute */
u32 default_med; /* Default value for MULTI_EXIT_DISC attribute */
+ int enable_as4; /* Enable local support for 4B AS numbers [RFC4893] */
+ u32 rr_cluster_id; /* Route reflector cluster ID, if different from local ID */
+ int rr_client; /* Whether neighbor is RR client of me */
unsigned connect_retry_time;
unsigned hold_time, initial_hold_time;
unsigned keepalive_time;
@@ -33,6 +36,7 @@ struct bgp_config {
unsigned error_delay_time_min; /* Time to wait after an error is detected */
unsigned error_delay_time_max;
unsigned disable_after_error; /* Disable the protocol when error is detected */
+ char *password; /* Password used for MD5 authentication */
};
struct bgp_conn {
@@ -47,16 +51,21 @@ struct bgp_conn {
byte *notify_data;
int error_flag; /* Error state, ignore all input */
int primary; /* This connection is primary */
+ u32 advertised_as; /* Temporary value for AS number received */
unsigned hold_time, keepalive_time; /* Times calculated from my and neighbor's requirements */
};
struct bgp_proto {
struct proto p;
struct bgp_config *cf; /* Shortcut to BGP configuration */
- unsigned local_as, remote_as;
+ u32 local_as, remote_as;
int is_internal; /* Internal BGP connection (local_as == remote_as) */
+ int as4_support; /* Peer supports 4B AS numbers [RFC4893] */
+ int as4_session; /* Session uses 4B AS numbers in AS_PATH (both sides support it) */
u32 local_id; /* BGP identifier of this router */
u32 remote_id; /* BGP identifier of the neighbor */
+ u32 rr_cluster_id; /* Route reflector cluster ID */
+ int rr_client; /* Whether neighbor is RR client of me */
struct bgp_conn *conn; /* Connection we have established */
struct bgp_conn outgoing_conn; /* Outgoing connection we're working with */
struct bgp_conn incoming_conn; /* Incoming connection we have neither accepted nor rejected yet */
@@ -100,6 +109,9 @@ struct bgp_bucket {
extern struct linpool *bgp_linpool;
+extern int bgp_as4_support;
+
+
void bgp_start_timer(struct timer *t, int value);
void bgp_check(struct bgp_config *c);
void bgp_error(struct bgp_conn *c, unsigned code, unsigned subcode, byte *data, int len);
@@ -115,17 +127,21 @@ void bgp_close_conn(struct bgp_conn *c);
/* attrs.c */
-byte *bgp_attach_attr(struct ea_list **to, struct linpool *, unsigned attr, unsigned val);
+void bgp_attach_attr(struct ea_list **to, struct linpool *pool, unsigned attr, uintptr_t val);
+byte *bgp_attach_attr_wa(struct ea_list **to, struct linpool *pool, unsigned attr, unsigned len);
struct rta *bgp_decode_attrs(struct bgp_conn *conn, byte *a, unsigned int len, struct linpool *pool, int mandatory);
int bgp_get_attr(struct eattr *e, byte *buf);
int bgp_rte_better(struct rte *, struct rte *);
void bgp_rt_notify(struct proto *, struct network *, struct rte *, struct rte *, struct ea_list *);
int bgp_import_control(struct proto *, struct rte **, struct ea_list **, struct linpool *);
void bgp_attr_init(struct bgp_proto *);
-unsigned int bgp_encode_attrs(byte *w, struct ea_list *attrs, int remains);
+unsigned int bgp_encode_attrs(struct bgp_proto *p, byte *w, ea_list *attrs, int remains);
void bgp_free_bucket(struct bgp_proto *p, struct bgp_bucket *buck);
void bgp_get_route_info(struct rte *, byte *buf, struct ea_list *attrs);
+inline static void bgp_attach_attr_ip(struct ea_list **to, struct linpool *pool, unsigned attr, ip_addr a)
+{ *(ip_addr *) bgp_attach_attr_wa(to, pool, attr, sizeof(ip_addr)) = a; }
+
/* packets.c */
void bgp_schedule_packet(struct bgp_conn *conn, int type);
@@ -165,6 +181,8 @@ void bgp_log_error(struct bgp_proto *p, char *msg, unsigned code, unsigned subco
#define BA_MP_REACH_NLRI 0x0e /* [RFC2283] */
#define BA_MP_UNREACH_NLRI 0x0f
#define BA_EXTENDED_COMM 0x10 /* draft-ramachandra-bgp-ext-communities */
+#define BA_AS4_PATH 0x11 /* [RFC4893] */
+#define BA_AS4_AGGREGATOR 0x12
/* BGP states */
diff --git a/proto/bgp/config.Y b/proto/bgp/config.Y
index 52ad731e..8524b2dd 100644
--- a/proto/bgp/config.Y
+++ b/proto/bgp/config.Y
@@ -18,9 +18,10 @@ CF_DECLS
CF_KEYWORDS(BGP, LOCAL, NEIGHBOR, AS, HOLD, TIME, CONNECT, RETRY, KEEPALIVE,
MULTIHOP, STARTUP, VIA, NEXT, HOP, SELF, DEFAULT, PATH, METRIC,
- ERROR, START, DELAY, FORGET, WAIT, DISABLE, AFTER,
+ ERROR, START, DELAY, FORGET, WAIT, ENABLE, DISABLE, AFTER,
BGP_PATH, BGP_LOCAL_PREF, BGP_MED, BGP_ORIGIN, BGP_NEXT_HOP,
- BGP_ATOMIC_AGGR, BGP_AGGREGATOR, BGP_COMMUNITY, SOURCE, ADDRESS)
+ BGP_ATOMIC_AGGR, BGP_AGGREGATOR, BGP_COMMUNITY, SOURCE, ADDRESS,
+ PASSWORD, RR, CLIENT, CLUSTER, ID, AS4)
CF_GRAMMAR
@@ -32,27 +33,28 @@ bgp_proto_start: proto_start BGP {
BGP_CFG->hold_time = 240;
BGP_CFG->connect_retry_time = 120;
BGP_CFG->initial_hold_time = 240;
- BGP_CFG->default_med = ~0; /* RFC 1771 doesn't specify this, draft-09 says ~0 */
+ BGP_CFG->default_med = 0;
BGP_CFG->compare_path_lengths = 1;
BGP_CFG->start_delay_time = 5;
BGP_CFG->error_amnesia_time = 300;
BGP_CFG->error_delay_time_min = 60;
BGP_CFG->error_delay_time_max = 300;
+ BGP_CFG->enable_as4 = bgp_as4_support;
}
;
bgp_proto:
bgp_proto_start proto_name '{'
| bgp_proto proto_item ';'
- | bgp_proto LOCAL AS expr ';' {
- if ($4 < 0 || $4 > 65535) cf_error("AS number out of range");
- BGP_CFG->local_as = $4;
- }
+ | bgp_proto LOCAL AS expr ';' { BGP_CFG->local_as = $4; }
| bgp_proto NEIGHBOR ipa AS expr ';' {
- if ($5 < 0 || $5 > 65535) cf_error("AS number out of range");
+ if (ipa_nonzero(BGP_CFG->remote_ip)) cf_error("Only one neighbor per BGP instance is allowed");
+
BGP_CFG->remote_ip = $3;
BGP_CFG->remote_as = $5;
}
+ | bgp_proto RR CLUSTER ID expr ';' { BGP_CFG->rr_cluster_id = $5; }
+ | bgp_proto RR CLIENT ';' { BGP_CFG->rr_client = 1; }
| bgp_proto HOLD TIME expr ';' { BGP_CFG->hold_time = $4; }
| bgp_proto STARTUP HOLD TIME expr ';' { BGP_CFG->initial_hold_time = $5; }
| bgp_proto CONNECT RETRY TIME expr ';' { BGP_CFG->connect_retry_time = $5; }
@@ -67,6 +69,8 @@ bgp_proto:
| bgp_proto ERROR FORGET TIME expr ';' { BGP_CFG->error_amnesia_time = $5; }
| bgp_proto ERROR WAIT TIME expr ',' expr ';' { BGP_CFG->error_delay_time_min = $5; BGP_CFG->error_delay_time_max = $7; }
| bgp_proto DISABLE AFTER ERROR bool ';' { BGP_CFG->disable_after_error = $5; }
+ | bgp_proto ENABLE AS4 bool ';' { BGP_CFG->enable_as4 = $4; }
+ | bgp_proto PASSWORD TEXT ';' { BGP_CFG->password = $3; }
;
CF_ADDTO(dynamic_attr, BGP_PATH
diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c
index 2e6f0b60..c18c6e42 100644
--- a/proto/bgp/packets.c
+++ b/proto/bgp/packets.c
@@ -12,6 +12,7 @@
#include "nest/iface.h"
#include "nest/protocol.h"
#include "nest/route.h"
+#include "nest/attrs.h"
#include "conf/conf.h"
#include "lib/unaligned.h"
#include "lib/socket.h"
@@ -30,33 +31,64 @@ bgp_create_notification(struct bgp_conn *conn, byte *buf)
return buf + 2 + conn->notify_size;
}
+#ifdef IPV6
+static byte *
+bgp_put_cap_ipv6(struct bgp_conn *conn UNUSED, byte *buf)
+{
+ *buf++ = 1; /* Capability 1: Multiprotocol extensions */
+ *buf++ = 4; /* Capability data length */
+ *buf++ = 0; /* We support AF IPv6 */
+ *buf++ = BGP_AF_IPV6;
+ *buf++ = 0; /* RFU */
+ *buf++ = 1; /* and SAFI 1 */
+ return buf;
+}
+#endif
+
+static byte *
+bgp_put_cap_as4(struct bgp_conn *conn, byte *buf)
+{
+ *buf++ = 65; /* Capability 65: Support for 4-octet AS number */
+ *buf++ = 4; /* Capability data length */
+ put_u32(buf, conn->bgp->local_as);
+ return buf + 4;
+}
+
static byte *
bgp_create_open(struct bgp_conn *conn, byte *buf)
{
struct bgp_proto *p = conn->bgp;
+ byte *cap;
+ int cap_len;
BGP_TRACE(D_PACKETS, "Sending OPEN(ver=%d,as=%d,hold=%d,id=%08x)",
BGP_VERSION, p->local_as, p->cf->hold_time, p->local_id);
buf[0] = BGP_VERSION;
- put_u16(buf+1, p->local_as);
+ put_u16(buf+1, (p->local_as < 0xFFFF) ? p->local_as : AS_TRANS);
put_u16(buf+3, p->cf->hold_time);
put_u32(buf+5, p->local_id);
-#ifndef IPV6
- buf[9] = 0; /* No optional parameters */
- return buf+10;
-#else
- buf += 9;
- *buf++ = 8; /* Optional params len */
- *buf++ = 2; /* Option: Capability list */
- *buf++ = 6; /* Option length */
- *buf++ = 1; /* Capability 1: Multiprotocol extensions */
- *buf++ = 4; /* Capability data length */
- *buf++ = 0; /* We support AF IPv6 */
- *buf++ = BGP_AF_IPV6;
- *buf++ = 0; /* RFU */
- *buf++ = 1; /* and SAFI 1 */
- return buf;
+ /* Skipped 3 B for length field and Capabilities parameter header */
+ cap = buf + 12;
+
+#ifdef IPV6
+ cap = bgp_put_cap_ipv6(conn, cap);
#endif
+ if (p->cf->enable_as4)
+ cap = bgp_put_cap_as4(conn, cap);
+
+ cap_len = cap - buf - 12;
+ if (cap_len > 0)
+ {
+ buf[9] = cap_len + 2; /* Optional params len */
+ buf[10] = 2; /* Option: Capability list */
+ buf[11] = cap_len; /* Option length */
+ return cap;
+ }
+ else
+ {
+ buf[9] = 0; /* No optional parameters */
+ return buf + 10;
+ }
}
static unsigned int
@@ -118,7 +150,7 @@ bgp_create_update(struct bgp_conn *conn, byte *buf)
continue;
}
DBG("Processing bucket %p\n", buck);
- a_size = bgp_encode_attrs(w+2, buck->eattrs, 1024);
+ a_size = bgp_encode_attrs(p, w+2, buck->eattrs, 1024);
put_u16(w, a_size);
w += a_size + 2;
r_size = bgp_encode_prefixes(p, w, buck, remains - a_size);
@@ -161,12 +193,12 @@ bgp_create_update(struct bgp_conn *conn, byte *buf)
if ((buck = p->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
{
DBG("Withdrawn routes:\n");
- tmp = bgp_attach_attr(&ea, bgp_linpool, BA_MP_UNREACH_NLRI, remains-8);
+ tmp = bgp_attach_attr_wa(&ea, bgp_linpool, BA_MP_UNREACH_NLRI, remains-8);
*tmp++ = 0;
*tmp++ = BGP_AF_IPV6;
*tmp++ = 1;
ea->attrs[0].u.ptr->length = bgp_encode_prefixes(p, tmp, buck, remains-11);
- size = bgp_encode_attrs(w, ea, remains);
+ size = bgp_encode_attrs(p, w, ea, remains);
w += size;
remains -= size;
}
@@ -183,10 +215,10 @@ bgp_create_update(struct bgp_conn *conn, byte *buf)
continue;
}
DBG("Processing bucket %p\n", buck);
- size = bgp_encode_attrs(w, buck->eattrs, 1024);
+ size = bgp_encode_attrs(p, w, buck->eattrs, 1024);
w += size;
remains -= size;
- tstart = tmp = bgp_attach_attr(&ea, bgp_linpool, BA_MP_REACH_NLRI, remains-8);
+ tstart = tmp = bgp_attach_attr_wa(&ea, bgp_linpool, BA_MP_REACH_NLRI, remains-8);
*tmp++ = 0;
*tmp++ = BGP_AF_IPV6;
*tmp++ = 1;
@@ -230,7 +262,7 @@ bgp_create_update(struct bgp_conn *conn, byte *buf)
*tmp++ = 0; /* No SNPA information */
tmp += bgp_encode_prefixes(p, tmp, buck, remains - (8+3+32+1));
ea->attrs[0].u.ptr->length = tmp - tstart;
- w += bgp_encode_attrs(w, ea, remains);
+ w += bgp_encode_attrs(p, w, ea, remains);
break;
}
}
@@ -353,9 +385,50 @@ bgp_tx(sock *sk)
;
}
+/* Capatibility negotiation as per RFC 2842 */
+
+void
+bgp_parse_capabilities(struct bgp_conn *conn, byte *opt, int len)
+{
+ struct bgp_proto *p = conn->bgp;
+ int cl;
+ u32 as;
+
+ while (len > 0)
+ {
+ if (len < 2 || len < 2 + opt[1])
+ goto err;
+
+ cl = opt[1];
+
+ switch (opt[0])
+ {
+ case 65:
+ if (cl != 4)
+ goto err;
+ p->as4_support = 1;
+ p->as4_session = p->cf->enable_as4;
+ if (p->as4_session)
+ conn->advertised_as = get_u32(opt + 2);
+ break;
+
+ /* We can safely ignore all other capabilities */
+ }
+ len -= 2 + cl;
+ opt += 2 + cl;
+ }
+ return;
+
+ err:
+ bgp_error(conn, 2, 0, NULL, 0);
+ return;
+}
+
static int
bgp_parse_options(struct bgp_conn *conn, byte *opt, int len)
{
+ int ol;
+
while (len > 0)
{
if (len < 2 || len < 2 + opt[1])
@@ -369,12 +442,14 @@ bgp_parse_options(struct bgp_conn *conn, byte *opt, int len)
DBG("\n");
}
#endif
+
+ ol = opt[1];
switch (opt[0])
{
case 2:
- /* Capatibility negotiation as per RFC 2842 */
- /* We can safely ignore all capabilities announced */
+ bgp_parse_capabilities(conn, opt + 2, ol);
break;
+
default:
/*
* BGP specs don't tell us to send which option
@@ -382,11 +457,11 @@ bgp_parse_options(struct bgp_conn *conn, byte *opt, int len)
* to do so. Also, capability negotiation with
* Cisco routers doesn't work without that.
*/
- bgp_error(conn, 2, 4, opt, opt[1]);
+ bgp_error(conn, 2, 4, opt, ol);
return 0;
}
- len -= 2 + opt[1];
- opt += 2 + opt[1];
+ len -= 2 + ol;
+ opt += 2 + ol;
}
return 0;
}
@@ -397,7 +472,7 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, int len)
struct bgp_conn *other;
struct bgp_proto *p = conn->bgp;
struct bgp_config *cf = p->cf;
- unsigned as, hold;
+ unsigned hold;
u32 id;
/* Check state */
@@ -409,20 +484,27 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, int len)
{ bgp_error(conn, 1, 2, pkt+16, 2); return; }
if (pkt[19] != BGP_VERSION)
{ bgp_error(conn, 2, 1, pkt+19, 1); return; } /* RFC 1771 says 16 bits, draft-09 tells to use 8 */
- as = get_u16(pkt+20);
+ conn->advertised_as = get_u16(pkt+20);
hold = get_u16(pkt+22);
id = get_u32(pkt+24);
- BGP_TRACE(D_PACKETS, "Got OPEN(as=%d,hold=%d,id=%08x)", as, hold, id);
- if (cf->remote_as && as != p->remote_as)
- { bgp_error(conn, 2, 2, pkt+20, -2); return; }
- if (hold > 0 && hold < 3)
- { bgp_error(conn, 2, 6, pkt+22, 2); return; }
- p->remote_id = id;
+ BGP_TRACE(D_PACKETS, "Got OPEN(as=%d,hold=%d,id=%08x)", conn->advertised_as, hold, id);
+
+ p->remote_id = id; // ???
if (bgp_parse_options(conn, pkt+29, pkt[28]))
return;
+
+ if (hold > 0 && hold < 3)
+ { bgp_error(conn, 2, 6, pkt+22, 2); return; }
+
if (!id || id == 0xffffffff || id == p->local_id)
{ bgp_error(conn, 2, 3, pkt+24, -4); return; }
+
+ if (conn->advertised_as != p->remote_as)
+ {
+ bgp_error(conn, 2, 2, (byte *) &(conn->advertised_as), -4); return;
+ }
+
/* Check the other connection */
other = (conn == &p->outgoing_conn) ? &p->incoming_conn : &p->outgoing_conn;
switch (other->state)
@@ -463,7 +545,7 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, int len)
else
conn->hold_time = p->cf->hold_time;
conn->keepalive_time = p->cf->keepalive_time ? : conn->hold_time / 3;
- p->remote_as = as;
+ // p->remote_as = conn->advertised_as;
p->remote_id = id;
DBG("BGP: Hold timer set to %d, keepalive to %d, AS to %d, ID to %x\n", conn->hold_time, conn->keepalive_time, p->remote_as, p->remote_id);
@@ -621,7 +703,7 @@ bgp_do_rx_update(struct bgp_conn *conn,
/* Create fake NEXT_HOP attribute */
if (len < 1 || (*x != 16 && *x != 32) || len < *x + 2)
goto bad;
- memcpy(bgp_attach_attr(&a0->eattrs, bgp_linpool, BA_NEXT_HOP, 16), x+1, 16);
+ bgp_attach_attr_ip(&a0->eattrs, bgp_linpool, BA_NEXT_HOP, x[1]);
len -= *x + 2;
x += *x + 1;
@@ -720,7 +802,7 @@ static struct {
{ 2, 4, "Unsupported optional parameter" },
{ 2, 5, "Authentication failure" },
{ 2, 6, "Unacceptable hold time" },
- { 2, 7, "Required capability missing" }, /* capability negotiation draft */
+ { 2, 7, "Required capability missing" }, /* [RFC3392] */
{ 3, 0, "Invalid UPDATE message" },
{ 3, 1, "Malformed attribute list" },
{ 3, 2, "Unrecognized well-known attribute" },