From 11cb620266035ffbe17b21c4a174380cb8b6a521 Mon Sep 17 00:00:00 2001 From: Ondrej Zajicek Date: Sun, 26 Oct 2008 22:36:08 +0100 Subject: Implementation of 4B ASN support for BGP --- nest/a-path.c | 209 ++++++++++++++++++++++++++++++++++++++++++++++++---------- nest/attrs.h | 21 ++++-- 2 files changed, 191 insertions(+), 39 deletions(-) (limited to 'nest') diff --git a/nest/a-path.c b/nest/a-path.c index 1b08f809..f4666911 100644 --- a/nest/a-path.c +++ b/nest/a-path.c @@ -14,38 +14,139 @@ #include "lib/unaligned.h" #include "lib/string.h" + +/* Global AS4 support, shared by all BGP instances. + * This specifies whether BA_AS_PATH attributes contain 2 or 4 B per ASN + */ + +int bgp_as4_support = 1; + +static void +put_as(byte *data, u32 as) +{ + if (bgp_as4_support) + put_u32(data, as); + else if (as <= 0xFFFF) + put_u16(data, as); + else + bug("put_as: Try to put 32bit AS to 16bit AS Path"); +} + +static inline u32 +get_as(byte *data) +{ + return bgp_as4_support ? get_u32(data) : get_u16(data); +} + struct adata * -as_path_prepend(struct linpool *pool, struct adata *olda, int as) +as_path_prepend(struct linpool *pool, struct adata *olda, u32 as) { + int bs = bgp_as4_support ? 4 : 2; struct adata *newa; - if (olda->length && olda->data[0] == AS_PATH_SEQUENCE && - olda->data[1] < 255) /* Starting with sequence => just prepend the AS number */ + if (olda->length && olda->data[0] == AS_PATH_SEQUENCE && olda->data[1] < 255) + /* Starting with sequence => just prepend the AS number */ { - newa = lp_alloc(pool, sizeof(struct adata) + olda->length + 2); - newa->length = olda->length + 2; - newa->data[0] = 2; + int nl = olda->length + bs; + newa = lp_alloc(pool, sizeof(struct adata) + nl); + newa->length = nl; + newa->data[0] = AS_PATH_SEQUENCE; newa->data[1] = olda->data[1] + 1; - memcpy(newa->data+4, olda->data+2, olda->length-2); + memcpy(newa->data + bs + 2, olda->data + 2, olda->length - 2); } - else /* Create new path segment */ + else /* Create new path segment */ { - newa = lp_alloc(pool, sizeof(struct adata) + olda->length + 4); - newa->length = olda->length + 4; - newa->data[0] = 2; + int nl = olda->length + bs + 2; + newa = lp_alloc(pool, sizeof(struct adata) + nl); + newa->length = nl; + newa->data[0] = AS_PATH_SEQUENCE; newa->data[1] = 1; - memcpy(newa->data+4, olda->data, olda->length); + memcpy(newa->data + bs + 2, olda->data, olda->length); } - put_u16(newa->data+2, as); + put_as(newa->data + 2, as); return newa; } +int +as_path_convert_to_old(struct adata *path, byte *dst, int *new_used) +{ + byte *src = path->data; + byte *src_end = src + path->length; + byte *dst_start = dst; + u32 as; + int i, n; + *new_used = 0; + + while (src < src_end) + { + n = src[1]; + *dst++ = *src++; + *dst++ = *src++; + + for(i=0; i 0xFFFF) + { + as = AS_TRANS; + *new_used = 1; + } + put_u16(dst, as); + src += 4; + dst += 2; + } + } + + return dst - dst_start; +} + +int +as_path_convert_to_new(struct adata *path, byte *dst, int req_as) +{ + byte *src = path->data; + byte *src_end = src + path->length; + byte *dst_start = dst; + u32 as; + int i, t, n; + + + while ((src < src_end) && (req_as > 0)) + { + t = *src++; + n = *src++; + + if (t == AS_PATH_SEQUENCE) + { + if (n > req_as) + n = req_as; + + req_as -= n; + } + else // t == AS_PATH_SET + req_as--; + + *dst++ = t; + *dst++ = n; + + for(i=0; idata; byte *e = p + path->length; - byte *end = buf + size - 8; + byte *end = buf + size - 16; int sp = 1; int l, isset; @@ -69,8 +170,8 @@ as_path_format(struct adata *path, byte *buf, unsigned int size) { if (!sp) *buf++ = ' '; - buf += bsprintf(buf, "%d", get_u16(p)); - p += 2; + buf += bsprintf(buf, "%u", get_as(p)); + p += bs; sp = 0; } if (isset) @@ -86,6 +187,7 @@ as_path_format(struct adata *path, byte *buf, unsigned int size) int as_path_getlen(struct adata *path) { + int bs = bgp_as4_support ? 4 : 2; int res = 0; u8 *p = path->data; u8 *q = p+path->length; @@ -95,8 +197,8 @@ as_path_getlen(struct adata *path) { switch (*p++) { - case AS_PATH_SET: len = *p++; res++; p += 2*len; break; - case AS_PATH_SEQUENCE: len = *p++; res+=len; p += 2*len; break; + case AS_PATH_SET: len = *p++; res++; p += bs * len; break; + case AS_PATH_SEQUENCE: len = *p++; res += len; p += bs * len; break; default: bug("as_path_getlen: Invalid path segment"); } } @@ -104,9 +206,11 @@ as_path_getlen(struct adata *path) } int -as_path_get_first(struct adata *path) +as_path_get_first(struct adata *path, u32 *orig_as) { - int res = -1; + int bs = bgp_as4_support ? 4 : 2; + int found = 0; + u32 res = 0; u8 *p = path->data; u8 *q = p+path->length; int len; @@ -117,36 +221,70 @@ as_path_get_first(struct adata *path) { case AS_PATH_SET: if (len = *p++) - res = get_u16(p); - p += 2*len; + { + found = 1; + res = get_as(p); + p += bs * len; + } break; case AS_PATH_SEQUENCE: if (len = *p++) - res = get_u16(p+2*(len-1)); - p += 2*len; + { + found = 1; + res = get_as(p + bs * (len - 1)); + p += bs * len; + } break; default: bug("as_path_get_first: Invalid path segment"); } } - return res; + + *orig_as = res; + return found; } +int +as_path_is_member(struct adata *path, u32 as) +{ + int bs = bgp_as4_support ? 4 : 2; + u8 *p = path->data; + u8 *q = p+path->length; + int i, n; + + while (pnext; if (!mask) return next == q; \ - asterisk = (mask->val == PM_ANY); \ + asterisk = mask->any; \ if (asterisk) { mask = mask->next; if (!mask) { return 1; } } \ } while(0) int as_path_match(struct adata *path, struct f_path_mask *mask) { + int bs = bgp_as4_support ? 4 : 2; int i; int asterisk = 0; u8 *p = path->data; u8 *q = p+path->length; int len; u8 *next; + u32 as; - asterisk = (mask->val == PM_ANY); + asterisk = mask->any; if (asterisk) { mask = mask->next; if (!mask) return 1; } @@ -156,20 +294,21 @@ as_path_match(struct adata *path, struct f_path_mask *mask) len = *p++; { u8 *p_save = p; - next = p_save + 2*len; + next = p_save + bs * len; retry: p = p_save; for (i=0; ival)) { + as = get_as(p); + if (asterisk && (as == mask->val)) { MASK_PLUS; goto retry; } - if (!asterisk && (get_u16(p) == mask->val)) { + if (!asterisk && (as == mask->val)) { p = next; MASK_PLUS; goto okay; } - p+=2; + p += bs; } if (!asterisk) return 0; @@ -180,15 +319,15 @@ as_path_match(struct adata *path, struct f_path_mask *mask) case AS_PATH_SEQUENCE: len = *p++; for (i=0; ival)) + as = get_as(p); + if (asterisk && (as == mask->val)) MASK_PLUS; else if (!asterisk) { - if (get_u16(p) != mask->val) + if (as != mask->val) return 0; MASK_PLUS; } - p+=2; + p += bs; } break; diff --git a/nest/attrs.h b/nest/attrs.h index abd6b9e9..f63f2e45 100644 --- a/nest/attrs.h +++ b/nest/attrs.h @@ -14,16 +14,29 @@ #define AS_PATH_SET 1 /* Types of path segments */ #define AS_PATH_SEQUENCE 2 -struct adata *as_path_prepend(struct linpool *pool, struct adata *olda, int as); +#define AS_PATH_MAXLEN 10000 + +#define AS_TRANS 23456 +/* AS_TRANS is used when we need to store 32bit ASN larger than 0xFFFF + * to 16bit slot (like in 16bit AS_PATH). See RFC 4893 for details + */ + +struct adata *as_path_prepend(struct linpool *pool, struct adata *olda, u32 as); +int as_path_convert_to_old(struct adata *path, byte *dst, int *new_used); +int as_path_convert_to_new(struct adata *path, byte *dst, int req_as); void as_path_format(struct adata *path, byte *buf, unsigned int size); int as_path_getlen(struct adata *path); -int as_path_get_first(struct adata *path); +int as_path_get_first(struct adata *path, u32 *orig_as); +int as_path_is_member(struct adata *path, u32 as); + struct f_path_mask { struct f_path_mask *next; - int val; + u32 val; + int any; }; -#define PM_ANY -1 + +// #define PM_ANY -1 int as_path_match(struct adata *path, struct f_path_mask *mask); -- cgit v1.2.3 From 4847a894bf7d4852325c3f1ea4bb4890054a1f66 Mon Sep 17 00:00:00 2001 From: Ondrej Zajicek Date: Sun, 26 Oct 2008 22:45:09 +0100 Subject: Implementation of route reflection for BGP --- filter/filter.h | 1 + nest/a-set.c | 8 +- nest/attrs.h | 3 + proto/bgp/attrs.c | 223 +++++++++++++++++++++++++++++++++++++--------------- proto/bgp/bgp.c | 9 +++ proto/bgp/bgp.h | 10 ++- proto/bgp/config.Y | 6 +- proto/bgp/packets.c | 6 +- 8 files changed, 195 insertions(+), 71 deletions(-) (limited to 'nest') diff --git a/filter/filter.h b/filter/filter.h index 04a26236..f71e54d3 100644 --- a/filter/filter.h +++ b/filter/filter.h @@ -11,6 +11,7 @@ #include "lib/resource.h" #include "lib/ip.h" +#include "nest/route.h" #include "nest/attrs.h" struct f_inst { /* Instruction */ diff --git a/nest/a-set.c b/nest/a-set.c index 44407141..69c090b7 100644 --- a/nest/a-set.c +++ b/nest/a-set.c @@ -40,10 +40,12 @@ int_set_format(struct adata *set, byte *buf, unsigned int size) struct adata * int_set_add(struct linpool *pool, struct adata *list, u32 val) { - struct adata *res = lp_alloc(pool, list->length + sizeof(struct adata) + 4); - res->length = list->length+4; + int len = list ? list->length : 0; + struct adata *res = lp_alloc(pool, len + sizeof(struct adata) + 4); + res->length = len + 4; * (u32 *) res->data = val; - memcpy((char *) res->data + 4, list->data, list->length); + if (list) + memcpy((char *) res->data + 4, list->data, list->length); return res; } diff --git a/nest/attrs.h b/nest/attrs.h index f63f2e45..aaa5f4a2 100644 --- a/nest/attrs.h +++ b/nest/attrs.h @@ -47,4 +47,7 @@ struct adata *int_set_add(struct linpool *pool, struct adata *list, u32 val); int int_set_contains(struct adata *list, u32 val); struct adata *int_set_del(struct linpool *pool, struct adata *list, u32 val); +static inline int int_set_get_size(struct adata *list) +{ return list->length / 4; } + #endif diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c index 48cb9dd5..a42a4880 100644 --- a/proto/bgp/attrs.c +++ b/proto/bgp/attrs.c @@ -104,13 +104,19 @@ bgp_check_next_hop(struct bgp_proto *p UNUSED, byte *a, int len) } static int -bgp_check_aggregator(struct bgp_proto *p UNUSED, UNUSED byte *a, int len) +bgp_check_aggregator(struct bgp_proto *p, UNUSED byte *a, int len) { int exp_len = (bgp_as4_support && p->as4_support) ? 8 : 6; return (len == exp_len) ? 0 : 5; } +static int +bgp_check_cluster_list(struct bgp_proto *p UNUSED, UNUSED byte *a, int len) +{ + return ((len % 4) == 0) ? 0 : 5; +} + static int bgp_check_reach_nlri(struct bgp_proto *p UNUSED, byte *a UNUSED, int len UNUSED) { @@ -150,8 +156,10 @@ static struct attr_desc bgp_attr_table[] = { bgp_check_aggregator, NULL }, { "community", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_INT_SET, 1, /* BA_COMMUNITY */ NULL, NULL }, - { NULL, }, /* BA_ORIGINATOR_ID */ - { NULL, }, /* BA_CLUSTER_LIST */ + { "originator_id", 4, BAF_OPTIONAL, EAF_TYPE_INT, 0, /* BA_ORIGINATOR_ID */ + NULL, NULL }, + { "cluster_list", -1, BAF_OPTIONAL, EAF_TYPE_INT_SET, 0, /* BA_CLUSTER_LIST */ + bgp_check_cluster_list, NULL }, { NULL, }, /* BA_DPA */ { NULL, }, /* BA_ADVERTISER */ { NULL, }, /* BA_RCID_PATH */ @@ -173,35 +181,52 @@ static struct attr_desc bgp_attr_table[] = { #define ATTR_KNOWN(code) ((code) < ARRAY_SIZE(bgp_attr_table) && bgp_attr_table[code].name) -static byte * -bgp_set_attr(eattr *e, struct linpool *pool, unsigned attr, unsigned val) +static inline struct adata * +bgp_alloc_adata(struct linpool *pool, unsigned len) +{ + struct adata *ad = lp_alloc(pool, sizeof(struct adata) + len); + ad->length = len; + return ad; +} + +static void +bgp_set_attr(eattr *e, unsigned attr, uintptr_t val) { ASSERT(ATTR_KNOWN(attr)); e->id = EA_CODE(EAP_BGP, attr); e->type = bgp_attr_table[attr].type; e->flags = bgp_attr_table[attr].expected_flags; if (e->type & EAF_EMBEDDED) - { - e->u.data = val; - return NULL; - } + e->u.data = val; else - { - e->u.ptr = lp_alloc(pool, sizeof(struct adata) + val); - e->u.ptr->length = val; - return e->u.ptr->data; - } + e->u.ptr = (struct adata *) val; } -byte * -bgp_attach_attr(ea_list **to, struct linpool *pool, unsigned attr, unsigned val) +static byte * +bgp_set_attr_wa(eattr *e, struct linpool *pool, unsigned attr, unsigned len) +{ + struct adata *ad = bgp_alloc_adata(pool, len); + bgp_set_attr(e, attr, (uintptr_t) ad); + return ad->data; +} + +void +bgp_attach_attr(ea_list **to, struct linpool *pool, unsigned attr, uintptr_t val) { ea_list *a = lp_alloc(pool, sizeof(ea_list) + sizeof(eattr)); a->next = *to; *to = a; a->flags = EALF_SORTED; a->count = 1; - return bgp_set_attr(a->attrs, pool, attr, val); + bgp_set_attr(a->attrs, attr, val); +} + +byte * +bgp_attach_attr_wa(ea_list **to, struct linpool *pool, unsigned attr, unsigned len) +{ + struct adata *ad = bgp_alloc_adata(pool, len); + bgp_attach_attr(to, pool, attr, (uintptr_t) ad); + return ad->data; } static int @@ -713,6 +738,7 @@ bgp_rt_notify(struct proto *P, net *n, rte *new, rte *old UNUSED, ea_list *attrs bgp_schedule_packet(p->conn, PKT_UPDATE); } + static int bgp_create_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *pool) { @@ -725,14 +751,14 @@ bgp_create_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *p ea->flags = EALF_SORTED; ea->count = 4; - bgp_set_attr(ea->attrs, pool, BA_ORIGIN, + bgp_set_attr(ea->attrs, BA_ORIGIN, ((rta->source == RTS_OSPF_EXT1) || (rta->source == RTS_OSPF_EXT2)) ? ORIGIN_INCOMPLETE : ORIGIN_IGP); if (p->is_internal) - bgp_set_attr(ea->attrs+1, pool, BA_AS_PATH, 0); + bgp_set_attr_wa(ea->attrs+1, pool, BA_AS_PATH, 0); else { - z = bgp_set_attr(ea->attrs+1, pool, BA_AS_PATH, bgp_as4_support ? 6 : 4); + z = bgp_set_attr_wa(ea->attrs+1, pool, BA_AS_PATH, bgp_as4_support ? 6 : 4); z[0] = AS_PATH_SEQUENCE; z[1] = 1; /* 1 AS */ @@ -742,7 +768,7 @@ bgp_create_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *p put_u16(z+2, p->local_as); } - z = bgp_set_attr(ea->attrs+2, pool, BA_NEXT_HOP, sizeof(ip_addr)); + z = bgp_set_attr_wa(ea->attrs+2, pool, BA_NEXT_HOP, sizeof(ip_addr)); if (p->cf->next_hop_self || !p->is_internal || rta->dest != RTD_ROUTER) @@ -755,34 +781,55 @@ bgp_create_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *p else *(ip_addr *)z = e->attrs->gw; - bgp_set_attr(ea->attrs+3, pool, BA_LOCAL_PREF, 0); + bgp_set_attr(ea->attrs+3, BA_LOCAL_PREF, 0); return 0; /* Leave decision to the filters */ } -static ea_list * -bgp_path_prepend(struct linpool *pool, eattr *a, ea_list *old, int as) + +static inline int +bgp_as_path_loopy(struct bgp_proto *p, rta *a) +{ + eattr *e = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH)); + return (e && as_path_is_member(e->u.ptr, p->local_as)); +} + +static inline int +bgp_originator_id_loopy(struct bgp_proto *p, rta *a) { - struct ea_list *e = lp_alloc(pool, sizeof(ea_list) + sizeof(eattr)); - struct adata *olda = a->u.ptr; - - e->next = old; - e->flags = EALF_SORTED; - e->count = 1; - e->attrs[0].id = EA_CODE(EAP_BGP, BA_AS_PATH); - e->attrs[0].flags = BAF_TRANSITIVE; - e->attrs[0].type = EAF_TYPE_AS_PATH; - e->attrs[0].u.ptr = as_path_prepend(pool, olda, as); - return e; + eattr *e = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID)); + return (e && (e->u.data == p->local_id)); +} + +static inline int +bgp_cluster_list_loopy(struct bgp_proto *p, rta *a) +{ + eattr *e = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_CLUSTER_LIST)); + return (e && p->rr_client && int_set_contains(e->u.ptr, p->rr_cluster_id)); +} + + +static inline void +bgp_path_prepend(rte *e, ea_list **attrs, struct linpool *pool, u32 as) +{ + eattr *a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH)); + bgp_attach_attr(attrs, pool, BA_AS_PATH, (uintptr_t) as_path_prepend(pool, a->u.ptr, as)); +} + +static inline void +bgp_cluster_list_prepend(rte *e, ea_list **attrs, struct linpool *pool, u32 cid) +{ + eattr *a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_CLUSTER_LIST)); + bgp_attach_attr(attrs, pool, BA_CLUSTER_LIST, (uintptr_t) int_set_add(pool, a ? a->u.ptr : NULL, cid)); } static int -bgp_update_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *pool) +bgp_update_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *pool, int rr) { eattr *a; - if (!p->is_internal && (a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH)))) - *attrs = bgp_path_prepend(pool, a, *attrs, p->local_as); + if (!p->is_internal) + bgp_path_prepend(e, attrs, pool, p->local_as); a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP)); if (a && (p->is_internal || (!p->is_internal && e->attrs->iface == p->neigh->iface))) @@ -792,7 +839,24 @@ bgp_update_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *p else { /* Need to create new one */ - *(ip_addr *) bgp_attach_attr(attrs, pool, BA_NEXT_HOP, sizeof(ip_addr)) = p->local_addr; + bgp_attach_attr_ip(attrs, pool, BA_NEXT_HOP, p->local_addr); + } + + if (rr) + { + /* Handling route reflection, RFC 4456 */ + struct bgp_proto *src = (struct bgp_proto *) e->attrs->proto; + + a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID)); + if (!a) + bgp_attach_attr(attrs, pool, BA_ORIGINATOR_ID, src->remote_id); + + /* We attach proper cluster ID according to whether the route is entering or leaving the cluster */ + bgp_cluster_list_prepend(e, attrs, pool, src->rr_client ? src->rr_cluster_id : p->rr_cluster_id); + + /* Two RR clients with different cluster ID, hmmm */ + if (src->rr_client && p->rr_client && (src->rr_cluster_id != p->rr_cluster_id)) + bgp_cluster_list_prepend(e, attrs, pool, p->rr_cluster_id); } return 0; /* Leave decision to the filters */ @@ -809,9 +873,22 @@ bgp_import_control(struct proto *P, rte **new, ea_list **attrs, struct linpool * return -1; if (new_bgp) { + /* We should check here for cluster list loop, because the receiving BGP instance + might have different cluster ID */ + if (bgp_cluster_list_loopy(p, e->attrs)) + return -1; + if (p->local_as == new_bgp->local_as && p->is_internal && new_bgp->is_internal) - return -1; /* Don't redistribute internal routes with IBGP */ - return bgp_update_attrs(p, e, attrs, pool); + { + /* Redistribution of internal routes with IBGP */ + if (p->rr_client || new_bgp->rr_client) + /* Route reflection, RFC 4456 */ + return bgp_update_attrs(p, e, attrs, pool, 1); + else + return -1; + } + else + return bgp_update_attrs(p, e, attrs, pool, 0); } else return bgp_create_attrs(p, e, attrs, pool); @@ -835,7 +912,7 @@ bgp_rte_better(rte *new, rte *old) if (n < o) return 0; - /* Use AS path lengths */ + /* RFC 4271 9.1.2.2. a) Use AS path lengths */ if (new_bgp->cf->compare_path_lengths || old_bgp->cf->compare_path_lengths) { x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH)); @@ -848,7 +925,7 @@ bgp_rte_better(rte *new, rte *old) return 0; } - /* Use origins */ + /* RFC 4271 9.1.2.2. b) Use origins */ x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGIN)); y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGIN)); n = x ? x->u.data : ORIGIN_INCOMPLETE; @@ -858,7 +935,7 @@ bgp_rte_better(rte *new, rte *old) if (n > o) return 0; - /* Compare MED's */ + /* RFC 4271 9.1.2.2. c) Compare MED's */ x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC)); y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC)); n = x ? x->u.data : new_bgp->cf->default_med; @@ -868,24 +945,41 @@ bgp_rte_better(rte *new, rte *old) if (n > o) return 0; - /* A tie breaking procedure according to RFC 1771, section 9.1.2.1 */ - /* We don't have interior distances */ - /* We prefer external peers */ + /* RFC 4271 9.1.2.2. d) Prefer external peers */ if (new_bgp->is_internal > old_bgp->is_internal) return 0; if (new_bgp->is_internal < old_bgp->is_internal) return 1; - /* Finally we compare BGP identifiers */ - return (new_bgp->remote_id < old_bgp->remote_id); -} -static int -bgp_path_loopy(struct bgp_proto *p, eattr *a) -{ - return as_path_is_member(a->u.ptr, p->local_as); -} + /* Skipping RFC 4271 9.1.2.2. e) */ + /* We don't have interior distances */ + + /* RFC 4456 9. b) Compare cluster list lengths */ + x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_CLUSTER_LIST)); + y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_CLUSTER_LIST)); + n = x ? int_set_get_size(x->u.ptr) : 0; + o = y ? int_set_get_size(y->u.ptr) : 0; + if (n < o) + return 1; + if (n > o) + return 0; + + /* RFC 4271 9.1.2.2. f) Compare BGP identifiers */ + /* RFC 4456 9. a) Use ORIGINATOR_ID instead of local neighor ID */ + x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID)); + y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID)); + n = x ? x->u.data : new_bgp->remote_id; + o = y ? y->u.data : old_bgp->remote_id; + if (n < o) + return 1; + if (n > o) + return 0; + /* RFC 4271 9.1.2.2. g) Compare peer IP adresses */ + return (ipa_compare(new_bgp->cf->remote_ip, old_bgp->cf->remote_ip) < 0); +} + static struct adata * bgp_aggregator_convert_to_new(struct adata *old, struct linpool *pool) { @@ -916,7 +1010,7 @@ bgp_merge_as_paths(struct adata *old2, struct adata *old4, int req_as, struct li } -/* Reconstruct 4B AS_PATH and AGGREGATOR according to RFC4893 4.2.3 */ +/* Reconstruct 4B AS_PATH and AGGREGATOR according to RFC 4893 4.2.3 */ static void bgp_reconstruct_4b_atts(struct bgp_proto *p, rta *a, struct linpool *pool) { @@ -1159,18 +1253,23 @@ bgp_decode_attrs(struct bgp_conn *conn, byte *attr, unsigned int len, struct lin bgp_remove_as4_attrs(bgp, a); /* If the AS path attribute contains our AS, reject the routes */ - e = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH)); - if (e && bgp_path_loopy(bgp, e)) - { - DBG("BGP: Path loop!\n"); - return NULL; - } + if (bgp_as_path_loopy(bgp, a)) + goto loop; + + /* Two checks for IBGP loops caused by route reflection, RFC 4456 */ + if (bgp_originator_id_loopy(bgp, a) || + bgp_cluster_list_loopy(bgp, a)) + goto loop; /* If there's no local preference, define one */ if (!(seen[0] && (1 << BA_LOCAL_PREF))) bgp_attach_attr(&a->eattrs, pool, BA_LOCAL_PREF, 0); return a; +loop: + DBG("BGP: Path loop!\n"); + return NULL; + malformed: bgp_error(conn, 3, 1, NULL, 0); return NULL; diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index e1f5ec02..5fa12492 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -485,6 +485,13 @@ bgp_start_locked(struct object_lock *lock) p->local_id = cf->c.global->router_id; p->next_hop = cf->multihop ? cf->multihop_via : cf->remote_ip; p->neigh = neigh_find(&p->p, &p->next_hop, NEF_STICKY); + + if (cf->rr_client) + { + p->rr_cluster_id = cf->rr_cluster_id ? cf->rr_cluster_id : p->local_id; + p->rr_client = cf->rr_client; + } + if (!p->neigh) { log(L_ERR "%s: Invalid next hop %I", p->p.name, p->next_hop); @@ -633,6 +640,8 @@ bgp_check(struct bgp_config *c) cf_error("Local AS number out of range"); if (!bgp_as4_support && (c->remote_as > 0xFFFF)) cf_error("Neighbor AS number out of range"); + if ((c->local_as != c->remote_as) && (c->rr_client)) + cf_error("Only internal neighbor can be RR client"); } static void diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h index 93383244..af3c5c5a 100644 --- a/proto/bgp/bgp.h +++ b/proto/bgp/bgp.h @@ -25,6 +25,8 @@ struct bgp_config { int compare_path_lengths; /* Use path lengths when selecting best route */ u32 default_local_pref; /* Default value for LOCAL_PREF attribute */ u32 default_med; /* Default value for MULTI_EXIT_DISC attribute */ + u32 rr_cluster_id; /* Route reflector cluster ID, if different from local ID */ + int rr_client; /* Whether neighbor is RR client of me */ unsigned connect_retry_time; unsigned hold_time, initial_hold_time; unsigned keepalive_time; @@ -60,6 +62,8 @@ struct bgp_proto { int as4_support; /* Peer supports 4B AS numbers [RFC4893] */ u32 local_id; /* BGP identifier of this router */ u32 remote_id; /* BGP identifier of the neighbor */ + u32 rr_cluster_id; /* Route reflector cluster ID */ + int rr_client; /* Whether neighbor is RR client of me */ struct bgp_conn *conn; /* Connection we have established */ struct bgp_conn outgoing_conn; /* Outgoing connection we're working with */ struct bgp_conn incoming_conn; /* Incoming connection we have neither accepted nor rejected yet */ @@ -121,7 +125,8 @@ void bgp_close_conn(struct bgp_conn *c); /* attrs.c */ -byte *bgp_attach_attr(struct ea_list **to, struct linpool *, unsigned attr, unsigned val); +void bgp_attach_attr(struct ea_list **to, struct linpool *pool, unsigned attr, uintptr_t val); +byte *bgp_attach_attr_wa(struct ea_list **to, struct linpool *pool, unsigned attr, unsigned len); struct rta *bgp_decode_attrs(struct bgp_conn *conn, byte *a, unsigned int len, struct linpool *pool, int mandatory); int bgp_get_attr(struct eattr *e, byte *buf); int bgp_rte_better(struct rte *, struct rte *); @@ -132,6 +137,9 @@ unsigned int bgp_encode_attrs(struct bgp_proto *p, byte *w, ea_list *attrs, int void bgp_free_bucket(struct bgp_proto *p, struct bgp_bucket *buck); void bgp_get_route_info(struct rte *, byte *buf, struct ea_list *attrs); +inline static void bgp_attach_attr_ip(struct ea_list **to, struct linpool *pool, unsigned attr, ip_addr a) +{ *(ip_addr *) bgp_attach_attr_wa(to, pool, attr, sizeof(ip_addr)) = a; } + /* packets.c */ void bgp_schedule_packet(struct bgp_conn *conn, int type); diff --git a/proto/bgp/config.Y b/proto/bgp/config.Y index 580c008f..b23b66cf 100644 --- a/proto/bgp/config.Y +++ b/proto/bgp/config.Y @@ -20,7 +20,8 @@ CF_KEYWORDS(BGP, LOCAL, NEIGHBOR, AS, HOLD, TIME, CONNECT, RETRY, KEEPALIVE, MULTIHOP, STARTUP, VIA, NEXT, HOP, SELF, DEFAULT, PATH, METRIC, ERROR, START, DELAY, FORGET, WAIT, DISABLE, AFTER, BGP_PATH, BGP_LOCAL_PREF, BGP_MED, BGP_ORIGIN, BGP_NEXT_HOP, - BGP_ATOMIC_AGGR, BGP_AGGREGATOR, BGP_COMMUNITY, SOURCE, ADDRESS, PASSWORD) + BGP_ATOMIC_AGGR, BGP_AGGREGATOR, BGP_COMMUNITY, SOURCE, ADDRESS, + PASSWORD, RR, CLIENT, CLUSTER, ID) CF_GRAMMAR @@ -38,7 +39,6 @@ bgp_proto_start: proto_start BGP { BGP_CFG->error_amnesia_time = 300; BGP_CFG->error_delay_time_min = 60; BGP_CFG->error_delay_time_max = 300; - BGP_CFG->password = NULL; } ; @@ -52,6 +52,8 @@ bgp_proto: BGP_CFG->remote_ip = $3; BGP_CFG->remote_as = $5; } + | bgp_proto RR CLUSTER ID expr ';' { BGP_CFG->rr_cluster_id = $5; } + | bgp_proto RR CLIENT ';' { BGP_CFG->rr_client = 1; } | bgp_proto HOLD TIME expr ';' { BGP_CFG->hold_time = $4; } | bgp_proto STARTUP HOLD TIME expr ';' { BGP_CFG->initial_hold_time = $5; } | bgp_proto CONNECT RETRY TIME expr ';' { BGP_CFG->connect_retry_time = $5; } diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c index 0dd920e4..8a352c68 100644 --- a/proto/bgp/packets.c +++ b/proto/bgp/packets.c @@ -193,7 +193,7 @@ bgp_create_update(struct bgp_conn *conn, byte *buf) if ((buck = p->withdraw_bucket) && !EMPTY_LIST(buck->prefixes)) { DBG("Withdrawn routes:\n"); - tmp = bgp_attach_attr(&ea, bgp_linpool, BA_MP_UNREACH_NLRI, remains-8); + tmp = bgp_attach_attr_wa(&ea, bgp_linpool, BA_MP_UNREACH_NLRI, remains-8); *tmp++ = 0; *tmp++ = BGP_AF_IPV6; *tmp++ = 1; @@ -218,7 +218,7 @@ bgp_create_update(struct bgp_conn *conn, byte *buf) size = bgp_encode_attrs(p, w, buck->eattrs, 1024); w += size; remains -= size; - tstart = tmp = bgp_attach_attr(&ea, bgp_linpool, BA_MP_REACH_NLRI, remains-8); + tstart = tmp = bgp_attach_attr_wa(&ea, bgp_linpool, BA_MP_REACH_NLRI, remains-8); *tmp++ = 0; *tmp++ = BGP_AF_IPV6; *tmp++ = 1; @@ -702,7 +702,7 @@ bgp_do_rx_update(struct bgp_conn *conn, /* Create fake NEXT_HOP attribute */ if (len < 1 || (*x != 16 && *x != 32) || len < *x + 2) goto bad; - memcpy(bgp_attach_attr(&a0->eattrs, bgp_linpool, BA_NEXT_HOP, 16), x+1, 16); + bgp_attach_attr_ip(&a0->eattrs, bgp_linpool, BA_NEXT_HOP, x[1]); len -= *x + 2; x += *x + 1; -- cgit v1.2.3 From b6bf284a905412cfe107b4967e55649e6194187e Mon Sep 17 00:00:00 2001 From: Ondrej Zajicek Date: Sun, 26 Oct 2008 22:59:21 +0100 Subject: Bugfixes in MULIT_EXIT_DISC attribute handling. - Old MED handling was completely different from behavior specified in RFCs - for example they havn't been propagated to neighboring areas. - Update tie-breaking according to RFC 4271. - Change default value for 'default bgp_med' configuration option according to RFC 4271. --- doc/bird.sgml | 16 +++++++++++----- nest/a-path.c | 14 ++++++++++++++ nest/attrs.h | 1 + proto/bgp/attrs.c | 46 ++++++++++++++++++++++++++++++++++++---------- proto/bgp/config.Y | 2 +- 5 files changed, 63 insertions(+), 16 deletions(-) (limited to 'nest') diff --git a/doc/bird.sgml b/doc/bird.sgml index 5ee9562f..8fa55f85 100644 --- a/doc/bird.sgml +++ b/doc/bird.sgml @@ -784,7 +784,7 @@ for each neighbor using the following configuration parameters: default bgp_med Value of the Multiple Exit Discriminator to be used during route selection when the MED attribute - is missing. Default: infinite. + is missing. Default: 0. default bgp_local_pref Value of the Local Preference to be used during route selection when the Local Preference attribute @@ -806,10 +806,16 @@ with `int The Multiple Exit Discriminator of the route - is an optional attribute which is often used within the local AS to - reflect interior distances to various boundary routers. See the route selection - rules above for exact semantics. + int The Multiple Exit Discriminator of the route + is an optional attribute which is used on on external (inter-AS) links to + convey to an adjacent AS the optimal entry point into the local AS. + The received attribute may be also propagated over internal BGP links + (and this is default behavior). The attribute value is zeroed when a route + is exported from a routing table to a BGP instance to ensure that the attribute + received from a neighboring AS is not propagated to other neighboring ASes. + A new value might be set in the export filter of a BGP instance. + See RFC 4451 + for further discussion of BGP MED attribute. enum Origin of the route: either data; + + if ((path->length == 0) || (p[0] != AS_PATH_SEQUENCE) || (p[1] == 0)) + return 0; + else + { + *last_as = get_as(p+2); + return 1; + } +} + int as_path_is_member(struct adata *path, u32 as) { diff --git a/nest/attrs.h b/nest/attrs.h index aaa5f4a2..fee2c2c8 100644 --- a/nest/attrs.h +++ b/nest/attrs.h @@ -27,6 +27,7 @@ int as_path_convert_to_new(struct adata *path, byte *dst, int req_as); void as_path_format(struct adata *path, byte *buf, unsigned int size); int as_path_getlen(struct adata *path); int as_path_get_first(struct adata *path, u32 *orig_as); +int as_path_get_last(struct adata *path, u32 *last_as); int as_path_is_member(struct adata *path, u32 as); diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c index b5d8fba7..2210cbe7 100644 --- a/proto/bgp/attrs.c +++ b/proto/bgp/attrs.c @@ -146,7 +146,7 @@ static struct attr_desc bgp_attr_table[] = { bgp_check_as_path, NULL }, { "next_hop", 4, BAF_TRANSITIVE, EAF_TYPE_IP_ADDRESS, 1, /* BA_NEXT_HOP */ bgp_check_next_hop, NULL }, - { "med", 4, BAF_OPTIONAL, EAF_TYPE_INT, 0, /* BA_MULTI_EXIT_DISC */ + { "med", 4, BAF_OPTIONAL, EAF_TYPE_INT, 1, /* BA_MULTI_EXIT_DISC */ NULL, NULL }, { "local_pref", 4, BAF_TRANSITIVE, EAF_TYPE_INT, 0, /* BA_LOCAL_PREF */ NULL, NULL }, @@ -829,7 +829,17 @@ bgp_update_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *p eattr *a; if (!p->is_internal) - bgp_path_prepend(e, attrs, pool, p->local_as); + { + bgp_path_prepend(e, attrs, pool, p->local_as); + + /* The MULTI_EXIT_DISC attribute received from a neighboring AS MUST NOT be + * propagated to other neighboring ASes. + * Perhaps it would be better to undefine it. + */ + a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC)); + if (a) + bgp_attach_attr(attrs, pool, BA_MULTI_EXIT_DISC, 0); + } a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP)); if (a && (p->is_internal || (!p->is_internal && e->attrs->iface == p->neigh->iface))) @@ -894,6 +904,18 @@ bgp_import_control(struct proto *P, rte **new, ea_list **attrs, struct linpool * return bgp_create_attrs(p, e, attrs, pool); } +static inline u32 +bgp_get_neighbor(rte *r) +{ + eattr *e = ea_find(r->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH)); + u32 as; + + if (e && as_path_get_last(e->u.ptr, &as)) + return as; + else + return ((struct bgp_proto *) r->attrs->proto)->remote_as; +} + int bgp_rte_better(rte *new, rte *old) { @@ -936,14 +958,18 @@ bgp_rte_better(rte *new, rte *old) return 0; /* RFC 4271 9.1.2.2. c) Compare MED's */ - x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC)); - y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC)); - n = x ? x->u.data : new_bgp->cf->default_med; - o = y ? y->u.data : old_bgp->cf->default_med; - if (n < o) - return 1; - if (n > o) - return 0; + + if (bgp_get_neighbor(new) == bgp_get_neighbor(old)) + { + x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC)); + y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC)); + n = x ? x->u.data : new_bgp->cf->default_med; + o = y ? y->u.data : old_bgp->cf->default_med; + if (n < o) + return 1; + if (n > o) + return 0; + } /* RFC 4271 9.1.2.2. d) Prefer external peers */ if (new_bgp->is_internal > old_bgp->is_internal) diff --git a/proto/bgp/config.Y b/proto/bgp/config.Y index d7bba575..8524b2dd 100644 --- a/proto/bgp/config.Y +++ b/proto/bgp/config.Y @@ -33,7 +33,7 @@ bgp_proto_start: proto_start BGP { BGP_CFG->hold_time = 240; BGP_CFG->connect_retry_time = 120; BGP_CFG->initial_hold_time = 240; - BGP_CFG->default_med = ~0; /* RFC 1771 doesn't specify this, draft-09 says ~0 */ + BGP_CFG->default_med = 0; BGP_CFG->compare_path_lengths = 1; BGP_CFG->start_delay_time = 5; BGP_CFG->error_amnesia_time = 300; -- cgit v1.2.3