From 11cb620266035ffbe17b21c4a174380cb8b6a521 Mon Sep 17 00:00:00 2001 From: Ondrej Zajicek Date: Sun, 26 Oct 2008 22:36:08 +0100 Subject: Implementation of 4B ASN support for BGP --- filter/config.Y | 11 +- filter/filter.c | 26 +++- nest/a-path.c | 209 ++++++++++++++++++++++----- nest/attrs.h | 21 ++- proto/bgp/attrs.c | 402 +++++++++++++++++++++++++++++++++++++++++++--------- proto/bgp/bgp.c | 5 + proto/bgp/bgp.h | 13 +- proto/bgp/config.Y | 8 +- proto/bgp/packets.c | 151 +++++++++++++++----- 9 files changed, 692 insertions(+), 154 deletions(-) diff --git a/filter/config.Y b/filter/config.Y index d4bf44cc..fdfb2e74 100644 --- a/filter/config.Y +++ b/filter/config.Y @@ -39,7 +39,6 @@ CF_KEYWORDS(FUNCTION, PRINT, PRINTN, UNSET, RETURN, %type set_atom fprefix fprefix_s fipa %type decls declsn one_decl function_params %type bgp_path -%type bgp_one CF_GRAMMAR @@ -273,14 +272,12 @@ switch_body: /* EMPTY */ { $$ = NULL; } /* CONST '(' expr ')' { $$ = f_new_inst(); $$->code = 'c'; $$->aux = T_INT; $$->a2.i = $3; } */ -bgp_one: - NUM { $$ = $1; } - | '?' { $$ = PM_ANY; } - ; bgp_path: - bgp_one { $$ = cfg_alloc(sizeof(struct f_path_mask)); $$->next = NULL; $$->val = $1; } - | bgp_one bgp_path { $$ = cfg_alloc(sizeof(struct f_path_mask)); $$->next = $2; $$->val = $1; } + NUM { $$ = cfg_alloc(sizeof(struct f_path_mask)); $$->next = NULL; $$->val = $1; $$->any = 0; } + | '?' { $$ = cfg_alloc(sizeof(struct f_path_mask)); $$->next = NULL; $$->val = 0; $$->any = 1; } + | NUM bgp_path { $$ = cfg_alloc(sizeof(struct f_path_mask)); $$->next = $2; $$->val = $1; $$->any = 0; } + | '?' bgp_path { $$ = cfg_alloc(sizeof(struct f_path_mask)); $$->next = $2; $$->val = 0; $$->any = 1; } ; constant: diff --git a/filter/filter.c b/filter/filter.c index 9cde3d96..7893d9ae 100644 --- a/filter/filter.c +++ b/filter/filter.c @@ -69,6 +69,30 @@ pm_path_compare(struct f_path_mask *m1, struct f_path_mask *m2) } } +static void +pm_format(struct f_path_mask *p, byte *buf, unsigned int size) +{ + byte *end = buf + size - 16; + + while (p) + { + if (buf > end) + { + strcpy(buf, " ..."); + return; + } + + if (p->any) + buf += bsprintf(buf, "? "); + else + buf += bsprintf(buf, "%u ", p->val); + + p = p->next; + } + + *buf = 0; +} + /** * val_compare - compare two values * @v1: first value @@ -224,7 +248,7 @@ val_print(struct f_val v) case T_ENUM: PRINTF( "(enum %x)%d", v.type, v.val.i ); break; case T_PATH: as_path_format(v.val.ad, buf2, 1020); PRINTF( "(path %s)", buf2 ); break; case T_CLIST: int_set_format(v.val.ad, buf2, 1020); PRINTF( "(clist %s)", buf2 ); break; - case T_PATH_MASK: debug( "(pathmask " ); { struct f_path_mask *p = v.val.path_mask; while (p) { debug("%d ", p->val); p=p->next; } debug(")" ); } break; + case T_PATH_MASK: pm_format(v.val.path_mask, buf2, 1020); PRINTF( "(pathmask %s)", buf2 ); break; default: PRINTF( "[unknown type %x]", v.type ); #undef PRINTF } diff --git a/nest/a-path.c b/nest/a-path.c index 1b08f809..f4666911 100644 --- a/nest/a-path.c +++ b/nest/a-path.c @@ -14,38 +14,139 @@ #include "lib/unaligned.h" #include "lib/string.h" + +/* Global AS4 support, shared by all BGP instances. + * This specifies whether BA_AS_PATH attributes contain 2 or 4 B per ASN + */ + +int bgp_as4_support = 1; + +static void +put_as(byte *data, u32 as) +{ + if (bgp_as4_support) + put_u32(data, as); + else if (as <= 0xFFFF) + put_u16(data, as); + else + bug("put_as: Try to put 32bit AS to 16bit AS Path"); +} + +static inline u32 +get_as(byte *data) +{ + return bgp_as4_support ? get_u32(data) : get_u16(data); +} + struct adata * -as_path_prepend(struct linpool *pool, struct adata *olda, int as) +as_path_prepend(struct linpool *pool, struct adata *olda, u32 as) { + int bs = bgp_as4_support ? 4 : 2; struct adata *newa; - if (olda->length && olda->data[0] == AS_PATH_SEQUENCE && - olda->data[1] < 255) /* Starting with sequence => just prepend the AS number */ + if (olda->length && olda->data[0] == AS_PATH_SEQUENCE && olda->data[1] < 255) + /* Starting with sequence => just prepend the AS number */ { - newa = lp_alloc(pool, sizeof(struct adata) + olda->length + 2); - newa->length = olda->length + 2; - newa->data[0] = 2; + int nl = olda->length + bs; + newa = lp_alloc(pool, sizeof(struct adata) + nl); + newa->length = nl; + newa->data[0] = AS_PATH_SEQUENCE; newa->data[1] = olda->data[1] + 1; - memcpy(newa->data+4, olda->data+2, olda->length-2); + memcpy(newa->data + bs + 2, olda->data + 2, olda->length - 2); } - else /* Create new path segment */ + else /* Create new path segment */ { - newa = lp_alloc(pool, sizeof(struct adata) + olda->length + 4); - newa->length = olda->length + 4; - newa->data[0] = 2; + int nl = olda->length + bs + 2; + newa = lp_alloc(pool, sizeof(struct adata) + nl); + newa->length = nl; + newa->data[0] = AS_PATH_SEQUENCE; newa->data[1] = 1; - memcpy(newa->data+4, olda->data, olda->length); + memcpy(newa->data + bs + 2, olda->data, olda->length); } - put_u16(newa->data+2, as); + put_as(newa->data + 2, as); return newa; } +int +as_path_convert_to_old(struct adata *path, byte *dst, int *new_used) +{ + byte *src = path->data; + byte *src_end = src + path->length; + byte *dst_start = dst; + u32 as; + int i, n; + *new_used = 0; + + while (src < src_end) + { + n = src[1]; + *dst++ = *src++; + *dst++ = *src++; + + for(i=0; i 0xFFFF) + { + as = AS_TRANS; + *new_used = 1; + } + put_u16(dst, as); + src += 4; + dst += 2; + } + } + + return dst - dst_start; +} + +int +as_path_convert_to_new(struct adata *path, byte *dst, int req_as) +{ + byte *src = path->data; + byte *src_end = src + path->length; + byte *dst_start = dst; + u32 as; + int i, t, n; + + + while ((src < src_end) && (req_as > 0)) + { + t = *src++; + n = *src++; + + if (t == AS_PATH_SEQUENCE) + { + if (n > req_as) + n = req_as; + + req_as -= n; + } + else // t == AS_PATH_SET + req_as--; + + *dst++ = t; + *dst++ = n; + + for(i=0; idata; byte *e = p + path->length; - byte *end = buf + size - 8; + byte *end = buf + size - 16; int sp = 1; int l, isset; @@ -69,8 +170,8 @@ as_path_format(struct adata *path, byte *buf, unsigned int size) { if (!sp) *buf++ = ' '; - buf += bsprintf(buf, "%d", get_u16(p)); - p += 2; + buf += bsprintf(buf, "%u", get_as(p)); + p += bs; sp = 0; } if (isset) @@ -86,6 +187,7 @@ as_path_format(struct adata *path, byte *buf, unsigned int size) int as_path_getlen(struct adata *path) { + int bs = bgp_as4_support ? 4 : 2; int res = 0; u8 *p = path->data; u8 *q = p+path->length; @@ -95,8 +197,8 @@ as_path_getlen(struct adata *path) { switch (*p++) { - case AS_PATH_SET: len = *p++; res++; p += 2*len; break; - case AS_PATH_SEQUENCE: len = *p++; res+=len; p += 2*len; break; + case AS_PATH_SET: len = *p++; res++; p += bs * len; break; + case AS_PATH_SEQUENCE: len = *p++; res += len; p += bs * len; break; default: bug("as_path_getlen: Invalid path segment"); } } @@ -104,9 +206,11 @@ as_path_getlen(struct adata *path) } int -as_path_get_first(struct adata *path) +as_path_get_first(struct adata *path, u32 *orig_as) { - int res = -1; + int bs = bgp_as4_support ? 4 : 2; + int found = 0; + u32 res = 0; u8 *p = path->data; u8 *q = p+path->length; int len; @@ -117,36 +221,70 @@ as_path_get_first(struct adata *path) { case AS_PATH_SET: if (len = *p++) - res = get_u16(p); - p += 2*len; + { + found = 1; + res = get_as(p); + p += bs * len; + } break; case AS_PATH_SEQUENCE: if (len = *p++) - res = get_u16(p+2*(len-1)); - p += 2*len; + { + found = 1; + res = get_as(p + bs * (len - 1)); + p += bs * len; + } break; default: bug("as_path_get_first: Invalid path segment"); } } - return res; + + *orig_as = res; + return found; } +int +as_path_is_member(struct adata *path, u32 as) +{ + int bs = bgp_as4_support ? 4 : 2; + u8 *p = path->data; + u8 *q = p+path->length; + int i, n; + + while (pnext; if (!mask) return next == q; \ - asterisk = (mask->val == PM_ANY); \ + asterisk = mask->any; \ if (asterisk) { mask = mask->next; if (!mask) { return 1; } } \ } while(0) int as_path_match(struct adata *path, struct f_path_mask *mask) { + int bs = bgp_as4_support ? 4 : 2; int i; int asterisk = 0; u8 *p = path->data; u8 *q = p+path->length; int len; u8 *next; + u32 as; - asterisk = (mask->val == PM_ANY); + asterisk = mask->any; if (asterisk) { mask = mask->next; if (!mask) return 1; } @@ -156,20 +294,21 @@ as_path_match(struct adata *path, struct f_path_mask *mask) len = *p++; { u8 *p_save = p; - next = p_save + 2*len; + next = p_save + bs * len; retry: p = p_save; for (i=0; ival)) { + as = get_as(p); + if (asterisk && (as == mask->val)) { MASK_PLUS; goto retry; } - if (!asterisk && (get_u16(p) == mask->val)) { + if (!asterisk && (as == mask->val)) { p = next; MASK_PLUS; goto okay; } - p+=2; + p += bs; } if (!asterisk) return 0; @@ -180,15 +319,15 @@ as_path_match(struct adata *path, struct f_path_mask *mask) case AS_PATH_SEQUENCE: len = *p++; for (i=0; ival)) + as = get_as(p); + if (asterisk && (as == mask->val)) MASK_PLUS; else if (!asterisk) { - if (get_u16(p) != mask->val) + if (as != mask->val) return 0; MASK_PLUS; } - p+=2; + p += bs; } break; diff --git a/nest/attrs.h b/nest/attrs.h index abd6b9e9..f63f2e45 100644 --- a/nest/attrs.h +++ b/nest/attrs.h @@ -14,16 +14,29 @@ #define AS_PATH_SET 1 /* Types of path segments */ #define AS_PATH_SEQUENCE 2 -struct adata *as_path_prepend(struct linpool *pool, struct adata *olda, int as); +#define AS_PATH_MAXLEN 10000 + +#define AS_TRANS 23456 +/* AS_TRANS is used when we need to store 32bit ASN larger than 0xFFFF + * to 16bit slot (like in 16bit AS_PATH). See RFC 4893 for details + */ + +struct adata *as_path_prepend(struct linpool *pool, struct adata *olda, u32 as); +int as_path_convert_to_old(struct adata *path, byte *dst, int *new_used); +int as_path_convert_to_new(struct adata *path, byte *dst, int req_as); void as_path_format(struct adata *path, byte *buf, unsigned int size); int as_path_getlen(struct adata *path); -int as_path_get_first(struct adata *path); +int as_path_get_first(struct adata *path, u32 *orig_as); +int as_path_is_member(struct adata *path, u32 as); + struct f_path_mask { struct f_path_mask *next; - int val; + u32 val; + int any; }; -#define PM_ANY -1 + +// #define PM_ANY -1 int as_path_match(struct adata *path, struct f_path_mask *mask); diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c index 30699f84..48cb9dd5 100644 --- a/proto/bgp/attrs.c +++ b/proto/bgp/attrs.c @@ -55,21 +55,37 @@ bgp_format_origin(eattr *a, byte *buf) } static int -bgp_check_path(struct bgp_proto *p UNUSED, byte *a, int len) +bgp_check_path(byte *a, int len, int bs, int errcode) { while (len) { DBG("Path segment %02x %02x\n", a[0], a[1]); if (len < 2 || - a[0] != AS_PATH_SET && a[0] != AS_PATH_SEQUENCE || - 2*a[1] + 2 > len) - return 11; - len -= 2*a[1] + 2; - a += 2*a[1] + 2; + (a[0] != AS_PATH_SET && a[0] != AS_PATH_SEQUENCE) || + bs * a[1] + 2 > len) + return errcode; + len -= bs * a[1] + 2; + a += bs * a[1] + 2; } return 0; } +static int +bgp_check_as_path(struct bgp_proto *p, byte *a, int len) +{ + return bgp_check_path(a, len, (bgp_as4_support && p->as4_support) ? 4 : 2, 11); +} + +static int +bgp_check_as4_path(struct bgp_proto *p, byte *a, int len) +{ + if (bgp_as4_support && (! p->as4_support)) + return bgp_check_path(a, len, 4, 9); + else + return 0; +} + + static int bgp_check_next_hop(struct bgp_proto *p UNUSED, byte *a, int len) { @@ -87,6 +103,14 @@ bgp_check_next_hop(struct bgp_proto *p UNUSED, byte *a, int len) #endif } +static int +bgp_check_aggregator(struct bgp_proto *p UNUSED, UNUSED byte *a, int len) +{ + int exp_len = (bgp_as4_support && p->as4_support) ? 8 : 6; + + return (len == exp_len) ? 0 : 5; +} + static int bgp_check_reach_nlri(struct bgp_proto *p UNUSED, byte *a UNUSED, int len UNUSED) { @@ -113,7 +137,7 @@ static struct attr_desc bgp_attr_table[] = { { "origin", 1, BAF_TRANSITIVE, EAF_TYPE_INT, 1, /* BA_ORIGIN */ bgp_check_origin, bgp_format_origin }, { "as_path", -1, BAF_TRANSITIVE, EAF_TYPE_AS_PATH, 1, /* BA_AS_PATH */ - bgp_check_path, NULL }, + bgp_check_as_path, NULL }, { "next_hop", 4, BAF_TRANSITIVE, EAF_TYPE_IP_ADDRESS, 1, /* BA_NEXT_HOP */ bgp_check_next_hop, NULL }, { "med", 4, BAF_OPTIONAL, EAF_TYPE_INT, 0, /* BA_MULTI_EXIT_DISC */ @@ -122,8 +146,8 @@ static struct attr_desc bgp_attr_table[] = { NULL, NULL }, { "atomic_aggr", 0, BAF_TRANSITIVE, EAF_TYPE_OPAQUE, 1, /* BA_ATOMIC_AGGR */ NULL, NULL }, - { "aggregator", 6, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_OPAQUE, 1, /* BA_AGGREGATOR */ - NULL, NULL }, + { "aggregator", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_OPAQUE, 1, /* BA_AGGREGATOR */ + bgp_check_aggregator, NULL }, { "community", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_INT_SET, 1, /* BA_COMMUNITY */ NULL, NULL }, { NULL, }, /* BA_ORIGINATOR_ID */ @@ -135,8 +159,18 @@ static struct attr_desc bgp_attr_table[] = { bgp_check_reach_nlri, NULL }, { "mp_unreach_nlri", -1, BAF_OPTIONAL, EAF_TYPE_OPAQUE, 1, /* BA_MP_UNREACH_NLRI */ bgp_check_unreach_nlri, NULL }, + { NULL, }, /* BA_EXTENDED_COMM */ + { "as4_path", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_OPAQUE, 1, /* BA_AS4_PATH */ + bgp_check_as4_path, NULL }, + { "as4_aggregator", 8, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_OPAQUE, 1, /* BA_AS4_PATH */ + NULL, NULL } }; +/* BA_AS4_PATH is type EAF_TYPE_OPAQUE and not type EAF_TYPE_AS_PATH because + * EAF_TYPE_AS_PATH is supposed to have different format (2 or 4 B for each ASN) + * depending on bgp_as4_support variable. + */ + #define ATTR_KNOWN(code) ((code) < ARRAY_SIZE(bgp_attr_table) && bgp_attr_table[code].name) static byte * @@ -170,8 +204,90 @@ bgp_attach_attr(ea_list **to, struct linpool *pool, unsigned attr, unsigned val) return bgp_set_attr(a->attrs, pool, attr, val); } +static int +bgp_encode_attr_hdr(byte *dst, unsigned int flags, unsigned code, int len) +{ + int wlen; + + DBG("\tAttribute %02x (%d bytes, flags %02x)\n", code, len, flags); + + if (len < 256) + { + *dst++ = flags; + *dst++ = code; + *dst++ = len; + wlen = 3; + } + else + { + *dst++ = flags | BAF_EXT_LEN; + *dst++ = code; + put_u16(dst, len); + wlen = 4; + } + + return wlen; +} + +static void +aggregator_convert_to_old(struct adata *aggr, byte *dst, int *new_used) +{ + byte *src = aggr->data; + *new_used = 0; + + u32 as = get_u32(src); + if (as > 0xFFFF) + { + as = AS_TRANS; + *new_used = 1; + } + put_u16(dst, as); + + /* Copy IPv4 address */ + memcpy(dst + 2, src + 4, 4); +} + +static void +aggregator_convert_to_new(struct adata *aggr, byte *dst) +{ + byte *src = aggr->data; + + u32 as = get_u16(src); + put_u32(dst, as); + + /* Copy IPv4 address */ + memcpy(dst + 4, src + 2, 4); +} + +static int +bgp_get_attr_len(eattr *a) +{ + int len; + if (ATTR_KNOWN(EA_ID(a->id))) + { + int code = EA_ID(a->id); + struct attr_desc *desc = &bgp_attr_table[code]; + len = desc->expected_length; + if (len < 0) + { + ASSERT(!(a->type & EAF_EMBEDDED)); + len = a->u.ptr->length; + } + } + else + { + ASSERT((a->type & EAF_TYPE_MASK) == EAF_TYPE_OPAQUE); + len = a->u.ptr->length; + } + + return len; +} + +#define ADVANCE(w, r, l) do { r -= l; w += l; } while (0) + /** * bgp_encode_attrs - encode BGP attributes + * @p: BGP instance * @w: buffer * @attrs: a list of extended attributes * @remains: remaining space in the buffer @@ -182,11 +298,11 @@ bgp_attach_attr(ea_list **to, struct linpool *pool, unsigned attr, unsigned val) * Result: Length of the attribute block generated. */ unsigned int -bgp_encode_attrs(byte *w, ea_list *attrs, int remains) +bgp_encode_attrs(struct bgp_proto *p, byte *w, ea_list *attrs, int remains) { unsigned int i, code, flags; byte *start = w; - int len; + int len, rv; for(i=0; icount; i++) { @@ -198,43 +314,90 @@ bgp_encode_attrs(byte *w, ea_list *attrs, int remains) if (code == BA_NEXT_HOP) continue; #endif - flags = a->flags & (BAF_OPTIONAL | BAF_TRANSITIVE | BAF_PARTIAL); - if (ATTR_KNOWN(code)) - { - struct attr_desc *desc = &bgp_attr_table[code]; - len = desc->expected_length; - if (len < 0) - { - ASSERT(!(a->type & EAF_EMBEDDED)); - len = a->u.ptr->length; - } - } - else + + /* When AS4-aware BGP speaker is talking to non-AS4-aware BGP speaker, + * we have to convert our 4B AS_PATH to 2B AS_PATH and send our AS_PATH + * as optional AS4_PATH attribute. + */ + if ((code == BA_AS_PATH) && bgp_as4_support && (! p->as4_support)) { - ASSERT((a->type & EAF_TYPE_MASK) == EAF_TYPE_OPAQUE); len = a->u.ptr->length; + + if (remains < (len + 4)) + goto err_no_buffer; + + /* Using temporary buffer because don't know a length of created attr + * and therefore a length of a header. Perhaps i should better always + * use BAF_EXT_LEN. */ + + byte buf[len]; + int new_used; + int nl = as_path_convert_to_old(a->u.ptr, buf, &new_used); + + rv = bgp_encode_attr_hdr(w, BAF_TRANSITIVE, BA_AS_PATH, nl); + ADVANCE(w, remains, rv); + memcpy(w, buf, nl); + ADVANCE(w, remains, nl); + + if (! new_used) + continue; + + if (remains < (len + 4)) + goto err_no_buffer; + + /* We should discard AS_CONFED_SEQUENCE or AS_CONFED_SET path segments + * here but we don't support confederations and such paths we already + * discarded in bgp_check_as_path(). + */ + + rv = bgp_encode_attr_hdr(w, BAF_OPTIONAL | BAF_TRANSITIVE, BA_AS4_PATH, len); + ADVANCE(w, remains, rv); + memcpy(w, a->u.ptr->data, len); + ADVANCE(w, remains, len); + + continue; } - DBG("\tAttribute %02x (type %02x, %d bytes, flags %02x)\n", code, a->type, len, flags); - if (remains < len + 4) - { - log(L_ERR "BGP: attribute list too long, ignoring the remaining attributes"); - break; - } - if (len < 256) - { - *w++ = flags; - *w++ = code; - *w++ = len; - remains -= 3; - } - else + + /* The same issue with AGGREGATOR attribute */ + if ((code == BA_AGGREGATOR) && bgp_as4_support && (! p->as4_support)) { - *w++ = flags | BAF_EXT_LEN; - *w++ = code; - put_u16(w, len); - w += 2; - remains -= 4; + int new_used; + + len = 6; + if (remains < (len + 3)) + goto err_no_buffer; + + rv = bgp_encode_attr_hdr(w, BAF_OPTIONAL | BAF_TRANSITIVE, BA_AGGREGATOR, len); + ADVANCE(w, remains, rv); + aggregator_convert_to_old(a->u.ptr, w, &new_used); + ADVANCE(w, remains, len); + + if (! new_used) + continue; + + len = 8; + if (remains < (len + 3)) + goto err_no_buffer; + + rv = bgp_encode_attr_hdr(w, BAF_OPTIONAL | BAF_TRANSITIVE, BA_AS4_AGGREGATOR, len); + ADVANCE(w, remains, rv); + memcpy(w, a->u.ptr->data, len); + ADVANCE(w, remains, len); + + continue; } + + /* Standard path continues here ... */ + + flags = a->flags & (BAF_OPTIONAL | BAF_TRANSITIVE | BAF_PARTIAL); + len = bgp_get_attr_len(a); + + if (remains < len + 4) + goto err_no_buffer; + + rv = bgp_encode_attr_hdr(w, flags, code, len); + ADVANCE(w, remains, rv); + switch (a->type & EAF_TYPE_MASK) { case EAF_TYPE_INT: @@ -266,10 +429,13 @@ bgp_encode_attrs(byte *w, ea_list *attrs, int remains) default: bug("bgp_encode_attrs: unknown attribute type %02x", a->type); } - remains -= len; - w += len; + ADVANCE(w, remains, len); } return w - start; + + err_no_buffer: + log(L_ERR "BGP: attribute list too long, ignoring the remaining attributes"); + return w - start; } static void @@ -566,10 +732,14 @@ bgp_create_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *p bgp_set_attr(ea->attrs+1, pool, BA_AS_PATH, 0); else { - z = bgp_set_attr(ea->attrs+1, pool, BA_AS_PATH, 4); + z = bgp_set_attr(ea->attrs+1, pool, BA_AS_PATH, bgp_as4_support ? 6 : 4); z[0] = AS_PATH_SEQUENCE; z[1] = 1; /* 1 AS */ - put_u16(z+2, p->local_as); + + if (bgp_as4_support) + put_u32(z+2, p->local_as); + else + put_u16(z+2, p->local_as); } z = bgp_set_attr(ea->attrs+2, pool, BA_NEXT_HOP, sizeof(ip_addr)); @@ -670,8 +840,8 @@ bgp_rte_better(rte *new, rte *old) { x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH)); y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH)); - n = x ? as_path_getlen(x->u.ptr) : 100000; - o = y ? as_path_getlen(y->u.ptr) : 100000; + n = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN; + o = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN; if (n < o) return 1; if (n > o) @@ -712,23 +882,118 @@ bgp_rte_better(rte *new, rte *old) static int bgp_path_loopy(struct bgp_proto *p, eattr *a) { - byte *path = a->u.ptr->data; - int len = a->u.ptr->length; - int i, n; + return as_path_is_member(a->u.ptr, p->local_as); +} + + +static struct adata * +bgp_aggregator_convert_to_new(struct adata *old, struct linpool *pool) +{ + struct adata *newa = lp_alloc(pool, sizeof(struct adata) + 8); + newa->length = 8; + aggregator_convert_to_new(old, newa->data); + return newa; +} + + +/* Take last req_as ASNs from path old2 (in 2B format), convert to 4B format + * and append path old4 (in 4B format). + */ +static struct adata * +bgp_merge_as_paths(struct adata *old2, struct adata *old4, int req_as, struct linpool *pool) +{ + byte buf[old2->length * 2]; + + int ol = as_path_convert_to_new(old2, buf, req_as); + int nl = ol + (old4 ? old4->length : 0); - while (len > 0) + struct adata *newa = lp_alloc(pool, sizeof(struct adata) + nl); + newa->length = nl; + memcpy(newa->data, buf, ol); + if (old4) memcpy(newa->data + ol, old4->data, old4->length); + + return newa; +} + + +/* Reconstruct 4B AS_PATH and AGGREGATOR according to RFC4893 4.2.3 */ +static void +bgp_reconstruct_4b_atts(struct bgp_proto *p, rta *a, struct linpool *pool) +{ + eattr *p2 =ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH)); + eattr *p4 =ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AS4_PATH)); + eattr *a2 =ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AGGREGATOR)); + eattr *a4 =ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AS4_AGGREGATOR)); + + if (a2) { - n = path[1]; - len -= 2 + 2*n; - path += 2; - for(i=0; iu.ptr->data); + + if (a4) { - if (get_u16(path) == p->local_as) - return 1; - path += 2; + if (a2_as != AS_TRANS) + { + /* Routes were aggregated by old router and therefore AS4_PATH + * and AS4_AGGREGATOR is invalid + * + * Convert AS_PATH and AGGREGATOR to 4B format and finish. + */ + + a2->u.ptr = bgp_aggregator_convert_to_new(a2->u.ptr, pool); + p2->u.ptr = bgp_merge_as_paths(p2->u.ptr, NULL, AS_PATH_MAXLEN, pool); + + return; + } + else + { + /* Common case, use AS4_AGGREGATOR attribute */ + a2->u.ptr = a4->u.ptr; + } + } + else + { + /* Common case, use old AGGREGATOR attribute */ + a2->u.ptr = bgp_aggregator_convert_to_new(a2->u.ptr, pool); + + if (a2_as == AS_TRANS) + log(L_WARN "BGP: AGGREGATOR attribute contain AS_TRANS, but AS4_AGGREGATOR is missing"); } } - return 0; + else + if (a4) + log(L_WARN "BGP: AS4_AGGREGATOR attribute received, but AGGREGATOR attribute is missing"); + + int p2_len = as_path_getlen(p2->u.ptr); + int p4_len = p4 ? as_path_getlen(p4->u.ptr) : AS_PATH_MAXLEN; + + if (p2_len < p4_len) + p2->u.ptr = bgp_merge_as_paths(p2->u.ptr, NULL, AS_PATH_MAXLEN, pool); + else + p2->u.ptr = bgp_merge_as_paths(p2->u.ptr, p4->u.ptr, p2_len - p4_len, pool); + +} + +static void +bgp_remove_as4_attrs(struct bgp_proto *p, rta *a) +{ + unsigned id1 = EA_CODE(EAP_BGP, BA_AS4_PATH); + unsigned id2 = EA_CODE(EAP_BGP, BA_AS4_AGGREGATOR); + ea_list **el = &(a->eattrs); + + /* We know that ea_lists constructed in bgp_decode_attrs have one attribute per ea_list struct */ + while (*el != NULL) + { + unsigned fid = (*el)->attrs[0].id; + + if ((fid == id1) || (fid == id2)) + { + *el = (*el)->next; + if (p->as4_support) + log(L_WARN "BGP: Unexpected AS4_* attributes received"); + } + else + el = &((*el)->next); + } } /** @@ -883,6 +1148,15 @@ bgp_decode_attrs(struct bgp_conn *conn, byte *attr, unsigned int len, struct lin } } } + + /* When receiving attributes from non-AS4-aware BGP speaker, + * we have to reconstruct 4B AS_PATH and AGGREGATOR attributes + */ + if (bgp_as4_support && (! bgp->as4_support)) + bgp_reconstruct_4b_atts(bgp, a, pool); + + if (bgp_as4_support) + bgp_remove_as4_attrs(bgp, a); /* If the AS path attribute contains our AS, reject the routes */ e = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH)); @@ -945,11 +1219,11 @@ bgp_get_route_info(rte *e, byte *buf, ea_list *attrs) { eattr *p = ea_find(attrs, EA_CODE(EAP_BGP, BA_AS_PATH)); eattr *o = ea_find(attrs, EA_CODE(EAP_BGP, BA_ORIGIN)); - int origas; + u32 origas; buf += bsprintf(buf, " (%d) [", e->pref); - if (p && (origas = as_path_get_first(p->u.ptr)) >= 0) - buf += bsprintf(buf, "AS%d", origas); + if (p && as_path_get_first(p->u.ptr, &origas)) + buf += bsprintf(buf, "AS%u", origas); if (o) buf += bsprintf(buf, "%c", "ie?"[o->u.data]); strcpy(buf, "]"); diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index cedd223b..ed2524c8 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -76,6 +76,7 @@ static void bgp_connect(struct bgp_proto *p); static void bgp_initiate(struct bgp_proto *p); static void bgp_setup_listen_sk(void); + static void bgp_close(struct bgp_proto *p UNUSED) { @@ -611,6 +612,10 @@ bgp_check(struct bgp_config *c) cf_error("Local AS number must be set"); if (!c->remote_as) cf_error("Neighbor must be configured"); + if (!bgp_as4_support && (c->local_as > 0xFFFF)) + cf_error("Local AS number out of range"); + if (!bgp_as4_support && (c->remote_as > 0xFFFF)) + cf_error("Neighbor AS number out of range"); } static void diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h index 6519db85..aa1bd107 100644 --- a/proto/bgp/bgp.h +++ b/proto/bgp/bgp.h @@ -16,7 +16,7 @@ struct eattr; struct bgp_config { struct proto_config c; - unsigned int local_as, remote_as; + u32 local_as, remote_as; ip_addr remote_ip; int multihop; /* Number of hops if multihop */ ip_addr multihop_via; /* Multihop: address to route to */ @@ -47,14 +47,16 @@ struct bgp_conn { byte *notify_data; int error_flag; /* Error state, ignore all input */ int primary; /* This connection is primary */ + u32 advertised_as; /* Temporary value for AS number received */ unsigned hold_time, keepalive_time; /* Times calculated from my and neighbor's requirements */ }; struct bgp_proto { struct proto p; struct bgp_config *cf; /* Shortcut to BGP configuration */ - unsigned local_as, remote_as; + u32 local_as, remote_as; int is_internal; /* Internal BGP connection (local_as == remote_as) */ + int as4_support; /* Peer supports 4B AS numbers [RFC4893] */ u32 local_id; /* BGP identifier of this router */ u32 remote_id; /* BGP identifier of the neighbor */ struct bgp_conn *conn; /* Connection we have established */ @@ -100,6 +102,9 @@ struct bgp_bucket { extern struct linpool *bgp_linpool; +extern int bgp_as4_support; + + void bgp_start_timer(struct timer *t, int value); void bgp_check(struct bgp_config *c); void bgp_error(struct bgp_conn *c, unsigned code, unsigned subcode, byte *data, int len); @@ -122,7 +127,7 @@ int bgp_rte_better(struct rte *, struct rte *); void bgp_rt_notify(struct proto *, struct network *, struct rte *, struct rte *, struct ea_list *); int bgp_import_control(struct proto *, struct rte **, struct ea_list **, struct linpool *); void bgp_attr_init(struct bgp_proto *); -unsigned int bgp_encode_attrs(byte *w, struct ea_list *attrs, int remains); +unsigned int bgp_encode_attrs(struct bgp_proto *p, byte *w, ea_list *attrs, int remains); void bgp_free_bucket(struct bgp_proto *p, struct bgp_bucket *buck); void bgp_get_route_info(struct rte *, byte *buf, struct ea_list *attrs); @@ -165,6 +170,8 @@ void bgp_log_error(struct bgp_proto *p, char *msg, unsigned code, unsigned subco #define BA_MP_REACH_NLRI 0x0e /* [RFC2283] */ #define BA_MP_UNREACH_NLRI 0x0f #define BA_EXTENDED_COMM 0x10 /* draft-ramachandra-bgp-ext-communities */ +#define BA_AS4_PATH 0x11 /* [RFC4893] */ +#define BA_AS4_AGGREGATOR 0x12 /* BGP states */ diff --git a/proto/bgp/config.Y b/proto/bgp/config.Y index 52ad731e..63dfb615 100644 --- a/proto/bgp/config.Y +++ b/proto/bgp/config.Y @@ -44,12 +44,10 @@ bgp_proto_start: proto_start BGP { bgp_proto: bgp_proto_start proto_name '{' | bgp_proto proto_item ';' - | bgp_proto LOCAL AS expr ';' { - if ($4 < 0 || $4 > 65535) cf_error("AS number out of range"); - BGP_CFG->local_as = $4; - } + | bgp_proto LOCAL AS expr ';' { BGP_CFG->local_as = $4; } | bgp_proto NEIGHBOR ipa AS expr ';' { - if ($5 < 0 || $5 > 65535) cf_error("AS number out of range"); + if (ipa_nonzero(BGP_CFG->remote_ip)) cf_error("Only one neighbor per BGP instance is allowed"); + BGP_CFG->remote_ip = $3; BGP_CFG->remote_as = $5; } diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c index 2e6f0b60..0dd920e4 100644 --- a/proto/bgp/packets.c +++ b/proto/bgp/packets.c @@ -12,6 +12,7 @@ #include "nest/iface.h" #include "nest/protocol.h" #include "nest/route.h" +#include "nest/attrs.h" #include "conf/conf.h" #include "lib/unaligned.h" #include "lib/socket.h" @@ -30,33 +31,64 @@ bgp_create_notification(struct bgp_conn *conn, byte *buf) return buf + 2 + conn->notify_size; } +#ifdef IPV6 +static byte * +bgp_put_cap_ipv6(struct bgp_conn *conn UNUSED, byte *buf) +{ + *buf++ = 1; /* Capability 1: Multiprotocol extensions */ + *buf++ = 4; /* Capability data length */ + *buf++ = 0; /* We support AF IPv6 */ + *buf++ = BGP_AF_IPV6; + *buf++ = 0; /* RFU */ + *buf++ = 1; /* and SAFI 1 */ + return buf; +} +#endif + +static byte * +bgp_put_cap_as4(struct bgp_conn *conn, byte *buf) +{ + *buf++ = 65; /* Capability 65: Support for 4-octet AS number */ + *buf++ = 4; /* Capability data length */ + put_u32(buf, conn->bgp->local_as); + return buf + 4; +} + static byte * bgp_create_open(struct bgp_conn *conn, byte *buf) { struct bgp_proto *p = conn->bgp; + byte *cap; + int cap_len; BGP_TRACE(D_PACKETS, "Sending OPEN(ver=%d,as=%d,hold=%d,id=%08x)", BGP_VERSION, p->local_as, p->cf->hold_time, p->local_id); buf[0] = BGP_VERSION; - put_u16(buf+1, p->local_as); + put_u16(buf+1, (p->local_as < 0xFFFF) ? p->local_as : AS_TRANS); put_u16(buf+3, p->cf->hold_time); put_u32(buf+5, p->local_id); -#ifndef IPV6 - buf[9] = 0; /* No optional parameters */ - return buf+10; -#else - buf += 9; - *buf++ = 8; /* Optional params len */ - *buf++ = 2; /* Option: Capability list */ - *buf++ = 6; /* Option length */ - *buf++ = 1; /* Capability 1: Multiprotocol extensions */ - *buf++ = 4; /* Capability data length */ - *buf++ = 0; /* We support AF IPv6 */ - *buf++ = BGP_AF_IPV6; - *buf++ = 0; /* RFU */ - *buf++ = 1; /* and SAFI 1 */ - return buf; + /* Skipped 3 B for length field and Capabilities parameter header */ + cap = buf + 12; + +#ifdef IPV6 + cap = bgp_put_cap_ipv6(conn, cap); #endif + if (bgp_as4_support) + cap = bgp_put_cap_as4(conn, cap); + + cap_len = cap - buf - 12; + if (cap_len > 0) + { + buf[9] = cap_len + 2; /* Optional params len */ + buf[10] = 2; /* Option: Capability list */ + buf[11] = cap_len; /* Option length */ + return cap; + } + else + { + buf[9] = 0; /* No optional parameters */ + return buf + 10; + } } static unsigned int @@ -118,7 +150,7 @@ bgp_create_update(struct bgp_conn *conn, byte *buf) continue; } DBG("Processing bucket %p\n", buck); - a_size = bgp_encode_attrs(w+2, buck->eattrs, 1024); + a_size = bgp_encode_attrs(p, w+2, buck->eattrs, 1024); put_u16(w, a_size); w += a_size + 2; r_size = bgp_encode_prefixes(p, w, buck, remains - a_size); @@ -166,7 +198,7 @@ bgp_create_update(struct bgp_conn *conn, byte *buf) *tmp++ = BGP_AF_IPV6; *tmp++ = 1; ea->attrs[0].u.ptr->length = bgp_encode_prefixes(p, tmp, buck, remains-11); - size = bgp_encode_attrs(w, ea, remains); + size = bgp_encode_attrs(p, w, ea, remains); w += size; remains -= size; } @@ -183,7 +215,7 @@ bgp_create_update(struct bgp_conn *conn, byte *buf) continue; } DBG("Processing bucket %p\n", buck); - size = bgp_encode_attrs(w, buck->eattrs, 1024); + size = bgp_encode_attrs(p, w, buck->eattrs, 1024); w += size; remains -= size; tstart = tmp = bgp_attach_attr(&ea, bgp_linpool, BA_MP_REACH_NLRI, remains-8); @@ -230,7 +262,7 @@ bgp_create_update(struct bgp_conn *conn, byte *buf) *tmp++ = 0; /* No SNPA information */ tmp += bgp_encode_prefixes(p, tmp, buck, remains - (8+3+32+1)); ea->attrs[0].u.ptr->length = tmp - tstart; - w += bgp_encode_attrs(w, ea, remains); + w += bgp_encode_attrs(p, w, ea, remains); break; } } @@ -353,9 +385,49 @@ bgp_tx(sock *sk) ; } +/* Capatibility negotiation as per RFC 2842 */ + +void +bgp_parse_capabilities(struct bgp_conn *conn, byte *opt, int len) +{ + struct bgp_proto *p = conn->bgp; + int cl; + u32 as; + + while (len > 0) + { + if (len < 2 || len < 2 + opt[1]) + goto err; + + cl = opt[1]; + + switch (opt[0]) + { + case 65: + if (cl != 4) + goto err; + p->as4_support = 1; + if (bgp_as4_support) + conn->advertised_as = get_u32(opt + 2); + break; + + /* We can safely ignore all other capabilities */ + } + len -= 2 + cl; + opt += 2 + cl; + } + return; + + err: + bgp_error(conn, 2, 0, NULL, 0); + return; +} + static int bgp_parse_options(struct bgp_conn *conn, byte *opt, int len) { + int ol; + while (len > 0) { if (len < 2 || len < 2 + opt[1]) @@ -369,12 +441,14 @@ bgp_parse_options(struct bgp_conn *conn, byte *opt, int len) DBG("\n"); } #endif + + ol = opt[1]; switch (opt[0]) { case 2: - /* Capatibility negotiation as per RFC 2842 */ - /* We can safely ignore all capabilities announced */ + bgp_parse_capabilities(conn, opt + 2, ol); break; + default: /* * BGP specs don't tell us to send which option @@ -382,11 +456,11 @@ bgp_parse_options(struct bgp_conn *conn, byte *opt, int len) * to do so. Also, capability negotiation with * Cisco routers doesn't work without that. */ - bgp_error(conn, 2, 4, opt, opt[1]); + bgp_error(conn, 2, 4, opt, ol); return 0; } - len -= 2 + opt[1]; - opt += 2 + opt[1]; + len -= 2 + ol; + opt += 2 + ol; } return 0; } @@ -397,7 +471,7 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, int len) struct bgp_conn *other; struct bgp_proto *p = conn->bgp; struct bgp_config *cf = p->cf; - unsigned as, hold; + unsigned hold; u32 id; /* Check state */ @@ -409,20 +483,27 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, int len) { bgp_error(conn, 1, 2, pkt+16, 2); return; } if (pkt[19] != BGP_VERSION) { bgp_error(conn, 2, 1, pkt+19, 1); return; } /* RFC 1771 says 16 bits, draft-09 tells to use 8 */ - as = get_u16(pkt+20); + conn->advertised_as = get_u16(pkt+20); hold = get_u16(pkt+22); id = get_u32(pkt+24); - BGP_TRACE(D_PACKETS, "Got OPEN(as=%d,hold=%d,id=%08x)", as, hold, id); - if (cf->remote_as && as != p->remote_as) - { bgp_error(conn, 2, 2, pkt+20, -2); return; } - if (hold > 0 && hold < 3) - { bgp_error(conn, 2, 6, pkt+22, 2); return; } - p->remote_id = id; + BGP_TRACE(D_PACKETS, "Got OPEN(as=%d,hold=%d,id=%08x)", conn->advertised_as, hold, id); + + p->remote_id = id; // ??? if (bgp_parse_options(conn, pkt+29, pkt[28])) return; + + if (hold > 0 && hold < 3) + { bgp_error(conn, 2, 6, pkt+22, 2); return; } + if (!id || id == 0xffffffff || id == p->local_id) { bgp_error(conn, 2, 3, pkt+24, -4); return; } + + if (conn->advertised_as != p->remote_as) + { + bgp_error(conn, 2, 2, (byte *) &(conn->advertised_as), -4); return; + } + /* Check the other connection */ other = (conn == &p->outgoing_conn) ? &p->incoming_conn : &p->outgoing_conn; switch (other->state) @@ -463,7 +544,7 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, int len) else conn->hold_time = p->cf->hold_time; conn->keepalive_time = p->cf->keepalive_time ? : conn->hold_time / 3; - p->remote_as = as; + // p->remote_as = conn->advertised_as; p->remote_id = id; DBG("BGP: Hold timer set to %d, keepalive to %d, AS to %d, ID to %x\n", conn->hold_time, conn->keepalive_time, p->remote_as, p->remote_id); @@ -720,7 +801,7 @@ static struct { { 2, 4, "Unsupported optional parameter" }, { 2, 5, "Authentication failure" }, { 2, 6, "Unacceptable hold time" }, - { 2, 7, "Required capability missing" }, /* capability negotiation draft */ + { 2, 7, "Required capability missing" }, /* [RFC3392] */ { 3, 0, "Invalid UPDATE message" }, { 3, 1, "Malformed attribute list" }, { 3, 2, "Unrecognized well-known attribute" }, -- cgit v1.2.3 From d51aa2819005a03e4cfb6f62333be6ccadfb3c06 Mon Sep 17 00:00:00 2001 From: Ondrej Zajicek Date: Sun, 26 Oct 2008 22:42:39 +0100 Subject: Implementation of MD5 authentication of BGP sessions. --- lib/socket.h | 2 ++ proto/bgp/bgp.c | 19 ++++++++++++- proto/bgp/bgp.h | 1 + proto/bgp/config.Y | 4 ++- sysdep/linux/sysio.h | 21 +++++++++++++++ sysdep/unix/io.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 120 insertions(+), 2 deletions(-) diff --git a/lib/socket.h b/lib/socket.h index ab932b31..4aa521db 100644 --- a/lib/socket.h +++ b/lib/socket.h @@ -39,6 +39,7 @@ typedef struct birdsock { int fd; /* System-dependent data */ node n; void *rbuf_alloc, *tbuf_alloc; + char *password; /* Password for MD5 authentication */ } sock; sock *sk_new(pool *); /* Allocate new socket */ @@ -47,6 +48,7 @@ int sk_send(sock *, unsigned len); /* Send data, <0=err, >0=ok, 0=sleep */ int sk_send_to(sock *, unsigned len, ip_addr to, unsigned port); /* sk_send to given destination */ void sk_reallocate(sock *); /* Free and allocate tbuf & rbuf */ void sk_dump_all(void); +int sk_set_md5_auth(sock *s, ip_addr a, char *passwd); /* Add or remove security associations for given passive socket */ static inline int sk_send_buffer_empty(sock *sk) diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index ed2524c8..e1f5ec02 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -78,10 +78,14 @@ static void bgp_setup_listen_sk(void); static void -bgp_close(struct bgp_proto *p UNUSED) +bgp_close(struct bgp_proto *p) { ASSERT(bgp_counter); bgp_counter--; + + if (p->cf->password) + sk_set_md5_auth(bgp_listen_sk, p->cf->remote_ip, NULL); + if (!bgp_counter) { rfree(bgp_listen_sk); @@ -330,6 +334,7 @@ bgp_connect(struct bgp_proto *p) /* Enter Connect state and start establishing c bgp_setup_conn(p, conn); bgp_setup_sk(p, conn, s); s->tx_hook = bgp_connected; + s->password = p->cf->password; conn->state = BS_CONNECT; if (sk_open(s)) { @@ -506,6 +511,7 @@ bgp_start(struct proto *P) bgp_counter++; bgp_setup_listen_sk(); + if (!bgp_linpool) bgp_linpool = lp_new(&root_pool, 4080); @@ -523,6 +529,17 @@ bgp_start(struct proto *P) lock->hook = bgp_start_locked; lock->data = p; olock_acquire(lock); + + /* We should create security association after we get a lock not to + * break existing connections. + */ + if (p->cf->password) + { + int rv = sk_set_md5_auth(bgp_listen_sk, p->cf->remote_ip, p->cf->password); + if (rv < 0) + return PS_STOP; + } + return PS_START; } diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h index aa1bd107..93383244 100644 --- a/proto/bgp/bgp.h +++ b/proto/bgp/bgp.h @@ -33,6 +33,7 @@ struct bgp_config { unsigned error_delay_time_min; /* Time to wait after an error is detected */ unsigned error_delay_time_max; unsigned disable_after_error; /* Disable the protocol when error is detected */ + char *password; /* Password used for MD5 authentication */ }; struct bgp_conn { diff --git a/proto/bgp/config.Y b/proto/bgp/config.Y index 63dfb615..580c008f 100644 --- a/proto/bgp/config.Y +++ b/proto/bgp/config.Y @@ -20,7 +20,7 @@ CF_KEYWORDS(BGP, LOCAL, NEIGHBOR, AS, HOLD, TIME, CONNECT, RETRY, KEEPALIVE, MULTIHOP, STARTUP, VIA, NEXT, HOP, SELF, DEFAULT, PATH, METRIC, ERROR, START, DELAY, FORGET, WAIT, DISABLE, AFTER, BGP_PATH, BGP_LOCAL_PREF, BGP_MED, BGP_ORIGIN, BGP_NEXT_HOP, - BGP_ATOMIC_AGGR, BGP_AGGREGATOR, BGP_COMMUNITY, SOURCE, ADDRESS) + BGP_ATOMIC_AGGR, BGP_AGGREGATOR, BGP_COMMUNITY, SOURCE, ADDRESS, PASSWORD) CF_GRAMMAR @@ -38,6 +38,7 @@ bgp_proto_start: proto_start BGP { BGP_CFG->error_amnesia_time = 300; BGP_CFG->error_delay_time_min = 60; BGP_CFG->error_delay_time_max = 300; + BGP_CFG->password = NULL; } ; @@ -65,6 +66,7 @@ bgp_proto: | bgp_proto ERROR FORGET TIME expr ';' { BGP_CFG->error_amnesia_time = $5; } | bgp_proto ERROR WAIT TIME expr ',' expr ';' { BGP_CFG->error_delay_time_min = $5; BGP_CFG->error_delay_time_max = $7; } | bgp_proto DISABLE AFTER ERROR bool ';' { BGP_CFG->disable_after_error = $5; } + | bgp_proto PASSWORD TEXT ';' { BGP_CFG->password = $3; } ; CF_ADDTO(dynamic_attr, BGP_PATH diff --git a/sysdep/linux/sysio.h b/sysdep/linux/sysio.h index 3a29cdc9..b0aff71f 100644 --- a/sysdep/linux/sysio.h +++ b/sysdep/linux/sysio.h @@ -139,3 +139,24 @@ static inline char *sysio_mcast_join(sock *s) #endif #endif + +#include +#include + +/* For the case that we have older kernel headers */ +/* Copied from Linux kernel file include/linux/tcp.h */ + +#ifndef TCP_MD5SIG + +#define TCP_MD5SIG 14 +#define TCP_MD5SIG_MAXKEYLEN 80 + +struct tcp_md5sig { + struct __kernel_sockaddr_storage tcpm_addr; /* address associated */ + __u16 __tcpm_pad1; /* zero */ + __u16 tcpm_keylen; /* key length */ + __u32 __tcpm_pad2; /* zero */ + __u8 tcpm_key[TCP_MD5SIG_MAXKEYLEN]; /* key (binary) */ +}; + +#endif diff --git a/sysdep/unix/io.c b/sysdep/unix/io.c index 7dcca21a..6faa176b 100644 --- a/sysdep/unix/io.c +++ b/sysdep/unix/io.c @@ -546,6 +546,7 @@ sk_new(pool *p) s->err_hook = NULL; s->fd = -1; s->rbuf_alloc = s->tbuf_alloc = NULL; + s->password = NULL; return s; } @@ -642,6 +643,71 @@ bad: return err; } + +/* FIXME: check portability */ + +static int +sk_set_md5_auth_int(sock *s, sockaddr *sa, char *passwd) +{ + struct tcp_md5sig md5; + + memset(&md5, 0, sizeof(md5)); + memcpy(&md5.tcpm_addr, (struct sockaddr *) sa, sizeof(*sa)); + + if (passwd) + { + int len = strlen(passwd); + + if (len > TCP_MD5SIG_MAXKEYLEN) + { + log(L_ERR "MD5 password too long"); + return -1; + } + + md5.tcpm_keylen = len; + memcpy(&md5.tcpm_key, passwd, len); + } + + int rv = setsockopt(s->fd, IPPROTO_TCP, TCP_MD5SIG, &md5, sizeof(md5)); + + if (rv < 0) + { + if (errno == ENOPROTOOPT) + log(L_ERR "Kernel does not support TCP MD5 signatures"); + else + log(L_ERR "sk_set_md5_auth_int: setsockopt: %m"); + } + + return rv; +} + +/** + * sk_set_md5_auth - add / remove MD5 security association for given socket. + * @s: socket + * @a: IP address of the other side + * @passwd: password used for MD5 authentication + * + * In TCP MD5 handling code in kernel, there is a set of pairs + * (address, password) used to choose password according to + * address of the other side. This function is useful for + * listening socket, for active sockets it is enough to set + * s->password field. + * + * When called with passwd != NULL, the new pair is added, + * When called with passwd == NULL, the existing pair is removed. + * + * Result: 0 for success, -1 for an error. + */ + +int +sk_set_md5_auth(sock *s, ip_addr a, char *passwd) +{ + sockaddr sa; + fill_in_sockaddr(&sa, a, 0); + return sk_set_md5_auth_int(s, &sa, passwd); +} + + static void sk_tcp_connected(sock *s) { @@ -805,6 +871,14 @@ sk_open(sock *s) ERR("bind"); } fill_in_sockaddr(&sa, s->daddr, s->dport); + + if (s->password) + { + int rv = sk_set_md5_auth_int(s, &sa, s->password); + if (rv < 0) + goto bad_no_log; + } + switch (type) { case SK_TCP_ACTIVE: @@ -846,6 +920,7 @@ sk_open(sock *s) bad: log(L_ERR "sk_open: %s: %m", err); +bad_no_log: close(fd); s->fd = -1; return -1; -- cgit v1.2.3 From 4847a894bf7d4852325c3f1ea4bb4890054a1f66 Mon Sep 17 00:00:00 2001 From: Ondrej Zajicek Date: Sun, 26 Oct 2008 22:45:09 +0100 Subject: Implementation of route reflection for BGP --- filter/filter.h | 1 + nest/a-set.c | 8 +- nest/attrs.h | 3 + proto/bgp/attrs.c | 223 +++++++++++++++++++++++++++++++++++++--------------- proto/bgp/bgp.c | 9 +++ proto/bgp/bgp.h | 10 ++- proto/bgp/config.Y | 6 +- proto/bgp/packets.c | 6 +- 8 files changed, 195 insertions(+), 71 deletions(-) diff --git a/filter/filter.h b/filter/filter.h index 04a26236..f71e54d3 100644 --- a/filter/filter.h +++ b/filter/filter.h @@ -11,6 +11,7 @@ #include "lib/resource.h" #include "lib/ip.h" +#include "nest/route.h" #include "nest/attrs.h" struct f_inst { /* Instruction */ diff --git a/nest/a-set.c b/nest/a-set.c index 44407141..69c090b7 100644 --- a/nest/a-set.c +++ b/nest/a-set.c @@ -40,10 +40,12 @@ int_set_format(struct adata *set, byte *buf, unsigned int size) struct adata * int_set_add(struct linpool *pool, struct adata *list, u32 val) { - struct adata *res = lp_alloc(pool, list->length + sizeof(struct adata) + 4); - res->length = list->length+4; + int len = list ? list->length : 0; + struct adata *res = lp_alloc(pool, len + sizeof(struct adata) + 4); + res->length = len + 4; * (u32 *) res->data = val; - memcpy((char *) res->data + 4, list->data, list->length); + if (list) + memcpy((char *) res->data + 4, list->data, list->length); return res; } diff --git a/nest/attrs.h b/nest/attrs.h index f63f2e45..aaa5f4a2 100644 --- a/nest/attrs.h +++ b/nest/attrs.h @@ -47,4 +47,7 @@ struct adata *int_set_add(struct linpool *pool, struct adata *list, u32 val); int int_set_contains(struct adata *list, u32 val); struct adata *int_set_del(struct linpool *pool, struct adata *list, u32 val); +static inline int int_set_get_size(struct adata *list) +{ return list->length / 4; } + #endif diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c index 48cb9dd5..a42a4880 100644 --- a/proto/bgp/attrs.c +++ b/proto/bgp/attrs.c @@ -104,13 +104,19 @@ bgp_check_next_hop(struct bgp_proto *p UNUSED, byte *a, int len) } static int -bgp_check_aggregator(struct bgp_proto *p UNUSED, UNUSED byte *a, int len) +bgp_check_aggregator(struct bgp_proto *p, UNUSED byte *a, int len) { int exp_len = (bgp_as4_support && p->as4_support) ? 8 : 6; return (len == exp_len) ? 0 : 5; } +static int +bgp_check_cluster_list(struct bgp_proto *p UNUSED, UNUSED byte *a, int len) +{ + return ((len % 4) == 0) ? 0 : 5; +} + static int bgp_check_reach_nlri(struct bgp_proto *p UNUSED, byte *a UNUSED, int len UNUSED) { @@ -150,8 +156,10 @@ static struct attr_desc bgp_attr_table[] = { bgp_check_aggregator, NULL }, { "community", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_INT_SET, 1, /* BA_COMMUNITY */ NULL, NULL }, - { NULL, }, /* BA_ORIGINATOR_ID */ - { NULL, }, /* BA_CLUSTER_LIST */ + { "originator_id", 4, BAF_OPTIONAL, EAF_TYPE_INT, 0, /* BA_ORIGINATOR_ID */ + NULL, NULL }, + { "cluster_list", -1, BAF_OPTIONAL, EAF_TYPE_INT_SET, 0, /* BA_CLUSTER_LIST */ + bgp_check_cluster_list, NULL }, { NULL, }, /* BA_DPA */ { NULL, }, /* BA_ADVERTISER */ { NULL, }, /* BA_RCID_PATH */ @@ -173,35 +181,52 @@ static struct attr_desc bgp_attr_table[] = { #define ATTR_KNOWN(code) ((code) < ARRAY_SIZE(bgp_attr_table) && bgp_attr_table[code].name) -static byte * -bgp_set_attr(eattr *e, struct linpool *pool, unsigned attr, unsigned val) +static inline struct adata * +bgp_alloc_adata(struct linpool *pool, unsigned len) +{ + struct adata *ad = lp_alloc(pool, sizeof(struct adata) + len); + ad->length = len; + return ad; +} + +static void +bgp_set_attr(eattr *e, unsigned attr, uintptr_t val) { ASSERT(ATTR_KNOWN(attr)); e->id = EA_CODE(EAP_BGP, attr); e->type = bgp_attr_table[attr].type; e->flags = bgp_attr_table[attr].expected_flags; if (e->type & EAF_EMBEDDED) - { - e->u.data = val; - return NULL; - } + e->u.data = val; else - { - e->u.ptr = lp_alloc(pool, sizeof(struct adata) + val); - e->u.ptr->length = val; - return e->u.ptr->data; - } + e->u.ptr = (struct adata *) val; } -byte * -bgp_attach_attr(ea_list **to, struct linpool *pool, unsigned attr, unsigned val) +static byte * +bgp_set_attr_wa(eattr *e, struct linpool *pool, unsigned attr, unsigned len) +{ + struct adata *ad = bgp_alloc_adata(pool, len); + bgp_set_attr(e, attr, (uintptr_t) ad); + return ad->data; +} + +void +bgp_attach_attr(ea_list **to, struct linpool *pool, unsigned attr, uintptr_t val) { ea_list *a = lp_alloc(pool, sizeof(ea_list) + sizeof(eattr)); a->next = *to; *to = a; a->flags = EALF_SORTED; a->count = 1; - return bgp_set_attr(a->attrs, pool, attr, val); + bgp_set_attr(a->attrs, attr, val); +} + +byte * +bgp_attach_attr_wa(ea_list **to, struct linpool *pool, unsigned attr, unsigned len) +{ + struct adata *ad = bgp_alloc_adata(pool, len); + bgp_attach_attr(to, pool, attr, (uintptr_t) ad); + return ad->data; } static int @@ -713,6 +738,7 @@ bgp_rt_notify(struct proto *P, net *n, rte *new, rte *old UNUSED, ea_list *attrs bgp_schedule_packet(p->conn, PKT_UPDATE); } + static int bgp_create_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *pool) { @@ -725,14 +751,14 @@ bgp_create_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *p ea->flags = EALF_SORTED; ea->count = 4; - bgp_set_attr(ea->attrs, pool, BA_ORIGIN, + bgp_set_attr(ea->attrs, BA_ORIGIN, ((rta->source == RTS_OSPF_EXT1) || (rta->source == RTS_OSPF_EXT2)) ? ORIGIN_INCOMPLETE : ORIGIN_IGP); if (p->is_internal) - bgp_set_attr(ea->attrs+1, pool, BA_AS_PATH, 0); + bgp_set_attr_wa(ea->attrs+1, pool, BA_AS_PATH, 0); else { - z = bgp_set_attr(ea->attrs+1, pool, BA_AS_PATH, bgp_as4_support ? 6 : 4); + z = bgp_set_attr_wa(ea->attrs+1, pool, BA_AS_PATH, bgp_as4_support ? 6 : 4); z[0] = AS_PATH_SEQUENCE; z[1] = 1; /* 1 AS */ @@ -742,7 +768,7 @@ bgp_create_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *p put_u16(z+2, p->local_as); } - z = bgp_set_attr(ea->attrs+2, pool, BA_NEXT_HOP, sizeof(ip_addr)); + z = bgp_set_attr_wa(ea->attrs+2, pool, BA_NEXT_HOP, sizeof(ip_addr)); if (p->cf->next_hop_self || !p->is_internal || rta->dest != RTD_ROUTER) @@ -755,34 +781,55 @@ bgp_create_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *p else *(ip_addr *)z = e->attrs->gw; - bgp_set_attr(ea->attrs+3, pool, BA_LOCAL_PREF, 0); + bgp_set_attr(ea->attrs+3, BA_LOCAL_PREF, 0); return 0; /* Leave decision to the filters */ } -static ea_list * -bgp_path_prepend(struct linpool *pool, eattr *a, ea_list *old, int as) + +static inline int +bgp_as_path_loopy(struct bgp_proto *p, rta *a) +{ + eattr *e = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH)); + return (e && as_path_is_member(e->u.ptr, p->local_as)); +} + +static inline int +bgp_originator_id_loopy(struct bgp_proto *p, rta *a) { - struct ea_list *e = lp_alloc(pool, sizeof(ea_list) + sizeof(eattr)); - struct adata *olda = a->u.ptr; - - e->next = old; - e->flags = EALF_SORTED; - e->count = 1; - e->attrs[0].id = EA_CODE(EAP_BGP, BA_AS_PATH); - e->attrs[0].flags = BAF_TRANSITIVE; - e->attrs[0].type = EAF_TYPE_AS_PATH; - e->attrs[0].u.ptr = as_path_prepend(pool, olda, as); - return e; + eattr *e = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID)); + return (e && (e->u.data == p->local_id)); +} + +static inline int +bgp_cluster_list_loopy(struct bgp_proto *p, rta *a) +{ + eattr *e = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_CLUSTER_LIST)); + return (e && p->rr_client && int_set_contains(e->u.ptr, p->rr_cluster_id)); +} + + +static inline void +bgp_path_prepend(rte *e, ea_list **attrs, struct linpool *pool, u32 as) +{ + eattr *a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH)); + bgp_attach_attr(attrs, pool, BA_AS_PATH, (uintptr_t) as_path_prepend(pool, a->u.ptr, as)); +} + +static inline void +bgp_cluster_list_prepend(rte *e, ea_list **attrs, struct linpool *pool, u32 cid) +{ + eattr *a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_CLUSTER_LIST)); + bgp_attach_attr(attrs, pool, BA_CLUSTER_LIST, (uintptr_t) int_set_add(pool, a ? a->u.ptr : NULL, cid)); } static int -bgp_update_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *pool) +bgp_update_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *pool, int rr) { eattr *a; - if (!p->is_internal && (a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH)))) - *attrs = bgp_path_prepend(pool, a, *attrs, p->local_as); + if (!p->is_internal) + bgp_path_prepend(e, attrs, pool, p->local_as); a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP)); if (a && (p->is_internal || (!p->is_internal && e->attrs->iface == p->neigh->iface))) @@ -792,7 +839,24 @@ bgp_update_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *p else { /* Need to create new one */ - *(ip_addr *) bgp_attach_attr(attrs, pool, BA_NEXT_HOP, sizeof(ip_addr)) = p->local_addr; + bgp_attach_attr_ip(attrs, pool, BA_NEXT_HOP, p->local_addr); + } + + if (rr) + { + /* Handling route reflection, RFC 4456 */ + struct bgp_proto *src = (struct bgp_proto *) e->attrs->proto; + + a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID)); + if (!a) + bgp_attach_attr(attrs, pool, BA_ORIGINATOR_ID, src->remote_id); + + /* We attach proper cluster ID according to whether the route is entering or leaving the cluster */ + bgp_cluster_list_prepend(e, attrs, pool, src->rr_client ? src->rr_cluster_id : p->rr_cluster_id); + + /* Two RR clients with different cluster ID, hmmm */ + if (src->rr_client && p->rr_client && (src->rr_cluster_id != p->rr_cluster_id)) + bgp_cluster_list_prepend(e, attrs, pool, p->rr_cluster_id); } return 0; /* Leave decision to the filters */ @@ -809,9 +873,22 @@ bgp_import_control(struct proto *P, rte **new, ea_list **attrs, struct linpool * return -1; if (new_bgp) { + /* We should check here for cluster list loop, because the receiving BGP instance + might have different cluster ID */ + if (bgp_cluster_list_loopy(p, e->attrs)) + return -1; + if (p->local_as == new_bgp->local_as && p->is_internal && new_bgp->is_internal) - return -1; /* Don't redistribute internal routes with IBGP */ - return bgp_update_attrs(p, e, attrs, pool); + { + /* Redistribution of internal routes with IBGP */ + if (p->rr_client || new_bgp->rr_client) + /* Route reflection, RFC 4456 */ + return bgp_update_attrs(p, e, attrs, pool, 1); + else + return -1; + } + else + return bgp_update_attrs(p, e, attrs, pool, 0); } else return bgp_create_attrs(p, e, attrs, pool); @@ -835,7 +912,7 @@ bgp_rte_better(rte *new, rte *old) if (n < o) return 0; - /* Use AS path lengths */ + /* RFC 4271 9.1.2.2. a) Use AS path lengths */ if (new_bgp->cf->compare_path_lengths || old_bgp->cf->compare_path_lengths) { x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH)); @@ -848,7 +925,7 @@ bgp_rte_better(rte *new, rte *old) return 0; } - /* Use origins */ + /* RFC 4271 9.1.2.2. b) Use origins */ x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGIN)); y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGIN)); n = x ? x->u.data : ORIGIN_INCOMPLETE; @@ -858,7 +935,7 @@ bgp_rte_better(rte *new, rte *old) if (n > o) return 0; - /* Compare MED's */ + /* RFC 4271 9.1.2.2. c) Compare MED's */ x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC)); y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC)); n = x ? x->u.data : new_bgp->cf->default_med; @@ -868,24 +945,41 @@ bgp_rte_better(rte *new, rte *old) if (n > o) return 0; - /* A tie breaking procedure according to RFC 1771, section 9.1.2.1 */ - /* We don't have interior distances */ - /* We prefer external peers */ + /* RFC 4271 9.1.2.2. d) Prefer external peers */ if (new_bgp->is_internal > old_bgp->is_internal) return 0; if (new_bgp->is_internal < old_bgp->is_internal) return 1; - /* Finally we compare BGP identifiers */ - return (new_bgp->remote_id < old_bgp->remote_id); -} -static int -bgp_path_loopy(struct bgp_proto *p, eattr *a) -{ - return as_path_is_member(a->u.ptr, p->local_as); -} + /* Skipping RFC 4271 9.1.2.2. e) */ + /* We don't have interior distances */ + + /* RFC 4456 9. b) Compare cluster list lengths */ + x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_CLUSTER_LIST)); + y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_CLUSTER_LIST)); + n = x ? int_set_get_size(x->u.ptr) : 0; + o = y ? int_set_get_size(y->u.ptr) : 0; + if (n < o) + return 1; + if (n > o) + return 0; + + /* RFC 4271 9.1.2.2. f) Compare BGP identifiers */ + /* RFC 4456 9. a) Use ORIGINATOR_ID instead of local neighor ID */ + x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID)); + y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID)); + n = x ? x->u.data : new_bgp->remote_id; + o = y ? y->u.data : old_bgp->remote_id; + if (n < o) + return 1; + if (n > o) + return 0; + /* RFC 4271 9.1.2.2. g) Compare peer IP adresses */ + return (ipa_compare(new_bgp->cf->remote_ip, old_bgp->cf->remote_ip) < 0); +} + static struct adata * bgp_aggregator_convert_to_new(struct adata *old, struct linpool *pool) { @@ -916,7 +1010,7 @@ bgp_merge_as_paths(struct adata *old2, struct adata *old4, int req_as, struct li } -/* Reconstruct 4B AS_PATH and AGGREGATOR according to RFC4893 4.2.3 */ +/* Reconstruct 4B AS_PATH and AGGREGATOR according to RFC 4893 4.2.3 */ static void bgp_reconstruct_4b_atts(struct bgp_proto *p, rta *a, struct linpool *pool) { @@ -1159,18 +1253,23 @@ bgp_decode_attrs(struct bgp_conn *conn, byte *attr, unsigned int len, struct lin bgp_remove_as4_attrs(bgp, a); /* If the AS path attribute contains our AS, reject the routes */ - e = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH)); - if (e && bgp_path_loopy(bgp, e)) - { - DBG("BGP: Path loop!\n"); - return NULL; - } + if (bgp_as_path_loopy(bgp, a)) + goto loop; + + /* Two checks for IBGP loops caused by route reflection, RFC 4456 */ + if (bgp_originator_id_loopy(bgp, a) || + bgp_cluster_list_loopy(bgp, a)) + goto loop; /* If there's no local preference, define one */ if (!(seen[0] && (1 << BA_LOCAL_PREF))) bgp_attach_attr(&a->eattrs, pool, BA_LOCAL_PREF, 0); return a; +loop: + DBG("BGP: Path loop!\n"); + return NULL; + malformed: bgp_error(conn, 3, 1, NULL, 0); return NULL; diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index e1f5ec02..5fa12492 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -485,6 +485,13 @@ bgp_start_locked(struct object_lock *lock) p->local_id = cf->c.global->router_id; p->next_hop = cf->multihop ? cf->multihop_via : cf->remote_ip; p->neigh = neigh_find(&p->p, &p->next_hop, NEF_STICKY); + + if (cf->rr_client) + { + p->rr_cluster_id = cf->rr_cluster_id ? cf->rr_cluster_id : p->local_id; + p->rr_client = cf->rr_client; + } + if (!p->neigh) { log(L_ERR "%s: Invalid next hop %I", p->p.name, p->next_hop); @@ -633,6 +640,8 @@ bgp_check(struct bgp_config *c) cf_error("Local AS number out of range"); if (!bgp_as4_support && (c->remote_as > 0xFFFF)) cf_error("Neighbor AS number out of range"); + if ((c->local_as != c->remote_as) && (c->rr_client)) + cf_error("Only internal neighbor can be RR client"); } static void diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h index 93383244..af3c5c5a 100644 --- a/proto/bgp/bgp.h +++ b/proto/bgp/bgp.h @@ -25,6 +25,8 @@ struct bgp_config { int compare_path_lengths; /* Use path lengths when selecting best route */ u32 default_local_pref; /* Default value for LOCAL_PREF attribute */ u32 default_med; /* Default value for MULTI_EXIT_DISC attribute */ + u32 rr_cluster_id; /* Route reflector cluster ID, if different from local ID */ + int rr_client; /* Whether neighbor is RR client of me */ unsigned connect_retry_time; unsigned hold_time, initial_hold_time; unsigned keepalive_time; @@ -60,6 +62,8 @@ struct bgp_proto { int as4_support; /* Peer supports 4B AS numbers [RFC4893] */ u32 local_id; /* BGP identifier of this router */ u32 remote_id; /* BGP identifier of the neighbor */ + u32 rr_cluster_id; /* Route reflector cluster ID */ + int rr_client; /* Whether neighbor is RR client of me */ struct bgp_conn *conn; /* Connection we have established */ struct bgp_conn outgoing_conn; /* Outgoing connection we're working with */ struct bgp_conn incoming_conn; /* Incoming connection we have neither accepted nor rejected yet */ @@ -121,7 +125,8 @@ void bgp_close_conn(struct bgp_conn *c); /* attrs.c */ -byte *bgp_attach_attr(struct ea_list **to, struct linpool *, unsigned attr, unsigned val); +void bgp_attach_attr(struct ea_list **to, struct linpool *pool, unsigned attr, uintptr_t val); +byte *bgp_attach_attr_wa(struct ea_list **to, struct linpool *pool, unsigned attr, unsigned len); struct rta *bgp_decode_attrs(struct bgp_conn *conn, byte *a, unsigned int len, struct linpool *pool, int mandatory); int bgp_get_attr(struct eattr *e, byte *buf); int bgp_rte_better(struct rte *, struct rte *); @@ -132,6 +137,9 @@ unsigned int bgp_encode_attrs(struct bgp_proto *p, byte *w, ea_list *attrs, int void bgp_free_bucket(struct bgp_proto *p, struct bgp_bucket *buck); void bgp_get_route_info(struct rte *, byte *buf, struct ea_list *attrs); +inline static void bgp_attach_attr_ip(struct ea_list **to, struct linpool *pool, unsigned attr, ip_addr a) +{ *(ip_addr *) bgp_attach_attr_wa(to, pool, attr, sizeof(ip_addr)) = a; } + /* packets.c */ void bgp_schedule_packet(struct bgp_conn *conn, int type); diff --git a/proto/bgp/config.Y b/proto/bgp/config.Y index 580c008f..b23b66cf 100644 --- a/proto/bgp/config.Y +++ b/proto/bgp/config.Y @@ -20,7 +20,8 @@ CF_KEYWORDS(BGP, LOCAL, NEIGHBOR, AS, HOLD, TIME, CONNECT, RETRY, KEEPALIVE, MULTIHOP, STARTUP, VIA, NEXT, HOP, SELF, DEFAULT, PATH, METRIC, ERROR, START, DELAY, FORGET, WAIT, DISABLE, AFTER, BGP_PATH, BGP_LOCAL_PREF, BGP_MED, BGP_ORIGIN, BGP_NEXT_HOP, - BGP_ATOMIC_AGGR, BGP_AGGREGATOR, BGP_COMMUNITY, SOURCE, ADDRESS, PASSWORD) + BGP_ATOMIC_AGGR, BGP_AGGREGATOR, BGP_COMMUNITY, SOURCE, ADDRESS, + PASSWORD, RR, CLIENT, CLUSTER, ID) CF_GRAMMAR @@ -38,7 +39,6 @@ bgp_proto_start: proto_start BGP { BGP_CFG->error_amnesia_time = 300; BGP_CFG->error_delay_time_min = 60; BGP_CFG->error_delay_time_max = 300; - BGP_CFG->password = NULL; } ; @@ -52,6 +52,8 @@ bgp_proto: BGP_CFG->remote_ip = $3; BGP_CFG->remote_as = $5; } + | bgp_proto RR CLUSTER ID expr ';' { BGP_CFG->rr_cluster_id = $5; } + | bgp_proto RR CLIENT ';' { BGP_CFG->rr_client = 1; } | bgp_proto HOLD TIME expr ';' { BGP_CFG->hold_time = $4; } | bgp_proto STARTUP HOLD TIME expr ';' { BGP_CFG->initial_hold_time = $5; } | bgp_proto CONNECT RETRY TIME expr ';' { BGP_CFG->connect_retry_time = $5; } diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c index 0dd920e4..8a352c68 100644 --- a/proto/bgp/packets.c +++ b/proto/bgp/packets.c @@ -193,7 +193,7 @@ bgp_create_update(struct bgp_conn *conn, byte *buf) if ((buck = p->withdraw_bucket) && !EMPTY_LIST(buck->prefixes)) { DBG("Withdrawn routes:\n"); - tmp = bgp_attach_attr(&ea, bgp_linpool, BA_MP_UNREACH_NLRI, remains-8); + tmp = bgp_attach_attr_wa(&ea, bgp_linpool, BA_MP_UNREACH_NLRI, remains-8); *tmp++ = 0; *tmp++ = BGP_AF_IPV6; *tmp++ = 1; @@ -218,7 +218,7 @@ bgp_create_update(struct bgp_conn *conn, byte *buf) size = bgp_encode_attrs(p, w, buck->eattrs, 1024); w += size; remains -= size; - tstart = tmp = bgp_attach_attr(&ea, bgp_linpool, BA_MP_REACH_NLRI, remains-8); + tstart = tmp = bgp_attach_attr_wa(&ea, bgp_linpool, BA_MP_REACH_NLRI, remains-8); *tmp++ = 0; *tmp++ = BGP_AF_IPV6; *tmp++ = 1; @@ -702,7 +702,7 @@ bgp_do_rx_update(struct bgp_conn *conn, /* Create fake NEXT_HOP attribute */ if (len < 1 || (*x != 16 && *x != 32) || len < *x + 2) goto bad; - memcpy(bgp_attach_attr(&a0->eattrs, bgp_linpool, BA_NEXT_HOP, 16), x+1, 16); + bgp_attach_attr_ip(&a0->eattrs, bgp_linpool, BA_NEXT_HOP, x[1]); len -= *x + 2; x += *x + 1; -- cgit v1.2.3 From ba5ed6f3e4eb4b2899cdad08e2edb99063bfbcee Mon Sep 17 00:00:00 2001 From: Ondrej Zajicek Date: Sun, 26 Oct 2008 22:48:02 +0100 Subject: Implementation of an option for disabling AS4 support per BGP instance. --- proto/bgp/attrs.c | 14 +++++++------- proto/bgp/bgp.c | 6 ++++-- proto/bgp/bgp.h | 2 ++ proto/bgp/config.Y | 6 ++++-- proto/bgp/packets.c | 5 +++-- 5 files changed, 20 insertions(+), 13 deletions(-) diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c index a42a4880..0fcd1ce1 100644 --- a/proto/bgp/attrs.c +++ b/proto/bgp/attrs.c @@ -73,13 +73,13 @@ bgp_check_path(byte *a, int len, int bs, int errcode) static int bgp_check_as_path(struct bgp_proto *p, byte *a, int len) { - return bgp_check_path(a, len, (bgp_as4_support && p->as4_support) ? 4 : 2, 11); + return bgp_check_path(a, len, p->as4_session ? 4 : 2, 11); } static int bgp_check_as4_path(struct bgp_proto *p, byte *a, int len) { - if (bgp_as4_support && (! p->as4_support)) + if (bgp_as4_support && (! p->as4_session)) return bgp_check_path(a, len, 4, 9); else return 0; @@ -106,7 +106,7 @@ bgp_check_next_hop(struct bgp_proto *p UNUSED, byte *a, int len) static int bgp_check_aggregator(struct bgp_proto *p, UNUSED byte *a, int len) { - int exp_len = (bgp_as4_support && p->as4_support) ? 8 : 6; + int exp_len = p->as4_session ? 8 : 6; return (len == exp_len) ? 0 : 5; } @@ -344,7 +344,7 @@ bgp_encode_attrs(struct bgp_proto *p, byte *w, ea_list *attrs, int remains) * we have to convert our 4B AS_PATH to 2B AS_PATH and send our AS_PATH * as optional AS4_PATH attribute. */ - if ((code == BA_AS_PATH) && bgp_as4_support && (! p->as4_support)) + if ((code == BA_AS_PATH) && bgp_as4_support && (! p->as4_session)) { len = a->u.ptr->length; @@ -384,7 +384,7 @@ bgp_encode_attrs(struct bgp_proto *p, byte *w, ea_list *attrs, int remains) } /* The same issue with AGGREGATOR attribute */ - if ((code == BA_AGGREGATOR) && bgp_as4_support && (! p->as4_support)) + if ((code == BA_AGGREGATOR) && bgp_as4_support && (! p->as4_session)) { int new_used; @@ -1082,7 +1082,7 @@ bgp_remove_as4_attrs(struct bgp_proto *p, rta *a) if ((fid == id1) || (fid == id2)) { *el = (*el)->next; - if (p->as4_support) + if (p->as4_session) log(L_WARN "BGP: Unexpected AS4_* attributes received"); } else @@ -1246,7 +1246,7 @@ bgp_decode_attrs(struct bgp_conn *conn, byte *attr, unsigned int len, struct lin /* When receiving attributes from non-AS4-aware BGP speaker, * we have to reconstruct 4B AS_PATH and AGGREGATOR attributes */ - if (bgp_as4_support && (! bgp->as4_support)) + if (bgp_as4_support && (! bgp->as4_session)) bgp_reconstruct_4b_atts(bgp, a, pool); if (bgp_as4_support) diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index 5fa12492..0d580be1 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -636,9 +636,11 @@ bgp_check(struct bgp_config *c) cf_error("Local AS number must be set"); if (!c->remote_as) cf_error("Neighbor must be configured"); - if (!bgp_as4_support && (c->local_as > 0xFFFF)) + if (!bgp_as4_support && c->enable_as4) + cf_error("AS4 support disabled globbaly"); + if (!c->enable_as4 && (c->local_as > 0xFFFF)) cf_error("Local AS number out of range"); - if (!bgp_as4_support && (c->remote_as > 0xFFFF)) + if (!c->enable_as4 && (c->remote_as > 0xFFFF)) cf_error("Neighbor AS number out of range"); if ((c->local_as != c->remote_as) && (c->rr_client)) cf_error("Only internal neighbor can be RR client"); diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h index af3c5c5a..1d67e336 100644 --- a/proto/bgp/bgp.h +++ b/proto/bgp/bgp.h @@ -25,6 +25,7 @@ struct bgp_config { int compare_path_lengths; /* Use path lengths when selecting best route */ u32 default_local_pref; /* Default value for LOCAL_PREF attribute */ u32 default_med; /* Default value for MULTI_EXIT_DISC attribute */ + int enable_as4; /* Enable local support for 4B AS numbers [RFC4893] */ u32 rr_cluster_id; /* Route reflector cluster ID, if different from local ID */ int rr_client; /* Whether neighbor is RR client of me */ unsigned connect_retry_time; @@ -60,6 +61,7 @@ struct bgp_proto { u32 local_as, remote_as; int is_internal; /* Internal BGP connection (local_as == remote_as) */ int as4_support; /* Peer supports 4B AS numbers [RFC4893] */ + int as4_session; /* Session uses 4B AS numbers in AS_PATH (both sides support it) */ u32 local_id; /* BGP identifier of this router */ u32 remote_id; /* BGP identifier of the neighbor */ u32 rr_cluster_id; /* Route reflector cluster ID */ diff --git a/proto/bgp/config.Y b/proto/bgp/config.Y index b23b66cf..d7bba575 100644 --- a/proto/bgp/config.Y +++ b/proto/bgp/config.Y @@ -18,10 +18,10 @@ CF_DECLS CF_KEYWORDS(BGP, LOCAL, NEIGHBOR, AS, HOLD, TIME, CONNECT, RETRY, KEEPALIVE, MULTIHOP, STARTUP, VIA, NEXT, HOP, SELF, DEFAULT, PATH, METRIC, - ERROR, START, DELAY, FORGET, WAIT, DISABLE, AFTER, + ERROR, START, DELAY, FORGET, WAIT, ENABLE, DISABLE, AFTER, BGP_PATH, BGP_LOCAL_PREF, BGP_MED, BGP_ORIGIN, BGP_NEXT_HOP, BGP_ATOMIC_AGGR, BGP_AGGREGATOR, BGP_COMMUNITY, SOURCE, ADDRESS, - PASSWORD, RR, CLIENT, CLUSTER, ID) + PASSWORD, RR, CLIENT, CLUSTER, ID, AS4) CF_GRAMMAR @@ -39,6 +39,7 @@ bgp_proto_start: proto_start BGP { BGP_CFG->error_amnesia_time = 300; BGP_CFG->error_delay_time_min = 60; BGP_CFG->error_delay_time_max = 300; + BGP_CFG->enable_as4 = bgp_as4_support; } ; @@ -68,6 +69,7 @@ bgp_proto: | bgp_proto ERROR FORGET TIME expr ';' { BGP_CFG->error_amnesia_time = $5; } | bgp_proto ERROR WAIT TIME expr ',' expr ';' { BGP_CFG->error_delay_time_min = $5; BGP_CFG->error_delay_time_max = $7; } | bgp_proto DISABLE AFTER ERROR bool ';' { BGP_CFG->disable_after_error = $5; } + | bgp_proto ENABLE AS4 bool ';' { BGP_CFG->enable_as4 = $4; } | bgp_proto PASSWORD TEXT ';' { BGP_CFG->password = $3; } ; diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c index 8a352c68..c18c6e42 100644 --- a/proto/bgp/packets.c +++ b/proto/bgp/packets.c @@ -73,7 +73,7 @@ bgp_create_open(struct bgp_conn *conn, byte *buf) #ifdef IPV6 cap = bgp_put_cap_ipv6(conn, cap); #endif - if (bgp_as4_support) + if (p->cf->enable_as4) cap = bgp_put_cap_as4(conn, cap); cap_len = cap - buf - 12; @@ -407,7 +407,8 @@ bgp_parse_capabilities(struct bgp_conn *conn, byte *opt, int len) if (cl != 4) goto err; p->as4_support = 1; - if (bgp_as4_support) + p->as4_session = p->cf->enable_as4; + if (p->as4_session) conn->advertised_as = get_u32(opt + 2); break; -- cgit v1.2.3 From 1adc17b4b57267e301fcd67309494bbbddbfa718 Mon Sep 17 00:00:00 2001 From: Ondrej Zajicek Date: Sun, 26 Oct 2008 22:52:21 +0100 Subject: Update of a documentation - new options for AS4, MD5 auth and route reflection. --- doc/bird.conf.example | 3 +++ doc/bird.sgml | 41 ++++++++++++++++++++++++++++++++++------- 2 files changed, 37 insertions(+), 7 deletions(-) diff --git a/doc/bird.conf.example b/doc/bird.conf.example index 05259d5b..22221d43 100644 --- a/doc/bird.conf.example +++ b/doc/bird.conf.example @@ -179,6 +179,9 @@ protocol static { # default bgp_med 0; # MED value we use for comparison when none is defined # default bgp_local_pref 0; # The same for local preference # source address 62.168.0.14; # What local address we use for the TCP connection +# password "secret" # Password used for MD5 authentication +# rr client; # I am a route reflector and the neighor is my client +# rr cluster id 1.0.0.1 # Use this value for cluster id instead of my router id # export where source=RTS_STATIC; # export filter { # if source = RTS_STATIC then { diff --git a/doc/bird.sgml b/doc/bird.sgml index 87113fa9..5ee9562f 100644 --- a/doc/bird.sgml +++ b/doc/bird.sgml @@ -655,13 +655,19 @@ routing table it wishes to export along with complete path information route) in order to avoid routing loops.

BIRD supports all requirements of the BGP4 standard as defined in -RFC 1771 -including several enhancements from the -latest draft. -It also supports the community attributes as per -RFC 1997, -capability negotiation defined in -RFC 2842. +RFC 4271 +It also supports the community attributes +(RFC 1997), +capability negotiation +(RFC 3392), +MD5 password authentication +(RFC 2385), +route reflectors +(RFC 4456), +and 4B AS numbers +(RFC 4893). + + For IPv6, it uses the standard multiprotocol extensions defined in RFC 2283 including changes described in the @@ -721,6 +727,27 @@ for each neighbor using the following configuration parameters: for next hop calculation. Default: the address of the local end of the interface our neighbor is connected to. + password Use this password for MD5 authentication + of BGP sessions. Default: no authentication. + + rr client Be a route reflector and treat neighbor as + route reflection client. Default: disabled. + + rr cluster id Route reflectors use cluster id + to avoid route reflection loops. When there is one route reflector in a cluster + it usually uses its router id as a cluster id, but when there are more route + reflectors in a cluster, these need to be configured (using this option) to + use a common cluster id. Clients in a cluster need not known their cluster + id and this option is not allowed to them Default: a same as router id. + + enable as4 BGP protocol was designed to use 2B AS numbers + and was extended later to allow 4B AS number. BIRD supports 4B AS extension, + but by disabling this option it can be persuaded not to advertise it and + to maintain old-style sessions with its neighbors. This might be useful for + circumventing bugs in neighbor's implementation of 4B AS extension. + Even when disabled (off), BIRD behaves internally as AS4-aware BGP router. + Default: on. + disable after error When an error is encountered (either locally or by the other side), disable the instance automatically and wait for an administrator to fix the problem manually. Default: off. -- cgit v1.2.3 From 4819c3e17ac22c6810ee80261ac3bffb5127e39d Mon Sep 17 00:00:00 2001 From: Ondrej Zajicek Date: Sun, 26 Oct 2008 22:54:23 +0100 Subject: Bugfix in LOCAL_PREF attribute handling. --- proto/bgp/attrs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c index 0fcd1ce1..b5d8fba7 100644 --- a/proto/bgp/attrs.c +++ b/proto/bgp/attrs.c @@ -1262,7 +1262,7 @@ bgp_decode_attrs(struct bgp_conn *conn, byte *attr, unsigned int len, struct lin goto loop; /* If there's no local preference, define one */ - if (!(seen[0] && (1 << BA_LOCAL_PREF))) + if (!(seen[0] & (1 << BA_LOCAL_PREF))) bgp_attach_attr(&a->eattrs, pool, BA_LOCAL_PREF, 0); return a; -- cgit v1.2.3 From b6bf284a905412cfe107b4967e55649e6194187e Mon Sep 17 00:00:00 2001 From: Ondrej Zajicek Date: Sun, 26 Oct 2008 22:59:21 +0100 Subject: Bugfixes in MULIT_EXIT_DISC attribute handling. - Old MED handling was completely different from behavior specified in RFCs - for example they havn't been propagated to neighboring areas. - Update tie-breaking according to RFC 4271. - Change default value for 'default bgp_med' configuration option according to RFC 4271. --- doc/bird.sgml | 16 +++++++++++----- nest/a-path.c | 14 ++++++++++++++ nest/attrs.h | 1 + proto/bgp/attrs.c | 46 ++++++++++++++++++++++++++++++++++++---------- proto/bgp/config.Y | 2 +- 5 files changed, 63 insertions(+), 16 deletions(-) diff --git a/doc/bird.sgml b/doc/bird.sgml index 5ee9562f..8fa55f85 100644 --- a/doc/bird.sgml +++ b/doc/bird.sgml @@ -784,7 +784,7 @@ for each neighbor using the following configuration parameters: default bgp_med Value of the Multiple Exit Discriminator to be used during route selection when the MED attribute - is missing. Default: infinite. + is missing. Default: 0. default bgp_local_pref Value of the Local Preference to be used during route selection when the Local Preference attribute @@ -806,10 +806,16 @@ with `int The Multiple Exit Discriminator of the route - is an optional attribute which is often used within the local AS to - reflect interior distances to various boundary routers. See the route selection - rules above for exact semantics. + int The Multiple Exit Discriminator of the route + is an optional attribute which is used on on external (inter-AS) links to + convey to an adjacent AS the optimal entry point into the local AS. + The received attribute may be also propagated over internal BGP links + (and this is default behavior). The attribute value is zeroed when a route + is exported from a routing table to a BGP instance to ensure that the attribute + received from a neighboring AS is not propagated to other neighboring ASes. + A new value might be set in the export filter of a BGP instance. + See RFC 4451 + for further discussion of BGP MED attribute. enum Origin of the route: either data; + + if ((path->length == 0) || (p[0] != AS_PATH_SEQUENCE) || (p[1] == 0)) + return 0; + else + { + *last_as = get_as(p+2); + return 1; + } +} + int as_path_is_member(struct adata *path, u32 as) { diff --git a/nest/attrs.h b/nest/attrs.h index aaa5f4a2..fee2c2c8 100644 --- a/nest/attrs.h +++ b/nest/attrs.h @@ -27,6 +27,7 @@ int as_path_convert_to_new(struct adata *path, byte *dst, int req_as); void as_path_format(struct adata *path, byte *buf, unsigned int size); int as_path_getlen(struct adata *path); int as_path_get_first(struct adata *path, u32 *orig_as); +int as_path_get_last(struct adata *path, u32 *last_as); int as_path_is_member(struct adata *path, u32 as); diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c index b5d8fba7..2210cbe7 100644 --- a/proto/bgp/attrs.c +++ b/proto/bgp/attrs.c @@ -146,7 +146,7 @@ static struct attr_desc bgp_attr_table[] = { bgp_check_as_path, NULL }, { "next_hop", 4, BAF_TRANSITIVE, EAF_TYPE_IP_ADDRESS, 1, /* BA_NEXT_HOP */ bgp_check_next_hop, NULL }, - { "med", 4, BAF_OPTIONAL, EAF_TYPE_INT, 0, /* BA_MULTI_EXIT_DISC */ + { "med", 4, BAF_OPTIONAL, EAF_TYPE_INT, 1, /* BA_MULTI_EXIT_DISC */ NULL, NULL }, { "local_pref", 4, BAF_TRANSITIVE, EAF_TYPE_INT, 0, /* BA_LOCAL_PREF */ NULL, NULL }, @@ -829,7 +829,17 @@ bgp_update_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *p eattr *a; if (!p->is_internal) - bgp_path_prepend(e, attrs, pool, p->local_as); + { + bgp_path_prepend(e, attrs, pool, p->local_as); + + /* The MULTI_EXIT_DISC attribute received from a neighboring AS MUST NOT be + * propagated to other neighboring ASes. + * Perhaps it would be better to undefine it. + */ + a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC)); + if (a) + bgp_attach_attr(attrs, pool, BA_MULTI_EXIT_DISC, 0); + } a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP)); if (a && (p->is_internal || (!p->is_internal && e->attrs->iface == p->neigh->iface))) @@ -894,6 +904,18 @@ bgp_import_control(struct proto *P, rte **new, ea_list **attrs, struct linpool * return bgp_create_attrs(p, e, attrs, pool); } +static inline u32 +bgp_get_neighbor(rte *r) +{ + eattr *e = ea_find(r->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH)); + u32 as; + + if (e && as_path_get_last(e->u.ptr, &as)) + return as; + else + return ((struct bgp_proto *) r->attrs->proto)->remote_as; +} + int bgp_rte_better(rte *new, rte *old) { @@ -936,14 +958,18 @@ bgp_rte_better(rte *new, rte *old) return 0; /* RFC 4271 9.1.2.2. c) Compare MED's */ - x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC)); - y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC)); - n = x ? x->u.data : new_bgp->cf->default_med; - o = y ? y->u.data : old_bgp->cf->default_med; - if (n < o) - return 1; - if (n > o) - return 0; + + if (bgp_get_neighbor(new) == bgp_get_neighbor(old)) + { + x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC)); + y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC)); + n = x ? x->u.data : new_bgp->cf->default_med; + o = y ? y->u.data : old_bgp->cf->default_med; + if (n < o) + return 1; + if (n > o) + return 0; + } /* RFC 4271 9.1.2.2. d) Prefer external peers */ if (new_bgp->is_internal > old_bgp->is_internal) diff --git a/proto/bgp/config.Y b/proto/bgp/config.Y index d7bba575..8524b2dd 100644 --- a/proto/bgp/config.Y +++ b/proto/bgp/config.Y @@ -33,7 +33,7 @@ bgp_proto_start: proto_start BGP { BGP_CFG->hold_time = 240; BGP_CFG->connect_retry_time = 120; BGP_CFG->initial_hold_time = 240; - BGP_CFG->default_med = ~0; /* RFC 1771 doesn't specify this, draft-09 says ~0 */ + BGP_CFG->default_med = 0; BGP_CFG->compare_path_lengths = 1; BGP_CFG->start_delay_time = 5; BGP_CFG->error_amnesia_time = 300; -- cgit v1.2.3 From 1567edea8d3da7da08092eef15bb3bd4544c6464 Mon Sep 17 00:00:00 2001 From: Ondrej Zajicek Date: Sun, 26 Oct 2008 23:09:46 +0100 Subject: Bugfix for routing table breaking bug. Here is a patch fixing a bug that causes breakage of a local routing table during shutdown of Bird. The problem was caused by shutdown of 'device' protocol before shutdown of 'kernel' protocol. When 'device' protocol went down, the route (with local network prefix) From different protocol (BGP or OSPF) became preferred and installed to the kernel routing table. Such routes were broken (like 192.168.1.0/24 via 192.168.1.2). I think it is also the cause of problem reported by Martin Kraus. The patch disables updating of kernel routing table during shutdown of Bird. I am not sure whether this is the best way to fix it, I would prefer to forbid 'kernel' protocol to overwrite routes with 'proto kernel'. The patch also fixes a problem that during shutdown sometimes routes created by Bird remained in the kernel routing table. --- conf/conf.c | 5 +++-- sysdep/linux/netlink/netlink.c | 9 ++++----- sysdep/unix/krt-set.c | 2 ++ sysdep/unix/krt.c | 2 +- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/conf/conf.c b/conf/conf.c index a744dcaa..fefcac51 100644 --- a/conf/conf.c +++ b/conf/conf.c @@ -266,7 +266,7 @@ config_commit(struct config *c) } if (old_config) /* Reconfiguration already in progress */ { - if (shutting_down) + if (shutting_down == 2) { log(L_INFO "New configuration discarded due to shutdown"); config_free(c); @@ -314,8 +314,9 @@ order_shutdown(void) init_list(&c->protos); init_list(&c->tables); c->shutdown = 1; - config_commit(c); shutting_down = 1; + config_commit(c); + shutting_down = 2; } /** diff --git a/sysdep/linux/netlink/netlink.c b/sysdep/linux/netlink/netlink.c index a70428ef..98c63f02 100644 --- a/sysdep/linux/netlink/netlink.c +++ b/sysdep/linux/netlink/netlink.c @@ -498,6 +498,8 @@ nl_send_route(struct krt_proto *p, rte *e, int new) nl_add_attr_ipa(&r.h, sizeof(r), RTA_GATEWAY, a->gw); break; case RTD_DEVICE: + if (!a->iface) + return; r.r.rtm_type = RTN_UNICAST; nl_add_attr_u32(&r.h, sizeof(r), RTA_OIF, a->iface->index); break; @@ -531,11 +533,8 @@ krt_set_notify(struct krt_proto *p, net *n UNUSED, rte *new, rte *old) else { if (old) - { - if (!old->attrs->iface || (old->attrs->iface->flags & IF_UP)) - nl_send_route(p, old, 0); - /* else the kernel has already flushed it */ - } + nl_send_route(p, old, 0); + if (new) nl_send_route(p, new, 1); } diff --git a/sysdep/unix/krt-set.c b/sysdep/unix/krt-set.c index bd564486..23cbe5c5 100644 --- a/sysdep/unix/krt-set.c +++ b/sysdep/unix/krt-set.c @@ -61,6 +61,8 @@ krt_ioctl(int ioc, rte *e, char *name) re.rt_flags |= RTF_GATEWAY; break; case RTD_DEVICE: + if (!a->iface) + return; re.rt_dev = a->iface->name; break; #ifdef RTF_REJECT diff --git a/sysdep/unix/krt.c b/sysdep/unix/krt.c index a6d17279..5269eb71 100644 --- a/sysdep/unix/krt.c +++ b/sysdep/unix/krt.c @@ -684,7 +684,7 @@ krt_notify(struct proto *P, net *net, rte *new, rte *old, struct ea_list *attrs { struct krt_proto *p = (struct krt_proto *) P; - if (shutting_down && KRT_CF->persist) + if (shutting_down) return; if (new && (!krt_capable(new) || new->attrs->source == RTS_INHERIT)) new = NULL; -- cgit v1.2.3