summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOndrej Zajicek <santiago@crfreenet.org>2008-10-27 00:20:22 +0100
committerOndrej Zajicek <santiago@crfreenet.org>2008-10-27 00:20:22 +0100
commita98fbf0f12b5e83e25afa0f585ca6a4d4ac5f6bf (patch)
treef6c215cd05ec9278696fae7b8814b5071a4b3c6a
parenta3b70dc499b64f41aa776b5b4afee5c7bfb8dfa6 (diff)
parent1567edea8d3da7da08092eef15bb3bd4544c6464 (diff)
Merge branch 'dev' into out
-rw-r--r--conf/conf.c5
-rw-r--r--doc/bird.conf.example3
-rw-r--r--doc/bird.sgml57
-rw-r--r--filter/config.Y11
-rw-r--r--filter/filter.c26
-rw-r--r--filter/filter.h1
-rw-r--r--lib/socket.h2
-rw-r--r--nest/a-path.c223
-rw-r--r--nest/a-set.c8
-rw-r--r--nest/attrs.h25
-rw-r--r--proto/bgp/attrs.c653
-rw-r--r--proto/bgp/bgp.c35
-rw-r--r--proto/bgp/bgp.h26
-rw-r--r--proto/bgp/config.Y20
-rw-r--r--proto/bgp/packets.c158
-rw-r--r--sysdep/linux/netlink/netlink.c9
-rw-r--r--sysdep/linux/sysio.h21
-rw-r--r--sysdep/unix/io.c75
-rw-r--r--sysdep/unix/krt-set.c2
-rw-r--r--sysdep/unix/krt.c2
20 files changed, 1114 insertions, 248 deletions
diff --git a/conf/conf.c b/conf/conf.c
index a744dcaa..fefcac51 100644
--- a/conf/conf.c
+++ b/conf/conf.c
@@ -266,7 +266,7 @@ config_commit(struct config *c)
}
if (old_config) /* Reconfiguration already in progress */
{
- if (shutting_down)
+ if (shutting_down == 2)
{
log(L_INFO "New configuration discarded due to shutdown");
config_free(c);
@@ -314,8 +314,9 @@ order_shutdown(void)
init_list(&c->protos);
init_list(&c->tables);
c->shutdown = 1;
- config_commit(c);
shutting_down = 1;
+ config_commit(c);
+ shutting_down = 2;
}
/**
diff --git a/doc/bird.conf.example b/doc/bird.conf.example
index 05259d5b..22221d43 100644
--- a/doc/bird.conf.example
+++ b/doc/bird.conf.example
@@ -179,6 +179,9 @@ protocol static {
# default bgp_med 0; # MED value we use for comparison when none is defined
# default bgp_local_pref 0; # The same for local preference
# source address 62.168.0.14; # What local address we use for the TCP connection
+# password "secret" # Password used for MD5 authentication
+# rr client; # I am a route reflector and the neighor is my client
+# rr cluster id 1.0.0.1 # Use this value for cluster id instead of my router id
# export where source=RTS_STATIC;
# export filter {
# if source = RTS_STATIC then {
diff --git a/doc/bird.sgml b/doc/bird.sgml
index 87113fa9..8fa55f85 100644
--- a/doc/bird.sgml
+++ b/doc/bird.sgml
@@ -655,13 +655,19 @@ routing table it wishes to export along with complete path information
route) in order to avoid routing loops.
<p>BIRD supports all requirements of the BGP4 standard as defined in
-RFC 1771<htmlurl url="ftp://ftp.rfc-editor.org/in-notes/rfc1771.txt">
-including several enhancements from the
-latest draft<htmlurl url="ftp://ftp.rfc-editor.org/internet-drafts/draft-ietf-idr-bgp4-09.txt">.
-It also supports the community attributes as per
-RFC 1997<htmlurl url="ftp://ftp.rfc-editor.org/in-notes/rfc1997.txt">,
-capability negotiation defined in
-RFC 2842<htmlurl url="ftp://ftp.rfc-editor.org/in-notes/rfc2842.txt">.
+RFC 4271<htmlurl url="ftp://ftp.rfc-editor.org/in-notes/rfc4271.txt">
+It also supports the community attributes
+(RFC 1997<htmlurl url="ftp://ftp.rfc-editor.org/in-notes/rfc1997.txt">),
+capability negotiation
+(RFC 3392<htmlurl url="ftp://ftp.rfc-editor.org/in-notes/rfc3392.txt">),
+MD5 password authentication
+(RFC 2385<htmlurl url="ftp://ftp.rfc-editor.org/in-notes/rfc2385.txt">),
+route reflectors
+(RFC 4456<htmlurl url="ftp://ftp.rfc-editor.org/in-notes/rfc4456.txt">),
+and 4B AS numbers
+(RFC 4893<htmlurl url="ftp://ftp.rfc-editor.org/in-notes/rfc4893.txt">).
+
+
For IPv6, it uses the standard multiprotocol extensions defined in
RFC 2283<htmlurl url="ftp://ftp.rfc-editor.org/in-notes/rfc2283.txt">
including changes described in the
@@ -721,6 +727,27 @@ for each neighbor using the following configuration parameters:
for next hop calculation. Default: the address of the local end
of the interface our neighbor is connected to.
+ <tag>password <m/string/</tag> Use this password for MD5 authentication
+ of BGP sessions. Default: no authentication.
+
+ <tag>rr client</tag> Be a route reflector and treat neighbor as
+ route reflection client. Default: disabled.
+
+ <tag>rr cluster id <m/IPv4 address/</tag> Route reflectors use cluster id
+ to avoid route reflection loops. When there is one route reflector in a cluster
+ it usually uses its router id as a cluster id, but when there are more route
+ reflectors in a cluster, these need to be configured (using this option) to
+ use a common cluster id. Clients in a cluster need not known their cluster
+ id and this option is not allowed to them Default: a same as router id.
+
+ <tag>enable as4 <m/switch/</tag> BGP protocol was designed to use 2B AS numbers
+ and was extended later to allow 4B AS number. BIRD supports 4B AS extension,
+ but by disabling this option it can be persuaded not to advertise it and
+ to maintain old-style sessions with its neighbors. This might be useful for
+ circumventing bugs in neighbor's implementation of 4B AS extension.
+ Even when disabled (off), BIRD behaves internally as AS4-aware BGP router.
+ Default: on.
+
<tag>disable after error <m/switch/</tag> When an error is encountered (either
locally or by the other side), disable the instance automatically
and wait for an administrator to fix the problem manually. Default: off.
@@ -757,7 +784,7 @@ for each neighbor using the following configuration parameters:
<tag>default bgp_med <m/number/</tag> Value of the Multiple Exit
Discriminator to be used during route selection when the MED attribute
- is missing. Default: infinite.
+ is missing. Default: 0.
<tag>default bgp_local_pref <m/number/</tag> Value of the Local Preference
to be used during route selection when the Local Preference attribute
@@ -779,10 +806,16 @@ with `<tt/O/') are optional.
selection among multiple BGP routes (see the selection rules above). It's
used as an additional metric which is propagated through the whole local AS.
- <tag>int <cf/bgp_med/ [IO]</tag> The Multiple Exit Discriminator of the route
- is an optional attribute which is often used within the local AS to
- reflect interior distances to various boundary routers. See the route selection
- rules above for exact semantics.
+ <tag>int <cf/bgp_med/ [O]</tag> The Multiple Exit Discriminator of the route
+ is an optional attribute which is used on on external (inter-AS) links to
+ convey to an adjacent AS the optimal entry point into the local AS.
+ The received attribute may be also propagated over internal BGP links
+ (and this is default behavior). The attribute value is zeroed when a route
+ is exported from a routing table to a BGP instance to ensure that the attribute
+ received from a neighboring AS is not propagated to other neighboring ASes.
+ A new value might be set in the export filter of a BGP instance.
+ See RFC 4451<htmlurl url="ftp://ftp.rfc-editor.org/in-notes/rfc4451.txt">
+ for further discussion of BGP MED attribute.
<tag>enum <cf/bgp_origin/</tag> Origin of the route: either <cf/ORIGIN_IGP/
if the route has originated in an interior routing protocol or
diff --git a/filter/config.Y b/filter/config.Y
index d4bf44cc..fdfb2e74 100644
--- a/filter/config.Y
+++ b/filter/config.Y
@@ -39,7 +39,6 @@ CF_KEYWORDS(FUNCTION, PRINT, PRINTN, UNSET, RETURN,
%type <v> set_atom fprefix fprefix_s fipa
%type <s> decls declsn one_decl function_params
%type <h> bgp_path
-%type <i> bgp_one
CF_GRAMMAR
@@ -273,14 +272,12 @@ switch_body: /* EMPTY */ { $$ = NULL; }
/* CONST '(' expr ')' { $$ = f_new_inst(); $$->code = 'c'; $$->aux = T_INT; $$->a2.i = $3; } */
-bgp_one:
- NUM { $$ = $1; }
- | '?' { $$ = PM_ANY; }
- ;
bgp_path:
- bgp_one { $$ = cfg_alloc(sizeof(struct f_path_mask)); $$->next = NULL; $$->val = $1; }
- | bgp_one bgp_path { $$ = cfg_alloc(sizeof(struct f_path_mask)); $$->next = $2; $$->val = $1; }
+ NUM { $$ = cfg_alloc(sizeof(struct f_path_mask)); $$->next = NULL; $$->val = $1; $$->any = 0; }
+ | '?' { $$ = cfg_alloc(sizeof(struct f_path_mask)); $$->next = NULL; $$->val = 0; $$->any = 1; }
+ | NUM bgp_path { $$ = cfg_alloc(sizeof(struct f_path_mask)); $$->next = $2; $$->val = $1; $$->any = 0; }
+ | '?' bgp_path { $$ = cfg_alloc(sizeof(struct f_path_mask)); $$->next = $2; $$->val = 0; $$->any = 1; }
;
constant:
diff --git a/filter/filter.c b/filter/filter.c
index 9cde3d96..7893d9ae 100644
--- a/filter/filter.c
+++ b/filter/filter.c
@@ -69,6 +69,30 @@ pm_path_compare(struct f_path_mask *m1, struct f_path_mask *m2)
}
}
+static void
+pm_format(struct f_path_mask *p, byte *buf, unsigned int size)
+{
+ byte *end = buf + size - 16;
+
+ while (p)
+ {
+ if (buf > end)
+ {
+ strcpy(buf, " ...");
+ return;
+ }
+
+ if (p->any)
+ buf += bsprintf(buf, "? ");
+ else
+ buf += bsprintf(buf, "%u ", p->val);
+
+ p = p->next;
+ }
+
+ *buf = 0;
+}
+
/**
* val_compare - compare two values
* @v1: first value
@@ -224,7 +248,7 @@ val_print(struct f_val v)
case T_ENUM: PRINTF( "(enum %x)%d", v.type, v.val.i ); break;
case T_PATH: as_path_format(v.val.ad, buf2, 1020); PRINTF( "(path %s)", buf2 ); break;
case T_CLIST: int_set_format(v.val.ad, buf2, 1020); PRINTF( "(clist %s)", buf2 ); break;
- case T_PATH_MASK: debug( "(pathmask " ); { struct f_path_mask *p = v.val.path_mask; while (p) { debug("%d ", p->val); p=p->next; } debug(")" ); } break;
+ case T_PATH_MASK: pm_format(v.val.path_mask, buf2, 1020); PRINTF( "(pathmask %s)", buf2 ); break;
default: PRINTF( "[unknown type %x]", v.type );
#undef PRINTF
}
diff --git a/filter/filter.h b/filter/filter.h
index 04a26236..f71e54d3 100644
--- a/filter/filter.h
+++ b/filter/filter.h
@@ -11,6 +11,7 @@
#include "lib/resource.h"
#include "lib/ip.h"
+#include "nest/route.h"
#include "nest/attrs.h"
struct f_inst { /* Instruction */
diff --git a/lib/socket.h b/lib/socket.h
index ab932b31..4aa521db 100644
--- a/lib/socket.h
+++ b/lib/socket.h
@@ -39,6 +39,7 @@ typedef struct birdsock {
int fd; /* System-dependent data */
node n;
void *rbuf_alloc, *tbuf_alloc;
+ char *password; /* Password for MD5 authentication */
} sock;
sock *sk_new(pool *); /* Allocate new socket */
@@ -47,6 +48,7 @@ int sk_send(sock *, unsigned len); /* Send data, <0=err, >0=ok, 0=sleep */
int sk_send_to(sock *, unsigned len, ip_addr to, unsigned port); /* sk_send to given destination */
void sk_reallocate(sock *); /* Free and allocate tbuf & rbuf */
void sk_dump_all(void);
+int sk_set_md5_auth(sock *s, ip_addr a, char *passwd); /* Add or remove security associations for given passive socket */
static inline int
sk_send_buffer_empty(sock *sk)
diff --git a/nest/a-path.c b/nest/a-path.c
index 1b08f809..5e3ecfd4 100644
--- a/nest/a-path.c
+++ b/nest/a-path.c
@@ -14,38 +14,139 @@
#include "lib/unaligned.h"
#include "lib/string.h"
+
+/* Global AS4 support, shared by all BGP instances.
+ * This specifies whether BA_AS_PATH attributes contain 2 or 4 B per ASN
+ */
+
+int bgp_as4_support = 1;
+
+static void
+put_as(byte *data, u32 as)
+{
+ if (bgp_as4_support)
+ put_u32(data, as);
+ else if (as <= 0xFFFF)
+ put_u16(data, as);
+ else
+ bug("put_as: Try to put 32bit AS to 16bit AS Path");
+}
+
+static inline u32
+get_as(byte *data)
+{
+ return bgp_as4_support ? get_u32(data) : get_u16(data);
+}
+
struct adata *
-as_path_prepend(struct linpool *pool, struct adata *olda, int as)
+as_path_prepend(struct linpool *pool, struct adata *olda, u32 as)
{
+ int bs = bgp_as4_support ? 4 : 2;
struct adata *newa;
- if (olda->length && olda->data[0] == AS_PATH_SEQUENCE &&
- olda->data[1] < 255) /* Starting with sequence => just prepend the AS number */
+ if (olda->length && olda->data[0] == AS_PATH_SEQUENCE && olda->data[1] < 255)
+ /* Starting with sequence => just prepend the AS number */
{
- newa = lp_alloc(pool, sizeof(struct adata) + olda->length + 2);
- newa->length = olda->length + 2;
- newa->data[0] = 2;
+ int nl = olda->length + bs;
+ newa = lp_alloc(pool, sizeof(struct adata) + nl);
+ newa->length = nl;
+ newa->data[0] = AS_PATH_SEQUENCE;
newa->data[1] = olda->data[1] + 1;
- memcpy(newa->data+4, olda->data+2, olda->length-2);
+ memcpy(newa->data + bs + 2, olda->data + 2, olda->length - 2);
}
- else /* Create new path segment */
+ else /* Create new path segment */
{
- newa = lp_alloc(pool, sizeof(struct adata) + olda->length + 4);
- newa->length = olda->length + 4;
- newa->data[0] = 2;
+ int nl = olda->length + bs + 2;
+ newa = lp_alloc(pool, sizeof(struct adata) + nl);
+ newa->length = nl;
+ newa->data[0] = AS_PATH_SEQUENCE;
newa->data[1] = 1;
- memcpy(newa->data+4, olda->data, olda->length);
+ memcpy(newa->data + bs + 2, olda->data, olda->length);
}
- put_u16(newa->data+2, as);
+ put_as(newa->data + 2, as);
return newa;
}
+int
+as_path_convert_to_old(struct adata *path, byte *dst, int *new_used)
+{
+ byte *src = path->data;
+ byte *src_end = src + path->length;
+ byte *dst_start = dst;
+ u32 as;
+ int i, n;
+ *new_used = 0;
+
+ while (src < src_end)
+ {
+ n = src[1];
+ *dst++ = *src++;
+ *dst++ = *src++;
+
+ for(i=0; i<n; i++)
+ {
+ as = get_u32(src);
+ if (as > 0xFFFF)
+ {
+ as = AS_TRANS;
+ *new_used = 1;
+ }
+ put_u16(dst, as);
+ src += 4;
+ dst += 2;
+ }
+ }
+
+ return dst - dst_start;
+}
+
+int
+as_path_convert_to_new(struct adata *path, byte *dst, int req_as)
+{
+ byte *src = path->data;
+ byte *src_end = src + path->length;
+ byte *dst_start = dst;
+ u32 as;
+ int i, t, n;
+
+
+ while ((src < src_end) && (req_as > 0))
+ {
+ t = *src++;
+ n = *src++;
+
+ if (t == AS_PATH_SEQUENCE)
+ {
+ if (n > req_as)
+ n = req_as;
+
+ req_as -= n;
+ }
+ else // t == AS_PATH_SET
+ req_as--;
+
+ *dst++ = t;
+ *dst++ = n;
+
+ for(i=0; i<n; i++)
+ {
+ as = get_u16(src);
+ put_u32(dst, as);
+ src += 2;
+ dst += 4;
+ }
+ }
+
+ return dst - dst_start;
+}
+
void
as_path_format(struct adata *path, byte *buf, unsigned int size)
{
+ int bs = bgp_as4_support ? 4 : 2;
byte *p = path->data;
byte *e = p + path->length;
- byte *end = buf + size - 8;
+ byte *end = buf + size - 16;
int sp = 1;
int l, isset;
@@ -69,8 +170,8 @@ as_path_format(struct adata *path, byte *buf, unsigned int size)
{
if (!sp)
*buf++ = ' ';
- buf += bsprintf(buf, "%d", get_u16(p));
- p += 2;
+ buf += bsprintf(buf, "%u", get_as(p));
+ p += bs;
sp = 0;
}
if (isset)
@@ -86,6 +187,7 @@ as_path_format(struct adata *path, byte *buf, unsigned int size)
int
as_path_getlen(struct adata *path)
{
+ int bs = bgp_as4_support ? 4 : 2;
int res = 0;
u8 *p = path->data;
u8 *q = p+path->length;
@@ -95,8 +197,8 @@ as_path_getlen(struct adata *path)
{
switch (*p++)
{
- case AS_PATH_SET: len = *p++; res++; p += 2*len; break;
- case AS_PATH_SEQUENCE: len = *p++; res+=len; p += 2*len; break;
+ case AS_PATH_SET: len = *p++; res++; p += bs * len; break;
+ case AS_PATH_SEQUENCE: len = *p++; res += len; p += bs * len; break;
default: bug("as_path_getlen: Invalid path segment");
}
}
@@ -104,9 +206,11 @@ as_path_getlen(struct adata *path)
}
int
-as_path_get_first(struct adata *path)
+as_path_get_first(struct adata *path, u32 *orig_as)
{
- int res = -1;
+ int bs = bgp_as4_support ? 4 : 2;
+ int found = 0;
+ u32 res = 0;
u8 *p = path->data;
u8 *q = p+path->length;
int len;
@@ -117,36 +221,84 @@ as_path_get_first(struct adata *path)
{
case AS_PATH_SET:
if (len = *p++)
- res = get_u16(p);
- p += 2*len;
+ {
+ found = 1;
+ res = get_as(p);
+ p += bs * len;
+ }
break;
case AS_PATH_SEQUENCE:
if (len = *p++)
- res = get_u16(p+2*(len-1));
- p += 2*len;
+ {
+ found = 1;
+ res = get_as(p + bs * (len - 1));
+ p += bs * len;
+ }
break;
default: bug("as_path_get_first: Invalid path segment");
}
}
- return res;
+
+ *orig_as = res;
+ return found;
+}
+
+int
+as_path_get_last(struct adata *path, u32 *last_as)
+{
+ u8 *p = path->data;
+
+ if ((path->length == 0) || (p[0] != AS_PATH_SEQUENCE) || (p[1] == 0))
+ return 0;
+ else
+ {
+ *last_as = get_as(p+2);
+ return 1;
+ }
}
+int
+as_path_is_member(struct adata *path, u32 as)
+{
+ int bs = bgp_as4_support ? 4 : 2;
+ u8 *p = path->data;
+ u8 *q = p+path->length;
+ int i, n;
+
+ while (p<q)
+ {
+ n = p[1];
+ p += 2;
+ for(i=0; i<n; i++)
+ {
+ if (get_as(p) == as)
+ return 1;
+ p += bs;
+ }
+ }
+ return 0;
+}
+
+
+
#define MASK_PLUS do { mask = mask->next; if (!mask) return next == q; \
- asterisk = (mask->val == PM_ANY); \
+ asterisk = mask->any; \
if (asterisk) { mask = mask->next; if (!mask) { return 1; } } \
} while(0)
int
as_path_match(struct adata *path, struct f_path_mask *mask)
{
+ int bs = bgp_as4_support ? 4 : 2;
int i;
int asterisk = 0;
u8 *p = path->data;
u8 *q = p+path->length;
int len;
u8 *next;
+ u32 as;
- asterisk = (mask->val == PM_ANY);
+ asterisk = mask->any;
if (asterisk)
{ mask = mask->next; if (!mask) return 1; }
@@ -156,20 +308,21 @@ as_path_match(struct adata *path, struct f_path_mask *mask)
len = *p++;
{
u8 *p_save = p;
- next = p_save + 2*len;
+ next = p_save + bs * len;
retry:
p = p_save;
for (i=0; i<len; i++) {
- if (asterisk && (get_u16(p) == mask->val)) {
+ as = get_as(p);
+ if (asterisk && (as == mask->val)) {
MASK_PLUS;
goto retry;
}
- if (!asterisk && (get_u16(p) == mask->val)) {
+ if (!asterisk && (as == mask->val)) {
p = next;
MASK_PLUS;
goto okay;
}
- p+=2;
+ p += bs;
}
if (!asterisk)
return 0;
@@ -180,15 +333,15 @@ as_path_match(struct adata *path, struct f_path_mask *mask)
case AS_PATH_SEQUENCE:
len = *p++;
for (i=0; i<len; i++) {
- next = p+2;
- if (asterisk && (get_u16(p) == mask->val))
+ as = get_as(p);
+ if (asterisk && (as == mask->val))
MASK_PLUS;
else if (!asterisk) {
- if (get_u16(p) != mask->val)
+ if (as != mask->val)
return 0;
MASK_PLUS;
}
- p+=2;
+ p += bs;
}
break;
diff --git a/nest/a-set.c b/nest/a-set.c
index 44407141..69c090b7 100644
--- a/nest/a-set.c
+++ b/nest/a-set.c
@@ -40,10 +40,12 @@ int_set_format(struct adata *set, byte *buf, unsigned int size)
struct adata *
int_set_add(struct linpool *pool, struct adata *list, u32 val)
{
- struct adata *res = lp_alloc(pool, list->length + sizeof(struct adata) + 4);
- res->length = list->length+4;
+ int len = list ? list->length : 0;
+ struct adata *res = lp_alloc(pool, len + sizeof(struct adata) + 4);
+ res->length = len + 4;
* (u32 *) res->data = val;
- memcpy((char *) res->data + 4, list->data, list->length);
+ if (list)
+ memcpy((char *) res->data + 4, list->data, list->length);
return res;
}
diff --git a/nest/attrs.h b/nest/attrs.h
index abd6b9e9..fee2c2c8 100644
--- a/nest/attrs.h
+++ b/nest/attrs.h
@@ -14,16 +14,30 @@
#define AS_PATH_SET 1 /* Types of path segments */
#define AS_PATH_SEQUENCE 2
-struct adata *as_path_prepend(struct linpool *pool, struct adata *olda, int as);
+#define AS_PATH_MAXLEN 10000
+
+#define AS_TRANS 23456
+/* AS_TRANS is used when we need to store 32bit ASN larger than 0xFFFF
+ * to 16bit slot (like in 16bit AS_PATH). See RFC 4893 for details
+ */
+
+struct adata *as_path_prepend(struct linpool *pool, struct adata *olda, u32 as);
+int as_path_convert_to_old(struct adata *path, byte *dst, int *new_used);
+int as_path_convert_to_new(struct adata *path, byte *dst, int req_as);
void as_path_format(struct adata *path, byte *buf, unsigned int size);
int as_path_getlen(struct adata *path);
-int as_path_get_first(struct adata *path);
+int as_path_get_first(struct adata *path, u32 *orig_as);
+int as_path_get_last(struct adata *path, u32 *last_as);
+int as_path_is_member(struct adata *path, u32 as);
+
struct f_path_mask {
struct f_path_mask *next;
- int val;
+ u32 val;
+ int any;
};
-#define PM_ANY -1
+
+// #define PM_ANY -1
int as_path_match(struct adata *path, struct f_path_mask *mask);
@@ -34,4 +48,7 @@ struct adata *int_set_add(struct linpool *pool, struct adata *list, u32 val);
int int_set_contains(struct adata *list, u32 val);
struct adata *int_set_del(struct linpool *pool, struct adata *list, u32 val);
+static inline int int_set_get_size(struct adata *list)
+{ return list->length / 4; }
+
#endif
diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c
index 30699f84..2210cbe7 100644
--- a/proto/bgp/attrs.c
+++ b/proto/bgp/attrs.c
@@ -55,22 +55,38 @@ bgp_format_origin(eattr *a, byte *buf)
}
static int
-bgp_check_path(struct bgp_proto *p UNUSED, byte *a, int len)
+bgp_check_path(byte *a, int len, int bs, int errcode)
{
while (len)
{
DBG("Path segment %02x %02x\n", a[0], a[1]);
if (len < 2 ||
- a[0] != AS_PATH_SET && a[0] != AS_PATH_SEQUENCE ||
- 2*a[1] + 2 > len)
- return 11;
- len -= 2*a[1] + 2;
- a += 2*a[1] + 2;
+ (a[0] != AS_PATH_SET && a[0] != AS_PATH_SEQUENCE) ||
+ bs * a[1] + 2 > len)
+ return errcode;
+ len -= bs * a[1] + 2;
+ a += bs * a[1] + 2;
}
return 0;
}
static int
+bgp_check_as_path(struct bgp_proto *p, byte *a, int len)
+{
+ return bgp_check_path(a, len, p->as4_session ? 4 : 2, 11);
+}
+
+static int
+bgp_check_as4_path(struct bgp_proto *p, byte *a, int len)
+{
+ if (bgp_as4_support && (! p->as4_session))
+ return bgp_check_path(a, len, 4, 9);
+ else
+ return 0;
+}
+
+
+static int
bgp_check_next_hop(struct bgp_proto *p UNUSED, byte *a, int len)
{
#ifdef IPV6
@@ -88,6 +104,20 @@ bgp_check_next_hop(struct bgp_proto *p UNUSED, byte *a, int len)
}
static int
+bgp_check_aggregator(struct bgp_proto *p, UNUSED byte *a, int len)
+{
+ int exp_len = p->as4_session ? 8 : 6;
+
+ return (len == exp_len) ? 0 : 5;
+}
+
+static int
+bgp_check_cluster_list(struct bgp_proto *p UNUSED, UNUSED byte *a, int len)
+{
+ return ((len % 4) == 0) ? 0 : 5;
+}
+
+static int
bgp_check_reach_nlri(struct bgp_proto *p UNUSED, byte *a UNUSED, int len UNUSED)
{
#ifdef IPV6
@@ -113,21 +143,23 @@ static struct attr_desc bgp_attr_table[] = {
{ "origin", 1, BAF_TRANSITIVE, EAF_TYPE_INT, 1, /* BA_ORIGIN */
bgp_check_origin, bgp_format_origin },
{ "as_path", -1, BAF_TRANSITIVE, EAF_TYPE_AS_PATH, 1, /* BA_AS_PATH */
- bgp_check_path, NULL },
+ bgp_check_as_path, NULL },
{ "next_hop", 4, BAF_TRANSITIVE, EAF_TYPE_IP_ADDRESS, 1, /* BA_NEXT_HOP */
bgp_check_next_hop, NULL },
- { "med", 4, BAF_OPTIONAL, EAF_TYPE_INT, 0, /* BA_MULTI_EXIT_DISC */
+ { "med", 4, BAF_OPTIONAL, EAF_TYPE_INT, 1, /* BA_MULTI_EXIT_DISC */
NULL, NULL },
{ "local_pref", 4, BAF_TRANSITIVE, EAF_TYPE_INT, 0, /* BA_LOCAL_PREF */
NULL, NULL },
{ "atomic_aggr", 0, BAF_TRANSITIVE, EAF_TYPE_OPAQUE, 1, /* BA_ATOMIC_AGGR */
NULL, NULL },
- { "aggregator", 6, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_OPAQUE, 1, /* BA_AGGREGATOR */
- NULL, NULL },
+ { "aggregator", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_OPAQUE, 1, /* BA_AGGREGATOR */
+ bgp_check_aggregator, NULL },
{ "community", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_INT_SET, 1, /* BA_COMMUNITY */
NULL, NULL },
- { NULL, }, /* BA_ORIGINATOR_ID */
- { NULL, }, /* BA_CLUSTER_LIST */
+ { "originator_id", 4, BAF_OPTIONAL, EAF_TYPE_INT, 0, /* BA_ORIGINATOR_ID */
+ NULL, NULL },
+ { "cluster_list", -1, BAF_OPTIONAL, EAF_TYPE_INT_SET, 0, /* BA_CLUSTER_LIST */
+ bgp_check_cluster_list, NULL },
{ NULL, }, /* BA_DPA */
{ NULL, }, /* BA_ADVERTISER */
{ NULL, }, /* BA_RCID_PATH */
@@ -135,43 +167,152 @@ static struct attr_desc bgp_attr_table[] = {
bgp_check_reach_nlri, NULL },
{ "mp_unreach_nlri", -1, BAF_OPTIONAL, EAF_TYPE_OPAQUE, 1, /* BA_MP_UNREACH_NLRI */
bgp_check_unreach_nlri, NULL },
+ { NULL, }, /* BA_EXTENDED_COMM */
+ { "as4_path", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_OPAQUE, 1, /* BA_AS4_PATH */
+ bgp_check_as4_path, NULL },
+ { "as4_aggregator", 8, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_OPAQUE, 1, /* BA_AS4_PATH */
+ NULL, NULL }
};
+/* BA_AS4_PATH is type EAF_TYPE_OPAQUE and not type EAF_TYPE_AS_PATH because
+ * EAF_TYPE_AS_PATH is supposed to have different format (2 or 4 B for each ASN)
+ * depending on bgp_as4_support variable.
+ */
+
#define ATTR_KNOWN(code) ((code) < ARRAY_SIZE(bgp_attr_table) && bgp_attr_table[code].name)
-static byte *
-bgp_set_attr(eattr *e, struct linpool *pool, unsigned attr, unsigned val)
+static inline struct adata *
+bgp_alloc_adata(struct linpool *pool, unsigned len)
+{
+ struct adata *ad = lp_alloc(pool, sizeof(struct adata) + len);
+ ad->length = len;
+ return ad;
+}
+
+static void
+bgp_set_attr(eattr *e, unsigned attr, uintptr_t val)
{
ASSERT(ATTR_KNOWN(attr));
e->id = EA_CODE(EAP_BGP, attr);
e->type = bgp_attr_table[attr].type;
e->flags = bgp_attr_table[attr].expected_flags;
if (e->type & EAF_EMBEDDED)
- {
- e->u.data = val;
- return NULL;
- }
+ e->u.data = val;
else
- {
- e->u.ptr = lp_alloc(pool, sizeof(struct adata) + val);
- e->u.ptr->length = val;
- return e->u.ptr->data;
- }
+ e->u.ptr = (struct adata *) val;
}
-byte *
-bgp_attach_attr(ea_list **to, struct linpool *pool, unsigned attr, unsigned val)
+static byte *
+bgp_set_attr_wa(eattr *e, struct linpool *pool, unsigned attr, unsigned len)
+{
+ struct adata *ad = bgp_alloc_adata(pool, len);
+ bgp_set_attr(e, attr, (uintptr_t) ad);
+ return ad->data;
+}
+
+void
+bgp_attach_attr(ea_list **to, struct linpool *pool, unsigned attr, uintptr_t val)
{
ea_list *a = lp_alloc(pool, sizeof(ea_list) + sizeof(eattr));
a->next = *to;
*to = a;
a->flags = EALF_SORTED;
a->count = 1;
- return bgp_set_attr(a->attrs, pool, attr, val);
+ bgp_set_attr(a->attrs, attr, val);
}
+byte *
+bgp_attach_attr_wa(ea_list **to, struct linpool *pool, unsigned attr, unsigned len)
+{
+ struct adata *ad = bgp_alloc_adata(pool, len);
+ bgp_attach_attr(to, pool, attr, (uintptr_t) ad);
+ return ad->data;
+}
+
+static int
+bgp_encode_attr_hdr(byte *dst, unsigned int flags, unsigned code, int len)
+{
+ int wlen;
+
+ DBG("\tAttribute %02x (%d bytes, flags %02x)\n", code, len, flags);
+
+ if (len < 256)
+ {
+ *dst++ = flags;
+ *dst++ = code;
+ *dst++ = len;
+ wlen = 3;
+ }
+ else
+ {
+ *dst++ = flags | BAF_EXT_LEN;
+ *dst++ = code;
+ put_u16(dst, len);
+ wlen = 4;
+ }
+
+ return wlen;
+}
+
+static void
+aggregator_convert_to_old(struct adata *aggr, byte *dst, int *new_used)
+{
+ byte *src = aggr->data;
+ *new_used = 0;
+
+ u32 as = get_u32(src);
+ if (as > 0xFFFF)
+ {
+ as = AS_TRANS;
+ *new_used = 1;
+ }
+ put_u16(dst, as);
+
+ /* Copy IPv4 address */
+ memcpy(dst + 2, src + 4, 4);
+}
+
+static void
+aggregator_convert_to_new(struct adata *aggr, byte *dst)
+{
+ byte *src = aggr->data;
+
+ u32 as = get_u16(src);
+ put_u32(dst, as);
+
+ /* Copy IPv4 address */
+ memcpy(dst + 4, src + 2, 4);
+}
+
+static int
+bgp_get_attr_len(eattr *a)
+{
+ int len;
+ if (ATTR_KNOWN(EA_ID(a->id)))
+ {
+ int code = EA_ID(a->id);
+ struct attr_desc *desc = &bgp_attr_table[code];
+ len = desc->expected_length;
+ if (len < 0)
+ {
+ ASSERT(!(a->type & EAF_EMBEDDED));
+ len = a->u.ptr->length;
+ }
+ }
+ else
+ {
+ ASSERT((a->type & EAF_TYPE_MASK) == EAF_TYPE_OPAQUE);
+ len = a->u.ptr->length;
+ }
+
+ return len;
+}
+
+#define ADVANCE(w, r, l) do { r -= l; w += l; } while (0)
+
/**
* bgp_encode_attrs - encode BGP attributes
+ * @p: BGP instance
* @w: buffer
* @attrs: a list of extended attributes
* @remains: remaining space in the buffer
@@ -182,11 +323,11 @@ bgp_attach_attr(ea_list **to, struct linpool *pool, unsigned attr, unsigned val)
* Result: Length of the attribute block generated.
*/
unsigned int
-bgp_encode_attrs(byte *w, ea_list *attrs, int remains)
+bgp_encode_attrs(struct bgp_proto *p, byte *w, ea_list *attrs, int remains)
{
unsigned int i, code, flags;
byte *start = w;
- int len;
+ int len, rv;
for(i=0; i<attrs->count; i++)
{
@@ -198,43 +339,90 @@ bgp_encode_attrs(byte *w, ea_list *attrs, int remains)
if (code == BA_NEXT_HOP)
continue;
#endif
- flags = a->flags & (BAF_OPTIONAL | BAF_TRANSITIVE | BAF_PARTIAL);
- if (ATTR_KNOWN(code))
- {
- struct attr_desc *desc = &bgp_attr_table[code];
- len = desc->expected_length;
- if (len < 0)
- {
- ASSERT(!(a->type & EAF_EMBEDDED));
- len = a->u.ptr->length;
- }
- }
- else
+
+ /* When AS4-aware BGP speaker is talking to non-AS4-aware BGP speaker,
+ * we have to convert our 4B AS_PATH to 2B AS_PATH and send our AS_PATH
+ * as optional AS4_PATH attribute.
+ */
+ if ((code == BA_AS_PATH) && bgp_as4_support && (! p->as4_session))
{
- ASSERT((a->type & EAF_TYPE_MASK) == EAF_TYPE_OPAQUE);
len = a->u.ptr->length;
+
+ if (remains < (len + 4))
+ goto err_no_buffer;
+
+ /* Using temporary buffer because don't know a length of created attr
+ * and therefore a length of a header. Perhaps i should better always
+ * use BAF_EXT_LEN. */
+
+ byte buf[len];
+ int new_used;
+ int nl = as_path_convert_to_old(a->u.ptr, buf, &new_used);
+
+ rv = bgp_encode_attr_hdr(w, BAF_TRANSITIVE, BA_AS_PATH, nl);
+ ADVANCE(w, remains, rv);
+ memcpy(w, buf, nl);
+ ADVANCE(w, remains, nl);
+
+ if (! new_used)
+ continue;
+
+ if (remains < (len + 4))
+ goto err_no_buffer;
+
+ /* We should discard AS_CONFED_SEQUENCE or AS_CONFED_SET path segments
+ * here but we don't support confederations and such paths we already
+ * discarded in bgp_check_as_path().
+ */
+
+ rv = bgp_encode_attr_hdr(w, BAF_OPTIONAL | BAF_TRANSITIVE, BA_AS4_PATH, len);
+ ADVANCE(w, remains, rv);
+ memcpy(w, a->u.ptr->data, len);
+ ADVANCE(w, remains, len);
+
+ continue;
}
- DBG("\tAttribute %02x (type %02x, %d bytes, flags %02x)\n", code, a->type, len, flags);
- if (remains < len + 4)
- {
- log(L_ERR "BGP: attribute list too long, ignoring the remaining attributes");
- break;
- }
- if (len < 256)
- {
- *w++ = flags;
- *w++ = code;
- *w++ = len;
- remains -= 3;
- }
- else
+
+ /* The same issue with AGGREGATOR attribute */
+ if ((code == BA_AGGREGATOR) && bgp_as4_support && (! p->as4_session))
{
- *w++ = flags | BAF_EXT_LEN;
- *w++ = code;
- put_u16(w, len);
- w += 2;
- remains -= 4;
+ int new_used;
+
+ len = 6;
+ if (remains < (len + 3))
+ goto err_no_buffer;
+
+ rv = bgp_encode_attr_hdr(w, BAF_OPTIONAL | BAF_TRANSITIVE, BA_AGGREGATOR, len);
+ ADVANCE(w, remains, rv);
+ aggregator_convert_to_old(a->u.ptr, w, &new_used);
+ ADVANCE(w, remains, len);
+
+ if (! new_used)
+ continue;
+
+ len = 8;
+ if (remains < (len + 3))
+ goto err_no_buffer;
+
+ rv = bgp_encode_attr_hdr(w, BAF_OPTIONAL | BAF_TRANSITIVE, BA_AS4_AGGREGATOR, len);
+ ADVANCE(w, remains, rv);
+ memcpy(w, a->u.ptr->data, len);
+ ADVANCE(w, remains, len);
+
+ continue;
}
+
+ /* Standard path continues here ... */
+
+ flags = a->flags & (BAF_OPTIONAL | BAF_TRANSITIVE | BAF_PARTIAL);
+ len = bgp_get_attr_len(a);
+
+ if (remains < len + 4)
+ goto err_no_buffer;
+
+ rv = bgp_encode_attr_hdr(w, flags, code, len);
+ ADVANCE(w, remains, rv);
+
switch (a->type & EAF_TYPE_MASK)
{
case EAF_TYPE_INT:
@@ -266,10 +454,13 @@ bgp_encode_attrs(byte *w, ea_list *attrs, int remains)
default:
bug("bgp_encode_attrs: unknown attribute type %02x", a->type);
}
- remains -= len;
- w += len;
+ ADVANCE(w, remains, len);
}
return w - start;
+
+ err_no_buffer:
+ log(L_ERR "BGP: attribute list too long, ignoring the remaining attributes");
+ return w - start;
}
static void
@@ -547,6 +738,7 @@ bgp_rt_notify(struct proto *P, net *n, rte *new, rte *old UNUSED, ea_list *attrs
bgp_schedule_packet(p->conn, PKT_UPDATE);
}
+
static int
bgp_create_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *pool)
{
@@ -559,20 +751,24 @@ bgp_create_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *p
ea->flags = EALF_SORTED;
ea->count = 4;
- bgp_set_attr(ea->attrs, pool, BA_ORIGIN,
+ bgp_set_attr(ea->attrs, BA_ORIGIN,
((rta->source == RTS_OSPF_EXT1) || (rta->source == RTS_OSPF_EXT2)) ? ORIGIN_INCOMPLETE : ORIGIN_IGP);
if (p->is_internal)
- bgp_set_attr(ea->attrs+1, pool, BA_AS_PATH, 0);
+ bgp_set_attr_wa(ea->attrs+1, pool, BA_AS_PATH, 0);
else
{
- z = bgp_set_attr(ea->attrs+1, pool, BA_AS_PATH, 4);
+ z = bgp_set_attr_wa(ea->attrs+1, pool, BA_AS_PATH, bgp_as4_support ? 6 : 4);
z[0] = AS_PATH_SEQUENCE;
z[1] = 1; /* 1 AS */
- put_u16(z+2, p->local_as);
+
+ if (bgp_as4_support)
+ put_u32(z+2, p->local_as);
+ else
+ put_u16(z+2, p->local_as);
}
- z = bgp_set_attr(ea->attrs+2, pool, BA_NEXT_HOP, sizeof(ip_addr));
+ z = bgp_set_attr_wa(ea->attrs+2, pool, BA_NEXT_HOP, sizeof(ip_addr));
if (p->cf->next_hop_self ||
!p->is_internal ||
rta->dest != RTD_ROUTER)
@@ -585,34 +781,65 @@ bgp_create_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *p
else
*(ip_addr *)z = e->attrs->gw;
- bgp_set_attr(ea->attrs+3, pool, BA_LOCAL_PREF, 0);
+ bgp_set_attr(ea->attrs+3, BA_LOCAL_PREF, 0);
return 0; /* Leave decision to the filters */
}
-static ea_list *
-bgp_path_prepend(struct linpool *pool, eattr *a, ea_list *old, int as)
+
+static inline int
+bgp_as_path_loopy(struct bgp_proto *p, rta *a)
{
- struct ea_list *e = lp_alloc(pool, sizeof(ea_list) + sizeof(eattr));
- struct adata *olda = a->u.ptr;
+ eattr *e = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
+ return (e && as_path_is_member(e->u.ptr, p->local_as));
+}
- e->next = old;
- e->flags = EALF_SORTED;
- e->count = 1;
- e->attrs[0].id = EA_CODE(EAP_BGP, BA_AS_PATH);
- e->attrs[0].flags = BAF_TRANSITIVE;
- e->attrs[0].type = EAF_TYPE_AS_PATH;
- e->attrs[0].u.ptr = as_path_prepend(pool, olda, as);
- return e;
+static inline int
+bgp_originator_id_loopy(struct bgp_proto *p, rta *a)
+{
+ eattr *e = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID));
+ return (e && (e->u.data == p->local_id));
+}
+
+static inline int
+bgp_cluster_list_loopy(struct bgp_proto *p, rta *a)
+{
+ eattr *e = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_CLUSTER_LIST));
+ return (e && p->rr_client && int_set_contains(e->u.ptr, p->rr_cluster_id));
+}
+
+
+static inline void
+bgp_path_prepend(rte *e, ea_list **attrs, struct linpool *pool, u32 as)
+{
+ eattr *a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
+ bgp_attach_attr(attrs, pool, BA_AS_PATH, (uintptr_t) as_path_prepend(pool, a->u.ptr, as));
+}
+
+static inline void
+bgp_cluster_list_prepend(rte *e, ea_list **attrs, struct linpool *pool, u32 cid)
+{
+ eattr *a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_CLUSTER_LIST));
+ bgp_attach_attr(attrs, pool, BA_CLUSTER_LIST, (uintptr_t) int_set_add(pool, a ? a->u.ptr : NULL, cid));
}
static int
-bgp_update_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *pool)
+bgp_update_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *pool, int rr)
{
eattr *a;
- if (!p->is_internal && (a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH))))
- *attrs = bgp_path_prepend(pool, a, *attrs, p->local_as);
+ if (!p->is_internal)
+ {
+ bgp_path_prepend(e, attrs, pool, p->local_as);
+
+ /* The MULTI_EXIT_DISC attribute received from a neighboring AS MUST NOT be
+ * propagated to other neighboring ASes.
+ * Perhaps it would be better to undefine it.
+ */
+ a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
+ if (a)
+ bgp_attach_attr(attrs, pool, BA_MULTI_EXIT_DISC, 0);
+ }
a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP));
if (a && (p->is_internal || (!p->is_internal && e->attrs->iface == p->neigh->iface)))
@@ -622,7 +849,24 @@ bgp_update_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *p
else
{
/* Need to create new one */
- *(ip_addr *) bgp_attach_attr(attrs, pool, BA_NEXT_HOP, sizeof(ip_addr)) = p->local_addr;
+ bgp_attach_attr_ip(attrs, pool, BA_NEXT_HOP, p->local_addr);
+ }
+
+ if (rr)
+ {
+ /* Handling route reflection, RFC 4456 */
+ struct bgp_proto *src = (struct bgp_proto *) e->attrs->proto;
+
+ a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID));
+ if (!a)
+ bgp_attach_attr(attrs, pool, BA_ORIGINATOR_ID, src->remote_id);
+
+ /* We attach proper cluster ID according to whether the route is entering or leaving the cluster */
+ bgp_cluster_list_prepend(e, attrs, pool, src->rr_client ? src->rr_cluster_id : p->rr_cluster_id);
+
+ /* Two RR clients with different cluster ID, hmmm */
+ if (src->rr_client && p->rr_client && (src->rr_cluster_id != p->rr_cluster_id))
+ bgp_cluster_list_prepend(e, attrs, pool, p->rr_cluster_id);
}
return 0; /* Leave decision to the filters */
@@ -639,14 +883,39 @@ bgp_import_control(struct proto *P, rte **new, ea_list **attrs, struct linpool *
return -1;
if (new_bgp)
{
+ /* We should check here for cluster list loop, because the receiving BGP instance
+ might have different cluster ID */
+ if (bgp_cluster_list_loopy(p, e->attrs))
+ return -1;
+
if (p->local_as == new_bgp->local_as && p->is_internal && new_bgp->is_internal)
- return -1; /* Don't redistribute internal routes with IBGP */
- return bgp_update_attrs(p, e, attrs, pool);
+ {
+ /* Redistribution of internal routes with IBGP */
+ if (p->rr_client || new_bgp->rr_client)
+ /* Route reflection, RFC 4456 */
+ return bgp_update_attrs(p, e, attrs, pool, 1);
+ else
+ return -1;
+ }
+ else
+ return bgp_update_attrs(p, e, attrs, pool, 0);
}
else
return bgp_create_attrs(p, e, attrs, pool);
}
+static inline u32
+bgp_get_neighbor(rte *r)
+{
+ eattr *e = ea_find(r->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
+ u32 as;
+
+ if (e && as_path_get_last(e->u.ptr, &as))
+ return as;
+ else
+ return ((struct bgp_proto *) r->attrs->proto)->remote_as;
+}
+
int
bgp_rte_better(rte *new, rte *old)
{
@@ -665,20 +934,20 @@ bgp_rte_better(rte *new, rte *old)
if (n < o)
return 0;
- /* Use AS path lengths */
+ /* RFC 4271 9.1.2.2. a) Use AS path lengths */
if (new_bgp->cf->compare_path_lengths || old_bgp->cf->compare_path_lengths)
{
x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
- n = x ? as_path_getlen(x->u.ptr) : 100000;
- o = y ? as_path_getlen(y->u.ptr) : 100000;
+ n = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN;
+ o = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN;
if (n < o)
return 1;
if (n > o)
return 0;
}
- /* Use origins */
+ /* RFC 4271 9.1.2.2. b) Use origins */
x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGIN));
y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGIN));
n = x ? x->u.data : ORIGIN_INCOMPLETE;
@@ -688,47 +957,163 @@ bgp_rte_better(rte *new, rte *old)
if (n > o)
return 0;
- /* Compare MED's */
- x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
- y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
- n = x ? x->u.data : new_bgp->cf->default_med;
- o = y ? y->u.data : old_bgp->cf->default_med;
+ /* RFC 4271 9.1.2.2. c) Compare MED's */
+
+ if (bgp_get_neighbor(new) == bgp_get_neighbor(old))
+ {
+ x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
+ y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
+ n = x ? x->u.data : new_bgp->cf->default_med;
+ o = y ? y->u.data : old_bgp->cf->default_med;
+ if (n < o)
+ return 1;
+ if (n > o)
+ return 0;
+ }
+
+ /* RFC 4271 9.1.2.2. d) Prefer external peers */
+ if (new_bgp->is_internal > old_bgp->is_internal)
+ return 0;
+ if (new_bgp->is_internal < old_bgp->is_internal)
+ return 1;
+
+ /* Skipping RFC 4271 9.1.2.2. e) */
+ /* We don't have interior distances */
+
+ /* RFC 4456 9. b) Compare cluster list lengths */
+ x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_CLUSTER_LIST));
+ y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_CLUSTER_LIST));
+ n = x ? int_set_get_size(x->u.ptr) : 0;
+ o = y ? int_set_get_size(y->u.ptr) : 0;
if (n < o)
return 1;
if (n > o)
return 0;
- /* A tie breaking procedure according to RFC 1771, section 9.1.2.1 */
- /* We don't have interior distances */
- /* We prefer external peers */
- if (new_bgp->is_internal > old_bgp->is_internal)
- return 0;
- if (new_bgp->is_internal < old_bgp->is_internal)
+ /* RFC 4271 9.1.2.2. f) Compare BGP identifiers */
+ /* RFC 4456 9. a) Use ORIGINATOR_ID instead of local neighor ID */
+ x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID));
+ y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID));
+ n = x ? x->u.data : new_bgp->remote_id;
+ o = y ? y->u.data : old_bgp->remote_id;
+ if (n < o)
return 1;
- /* Finally we compare BGP identifiers */
- return (new_bgp->remote_id < old_bgp->remote_id);
+ if (n > o)
+ return 0;
+
+
+ /* RFC 4271 9.1.2.2. g) Compare peer IP adresses */
+ return (ipa_compare(new_bgp->cf->remote_ip, old_bgp->cf->remote_ip) < 0);
}
-static int
-bgp_path_loopy(struct bgp_proto *p, eattr *a)
+static struct adata *
+bgp_aggregator_convert_to_new(struct adata *old, struct linpool *pool)
+{
+ struct adata *newa = lp_alloc(pool, sizeof(struct adata) + 8);
+ newa->length = 8;
+ aggregator_convert_to_new(old, newa->data);
+ return newa;
+}
+
+
+/* Take last req_as ASNs from path old2 (in 2B format), convert to 4B format
+ * and append path old4 (in 4B format).
+ */
+static struct adata *
+bgp_merge_as_paths(struct adata *old2, struct adata *old4, int req_as, struct linpool *pool)
+{
+ byte buf[old2->length * 2];
+
+ int ol = as_path_convert_to_new(old2, buf, req_as);
+ int nl = ol + (old4 ? old4->length : 0);
+
+ struct adata *newa = lp_alloc(pool, sizeof(struct adata) + nl);
+ newa->length = nl;
+ memcpy(newa->data, buf, ol);
+ if (old4) memcpy(newa->data + ol, old4->data, old4->length);
+
+ return newa;
+}
+
+
+/* Reconstruct 4B AS_PATH and AGGREGATOR according to RFC 4893 4.2.3 */
+static void
+bgp_reconstruct_4b_atts(struct bgp_proto *p, rta *a, struct linpool *pool)
+{
+ eattr *p2 =ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
+ eattr *p4 =ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AS4_PATH));
+ eattr *a2 =ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AGGREGATOR));
+ eattr *a4 =ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AS4_AGGREGATOR));
+
+ if (a2)
+ {
+ u32 a2_as = get_u16(a2->u.ptr->data);
+
+ if (a4)
+ {
+ if (a2_as != AS_TRANS)
+ {
+ /* Routes were aggregated by old router and therefore AS4_PATH
+ * and AS4_AGGREGATOR is invalid
+ *
+ * Convert AS_PATH and AGGREGATOR to 4B format and finish.
+ */
+
+ a2->u.ptr = bgp_aggregator_convert_to_new(a2->u.ptr, pool);
+ p2->u.ptr = bgp_merge_as_paths(p2->u.ptr, NULL, AS_PATH_MAXLEN, pool);
+
+ return;
+ }
+ else
+ {
+ /* Common case, use AS4_AGGREGATOR attribute */
+ a2->u.ptr = a4->u.ptr;
+ }
+ }
+ else
+ {
+ /* Common case, use old AGGREGATOR attribute */
+ a2->u.ptr = bgp_aggregator_convert_to_new(a2->u.ptr, pool);
+
+ if (a2_as == AS_TRANS)
+ log(L_WARN "BGP: AGGREGATOR attribute contain AS_TRANS, but AS4_AGGREGATOR is missing");
+ }
+ }
+ else
+ if (a4)
+ log(L_WARN "BGP: AS4_AGGREGATOR attribute received, but AGGREGATOR attribute is missing");
+
+ int p2_len = as_path_getlen(p2->u.ptr);
+ int p4_len = p4 ? as_path_getlen(p4->u.ptr) : AS_PATH_MAXLEN;
+
+ if (p2_len < p4_len)
+ p2->u.ptr = bgp_merge_as_paths(p2->u.ptr, NULL, AS_PATH_MAXLEN, pool);
+ else
+ p2->u.ptr = bgp_merge_as_paths(p2->u.ptr, p4->u.ptr, p2_len - p4_len, pool);
+
+}
+
+static void
+bgp_remove_as4_attrs(struct bgp_proto *p, rta *a)
{
- byte *path = a->u.ptr->data;
- int len = a->u.ptr->length;
- int i, n;
+ unsigned id1 = EA_CODE(EAP_BGP, BA_AS4_PATH);
+ unsigned id2 = EA_CODE(EAP_BGP, BA_AS4_AGGREGATOR);
+ ea_list **el = &(a->eattrs);
- while (len > 0)
+ /* We know that ea_lists constructed in bgp_decode_attrs have one attribute per ea_list struct */
+ while (*el != NULL)
{
- n = path[1];
- len -= 2 + 2*n;
- path += 2;
- for(i=0; i<n; i++)
+ unsigned fid = (*el)->attrs[0].id;
+
+ if ((fid == id1) || (fid == id2))
{
- if (get_u16(path) == p->local_as)
- return 1;
- path += 2;
+ *el = (*el)->next;
+ if (p->as4_session)
+ log(L_WARN "BGP: Unexpected AS4_* attributes received");
}
+ else
+ el = &((*el)->next);
}
- return 0;
}
/**
@@ -883,20 +1268,34 @@ bgp_decode_attrs(struct bgp_conn *conn, byte *attr, unsigned int len, struct lin
}
}
}
+
+ /* When receiving attributes from non-AS4-aware BGP speaker,
+ * we have to reconstruct 4B AS_PATH and AGGREGATOR attributes
+ */
+ if (bgp_as4_support && (! bgp->as4_session))
+ bgp_reconstruct_4b_atts(bgp, a, pool);
+
+ if (bgp_as4_support)
+ bgp_remove_as4_attrs(bgp, a);
/* If the AS path attribute contains our AS, reject the routes */
- e = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
- if (e && bgp_path_loopy(bgp, e))
- {
- DBG("BGP: Path loop!\n");
- return NULL;
- }
+ if (bgp_as_path_loopy(bgp, a))
+ goto loop;
+
+ /* Two checks for IBGP loops caused by route reflection, RFC 4456 */
+ if (bgp_originator_id_loopy(bgp, a) ||
+ bgp_cluster_list_loopy(bgp, a))
+ goto loop;
/* If there's no local preference, define one */
- if (!(seen[0] && (1 << BA_LOCAL_PREF)))
+ if (!(seen[0] & (1 << BA_LOCAL_PREF)))
bgp_attach_attr(&a->eattrs, pool, BA_LOCAL_PREF, 0);
return a;
+loop:
+ DBG("BGP: Path loop!\n");
+ return NULL;
+
malformed:
bgp_error(conn, 3, 1, NULL, 0);
return NULL;
@@ -945,11 +1344,11 @@ bgp_get_route_info(rte *e, byte *buf, ea_list *attrs)
{
eattr *p = ea_find(attrs, EA_CODE(EAP_BGP, BA_AS_PATH));
eattr *o = ea_find(attrs, EA_CODE(EAP_BGP, BA_ORIGIN));
- int origas;
+ u32 origas;
buf += bsprintf(buf, " (%d) [", e->pref);
- if (p && (origas = as_path_get_first(p->u.ptr)) >= 0)
- buf += bsprintf(buf, "AS%d", origas);
+ if (p && as_path_get_first(p->u.ptr, &origas))
+ buf += bsprintf(buf, "AS%u", origas);
if (o)
buf += bsprintf(buf, "%c", "ie?"[o->u.data]);
strcpy(buf, "]");
diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c
index cedd223b..0d580be1 100644
--- a/proto/bgp/bgp.c
+++ b/proto/bgp/bgp.c
@@ -76,11 +76,16 @@ static void bgp_connect(struct bgp_proto *p);
static void bgp_initiate(struct bgp_proto *p);
static void bgp_setup_listen_sk(void);
+
static void
-bgp_close(struct bgp_proto *p UNUSED)
+bgp_close(struct bgp_proto *p)
{
ASSERT(bgp_counter);
bgp_counter--;
+
+ if (p->cf->password)
+ sk_set_md5_auth(bgp_listen_sk, p->cf->remote_ip, NULL);
+
if (!bgp_counter)
{
rfree(bgp_listen_sk);
@@ -329,6 +334,7 @@ bgp_connect(struct bgp_proto *p) /* Enter Connect state and start establishing c
bgp_setup_conn(p, conn);
bgp_setup_sk(p, conn, s);
s->tx_hook = bgp_connected;
+ s->password = p->cf->password;
conn->state = BS_CONNECT;
if (sk_open(s))
{
@@ -479,6 +485,13 @@ bgp_start_locked(struct object_lock *lock)
p->local_id = cf->c.global->router_id;
p->next_hop = cf->multihop ? cf->multihop_via : cf->remote_ip;
p->neigh = neigh_find(&p->p, &p->next_hop, NEF_STICKY);
+
+ if (cf->rr_client)
+ {
+ p->rr_cluster_id = cf->rr_cluster_id ? cf->rr_cluster_id : p->local_id;
+ p->rr_client = cf->rr_client;
+ }
+
if (!p->neigh)
{
log(L_ERR "%s: Invalid next hop %I", p->p.name, p->next_hop);
@@ -505,6 +518,7 @@ bgp_start(struct proto *P)
bgp_counter++;
bgp_setup_listen_sk();
+
if (!bgp_linpool)
bgp_linpool = lp_new(&root_pool, 4080);
@@ -522,6 +536,17 @@ bgp_start(struct proto *P)
lock->hook = bgp_start_locked;
lock->data = p;
olock_acquire(lock);
+
+ /* We should create security association after we get a lock not to
+ * break existing connections.
+ */
+ if (p->cf->password)
+ {
+ int rv = sk_set_md5_auth(bgp_listen_sk, p->cf->remote_ip, p->cf->password);
+ if (rv < 0)
+ return PS_STOP;
+ }
+
return PS_START;
}
@@ -611,6 +636,14 @@ bgp_check(struct bgp_config *c)
cf_error("Local AS number must be set");
if (!c->remote_as)
cf_error("Neighbor must be configured");
+ if (!bgp_as4_support && c->enable_as4)
+ cf_error("AS4 support disabled globbaly");
+ if (!c->enable_as4 && (c->local_as > 0xFFFF))
+ cf_error("Local AS number out of range");
+ if (!c->enable_as4 && (c->remote_as > 0xFFFF))
+ cf_error("Neighbor AS number out of range");
+ if ((c->local_as != c->remote_as) && (c->rr_client))
+ cf_error("Only internal neighbor can be RR client");
}
static void
diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h
index 6519db85..1d67e336 100644
--- a/proto/bgp/bgp.h
+++ b/proto/bgp/bgp.h
@@ -16,7 +16,7 @@ struct eattr;
struct bgp_config {
struct proto_config c;
- unsigned int local_as, remote_as;
+ u32 local_as, remote_as;
ip_addr remote_ip;
int multihop; /* Number of hops if multihop */
ip_addr multihop_via; /* Multihop: address to route to */
@@ -25,6 +25,9 @@ struct bgp_config {
int compare_path_lengths; /* Use path lengths when selecting best route */
u32 default_local_pref; /* Default value for LOCAL_PREF attribute */
u32 default_med; /* Default value for MULTI_EXIT_DISC attribute */
+ int enable_as4; /* Enable local support for 4B AS numbers [RFC4893] */
+ u32 rr_cluster_id; /* Route reflector cluster ID, if different from local ID */
+ int rr_client; /* Whether neighbor is RR client of me */
unsigned connect_retry_time;
unsigned hold_time, initial_hold_time;
unsigned keepalive_time;
@@ -33,6 +36,7 @@ struct bgp_config {
unsigned error_delay_time_min; /* Time to wait after an error is detected */
unsigned error_delay_time_max;
unsigned disable_after_error; /* Disable the protocol when error is detected */
+ char *password; /* Password used for MD5 authentication */
};
struct bgp_conn {
@@ -47,16 +51,21 @@ struct bgp_conn {
byte *notify_data;
int error_flag; /* Error state, ignore all input */
int primary; /* This connection is primary */
+ u32 advertised_as; /* Temporary value for AS number received */
unsigned hold_time, keepalive_time; /* Times calculated from my and neighbor's requirements */
};
struct bgp_proto {
struct proto p;
struct bgp_config *cf; /* Shortcut to BGP configuration */
- unsigned local_as, remote_as;
+ u32 local_as, remote_as;
int is_internal; /* Internal BGP connection (local_as == remote_as) */
+ int as4_support; /* Peer supports 4B AS numbers [RFC4893] */
+ int as4_session; /* Session uses 4B AS numbers in AS_PATH (both sides support it) */
u32 local_id; /* BGP identifier of this router */
u32 remote_id; /* BGP identifier of the neighbor */
+ u32 rr_cluster_id; /* Route reflector cluster ID */
+ int rr_client; /* Whether neighbor is RR client of me */
struct bgp_conn *conn; /* Connection we have established */
struct bgp_conn outgoing_conn; /* Outgoing connection we're working with */
struct bgp_conn incoming_conn; /* Incoming connection we have neither accepted nor rejected yet */
@@ -100,6 +109,9 @@ struct bgp_bucket {
extern struct linpool *bgp_linpool;
+extern int bgp_as4_support;
+
+
void bgp_start_timer(struct timer *t, int value);
void bgp_check(struct bgp_config *c);
void bgp_error(struct bgp_conn *c, unsigned code, unsigned subcode, byte *data, int len);
@@ -115,17 +127,21 @@ void bgp_close_conn(struct bgp_conn *c);
/* attrs.c */
-byte *bgp_attach_attr(struct ea_list **to, struct linpool *, unsigned attr, unsigned val);
+void bgp_attach_attr(struct ea_list **to, struct linpool *pool, unsigned attr, uintptr_t val);
+byte *bgp_attach_attr_wa(struct ea_list **to, struct linpool *pool, unsigned attr, unsigned len);
struct rta *bgp_decode_attrs(struct bgp_conn *conn, byte *a, unsigned int len, struct linpool *pool, int mandatory);
int bgp_get_attr(struct eattr *e, byte *buf);
int bgp_rte_better(struct rte *, struct rte *);
void bgp_rt_notify(struct proto *, struct network *, struct rte *, struct rte *, struct ea_list *);
int bgp_import_control(struct proto *, struct rte **, struct ea_list **, struct linpool *);
void bgp_attr_init(struct bgp_proto *);
-unsigned int bgp_encode_attrs(byte *w, struct ea_list *attrs, int remains);
+unsigned int bgp_encode_attrs(struct bgp_proto *p, byte *w, ea_list *attrs, int remains);
void bgp_free_bucket(struct bgp_proto *p, struct bgp_bucket *buck);
void bgp_get_route_info(struct rte *, byte *buf, struct ea_list *attrs);
+inline static void bgp_attach_attr_ip(struct ea_list **to, struct linpool *pool, unsigned attr, ip_addr a)
+{ *(ip_addr *) bgp_attach_attr_wa(to, pool, attr, sizeof(ip_addr)) = a; }
+
/* packets.c */
void bgp_schedule_packet(struct bgp_conn *conn, int type);
@@ -165,6 +181,8 @@ void bgp_log_error(struct bgp_proto *p, char *msg, unsigned code, unsigned subco
#define BA_MP_REACH_NLRI 0x0e /* [RFC2283] */
#define BA_MP_UNREACH_NLRI 0x0f
#define BA_EXTENDED_COMM 0x10 /* draft-ramachandra-bgp-ext-communities */
+#define BA_AS4_PATH 0x11 /* [RFC4893] */
+#define BA_AS4_AGGREGATOR 0x12
/* BGP states */
diff --git a/proto/bgp/config.Y b/proto/bgp/config.Y
index 52ad731e..8524b2dd 100644
--- a/proto/bgp/config.Y
+++ b/proto/bgp/config.Y
@@ -18,9 +18,10 @@ CF_DECLS
CF_KEYWORDS(BGP, LOCAL, NEIGHBOR, AS, HOLD, TIME, CONNECT, RETRY, KEEPALIVE,
MULTIHOP, STARTUP, VIA, NEXT, HOP, SELF, DEFAULT, PATH, METRIC,
- ERROR, START, DELAY, FORGET, WAIT, DISABLE, AFTER,
+ ERROR, START, DELAY, FORGET, WAIT, ENABLE, DISABLE, AFTER,
BGP_PATH, BGP_LOCAL_PREF, BGP_MED, BGP_ORIGIN, BGP_NEXT_HOP,
- BGP_ATOMIC_AGGR, BGP_AGGREGATOR, BGP_COMMUNITY, SOURCE, ADDRESS)
+ BGP_ATOMIC_AGGR, BGP_AGGREGATOR, BGP_COMMUNITY, SOURCE, ADDRESS,
+ PASSWORD, RR, CLIENT, CLUSTER, ID, AS4)
CF_GRAMMAR
@@ -32,27 +33,28 @@ bgp_proto_start: proto_start BGP {
BGP_CFG->hold_time = 240;
BGP_CFG->connect_retry_time = 120;
BGP_CFG->initial_hold_time = 240;
- BGP_CFG->default_med = ~0; /* RFC 1771 doesn't specify this, draft-09 says ~0 */
+ BGP_CFG->default_med = 0;
BGP_CFG->compare_path_lengths = 1;
BGP_CFG->start_delay_time = 5;
BGP_CFG->error_amnesia_time = 300;
BGP_CFG->error_delay_time_min = 60;
BGP_CFG->error_delay_time_max = 300;
+ BGP_CFG->enable_as4 = bgp_as4_support;
}
;
bgp_proto:
bgp_proto_start proto_name '{'
| bgp_proto proto_item ';'
- | bgp_proto LOCAL AS expr ';' {
- if ($4 < 0 || $4 > 65535) cf_error("AS number out of range");
- BGP_CFG->local_as = $4;
- }
+ | bgp_proto LOCAL AS expr ';' { BGP_CFG->local_as = $4; }
| bgp_proto NEIGHBOR ipa AS expr ';' {
- if ($5 < 0 || $5 > 65535) cf_error("AS number out of range");
+ if (ipa_nonzero(BGP_CFG->remote_ip)) cf_error("Only one neighbor per BGP instance is allowed");
+
BGP_CFG->remote_ip = $3;
BGP_CFG->remote_as = $5;
}
+ | bgp_proto RR CLUSTER ID expr ';' { BGP_CFG->rr_cluster_id = $5; }
+ | bgp_proto RR CLIENT ';' { BGP_CFG->rr_client = 1; }
| bgp_proto HOLD TIME expr ';' { BGP_CFG->hold_time = $4; }
| bgp_proto STARTUP HOLD TIME expr ';' { BGP_CFG->initial_hold_time = $5; }
| bgp_proto CONNECT RETRY TIME expr ';' { BGP_CFG->connect_retry_time = $5; }
@@ -67,6 +69,8 @@ bgp_proto:
| bgp_proto ERROR FORGET TIME expr ';' { BGP_CFG->error_amnesia_time = $5; }
| bgp_proto ERROR WAIT TIME expr ',' expr ';' { BGP_CFG->error_delay_time_min = $5; BGP_CFG->error_delay_time_max = $7; }
| bgp_proto DISABLE AFTER ERROR bool ';' { BGP_CFG->disable_after_error = $5; }
+ | bgp_proto ENABLE AS4 bool ';' { BGP_CFG->enable_as4 = $4; }
+ | bgp_proto PASSWORD TEXT ';' { BGP_CFG->password = $3; }
;
CF_ADDTO(dynamic_attr, BGP_PATH
diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c
index 2e6f0b60..c18c6e42 100644
--- a/proto/bgp/packets.c
+++ b/proto/bgp/packets.c
@@ -12,6 +12,7 @@
#include "nest/iface.h"
#include "nest/protocol.h"
#include "nest/route.h"
+#include "nest/attrs.h"
#include "conf/conf.h"
#include "lib/unaligned.h"
#include "lib/socket.h"
@@ -30,33 +31,64 @@ bgp_create_notification(struct bgp_conn *conn, byte *buf)
return buf + 2 + conn->notify_size;
}
+#ifdef IPV6
+static byte *
+bgp_put_cap_ipv6(struct bgp_conn *conn UNUSED, byte *buf)
+{
+ *buf++ = 1; /* Capability 1: Multiprotocol extensions */
+ *buf++ = 4; /* Capability data length */
+ *buf++ = 0; /* We support AF IPv6 */
+ *buf++ = BGP_AF_IPV6;
+ *buf++ = 0; /* RFU */
+ *buf++ = 1; /* and SAFI 1 */
+ return buf;
+}
+#endif
+
+static byte *
+bgp_put_cap_as4(struct bgp_conn *conn, byte *buf)
+{
+ *buf++ = 65; /* Capability 65: Support for 4-octet AS number */
+ *buf++ = 4; /* Capability data length */
+ put_u32(buf, conn->bgp->local_as);
+ return buf + 4;
+}
+
static byte *
bgp_create_open(struct bgp_conn *conn, byte *buf)
{
struct bgp_proto *p = conn->bgp;
+ byte *cap;
+ int cap_len;
BGP_TRACE(D_PACKETS, "Sending OPEN(ver=%d,as=%d,hold=%d,id=%08x)",
BGP_VERSION, p->local_as, p->cf->hold_time, p->local_id);
buf[0] = BGP_VERSION;
- put_u16(buf+1, p->local_as);
+ put_u16(buf+1, (p->local_as < 0xFFFF) ? p->local_as : AS_TRANS);
put_u16(buf+3, p->cf->hold_time);
put_u32(buf+5, p->local_id);
-#ifndef IPV6
- buf[9] = 0; /* No optional parameters */
- return buf+10;
-#else
- buf += 9;
- *buf++ = 8; /* Optional params len */
- *buf++ = 2; /* Option: Capability list */
- *buf++ = 6; /* Option length */
- *buf++ = 1; /* Capability 1: Multiprotocol extensions */
- *buf++ = 4; /* Capability data length */
- *buf++ = 0; /* We support AF IPv6 */
- *buf++ = BGP_AF_IPV6;
- *buf++ = 0; /* RFU */
- *buf++ = 1; /* and SAFI 1 */
- return buf;
+ /* Skipped 3 B for length field and Capabilities parameter header */
+ cap = buf + 12;
+
+#ifdef IPV6
+ cap = bgp_put_cap_ipv6(conn, cap);
#endif
+ if (p->cf->enable_as4)
+ cap = bgp_put_cap_as4(conn, cap);
+
+ cap_len = cap - buf - 12;
+ if (cap_len > 0)
+ {
+ buf[9] = cap_len + 2; /* Optional params len */
+ buf[10] = 2; /* Option: Capability list */
+ buf[11] = cap_len; /* Option length */
+ return cap;
+ }
+ else
+ {
+ buf[9] = 0; /* No optional parameters */
+ return buf + 10;
+ }
}
static unsigned int
@@ -118,7 +150,7 @@ bgp_create_update(struct bgp_conn *conn, byte *buf)
continue;
}
DBG("Processing bucket %p\n", buck);
- a_size = bgp_encode_attrs(w+2, buck->eattrs, 1024);
+ a_size = bgp_encode_attrs(p, w+2, buck->eattrs, 1024);
put_u16(w, a_size);
w += a_size + 2;
r_size = bgp_encode_prefixes(p, w, buck, remains - a_size);
@@ -161,12 +193,12 @@ bgp_create_update(struct bgp_conn *conn, byte *buf)
if ((buck = p->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
{
DBG("Withdrawn routes:\n");
- tmp = bgp_attach_attr(&ea, bgp_linpool, BA_MP_UNREACH_NLRI, remains-8);
+ tmp = bgp_attach_attr_wa(&ea, bgp_linpool, BA_MP_UNREACH_NLRI, remains-8);
*tmp++ = 0;
*tmp++ = BGP_AF_IPV6;
*tmp++ = 1;
ea->attrs[0].u.ptr->length = bgp_encode_prefixes(p, tmp, buck, remains-11);
- size = bgp_encode_attrs(w, ea, remains);
+ size = bgp_encode_attrs(p, w, ea, remains);
w += size;
remains -= size;
}
@@ -183,10 +215,10 @@ bgp_create_update(struct bgp_conn *conn, byte *buf)
continue;
}
DBG("Processing bucket %p\n", buck);
- size = bgp_encode_attrs(w, buck->eattrs, 1024);
+ size = bgp_encode_attrs(p, w, buck->eattrs, 1024);
w += size;
remains -= size;
- tstart = tmp = bgp_attach_attr(&ea, bgp_linpool, BA_MP_REACH_NLRI, remains-8);
+ tstart = tmp = bgp_attach_attr_wa(&ea, bgp_linpool, BA_MP_REACH_NLRI, remains-8);
*tmp++ = 0;
*tmp++ = BGP_AF_IPV6;
*tmp++ = 1;
@@ -230,7 +262,7 @@ bgp_create_update(struct bgp_conn *conn, byte *buf)
*tmp++ = 0; /* No SNPA information */
tmp += bgp_encode_prefixes(p, tmp, buck, remains - (8+3+32+1));
ea->attrs[0].u.ptr->length = tmp - tstart;
- w += bgp_encode_attrs(w, ea, remains);
+ w += bgp_encode_attrs(p, w, ea, remains);
break;
}
}
@@ -353,9 +385,50 @@ bgp_tx(sock *sk)
;
}
+/* Capatibility negotiation as per RFC 2842 */
+
+void
+bgp_parse_capabilities(struct bgp_conn *conn, byte *opt, int len)
+{
+ struct bgp_proto *p = conn->bgp;
+ int cl;
+ u32 as;
+
+ while (len > 0)
+ {
+ if (len < 2 || len < 2 + opt[1])
+ goto err;
+
+ cl = opt[1];
+
+ switch (opt[0])
+ {
+ case 65:
+ if (cl != 4)
+ goto err;
+ p->as4_support = 1;
+ p->as4_session = p->cf->enable_as4;
+ if (p->as4_session)
+ conn->advertised_as = get_u32(opt + 2);
+ break;
+
+ /* We can safely ignore all other capabilities */
+ }
+ len -= 2 + cl;
+ opt += 2 + cl;
+ }
+ return;
+
+ err:
+ bgp_error(conn, 2, 0, NULL, 0);
+ return;
+}
+
static int
bgp_parse_options(struct bgp_conn *conn, byte *opt, int len)
{
+ int ol;
+
while (len > 0)
{
if (len < 2 || len < 2 + opt[1])
@@ -369,12 +442,14 @@ bgp_parse_options(struct bgp_conn *conn, byte *opt, int len)
DBG("\n");
}
#endif
+
+ ol = opt[1];
switch (opt[0])
{
case 2:
- /* Capatibility negotiation as per RFC 2842 */
- /* We can safely ignore all capabilities announced */
+ bgp_parse_capabilities(conn, opt + 2, ol);
break;
+
default:
/*
* BGP specs don't tell us to send which option
@@ -382,11 +457,11 @@ bgp_parse_options(struct bgp_conn *conn, byte *opt, int len)
* to do so. Also, capability negotiation with
* Cisco routers doesn't work without that.
*/
- bgp_error(conn, 2, 4, opt, opt[1]);
+ bgp_error(conn, 2, 4, opt, ol);
return 0;
}
- len -= 2 + opt[1];
- opt += 2 + opt[1];
+ len -= 2 + ol;
+ opt += 2 + ol;
}
return 0;
}
@@ -397,7 +472,7 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, int len)
struct bgp_conn *other;
struct bgp_proto *p = conn->bgp;
struct bgp_config *cf = p->cf;
- unsigned as, hold;
+ unsigned hold;
u32 id;
/* Check state */
@@ -409,20 +484,27 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, int len)
{ bgp_error(conn, 1, 2, pkt+16, 2); return; }
if (pkt[19] != BGP_VERSION)
{ bgp_error(conn, 2, 1, pkt+19, 1); return; } /* RFC 1771 says 16 bits, draft-09 tells to use 8 */
- as = get_u16(pkt+20);
+ conn->advertised_as = get_u16(pkt+20);
hold = get_u16(pkt+22);
id = get_u32(pkt+24);
- BGP_TRACE(D_PACKETS, "Got OPEN(as=%d,hold=%d,id=%08x)", as, hold, id);
- if (cf->remote_as && as != p->remote_as)
- { bgp_error(conn, 2, 2, pkt+20, -2); return; }
- if (hold > 0 && hold < 3)
- { bgp_error(conn, 2, 6, pkt+22, 2); return; }
- p->remote_id = id;
+ BGP_TRACE(D_PACKETS, "Got OPEN(as=%d,hold=%d,id=%08x)", conn->advertised_as, hold, id);
+
+ p->remote_id = id; // ???
if (bgp_parse_options(conn, pkt+29, pkt[28]))
return;
+
+ if (hold > 0 && hold < 3)
+ { bgp_error(conn, 2, 6, pkt+22, 2); return; }
+
if (!id || id == 0xffffffff || id == p->local_id)
{ bgp_error(conn, 2, 3, pkt+24, -4); return; }
+
+ if (conn->advertised_as != p->remote_as)
+ {
+ bgp_error(conn, 2, 2, (byte *) &(conn->advertised_as), -4); return;
+ }
+
/* Check the other connection */
other = (conn == &p->outgoing_conn) ? &p->incoming_conn : &p->outgoing_conn;
switch (other->state)
@@ -463,7 +545,7 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, int len)
else
conn->hold_time = p->cf->hold_time;
conn->keepalive_time = p->cf->keepalive_time ? : conn->hold_time / 3;
- p->remote_as = as;
+ // p->remote_as = conn->advertised_as;
p->remote_id = id;
DBG("BGP: Hold timer set to %d, keepalive to %d, AS to %d, ID to %x\n", conn->hold_time, conn->keepalive_time, p->remote_as, p->remote_id);
@@ -621,7 +703,7 @@ bgp_do_rx_update(struct bgp_conn *conn,
/* Create fake NEXT_HOP attribute */
if (len < 1 || (*x != 16 && *x != 32) || len < *x + 2)
goto bad;
- memcpy(bgp_attach_attr(&a0->eattrs, bgp_linpool, BA_NEXT_HOP, 16), x+1, 16);
+ bgp_attach_attr_ip(&a0->eattrs, bgp_linpool, BA_NEXT_HOP, x[1]);
len -= *x + 2;
x += *x + 1;
@@ -720,7 +802,7 @@ static struct {
{ 2, 4, "Unsupported optional parameter" },
{ 2, 5, "Authentication failure" },
{ 2, 6, "Unacceptable hold time" },
- { 2, 7, "Required capability missing" }, /* capability negotiation draft */
+ { 2, 7, "Required capability missing" }, /* [RFC3392] */
{ 3, 0, "Invalid UPDATE message" },
{ 3, 1, "Malformed attribute list" },
{ 3, 2, "Unrecognized well-known attribute" },
diff --git a/sysdep/linux/netlink/netlink.c b/sysdep/linux/netlink/netlink.c
index 47841955..38d00afe 100644
--- a/sysdep/linux/netlink/netlink.c
+++ b/sysdep/linux/netlink/netlink.c
@@ -499,6 +499,8 @@ nl_send_route(struct krt_proto *p, rte *e, int new)
nl_add_attr_ipa(&r.h, sizeof(r), RTA_GATEWAY, a->gw);
break;
case RTD_DEVICE:
+ if (!a->iface)
+ return;
r.r.rtm_type = RTN_UNICAST;
nl_add_attr_u32(&r.h, sizeof(r), RTA_OIF, a->iface->index);
break;
@@ -532,11 +534,8 @@ krt_set_notify(struct krt_proto *p, net *n UNUSED, rte *new, rte *old)
else
{
if (old)
- {
- if (!old->attrs->iface || (old->attrs->iface->flags & IF_UP))
- nl_send_route(p, old, 0);
- /* else the kernel has already flushed it */
- }
+ nl_send_route(p, old, 0);
+
if (new)
nl_send_route(p, new, 1);
}
diff --git a/sysdep/linux/sysio.h b/sysdep/linux/sysio.h
index 3a29cdc9..b0aff71f 100644
--- a/sysdep/linux/sysio.h
+++ b/sysdep/linux/sysio.h
@@ -139,3 +139,24 @@ static inline char *sysio_mcast_join(sock *s)
#endif
#endif
+
+#include <linux/socket.h>
+#include <linux/tcp.h>
+
+/* For the case that we have older kernel headers */
+/* Copied from Linux kernel file include/linux/tcp.h */
+
+#ifndef TCP_MD5SIG
+
+#define TCP_MD5SIG 14
+#define TCP_MD5SIG_MAXKEYLEN 80
+
+struct tcp_md5sig {
+ struct __kernel_sockaddr_storage tcpm_addr; /* address associated */
+ __u16 __tcpm_pad1; /* zero */
+ __u16 tcpm_keylen; /* key length */
+ __u32 __tcpm_pad2; /* zero */
+ __u8 tcpm_key[TCP_MD5SIG_MAXKEYLEN]; /* key (binary) */
+};
+
+#endif
diff --git a/sysdep/unix/io.c b/sysdep/unix/io.c
index fa471f6e..8cec2cdd 100644
--- a/sysdep/unix/io.c
+++ b/sysdep/unix/io.c
@@ -546,6 +546,7 @@ sk_new(pool *p)
s->err_hook = NULL;
s->fd = -1;
s->rbuf_alloc = s->tbuf_alloc = NULL;
+ s->password = NULL;
return s;
}
@@ -642,6 +643,71 @@ bad:
return err;
}
+
+/* FIXME: check portability */
+
+static int
+sk_set_md5_auth_int(sock *s, sockaddr *sa, char *passwd)
+{
+ struct tcp_md5sig md5;
+
+ memset(&md5, 0, sizeof(md5));
+ memcpy(&md5.tcpm_addr, (struct sockaddr *) sa, sizeof(*sa));
+
+ if (passwd)
+ {
+ int len = strlen(passwd);
+
+ if (len > TCP_MD5SIG_MAXKEYLEN)
+ {
+ log(L_ERR "MD5 password too long");
+ return -1;
+ }
+
+ md5.tcpm_keylen = len;
+ memcpy(&md5.tcpm_key, passwd, len);
+ }
+
+ int rv = setsockopt(s->fd, IPPROTO_TCP, TCP_MD5SIG, &md5, sizeof(md5));
+
+ if (rv < 0)
+ {
+ if (errno == ENOPROTOOPT)
+ log(L_ERR "Kernel does not support TCP MD5 signatures");
+ else
+ log(L_ERR "sk_set_md5_auth_int: setsockopt: %m");
+ }
+
+ return rv;
+}
+
+/**
+ * sk_set_md5_auth - add / remove MD5 security association for given socket.
+ * @s: socket
+ * @a: IP address of the other side
+ * @passwd: password used for MD5 authentication
+ *
+ * In TCP MD5 handling code in kernel, there is a set of pairs
+ * (address, password) used to choose password according to
+ * address of the other side. This function is useful for
+ * listening socket, for active sockets it is enough to set
+ * s->password field.
+ *
+ * When called with passwd != NULL, the new pair is added,
+ * When called with passwd == NULL, the existing pair is removed.
+ *
+ * Result: 0 for success, -1 for an error.
+ */
+
+int
+sk_set_md5_auth(sock *s, ip_addr a, char *passwd)
+{
+ sockaddr sa;
+ fill_in_sockaddr(&sa, a, 0);
+ return sk_set_md5_auth_int(s, &sa, passwd);
+}
+
+
static void
sk_tcp_connected(sock *s)
{
@@ -805,6 +871,14 @@ sk_open(sock *s)
ERR("bind");
}
fill_in_sockaddr(&sa, s->daddr, s->dport);
+
+ if (s->password)
+ {
+ int rv = sk_set_md5_auth_int(s, &sa, s->password);
+ if (rv < 0)
+ goto bad_no_log;
+ }
+
switch (type)
{
case SK_TCP_ACTIVE:
@@ -846,6 +920,7 @@ sk_open(sock *s)
bad:
log(L_ERR "sk_open: %s: %m", err);
+bad_no_log:
close(fd);
s->fd = -1;
return -1;
diff --git a/sysdep/unix/krt-set.c b/sysdep/unix/krt-set.c
index bd564486..23cbe5c5 100644
--- a/sysdep/unix/krt-set.c
+++ b/sysdep/unix/krt-set.c
@@ -61,6 +61,8 @@ krt_ioctl(int ioc, rte *e, char *name)
re.rt_flags |= RTF_GATEWAY;
break;
case RTD_DEVICE:
+ if (!a->iface)
+ return;
re.rt_dev = a->iface->name;
break;
#ifdef RTF_REJECT
diff --git a/sysdep/unix/krt.c b/sysdep/unix/krt.c
index a6d17279..5269eb71 100644
--- a/sysdep/unix/krt.c
+++ b/sysdep/unix/krt.c
@@ -684,7 +684,7 @@ krt_notify(struct proto *P, net *net, rte *new, rte *old, struct ea_list *attrs
{
struct krt_proto *p = (struct krt_proto *) P;
- if (shutting_down && KRT_CF->persist)
+ if (shutting_down)
return;
if (new && (!krt_capable(new) || new->attrs->source == RTS_INHERIT))
new = NULL;