diff options
author | Maria Matejka <mq@ucw.cz> | 2022-09-27 12:39:07 +0200 |
---|---|---|
committer | Maria Matejka <mq@ucw.cz> | 2022-09-27 12:39:07 +0200 |
commit | 32a67c93ebf29309286dca5195f026eeda3f78a2 (patch) | |
tree | 578c6038187d0c50c4a4f250e440983dbb93029d /proto/bgp | |
parent | 57a34d466e85bedbf40a0f7cbde23b843a303c8d (diff) | |
parent | cae5979871ee7aa341334f8b1af6bafc60ee9692 (diff) |
Merge commit 'cae5979871ee7aa341334f8b1af6bafc60ee9692' into tmp-bad-learn
Diffstat (limited to 'proto/bgp')
-rw-r--r-- | proto/bgp/Makefile | 3 | ||||
-rw-r--r-- | proto/bgp/attrs.c | 636 | ||||
-rw-r--r-- | proto/bgp/bgp.c | 125 | ||||
-rw-r--r-- | proto/bgp/bgp.h | 101 | ||||
-rw-r--r-- | proto/bgp/config.Y | 61 | ||||
-rw-r--r-- | proto/bgp/packets.c | 207 |
6 files changed, 608 insertions, 525 deletions
diff --git a/proto/bgp/Makefile b/proto/bgp/Makefile index 00aaef5e..2a4cc99c 100644 --- a/proto/bgp/Makefile +++ b/proto/bgp/Makefile @@ -2,5 +2,6 @@ src := attrs.c bgp.c packets.c obj := $(src-o-files) $(all-daemon) $(cf-local) +$(call proto-build,bgp_build) -tests_objs := $(tests_objs) $(src-o-files)
\ No newline at end of file +tests_objs := $(tests_objs) $(src-o-files) diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c index 90490b4f..0b715eaa 100644 --- a/proto/bgp/attrs.c +++ b/proto/bgp/attrs.c @@ -15,12 +15,13 @@ #include "nest/bird.h" #include "nest/iface.h" #include "nest/protocol.h" -#include "nest/route.h" -#include "nest/attrs.h" +#include "nest/rt.h" +#include "lib/attrs.h" #include "conf/conf.h" #include "lib/resource.h" #include "lib/string.h" #include "lib/unaligned.h" +#include "lib/macro.h" #include "bgp.h" @@ -45,9 +46,9 @@ * * export - Hook that validates and normalizes attribute during export phase. * Receives eattr, may modify it (e.g., sort community lists for canonical - * representation), UNSET() it (e.g., skip empty lists), or WITHDRAW() it if - * necessary. May assume that eattr has value valid w.r.t. its type, but may be - * invalid w.r.t. BGP constraints. Optional. + * representation), UNSET() it (e.g., skip empty lists), or REJECT() the route + * if necessary. May assume that eattr has value valid w.r.t. its type, but may + * be invalid w.r.t. BGP constraints. Optional. * * encode - Hook that converts internal representation to external one during * packet writing. Receives eattr and puts it in the buffer (including attribute @@ -64,37 +65,72 @@ * format - Optional hook that converts eattr to textual representation. */ - -struct bgp_attr_desc { - const char *name; - uint type; - uint flags; - void (*export)(struct bgp_export_state *s, eattr *a); - int (*encode)(struct bgp_write_state *s, eattr *a, byte *buf, uint size); - void (*decode)(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to); - void (*format)(const eattr *ea, byte *buf, uint size); +union bgp_attr_desc { + struct ea_class class; + struct { + EA_CLASS_INSIDE; + uint flags; + void (*export)(struct bgp_export_state *s, eattr *a); + int (*encode)(struct bgp_write_state *s, eattr *a, byte *buf, uint size); + void (*decode)(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to); + }; }; -static const struct bgp_attr_desc bgp_attr_table[]; +static union bgp_attr_desc bgp_attr_table[]; +static inline const union bgp_attr_desc *bgp_find_attr_desc(eattr *a) +{ + const struct ea_class *class = ea_class_find(a->id); -static inline int bgp_attr_known(uint code); + if ((class < &bgp_attr_table[0].class) || (class >= &bgp_attr_table[BGP_ATTR_MAX].class)) + return NULL; -eattr * -bgp_set_attr(ea_list **attrs, struct linpool *pool, uint code, uint flags, uintptr_t val) + return (const union bgp_attr_desc *) class; +} + +#define BGP_EA_ID(code) (bgp_attr_table[code].id) +#define EA_BGP_ID(code) (((union bgp_attr_desc *) ea_class_find(code)) - bgp_attr_table) + +void bgp_set_attr_u32(ea_list **to, uint code, uint flags, u32 val) { - ASSERT(bgp_attr_known(code)); + const union bgp_attr_desc *desc = &bgp_attr_table[code]; - return ea_set_attr( - attrs, - pool, - EA_CODE(PROTOCOL_BGP, code), - flags & ~BAF_EXT_LEN, - bgp_attr_table[code].type, - val - ); + ea_set_attr(to, EA_LITERAL_EMBEDDED( + &desc->class, + flags & ~BAF_EXT_LEN, + val + )); } +void bgp_set_attr_ptr(ea_list **to, uint code, uint flags, const struct adata *ad) +{ + const union bgp_attr_desc *desc = &bgp_attr_table[code]; + + ea_set_attr(to, EA_LITERAL_DIRECT_ADATA( + &desc->class, + flags & ~BAF_EXT_LEN, + ad + )); +} +void +bgp_set_attr_data(ea_list **to, uint code, uint flags, void *data, uint len) +{ + const union bgp_attr_desc *desc = &bgp_attr_table[code]; + + ea_set_attr(to, EA_LITERAL_STORE_ADATA( + &desc->class, + flags & ~BAF_EXT_LEN, + data, + len + )); +} + +void +bgp_unset_attr(ea_list **to, uint code) +{ + const union bgp_attr_desc *desc = &bgp_attr_table[code]; + ea_unset_attr(to, 0, &desc->class); +} #define REPORT(msg, args...) \ ({ log(L_REMOTE "%s: " msg, s->proto->p.name, ## args); }) @@ -106,7 +142,10 @@ bgp_set_attr(ea_list **attrs, struct linpool *pool, uint code, uint flags, uintp ({ REPORT(msg, ## args); s->err_withdraw = 1; return; }) #define UNSET(a) \ - ({ a->type = EAF_TYPE_UNDEF; return; }) + ({ a->undef = 1; return; }) + +#define REJECT(msg, args...) \ + ({ log(L_ERR "%s: " msg, s->proto->p.name, ## args); s->err_reject = 1; return; }) #define NEW_BGP "Discarding %s attribute received from AS4-aware neighbor" #define BAD_EBGP "Discarding %s attribute received from EBGP neighbor" @@ -148,7 +187,7 @@ bgp_encode_u8(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size) if (size < (3+1)) return -1; - bgp_put_attr_hdr3(buf, EA_ID(a->id), a->flags, 1); + bgp_put_attr_hdr3(buf, EA_BGP_ID(a->id), a->flags, 1); buf[3] = a->u.data; return 3+1; @@ -160,7 +199,7 @@ bgp_encode_u32(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size) if (size < (3+4)) return -1; - bgp_put_attr_hdr3(buf, EA_ID(a->id), a->flags, 4); + bgp_put_attr_hdr3(buf, EA_BGP_ID(a->id), a->flags, 4); put_u32(buf+3, a->u.data); return 3+4; @@ -174,7 +213,7 @@ bgp_encode_u32s(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size if (size < (4+len)) return -1; - uint hdr = bgp_put_attr_hdr(buf, EA_ID(a->id), a->flags, len); + uint hdr = bgp_put_attr_hdr(buf, EA_BGP_ID(a->id), a->flags, len); put_u32s(buf + hdr, (u32 *) a->u.ptr->data, len / 4); return hdr + len; @@ -195,7 +234,7 @@ bgp_put_attr(byte *buf, uint size, uint code, uint flags, const byte *data, uint static int bgp_encode_raw(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size) { - return bgp_put_attr(buf, size, EA_ID(a->id), a->flags, a->u.ptr->data, a->u.ptr->length); + return bgp_put_attr(buf, size, EA_BGP_ID(a->id), a->flags, a->u.ptr->data, a->u.ptr->length); } @@ -333,9 +372,11 @@ bgp_aigp_set_metric(struct linpool *pool, const struct adata *ad, u64 metric) } int -bgp_total_aigp_metric_(struct rta *a, u64 *metric, const struct adata **ad) +bgp_total_aigp_metric_(const rte *e, u64 *metric, const struct adata **ad) { - eattr *ea = ea_find(a->eattrs, EA_CODE(PROTOCOL_BGP, BA_AIGP)); + rta *a = e->attrs; + + eattr *ea = ea_find(a->eattrs, BGP_EA_ID(BA_AIGP)); if (!ea) return 0; @@ -344,7 +385,7 @@ bgp_total_aigp_metric_(struct rta *a, u64 *metric, const struct adata **ad) return 0; u64 aigp = get_u64(b + 3); - u64 step = a->igp_metric; + u64 step = rt_get_igp_metric(e); if (!rta_resolvable(a) || (step >= IGP_METRIC_UNKNOWN)) step = BGP_AIGP_MAX; @@ -363,7 +404,7 @@ bgp_total_aigp_metric_(struct rta *a, u64 *metric, const struct adata **ad) static inline int bgp_init_aigp_metric(rte *e, u64 *metric, const struct adata **ad) { - if (e->attrs->source == RTS_BGP) + if (rt_get_source_attr(e) == RTS_BGP) return 0; *metric = rt_get_igp_metric(e); @@ -372,9 +413,9 @@ bgp_init_aigp_metric(rte *e, u64 *metric, const struct adata **ad) } u32 -bgp_rte_igp_metric(struct rte *rt) +bgp_rte_igp_metric(const rte *rt) { - u64 metric = bgp_total_aigp_metric(rt->attrs); + u64 metric = bgp_total_aigp_metric(rt); return (u32) MIN(metric, (u64) IGP_METRIC_UNKNOWN); } @@ -387,7 +428,7 @@ static void bgp_export_origin(struct bgp_export_state *s, eattr *a) { if (a->u.data > 2) - WITHDRAW(BAD_VALUE, "ORIGIN", a->u.data); + REJECT(BAD_VALUE, "ORIGIN", a->u.data); } static void @@ -399,7 +440,7 @@ bgp_decode_origin(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte if (data[0] > 2) WITHDRAW(BAD_VALUE, "ORIGIN", data[0]); - bgp_set_attr_u32(to, s->pool, BA_ORIGIN, flags, data[0]); + bgp_set_attr_u32(to, BA_ORIGIN, flags, data[0]); } static void @@ -467,7 +508,7 @@ bgp_decode_as_path(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte !bgp_as_path_first_as_equal(data, len, p->remote_as)) WITHDRAW("Malformed AS_PATH attribute - %s", "First AS differs from neigbor AS"); - bgp_set_attr_data(to, s->pool, BA_AS_PATH, flags, data, len); + bgp_set_attr_data(to, BA_AS_PATH, flags, data, len); } @@ -539,7 +580,7 @@ bgp_decode_med(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *da WITHDRAW(BAD_LENGTH, "MULTI_EXIT_DISC", len); u32 val = get_u32(data); - bgp_set_attr_u32(to, s->pool, BA_MULTI_EXIT_DISC, flags, val); + bgp_set_attr_u32(to, BA_MULTI_EXIT_DISC, flags, val); } @@ -560,7 +601,7 @@ bgp_decode_local_pref(struct bgp_parse_state *s, uint code UNUSED, uint flags, b WITHDRAW(BAD_LENGTH, "LOCAL_PREF", len); u32 val = get_u32(data); - bgp_set_attr_u32(to, s->pool, BA_LOCAL_PREF, flags, val); + bgp_set_attr_u32(to, BA_LOCAL_PREF, flags, val); } @@ -570,7 +611,7 @@ bgp_decode_atomic_aggr(struct bgp_parse_state *s, uint code UNUSED, uint flags, if (len != 0) DISCARD(BAD_LENGTH, "ATOMIC_AGGR", len); - bgp_set_attr_data(to, s->pool, BA_ATOMIC_AGGR, flags, NULL, 0); + bgp_set_attr_data(to, BA_ATOMIC_AGGR, flags, NULL, 0); } static int @@ -604,7 +645,7 @@ bgp_decode_aggregator(struct bgp_parse_state *s, uint code UNUSED, uint flags, b len = aggregator_16to32(data, src); } - bgp_set_attr_data(to, s->pool, BA_AGGREGATOR, flags, data, len); + bgp_set_attr_data(to, BA_AGGREGATOR, flags, data, len); } static void @@ -633,7 +674,7 @@ bgp_decode_community(struct bgp_parse_state *s, uint code UNUSED, uint flags, by struct adata *ad = lp_alloc_adata(s->pool, len); get_u32s(data, (u32 *) ad->data, len / 4); - bgp_set_attr_ptr(to, s->pool, BA_COMMUNITY, flags, ad); + bgp_set_attr_ptr(to, BA_COMMUNITY, flags, ad); } @@ -654,7 +695,7 @@ bgp_decode_originator_id(struct bgp_parse_state *s, uint code UNUSED, uint flags WITHDRAW(BAD_LENGTH, "ORIGINATOR_ID", len); u32 val = get_u32(data); - bgp_set_attr_u32(to, s->pool, BA_ORIGINATOR_ID, flags, val); + bgp_set_attr_u32(to, BA_ORIGINATOR_ID, flags, val); } @@ -679,7 +720,7 @@ bgp_decode_cluster_list(struct bgp_parse_state *s, uint code UNUSED, uint flags, struct adata *ad = lp_alloc_adata(s->pool, len); get_u32s(data, (u32 *) ad->data, len / 4); - bgp_set_attr_ptr(to, s->pool, BA_CLUSTER_LIST, flags, ad); + bgp_set_attr_ptr(to, BA_CLUSTER_LIST, flags, ad); } static void @@ -798,7 +839,7 @@ bgp_decode_ext_community(struct bgp_parse_state *s, uint code UNUSED, uint flags struct adata *ad = lp_alloc_adata(s->pool, len); get_u32s(data, (u32 *) ad->data, len / 4); - bgp_set_attr_ptr(to, s->pool, BA_EXT_COMMUNITY, flags, ad); + bgp_set_attr_ptr(to, BA_EXT_COMMUNITY, flags, ad); } @@ -811,7 +852,7 @@ bgp_decode_as4_aggregator(struct bgp_parse_state *s, uint code UNUSED, uint flag if (len != 8) DISCARD(BAD_LENGTH, "AS4_AGGREGATOR", len); - bgp_set_attr_data(to, s->pool, BA_AS4_AGGREGATOR, flags, data, len); + bgp_set_attr_data(to, BA_AS4_AGGREGATOR, flags, data, len); } static void @@ -841,7 +882,7 @@ bgp_decode_as4_path(struct bgp_parse_state *s, uint code UNUSED, uint flags, byt a = as_path_strip_confed(s->pool, a); } - bgp_set_attr_ptr(to, s->pool, BA_AS4_PATH, flags, a); + bgp_set_attr_ptr(to, BA_AS4_PATH, flags, a); } @@ -865,7 +906,7 @@ bgp_decode_aigp(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *d if (!bgp_aigp_valid(data, len, err, sizeof(err))) DISCARD("Malformed AIGP attribute - %s", err); - bgp_set_attr_data(to, s->pool, BA_AIGP, flags, data, len); + bgp_set_attr_data(to, BA_AIGP, flags, data, len); } static void @@ -897,7 +938,7 @@ bgp_decode_large_community(struct bgp_parse_state *s, uint code UNUSED, uint fla struct adata *ad = lp_alloc_adata(s->pool, len); get_u32s(data, (u32 *) ad->data, len / 4); - bgp_set_attr_ptr(to, s->pool, BA_LARGE_COMMUNITY, flags, ad); + bgp_set_attr_ptr(to, BA_LARGE_COMMUNITY, flags, ad); } static void @@ -909,20 +950,20 @@ bgp_export_mpls_label_stack(struct bgp_export_state *s, eattr *a) /* Perhaps we should just ignore it? */ if (!s->mpls) - WITHDRAW("Unexpected MPLS stack"); + REJECT("Unexpected MPLS stack"); /* Empty MPLS stack is not allowed */ if (!lnum) - WITHDRAW("Malformed MPLS stack - empty"); + REJECT("Malformed MPLS stack - empty"); /* This is ugly, but we must ensure that labels fit into NLRI field */ if ((24*lnum + (net_is_vpn(n) ? 64 : 0) + net_pxlen(n)) > 255) - WITHDRAW("Malformed MPLS stack - too many labels (%u)", lnum); + REJECT("Malformed MPLS stack - too many labels (%u)", lnum); for (uint i = 0; i < lnum; i++) { if (labels[i] > 0xfffff) - WITHDRAW("Malformed MPLS stack - invalid label (%u)", labels[i]); + REJECT("Malformed MPLS stack - invalid label (%u)", labels[i]); /* TODO: Check for special-purpose label values? */ } @@ -970,10 +1011,29 @@ bgp_format_mpls_label_stack(const eattr *a, byte *buf, uint size) } static inline void -bgp_decode_unknown(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to) +bgp_export_unknown(struct bgp_export_state *s UNUSED, eattr *a) { + if (!(a->flags & BAF_TRANSITIVE)) + UNSET(a); + + a->flags |= BAF_PARTIAL; +} + +static inline void +bgp_decode_unknown(struct bgp_parse_state *s UNUSED, uint code, uint flags, byte *data, uint len, ea_list **to) +{ + if (!(flags & BAF_OPTIONAL)) + WITHDRAW("Unknown attribute (code %u) - conflicting flags (%02x)", code, flags); + /* Cannot use bgp_set_attr_data() as it works on known attributes only */ - ea_set_attr_data(to, s->pool, EA_CODE(PROTOCOL_BGP, code), flags, EAF_TYPE_OPAQUE, data, len); + ea_set_attr_data(to, &bgp_attr_table[code].class, flags, data, len); +} + +static inline void +bgp_format_unknown(const eattr *a, byte *buf, uint size) +{ + if (a->flags & BAF_TRANSITIVE) + bsnprintf(buf, size, "(transitive)"); } @@ -981,10 +1041,10 @@ bgp_decode_unknown(struct bgp_parse_state *s, uint code, uint flags, byte *data, * Attribute table */ -static const struct bgp_attr_desc bgp_attr_table[] = { +static union bgp_attr_desc bgp_attr_table[BGP_ATTR_MAX] = { [BA_ORIGIN] = { - .name = "origin", - .type = EAF_TYPE_INT, + .name = "bgp_origin", + .type = T_ENUM_BGP_ORIGIN, .flags = BAF_TRANSITIVE, .export = bgp_export_origin, .encode = bgp_encode_u8, @@ -992,69 +1052,69 @@ static const struct bgp_attr_desc bgp_attr_table[] = { .format = bgp_format_origin, }, [BA_AS_PATH] = { - .name = "as_path", - .type = EAF_TYPE_AS_PATH, + .name = "bgp_path", + .type = T_PATH, .flags = BAF_TRANSITIVE, .encode = bgp_encode_as_path, .decode = bgp_decode_as_path, }, [BA_NEXT_HOP] = { - .name = "next_hop", - .type = EAF_TYPE_IP_ADDRESS, + .name = "bgp_next_hop", + .type = T_IP, .flags = BAF_TRANSITIVE, .encode = bgp_encode_next_hop, .decode = bgp_decode_next_hop, .format = bgp_format_next_hop, }, [BA_MULTI_EXIT_DISC] = { - .name = "med", - .type = EAF_TYPE_INT, + .name = "bgp_med", + .type = T_INT, .flags = BAF_OPTIONAL, .encode = bgp_encode_u32, .decode = bgp_decode_med, }, [BA_LOCAL_PREF] = { - .name = "local_pref", - .type = EAF_TYPE_INT, + .name = "bgp_local_pref", + .type = T_INT, .flags = BAF_TRANSITIVE, .export = bgp_export_local_pref, .encode = bgp_encode_u32, .decode = bgp_decode_local_pref, }, [BA_ATOMIC_AGGR] = { - .name = "atomic_aggr", - .type = EAF_TYPE_OPAQUE, + .name = "bgp_atomic_aggr", + .type = T_OPAQUE, .flags = BAF_TRANSITIVE, .encode = bgp_encode_raw, .decode = bgp_decode_atomic_aggr, }, [BA_AGGREGATOR] = { - .name = "aggregator", - .type = EAF_TYPE_OPAQUE, + .name = "bgp_aggregator", + .type = T_OPAQUE, .flags = BAF_OPTIONAL | BAF_TRANSITIVE, .encode = bgp_encode_aggregator, .decode = bgp_decode_aggregator, .format = bgp_format_aggregator, }, [BA_COMMUNITY] = { - .name = "community", - .type = EAF_TYPE_INT_SET, + .name = "bgp_community", + .type = T_CLIST, .flags = BAF_OPTIONAL | BAF_TRANSITIVE, .export = bgp_export_community, .encode = bgp_encode_u32s, .decode = bgp_decode_community, }, [BA_ORIGINATOR_ID] = { - .name = "originator_id", - .type = EAF_TYPE_ROUTER_ID, + .name = "bgp_originator_id", + .type = T_QUAD, .flags = BAF_OPTIONAL, .export = bgp_export_originator_id, .encode = bgp_encode_u32, .decode = bgp_decode_originator_id, }, [BA_CLUSTER_LIST] = { - .name = "cluster_list", - .type = EAF_TYPE_INT_SET, + .name = "bgp_cluster_list", + .type = T_CLIST, .flags = BAF_OPTIONAL, .export = bgp_export_cluster_list, .encode = bgp_encode_u32s, @@ -1062,43 +1122,43 @@ static const struct bgp_attr_desc bgp_attr_table[] = { .format = bgp_format_cluster_list, }, [BA_MP_REACH_NLRI] = { - .name = "mp_reach_nlri", - .type = EAF_TYPE_OPAQUE, + .name = "bgp_mp_reach_nlri", + .type = T_OPAQUE, .flags = BAF_OPTIONAL, .decode = bgp_decode_mp_reach_nlri, }, [BA_MP_UNREACH_NLRI] = { - .name = "mp_unreach_nlri", - .type = EAF_TYPE_OPAQUE, + .name = "bgp_mp_unreach_nlri", + .type = T_OPAQUE, .flags = BAF_OPTIONAL, .decode = bgp_decode_mp_unreach_nlri, }, [BA_EXT_COMMUNITY] = { - .name = "ext_community", - .type = EAF_TYPE_EC_SET, + .name = "bgp_ext_community", + .type = T_ECLIST, .flags = BAF_OPTIONAL | BAF_TRANSITIVE, .export = bgp_export_ext_community, .encode = bgp_encode_u32s, .decode = bgp_decode_ext_community, }, [BA_AS4_PATH] = { - .name = "as4_path", - .type = EAF_TYPE_AS_PATH, + .name = "bgp_as4_path", + .type = T_PATH, .flags = BAF_OPTIONAL | BAF_TRANSITIVE, .encode = bgp_encode_raw, .decode = bgp_decode_as4_path, }, [BA_AS4_AGGREGATOR] = { - .name = "as4_aggregator", - .type = EAF_TYPE_OPAQUE, + .name = "bgp_as4_aggregator", + .type = T_OPAQUE, .flags = BAF_OPTIONAL | BAF_TRANSITIVE, .encode = bgp_encode_raw, .decode = bgp_decode_as4_aggregator, .format = bgp_format_aggregator, }, [BA_AIGP] = { - .name = "aigp", - .type = EAF_TYPE_OPAQUE, + .name = "bgp_aigp", + .type = T_OPAQUE, .flags = BAF_OPTIONAL | BAF_DECODE_FLAGS, .export = bgp_export_aigp, .encode = bgp_encode_raw, @@ -1106,16 +1166,17 @@ static const struct bgp_attr_desc bgp_attr_table[] = { .format = bgp_format_aigp, }, [BA_LARGE_COMMUNITY] = { - .name = "large_community", - .type = EAF_TYPE_LC_SET, + .name = "bgp_large_community", + .type = T_LCLIST, .flags = BAF_OPTIONAL | BAF_TRANSITIVE, .export = bgp_export_large_community, .encode = bgp_encode_u32s, .decode = bgp_decode_large_community, }, [BA_MPLS_LABEL_STACK] = { - .name = "mpls_label_stack", - .type = EAF_TYPE_INT_SET, + .name = "bgp_mpls_label_stack", + .type = T_CLIST, + .readonly = 1, .export = bgp_export_mpls_label_stack, .encode = bgp_encode_mpls_label_stack, .decode = bgp_decode_mpls_label_stack, @@ -1123,12 +1184,32 @@ static const struct bgp_attr_desc bgp_attr_table[] = { }, }; -static inline int -bgp_attr_known(uint code) +eattr * +bgp_find_attr(ea_list *attrs, uint code) { - return (code < ARRAY_SIZE(bgp_attr_table)) && bgp_attr_table[code].name; + return ea_find(attrs, BGP_EA_ID(code)); } +void +bgp_register_attrs(void) +{ + for (uint i=0; i<ARRAY_SIZE(bgp_attr_table); i++) + { + if (!bgp_attr_table[i].name) + bgp_attr_table[i] = (union bgp_attr_desc) { + .name = mb_sprintf(&root_pool, "bgp_unknown_0x%02x", i), + .type = T_OPAQUE, + .flags = BAF_OPTIONAL, + .readonly = 1, + .export = bgp_export_unknown, + .encode = bgp_encode_raw, + .decode = bgp_decode_unknown, + .format = bgp_format_unknown, + }; + + ea_register_init(&bgp_attr_table[i].class); + } +} /* * Attribute export @@ -1137,38 +1218,24 @@ bgp_attr_known(uint code) static inline void bgp_export_attr(struct bgp_export_state *s, eattr *a, ea_list *to) { - if (EA_PROTO(a->id) != PROTOCOL_BGP) + const union bgp_attr_desc *desc = bgp_find_attr_desc(a); + if (!desc) return; - uint code = EA_ID(a->id); - - if (bgp_attr_known(code)) - { - const struct bgp_attr_desc *desc = &bgp_attr_table[code]; - - /* The flags might have been zero if the attr was added by filters */ - a->flags = (a->flags & BAF_PARTIAL) | desc->flags; + /* The flags might have been zero if the attr was added locally */ + a->flags = (a->flags & BAF_PARTIAL) | desc->flags; - /* Set partial bit if new opt-trans attribute is attached to non-local route */ - if ((s->src != NULL) && (a->type & EAF_ORIGINATED) && - (a->flags & BAF_OPTIONAL) && (a->flags & BAF_TRANSITIVE)) - a->flags |= BAF_PARTIAL; + /* Set partial bit if new opt-trans attribute is attached to non-local route */ + if ((s->src != NULL) && (a->originated) && + (a->flags & BAF_OPTIONAL) && (a->flags & BAF_TRANSITIVE)) + a->flags |= BAF_PARTIAL; - /* Call specific hook */ - CALL(desc->export, s, a); + /* Call specific hook */ + CALL(desc->export, s, a); - /* Attribute might become undefined in hook */ - if ((a->type & EAF_TYPE_MASK) == EAF_TYPE_UNDEF) - return; - } - else - { - /* Don't re-export unknown non-transitive attributes */ - if (!(a->flags & BAF_TRANSITIVE)) - return; - - a->flags |= BAF_PARTIAL; - } + /* Attribute might become undefined in hook */ + if (a->undef) + return; /* Append updated attribute */ to->attrs[to->count++] = *a; @@ -1188,12 +1255,11 @@ bgp_export_attr(struct bgp_export_state *s, eattr *a, ea_list *to) * Result: one sorted attribute list segment, or NULL if attributes are unsuitable. */ static inline ea_list * -bgp_export_attrs(struct bgp_export_state *s, ea_list *attrs) +bgp_export_attrs(struct bgp_export_state *s, const ea_list *a) { /* Merge the attribute list */ - ea_list *new = lp_alloc(s->pool, ea_scan(attrs)); - ea_merge(attrs, new); - ea_sort(new); + ea_list *new = ea_normalize(a); + ASSERT_DIE(new); uint i, count; count = new->count; @@ -1203,7 +1269,7 @@ bgp_export_attrs(struct bgp_export_state *s, ea_list *attrs) for (i = 0; i < count; i++) bgp_export_attr(s, &new->attrs[i], new); - if (s->err_withdraw) + if (s->err_reject) return NULL; return new; @@ -1217,14 +1283,9 @@ bgp_export_attrs(struct bgp_export_state *s, ea_list *attrs) static inline int bgp_encode_attr(struct bgp_write_state *s, eattr *a, byte *buf, uint size) { - ASSERT(EA_PROTO(a->id) == PROTOCOL_BGP); - - uint code = EA_ID(a->id); - - if (bgp_attr_known(code)) - return bgp_attr_table[code].encode(s, a, buf, size); - else - return bgp_encode_raw(s, a, buf, size); + const union bgp_attr_desc *desc = bgp_find_attr_desc(a); + ASSERT_DIE(desc); + return desc->encode(s, a, buf, size); } /** @@ -1289,7 +1350,7 @@ bgp_cluster_list_loopy(struct bgp_proto *p, ea_list *attrs) } static inline void -bgp_decode_attr(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to) +bgp_decode_attr(struct bgp_parse_state *s, byte code, byte flags, byte *data, uint len, ea_list **to) { /* Handle duplicate attributes; RFC 7606 3 (g) */ if (BIT32_TEST(s->attrs_seen, code)) @@ -1301,24 +1362,15 @@ bgp_decode_attr(struct bgp_parse_state *s, uint code, uint flags, byte *data, ui } BIT32_SET(s->attrs_seen, code); - if (bgp_attr_known(code)) - { - const struct bgp_attr_desc *desc = &bgp_attr_table[code]; + ASSERT_DIE(bgp_attr_table[code].id); + const union bgp_attr_desc *desc = &bgp_attr_table[code]; - /* Handle conflicting flags; RFC 7606 3 (c) */ - if (((flags ^ desc->flags) & (BAF_OPTIONAL | BAF_TRANSITIVE)) && - !(desc->flags & BAF_DECODE_FLAGS)) - WITHDRAW("Malformed %s attribute - conflicting flags (%02x)", desc->name, flags); + /* Handle conflicting flags; RFC 7606 3 (c) */ + if (((flags ^ desc->flags) & (BAF_OPTIONAL | BAF_TRANSITIVE)) && + !(desc->flags & BAF_DECODE_FLAGS)) + WITHDRAW("Malformed %s attribute - conflicting flags (%02x, expected %02x)", desc->name, flags, desc->flags); - desc->decode(s, code, flags, data, len, to); - } - else /* Unknown attribute */ - { - if (!(flags & BAF_OPTIONAL)) - WITHDRAW("Unknown attribute (code %u) - conflicting flags (%02x)", code, flags); - - bgp_decode_unknown(s, code, flags, data, len, to); - } + desc->decode(s, code, flags, data, len, to); } /** @@ -1336,7 +1388,8 @@ bgp_decode_attrs(struct bgp_parse_state *s, byte *data, uint len) { struct bgp_proto *p = s->proto; ea_list *attrs = NULL; - uint code, flags, alen; + uint alen; + byte code, flags; byte *pos = data; /* Parse the attributes */ @@ -1401,23 +1454,23 @@ bgp_decode_attrs(struct bgp_parse_state *s, byte *data, uint len) /* Reject routes with our ASN in AS_PATH attribute */ if (bgp_as_path_loopy(p, attrs, p->local_as)) - goto withdraw; + goto loop; /* Reject routes with our Confederation ID in AS_PATH attribute; RFC 5065 4.0 */ if ((p->public_as != p->local_as) && bgp_as_path_loopy(p, attrs, p->public_as)) - goto withdraw; + goto loop; /* Reject routes with our Router ID in ORIGINATOR_ID attribute; RFC 4456 8 */ if (p->is_internal && bgp_originator_id_loopy(p, attrs)) - goto withdraw; + goto loop; /* Reject routes with our Cluster ID in CLUSTER_LIST attribute; RFC 4456 8 */ if (p->rr_client && bgp_cluster_list_loopy(p, attrs)) - goto withdraw; + goto loop; /* If there is no local preference, define one */ if (!BIT32_TEST(s->attrs_seen, BA_LOCAL_PREF)) - bgp_set_attr_u32(&attrs, s->pool, BA_LOCAL_PREF, 0, p->cf->default_local_pref); + bgp_set_attr_u32(&attrs, BA_LOCAL_PREF, 0, p->cf->default_local_pref); return attrs; @@ -1434,6 +1487,10 @@ withdraw: s->err_withdraw = 1; return NULL; + +loop: + /* Loops are handled as withdraws, but ignored silently. Do not set err_withdraw. */ + return NULL; } void @@ -1443,7 +1500,7 @@ bgp_finish_attrs(struct bgp_parse_state *s, rta *a) if (BIT32_TEST(s->attrs_seen, BA_AIGP) && !s->channel->cf->aigp) { REPORT("Discarding AIGP attribute received on non-AIGP session"); - bgp_unset_attr(&a->eattrs, s->pool, BA_AIGP); + bgp_unset_attr(&a->eattrs, BA_AIGP); } } @@ -1458,7 +1515,7 @@ bgp_finish_attrs(struct bgp_parse_state *s, rta *a) #define RBH_FN(a,h) h #define RBH_REHASH bgp_rbh_rehash -#define RBH_PARAMS /8, *2, 2, 2, 8, 20 +#define RBH_PARAMS /8, *2, 2, 2, 12, 20 HASH_DEFINE_REHASH_FN(RBH, struct bgp_bucket) @@ -1491,6 +1548,7 @@ bgp_free_bucket_table(struct bgp_channel *c) static struct bgp_bucket * bgp_get_bucket(struct bgp_channel *c, ea_list *new) { + /* Hash and lookup */ u32 hash = ea_hash(new); struct bgp_bucket *b = HASH_FIND(c->bucket_hash, RBH, new, hash); @@ -1498,45 +1556,18 @@ bgp_get_bucket(struct bgp_channel *c, ea_list *new) if (b) return b; - uint ea_size = sizeof(ea_list) + new->count * sizeof(eattr); - uint ea_size_aligned = BIRD_ALIGN(ea_size, CPU_STRUCT_ALIGN); - uint size = sizeof(struct bgp_bucket) + ea_size_aligned; - uint i; - byte *dest; - - /* Gather total size of non-inline attributes */ - for (i = 0; i < new->count; i++) - { - eattr *a = &new->attrs[i]; + /* Scan the list for total size */ + uint ea_size = BIRD_CPU_ALIGN(ea_list_size(new)); + uint size = sizeof(struct bgp_bucket) + ea_size; - if (!(a->type & EAF_EMBEDDED)) - size += BIRD_ALIGN(sizeof(struct adata) + a->u.ptr->length, CPU_STRUCT_ALIGN); - } - - /* Create the bucket */ + /* Allocate the bucket */ b = mb_alloc(c->pool, size); *b = (struct bgp_bucket) { }; init_list(&b->prefixes); b->hash = hash; - /* Copy list of extended attributes */ - memcpy(b->eattrs, new, ea_size); - dest = ((byte *) b->eattrs) + ea_size_aligned; - - /* Copy values of non-inline attributes */ - for (i = 0; i < new->count; i++) - { - eattr *a = &b->eattrs->attrs[i]; - - if (!(a->type & EAF_EMBEDDED)) - { - const struct adata *oa = a->u.ptr; - struct adata *na = (struct adata *) dest; - memcpy(na, oa, sizeof(struct adata) + oa->length); - a->u.ptr = na; - dest += BIRD_ALIGN(sizeof(struct adata) + na->length, CPU_STRUCT_ALIGN); - } - } + /* Copy the ea_list */ + ea_list_copy(b->eattrs, new, ea_size); /* Insert the bucket to send queue and bucket hash */ add_tail(&c->bucket_queue, &b->send_node); @@ -1600,7 +1631,7 @@ bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b) #define PXH_FN(n,i,h) h #define PXH_REHASH bgp_pxh_rehash -#define PXH_PARAMS /8, *2, 2, 2, 8, 24 +#define PXH_PARAMS /8, *2, 2, 2, 12, 24 HASH_DEFINE_REHASH_FN(PXH, struct bgp_prefix) @@ -1626,7 +1657,8 @@ bgp_free_prefix_table(struct bgp_channel *c) static struct bgp_prefix * bgp_get_prefix(struct bgp_channel *c, const net_addr *net, u32 path_id) { - u32 hash = net_hash(net) ^ u32_hash(path_id); + /* We must use a different hash function than the rtable */ + u32 hash = u32_hash(net_hash(net) ^ u32_hash(path_id)); struct bgp_prefix *px = HASH_FIND(c->prefix_hash, PXH, net, path_id, hash); if (px) @@ -1657,7 +1689,7 @@ bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *px) HASH_REMOVE2(c->prefix_hash, PXH, c->pool, px); if (c->prefix_slab) - sl_free(c->prefix_slab, px); + sl_free(px); else mb_free(px); } @@ -1682,6 +1714,22 @@ bgp_preexport(struct channel *c, rte *e) if (src == NULL) return 0; + /* Reject flowspec that failed validation */ + if (net_is_flow(e->net)) + switch (rt_get_flowspec_valid(e)) + { + case FLOWSPEC_VALID: + break; + case FLOWSPEC_INVALID: + return -1; + case FLOWSPEC_UNKNOWN: + ASSUME((rt_get_source_attr(e) != RTS_BGP) || + !((struct bgp_channel *) SKIP_BACK(struct channel, in_req, e->sender->req))->base_table); + break; + case FLOWSPEC__MAX: + bug("This never happens."); + } + /* IBGP route reflection, RFC 4456 */ if (p->is_internal && src->is_internal && (p->local_as == src->local_as)) { @@ -1698,7 +1746,7 @@ bgp_preexport(struct channel *c, rte *e) /* Handle well-known communities, RFC 1997 */ struct eattr *com; if (p->cf->interpret_communities && - (com = ea_find(e->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY)))) + (com = ea_find(e->attrs->eattrs, BGP_EA_ID(BA_COMMUNITY)))) { const struct adata *d = com->u.ptr; @@ -1734,7 +1782,7 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at /* ORIGIN attribute - mandatory, attach if missing */ if (! bgp_find_attr(attrs0, BA_ORIGIN)) - bgp_set_attr_u32(&attrs, pool, BA_ORIGIN, 0, src ? ORIGIN_INCOMPLETE : ORIGIN_IGP); + bgp_set_attr_u32(&attrs, BA_ORIGIN, 0, src ? ORIGIN_INCOMPLETE : ORIGIN_IGP); /* AS_PATH attribute - mandatory */ a = bgp_find_attr(attrs0, BA_AS_PATH); @@ -1749,24 +1797,24 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at { /* IBGP or route server -> just ensure there is one */ if (!a) - bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, &null_adata); + bgp_set_attr_ptr(&attrs, BA_AS_PATH, 0, &null_adata); } else if (p->is_interior) { /* Confederation -> prepend ASN as AS_CONFED_SEQUENCE */ ad = as_path_prepend2(pool, ad, AS_PATH_CONFED_SEQUENCE, p->public_as); - bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, ad); + bgp_set_attr_ptr(&attrs, BA_AS_PATH, 0, ad); } else /* Regular EBGP (no RS, no confederation) */ { /* Regular EBGP -> prepend ASN as regular sequence */ ad = as_path_prepend2(pool, ad, AS_PATH_SEQUENCE, p->public_as); - bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, ad); + bgp_set_attr_ptr(&attrs, BA_AS_PATH, 0, ad); /* MULTI_EXIT_DESC attribute - accept only if set in export filter */ a = bgp_find_attr(attrs0, BA_MULTI_EXIT_DISC); - if (a && !(a->type & EAF_FRESH)) - bgp_unset_attr(&attrs, pool, BA_MULTI_EXIT_DISC); + if (a && !(a->fresh)) + bgp_unset_attr(&attrs, BA_MULTI_EXIT_DISC); } /* NEXT_HOP attribute - delegated to AF-specific hook */ @@ -1775,16 +1823,16 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at /* LOCAL_PREF attribute - required for IBGP, attach if missing */ if (p->is_interior && ! bgp_find_attr(attrs0, BA_LOCAL_PREF)) - bgp_set_attr_u32(&attrs, pool, BA_LOCAL_PREF, 0, p->cf->default_local_pref); + bgp_set_attr_u32(&attrs, BA_LOCAL_PREF, 0, p->cf->default_local_pref); /* AIGP attribute - accumulate local metric or originate new one */ u64 metric; if (s.local_next_hop && - (bgp_total_aigp_metric_(e->attrs, &metric, &ad) || + (bgp_total_aigp_metric_(e, &metric, &ad) || (c->cf->aigp_originate && bgp_init_aigp_metric(e, &metric, &ad)))) { ad = bgp_aigp_set_metric(pool, ad, metric); - bgp_set_attr_ptr(&attrs, pool, BA_AIGP, 0, ad); + bgp_set_attr_ptr(&attrs, BA_AIGP, 0, ad); } /* IBGP route reflection, RFC 4456 */ @@ -1792,7 +1840,7 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at { /* ORIGINATOR_ID attribute - attach if not already set */ if (! bgp_find_attr(attrs0, BA_ORIGINATOR_ID)) - bgp_set_attr_u32(&attrs, pool, BA_ORIGINATOR_ID, 0, src->remote_id); + bgp_set_attr_u32(&attrs, BA_ORIGINATOR_ID, 0, src->remote_id); /* CLUSTER_LIST attribute - prepend cluster ID */ a = bgp_find_attr(attrs0, BA_CLUSTER_LIST); @@ -1807,7 +1855,7 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at ad = int_set_prepend(pool, ad, p->rr_cluster_id); /* Should be at least one prepended cluster ID */ - bgp_set_attr_ptr(&attrs, pool, BA_CLUSTER_LIST, 0, ad); + bgp_set_attr_ptr(&attrs, BA_CLUSTER_LIST, 0, ad); } /* AS4_* transition attributes, RFC 6793 4.2.2 */ @@ -1816,15 +1864,15 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at a = bgp_find_attr(attrs, BA_AS_PATH); if (a && as_path_contains_as4(a->u.ptr)) { - bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, as_path_to_old(pool, a->u.ptr)); - bgp_set_attr_ptr(&attrs, pool, BA_AS4_PATH, 0, as_path_strip_confed(pool, a->u.ptr)); + bgp_set_attr_ptr(&attrs, BA_AS_PATH, 0, as_path_to_old(pool, a->u.ptr)); + bgp_set_attr_ptr(&attrs, BA_AS4_PATH, 0, as_path_strip_confed(pool, a->u.ptr)); } a = bgp_find_attr(attrs, BA_AGGREGATOR); if (a && aggregator_contains_as4(a->u.ptr)) { - bgp_set_attr_ptr(&attrs, pool, BA_AGGREGATOR, 0, aggregator_to_old(pool, a->u.ptr)); - bgp_set_attr_ptr(&attrs, pool, BA_AS4_AGGREGATOR, 0, a->u.ptr); + bgp_set_attr_ptr(&attrs, BA_AGGREGATOR, 0, aggregator_to_old(pool, a->u.ptr)); + bgp_set_attr_ptr(&attrs, BA_AS4_AGGREGATOR, 0, a->u.ptr); } } @@ -1849,13 +1897,15 @@ bgp_rt_notify(struct proto *P, struct channel *C, const net_addr *n, rte *new, c if (new) { - struct ea_list *attrs = bgp_update_attrs(p, c, new, new->attrs->eattrs, bgp_linpool2); + struct ea_list *attrs = bgp_update_attrs(p, c, new, new->attrs->eattrs, tmp_linpool); + + /* Error during attribute processing */ + if (!attrs) + log(L_ERR "%s: Invalid route %N withdrawn", p->p.name, n); /* If attributes are invalid, we fail back to withdraw */ buck = attrs ? bgp_get_bucket(c, attrs) : bgp_get_withdraw_bucket(c); path = new->src->global_id; - - lp_flush(bgp_linpool2); } else { @@ -1873,7 +1923,7 @@ bgp_rt_notify(struct proto *P, struct channel *C, const net_addr *n, rte *new, c static inline u32 bgp_get_neighbor(rte *r) { - eattr *e = ea_find(r->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH)); + eattr *e = ea_find(r->attrs->eattrs, BGP_EA_ID(BA_AS_PATH)); u32 as; if (e && as_path_get_first_regular(e->u.ptr, &as)) @@ -1894,7 +1944,7 @@ rte_stale(rte *r) return 0; /* If staleness is unknown, compute and cache it */ - eattr *a = ea_find(r->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY)); + eattr *a = ea_find(r->attrs->eattrs, BGP_EA_ID(BA_COMMUNITY)); if (a && int_set_contains(a->u.ptr, BGP_COMM_LLGR_STALE)) { r->pflags |= BGP_REF_STALE; @@ -1940,8 +1990,8 @@ bgp_rte_better(rte *new, rte *old) return 1; /* Start with local preferences */ - x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF)); - y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF)); + x = ea_find(new->attrs->eattrs, BGP_EA_ID(BA_LOCAL_PREF)); + y = ea_find(old->attrs->eattrs, BGP_EA_ID(BA_LOCAL_PREF)); n = x ? x->u.data : new_bgp->cf->default_local_pref; o = y ? y->u.data : old_bgp->cf->default_local_pref; if (n > o) @@ -1950,8 +2000,8 @@ bgp_rte_better(rte *new, rte *old) return 0; /* RFC 7311 4.1 - Apply AIGP metric */ - u64 n2 = bgp_total_aigp_metric(new->attrs); - u64 o2 = bgp_total_aigp_metric(old->attrs); + u64 n2 = bgp_total_aigp_metric(new); + u64 o2 = bgp_total_aigp_metric(old); if (n2 < o2) return 1; if (n2 > o2) @@ -1960,8 +2010,8 @@ bgp_rte_better(rte *new, rte *old) /* RFC 4271 9.1.2.2. a) Use AS path lengths */ if (new_bgp->cf->compare_path_lengths || old_bgp->cf->compare_path_lengths) { - x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH)); - y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH)); + x = ea_find(new->attrs->eattrs, BGP_EA_ID(BA_AS_PATH)); + y = ea_find(old->attrs->eattrs, BGP_EA_ID(BA_AS_PATH)); n = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN; o = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN; if (n < o) @@ -1971,8 +2021,8 @@ bgp_rte_better(rte *new, rte *old) } /* RFC 4271 9.1.2.2. b) Use origins */ - x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN)); - y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN)); + x = ea_find(new->attrs->eattrs, BGP_EA_ID(BA_ORIGIN)); + y = ea_find(old->attrs->eattrs, BGP_EA_ID(BA_ORIGIN)); n = x ? x->u.data : ORIGIN_INCOMPLETE; o = y ? y->u.data : ORIGIN_INCOMPLETE; if (n < o) @@ -1994,8 +2044,8 @@ bgp_rte_better(rte *new, rte *old) if (new_bgp->cf->med_metric || old_bgp->cf->med_metric || (bgp_get_neighbor(new) == bgp_get_neighbor(old))) { - x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC)); - y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC)); + x = ea_find(new->attrs->eattrs, BGP_EA_ID(BA_MULTI_EXIT_DISC)); + y = ea_find(old->attrs->eattrs, BGP_EA_ID(BA_MULTI_EXIT_DISC)); n = x ? x->u.data : new_bgp->cf->default_med; o = y ? y->u.data : old_bgp->cf->default_med; if (n < o) @@ -2011,8 +2061,8 @@ bgp_rte_better(rte *new, rte *old) return 1; /* RFC 4271 9.1.2.2. e) Compare IGP metrics */ - n = new_bgp->cf->igp_metric ? new->attrs->igp_metric : 0; - o = old_bgp->cf->igp_metric ? old->attrs->igp_metric : 0; + n = new_bgp->cf->igp_metric ? rt_get_igp_metric(new) : 0; + o = old_bgp->cf->igp_metric ? rt_get_igp_metric(old) : 0; if (n < o) return 1; if (n > o) @@ -2020,8 +2070,8 @@ bgp_rte_better(rte *new, rte *old) /* RFC 4271 9.1.2.2. f) Compare BGP identifiers */ /* RFC 4456 9. a) Use ORIGINATOR_ID instead of local neighbor ID */ - x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGINATOR_ID)); - y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGINATOR_ID)); + x = ea_find(new->attrs->eattrs, BGP_EA_ID(BA_ORIGINATOR_ID)); + y = ea_find(old->attrs->eattrs, BGP_EA_ID(BA_ORIGINATOR_ID)); n = x ? x->u.data : new_bgp->remote_id; o = y ? y->u.data : old_bgp->remote_id; @@ -2038,8 +2088,8 @@ bgp_rte_better(rte *new, rte *old) return 0; /* RFC 4456 9. b) Compare cluster list lengths */ - x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_CLUSTER_LIST)); - y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_CLUSTER_LIST)); + x = ea_find(new->attrs->eattrs, BGP_EA_ID(BA_CLUSTER_LIST)); + y = ea_find(old->attrs->eattrs, BGP_EA_ID(BA_CLUSTER_LIST)); n = x ? int_set_get_size(x->u.ptr) : 0; o = y ? int_set_get_size(y->u.ptr) : 0; if (n < o) @@ -2061,17 +2111,20 @@ bgp_rte_mergable(rte *pri, rte *sec) u32 p, s; /* Skip suppressed routes (see bgp_rte_recalculate()) */ - /* LLGR draft - depreference stale routes */ - if (pri->pflags != sec->pflags) + if ((pri->pflags ^ sec->pflags) & BGP_REF_SUPPRESSED) return 0; /* RFC 4271 9.1.2.1. Route resolvability test */ if (rta_resolvable(pri->attrs) != rta_resolvable(sec->attrs)) return 0; + /* LLGR draft - depreference stale routes */ + if (rte_stale(pri) != rte_stale(sec)) + return 0; + /* Start with local preferences */ - x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF)); - y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF)); + x = ea_find(pri->attrs->eattrs, BGP_EA_ID(BA_LOCAL_PREF)); + y = ea_find(sec->attrs->eattrs, BGP_EA_ID(BA_LOCAL_PREF)); p = x ? x->u.data : pri_bgp->cf->default_local_pref; s = y ? y->u.data : sec_bgp->cf->default_local_pref; if (p != s) @@ -2080,8 +2133,8 @@ bgp_rte_mergable(rte *pri, rte *sec) /* RFC 4271 9.1.2.2. a) Use AS path lengths */ if (pri_bgp->cf->compare_path_lengths || sec_bgp->cf->compare_path_lengths) { - x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH)); - y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH)); + x = ea_find(pri->attrs->eattrs, BGP_EA_ID(BA_AS_PATH)); + y = ea_find(sec->attrs->eattrs, BGP_EA_ID(BA_AS_PATH)); p = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN; s = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN; @@ -2093,8 +2146,8 @@ bgp_rte_mergable(rte *pri, rte *sec) } /* RFC 4271 9.1.2.2. b) Use origins */ - x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN)); - y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN)); + x = ea_find(pri->attrs->eattrs, BGP_EA_ID(BA_ORIGIN)); + y = ea_find(sec->attrs->eattrs, BGP_EA_ID(BA_ORIGIN)); p = x ? x->u.data : ORIGIN_INCOMPLETE; s = y ? y->u.data : ORIGIN_INCOMPLETE; if (p != s) @@ -2104,8 +2157,8 @@ bgp_rte_mergable(rte *pri, rte *sec) if (pri_bgp->cf->med_metric || sec_bgp->cf->med_metric || (bgp_get_neighbor(pri) == bgp_get_neighbor(sec))) { - x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC)); - y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC)); + x = ea_find(pri->attrs->eattrs, BGP_EA_ID(BA_MULTI_EXIT_DISC)); + y = ea_find(sec->attrs->eattrs, BGP_EA_ID(BA_MULTI_EXIT_DISC)); p = x ? x->u.data : pri_bgp->cf->default_med; s = y ? y->u.data : sec_bgp->cf->default_med; if (p != s) @@ -2117,8 +2170,8 @@ bgp_rte_mergable(rte *pri, rte *sec) return 0; /* RFC 4271 9.1.2.2. e) Compare IGP metrics */ - p = pri_bgp->cf->igp_metric ? pri->attrs->igp_metric : 0; - s = sec_bgp->cf->igp_metric ? sec->attrs->igp_metric : 0; + p = pri_bgp->cf->igp_metric ? rt_get_igp_metric(pri) : 0; + s = sec_bgp->cf->igp_metric ? rt_get_igp_metric(sec) : 0; if (p != s) return 0; @@ -2131,7 +2184,7 @@ bgp_rte_mergable(rte *pri, rte *sec) static inline int same_group(rte *r, u32 lpref, u32 lasn) { - return (r->attrs->pref == lpref) && (bgp_get_neighbor(r) == lasn); + return (rt_get_preference(r) == lpref) && (bgp_get_neighbor(r) == lasn); } static inline int @@ -2145,7 +2198,7 @@ int bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best) { rte *key = new ? new : old; - u32 lpref = key->attrs->pref; + u32 lpref = rt_get_preference(key); u32 lasn = bgp_get_neighbor(key); int old_suppressed = old ? !!(old->pflags & BGP_REF_SUPPRESSED) : 0; @@ -2212,7 +2265,7 @@ bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best) /* The default case - find a new best-in-group route */ rte *r = new; /* new may not be in the list */ - for (struct rte_storage *s = net->routes; rte_is_valid(&s->rte); s = s->next) + for (struct rte_storage *s = net->routes; rte_is_valid(RTE_OR_NULL(s)); s = s->next) if (use_deterministic_med(s) && same_group(&s->rte, lpref, lasn)) { s->rte.pflags |= BGP_REF_SUPPRESSED; @@ -2229,7 +2282,7 @@ bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best) new->pflags &= ~BGP_REF_SUPPRESSED; /* Found all existing routes mergable with best-in-group */ - for (struct rte_storage *s = net->routes; rte_is_valid(&s->rte); s = s->next) + for (struct rte_storage *s = net->routes; rte_is_valid(RTE_OR_NULL(s)); s = s->next) if (use_deterministic_med(s) && same_group(&s->rte, lpref, lasn)) if ((&s->rte != r) && bgp_rte_mergable(r, &s->rte)) s->rte.pflags &= ~BGP_REF_SUPPRESSED; @@ -2270,7 +2323,7 @@ bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best) rte * bgp_rte_modify_stale(struct rte *r, struct linpool *pool) { - eattr *ea = ea_find(r->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY)); + eattr *ea = ea_find(r->attrs->eattrs, BGP_EA_ID(BA_COMMUNITY)); const struct adata *ad = ea ? ea->u.ptr : NULL; uint flags = ea ? ea->flags : BAF_PARTIAL; @@ -2286,7 +2339,7 @@ bgp_rte_modify_stale(struct rte *r, struct linpool *pool) e0 = *r; e0.attrs = a; - bgp_set_attr_ptr(&(a->eattrs), pool, BA_COMMUNITY, flags, + bgp_set_attr_ptr(&(a->eattrs), BA_COMMUNITY, flags, int_set_add(pool, ad, BGP_COMM_LLGR_STALE)); e0.pflags |= BGP_REF_STALE; @@ -2306,8 +2359,8 @@ bgp_process_as4_attrs(ea_list **attrs, struct linpool *pool) eattr *a4 = bgp_find_attr(*attrs, BA_AS4_AGGREGATOR); /* First, unset AS4_* attributes */ - if (p4) bgp_unset_attr(attrs, pool, BA_AS4_PATH); - if (a4) bgp_unset_attr(attrs, pool, BA_AS4_AGGREGATOR); + if (p4) bgp_unset_attr(attrs, BA_AS4_PATH); + if (a4) bgp_unset_attr(attrs, BA_AS4_AGGREGATOR); /* Handle AGGREGATOR attribute */ if (a2 && a4) @@ -2340,60 +2393,37 @@ bgp_process_as4_attrs(ea_list **attrs, struct linpool *pool) } } -int -bgp_get_attr(const eattr *a, byte *buf, int buflen) -{ - uint i = EA_ID(a->id); - const struct bgp_attr_desc *d; - int len; - - if (bgp_attr_known(i)) - { - d = &bgp_attr_table[i]; - len = bsprintf(buf, "%s", d->name); - buf += len; - if (d->format) - { - *buf++ = ':'; - *buf++ = ' '; - d->format(a, buf, buflen - len - 2); - return GA_FULL; - } - return GA_NAME; - } - - bsprintf(buf, "%02x%s", i, (a->flags & BAF_TRANSITIVE) ? " [t]" : ""); - return GA_NAME; -} - void bgp_get_route_info(rte *e, byte *buf) { - eattr *p = ea_find(e->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH)); - eattr *o = ea_find(e->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN)); + eattr *p = ea_find(e->attrs->eattrs, BGP_EA_ID(BA_AS_PATH)); + eattr *o = ea_find(e->attrs->eattrs, BGP_EA_ID(BA_ORIGIN)); u32 origas; - buf += bsprintf(buf, " (%d", e->attrs->pref); + buf += bsprintf(buf, " (%d", rt_get_preference(e)); - if (e->pflags & BGP_REF_SUPPRESSED) - buf += bsprintf(buf, "-"); + if (!net_is_flow(e->net)) + { + if (e->pflags & BGP_REF_SUPPRESSED) + buf += bsprintf(buf, "-"); - if (rte_stale(e)) - buf += bsprintf(buf, "s"); + if (rte_stale(e)) + buf += bsprintf(buf, "s"); - u64 metric = bgp_total_aigp_metric(e->attrs); - if (metric < BGP_AIGP_MAX) - { - buf += bsprintf(buf, "/%lu", metric); - } - else if (e->attrs->igp_metric) - { - if (!rta_resolvable(e->attrs)) - buf += bsprintf(buf, "/-"); - else if (e->attrs->igp_metric >= IGP_METRIC_UNKNOWN) - buf += bsprintf(buf, "/?"); - else - buf += bsprintf(buf, "/%d", e->attrs->igp_metric); + u64 metric = bgp_total_aigp_metric(e); + if (metric < BGP_AIGP_MAX) + { + buf += bsprintf(buf, "/%lu", metric); + } + else if (metric = rt_get_igp_metric(e)) + { + if (!rta_resolvable(e->attrs)) + buf += bsprintf(buf, "/-"); + else if (metric >= IGP_METRIC_UNKNOWN) + buf += bsprintf(buf, "/?"); + else + buf += bsprintf(buf, "/%d", metric); + } } buf += bsprintf(buf, ") ["); diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index 5c78bfa1..84430287 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -101,6 +101,7 @@ * RFC 8203 - BGP Administrative Shutdown Communication * RFC 8212 - Default EBGP Route Propagation Behavior without Policies * RFC 8654 - Extended Message Support for BGP + * RFC 9117 - Revised Validation Procedure for BGP Flow Specifications * draft-ietf-idr-ext-opt-param-07 * draft-uttaro-idr-bgp-persistence-04 * draft-walton-bgp-hostname-capability-02 @@ -113,7 +114,7 @@ #include "nest/bird.h" #include "nest/iface.h" #include "nest/protocol.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/cli.h" #include "nest/locks.h" #include "conf/conf.h" @@ -125,9 +126,7 @@ #include "bgp.h" -struct linpool *bgp_linpool; /* Global temporary pool */ -struct linpool *bgp_linpool2; /* Global temporary pool for bgp_rt_notify() */ -static list bgp_sockets; /* Global list of listening sockets */ +static list STATIC_LIST_INIT(bgp_sockets); /* Global list of listening sockets */ static void bgp_connect(struct bgp_proto *p); @@ -157,16 +156,17 @@ bgp_open(struct bgp_proto *p) ip_addr addr = p->cf->strict_bind ? p->cf->local_ip : (p->ipv4 ? IPA_NONE4 : IPA_NONE6); uint port = p->cf->local_port; - - /* FIXME: Add some global init? */ - if (!bgp_linpool) - init_list(&bgp_sockets); + uint flags = p->cf->free_bind ? SKF_FREEBIND : 0; + uint flag_mask = SKF_FREEBIND; /* We assume that cf->iface is defined iff cf->local_ip is link-local */ WALK_LIST(bs, bgp_sockets) - if (ipa_equal(bs->sk->saddr, addr) && (bs->sk->sport == port) && - (bs->sk->iface == ifa) && (bs->sk->vrf == p->p.vrf)) + if (ipa_equal(bs->sk->saddr, addr) && + (bs->sk->sport == port) && + (bs->sk->iface == ifa) && + (bs->sk->vrf == p->p.vrf) && + ((bs->sk->flags & flag_mask) == flags)) { bs->uc++; p->sock = bs; @@ -180,7 +180,7 @@ bgp_open(struct bgp_proto *p) sk->sport = port; sk->iface = ifa; sk->vrf = p->p.vrf; - sk->flags = 0; + sk->flags = flags; sk->tos = IP_PREC_INTERNET_CONTROL; sk->rbsize = BGP_RX_BUFFER_SIZE; sk->tbsize = BGP_TX_BUFFER_SIZE; @@ -198,12 +198,6 @@ bgp_open(struct bgp_proto *p) add_tail(&bgp_sockets, &bs->n); - if (!bgp_linpool) - { - bgp_linpool = lp_new_default(proto_pool); - bgp_linpool2 = lp_new_default(proto_pool); - } - return 0; err: @@ -232,15 +226,6 @@ bgp_close(struct bgp_proto *p) rfree(bs->sk); rem_node(&bs->n); mb_free(bs); - - if (!EMPTY_LIST(bgp_sockets)) - return; - - rfree(bgp_linpool); - bgp_linpool = NULL; - - rfree(bgp_linpool2); - bgp_linpool2 = NULL; } static inline int @@ -775,25 +760,25 @@ bgp_handle_graceful_restart(struct bgp_proto *p) { case BGP_GRS_NONE: c->gr_active = BGP_GRS_ACTIVE; - rt_refresh_begin(c->c.table, &c->c); + rt_refresh_begin(c->c.table, &c->c.in_req); break; case BGP_GRS_ACTIVE: - rt_refresh_end(c->c.table, &c->c); - rt_refresh_begin(c->c.table, &c->c); + rt_refresh_end(c->c.table, &c->c.in_req); + rt_refresh_begin(c->c.table, &c->c.in_req); break; case BGP_GRS_LLGR: - rt_refresh_begin(c->c.table, &c->c); - rt_modify_stale(c->c.table, &c->c); + rt_refresh_begin(c->c.table, &c->c.in_req); + rt_modify_stale(c->c.table, &c->c.in_req); break; } } else { /* Just flush the routes */ - rt_refresh_begin(c->c.table, &c->c); - rt_refresh_end(c->c.table, &c->c); + rt_refresh_begin(c->c.table, &c->c.in_req); + rt_refresh_end(c->c.table, &c->c.in_req); } /* Reset bucket and prefix tables */ @@ -834,7 +819,7 @@ bgp_graceful_restart_done(struct bgp_channel *c) BGP_TRACE(D_EVENTS, "Neighbor graceful restart done"); tm_stop(c->stale_timer); - rt_refresh_end(c->c.table, &c->c); + rt_refresh_end(c->c.table, &c->c.in_req); } /** @@ -876,7 +861,7 @@ bgp_graceful_restart_timeout(timer *t) /* Channel is in GR, and supports LLGR -> start LLGR */ c->gr_active = BGP_GRS_LLGR; tm_start(c->stale_timer, c->stale_time S); - rt_modify_stale(c->c.table, &c->c); + rt_modify_stale(c->c.table, &c->c.in_req); } } else @@ -914,10 +899,10 @@ bgp_refresh_begin(struct bgp_channel *c) { log(L_WARN "%s: BEGIN-OF-RR received before END-OF-RIB, ignoring", p->p.name); return; } c->load_state = BFS_REFRESHING; - rt_refresh_begin(c->c.table, &c->c); + rt_refresh_begin(c->c.table, &c->c.in_req); if (c->c.in_table) - rt_refresh_begin(c->c.in_table, &c->c); + rt_refresh_begin(c->c.in_table, &c->c.in_req); } /** @@ -938,7 +923,7 @@ bgp_refresh_end(struct bgp_channel *c) { log(L_WARN "%s: END-OF-RR received without prior BEGIN-OF-RR, ignoring", p->p.name); return; } c->load_state = BFS_NONE; - rt_refresh_end(c->c.table, &c->c); + rt_refresh_end(c->c.table, &c->c.in_req); if (c->c.in_table) rt_prune_sync(c->c.in_table, 0); @@ -1736,6 +1721,9 @@ bgp_channel_init(struct channel *C, struct channel_config *CF) if (cf->igp_table_ip6) c->igp_table_ip6 = cf->igp_table_ip6->table; + + if (cf->base_table) + c->base_table = cf->base_table->table; } static int @@ -1751,6 +1739,12 @@ bgp_channel_start(struct channel *C) if (c->igp_table_ip6) rt_lock_table(c->igp_table_ip6); + if (c->base_table) + { + rt_lock_table(c->base_table); + rt_flowspec_link(c->base_table, c->c.table); + } + c->pool = p->p.pool; // XXXX bgp_init_bucket_table(c); bgp_init_prefix_table(c); @@ -1835,6 +1829,12 @@ bgp_channel_cleanup(struct channel *C) if (c->igp_table_ip6) rt_unlock_table(c->igp_table_ip6); + if (c->base_table) + { + rt_flowspec_unlink(c->base_table, c->c.table); + rt_unlock_table(c->base_table); + } + c->index = 0; /* Cleanup rest of bgp_channel starting at pool field */ @@ -1882,6 +1882,25 @@ bgp_default_igp_table(struct bgp_config *cf, struct bgp_channel_config *cc, u32 cf_error("Undefined IGP table"); } +static struct rtable_config * +bgp_default_base_table(struct bgp_config *cf, struct bgp_channel_config *cc) +{ + /* Expected table type */ + u32 type = (cc->afi == BGP_AF_FLOW4) ? NET_IP4 : NET_IP6; + + /* First, try appropriate IP channel */ + u32 afi2 = BGP_AF(BGP_AFI(cc->afi), BGP_SAFI_UNICAST); + struct bgp_channel_config *cc2 = bgp_find_channel_config(cf, afi2); + if (cc2 && (cc2->c.table->addr_type == type)) + return cc2->c.table; + + /* Last, try default table of given type */ + struct rtable_config *tab = cf->c.global->def_tables[type]; + if (tab) + return tab; + + cf_error("Undefined base table"); +} void bgp_postconfig(struct proto_config *CF) @@ -2026,6 +2045,14 @@ bgp_postconfig(struct proto_config *CF) cf_error("Mismatched IGP table type"); } + /* Default value of base table */ + if ((BGP_SAFI(cc->afi) == BGP_SAFI_FLOW) && cc->validate && !cc->base_table) + cc->base_table = bgp_default_base_table(cf, cc); + + if (cc->base_table && !cc->base_table->trie_used) + cf_error("Flowspec validation requires base table (%s) with trie", + cc->base_table->name); + if (cf->multihop && (cc->gw_mode == GW_DIRECT)) cf_error("Multihop BGP cannot use direct gateway mode"); @@ -2094,7 +2121,7 @@ bgp_reconfigure(struct proto *P, struct proto_config *CF) return same; } -#define IGP_TABLE(cf, sym) ((cf)->igp_table_##sym ? (cf)->igp_table_##sym ->table : NULL ) +#define TABLE(cf, NAME) ((cf)->NAME ? (cf)->NAME->table : NULL ) static int bgp_channel_reconfigure(struct channel *C, struct channel_config *CC, int *import_changed, int *export_changed) @@ -2105,6 +2132,7 @@ bgp_channel_reconfigure(struct channel *C, struct channel_config *CC, int *impor struct bgp_channel_config *old = c->cf; if ((new->secondary != old->secondary) || + (new->validate != old->validate) || (new->gr_able != old->gr_able) || (new->llgr_able != old->llgr_able) || (new->llgr_time != old->llgr_time) || @@ -2112,8 +2140,9 @@ bgp_channel_reconfigure(struct channel *C, struct channel_config *CC, int *impor (new->add_path != old->add_path) || (new->import_table != old->import_table) || (new->export_table != old->export_table) || - (IGP_TABLE(new, ip4) != IGP_TABLE(old, ip4)) || - (IGP_TABLE(new, ip6) != IGP_TABLE(old, ip6))) + (TABLE(new, igp_table_ip4) != TABLE(old, igp_table_ip4)) || + (TABLE(new, igp_table_ip6) != TABLE(old, igp_table_ip6)) || + (TABLE(new, base_table) != TABLE(old, base_table))) return 0; if (new->mandatory && !old->mandatory && (C->channel_state != CS_UP)) @@ -2438,6 +2467,9 @@ bgp_show_proto_info(struct proto *P) else cli_msg(-1006, " Neighbor address: %I%J", p->remote_ip, p->cf->iface); + if ((p->conn == &p->outgoing_conn) && (p->cf->remote_port != BGP_PORT)) + cli_msg(-1006, " Neighbor port: %u", p->cf->remote_port); + cli_msg(-1006, " Neighbor AS: %u", p->remote_as); cli_msg(-1006, " Local AS: %u", p->cf->local_as); @@ -2527,6 +2559,9 @@ bgp_show_proto_info(struct proto *P) if (c->igp_table_ip6) cli_msg(-1006, " IGP IPv6 table: %s", c->igp_table_ip6->name); + + if (c->base_table) + cli_msg(-1006, " Base table: %s", c->base_table->name); } } } @@ -2544,7 +2579,6 @@ struct channel_class channel_bgp = { struct protocol proto_bgp = { .name = "BGP", .template = "bgp%d", - .class = PROTOCOL_BGP, .preference = DEF_PREF_BGP, .channel_mask = NB_IP | NB_VPN | NB_FLOW, .proto_size = sizeof(struct bgp_proto), @@ -2556,7 +2590,12 @@ struct protocol proto_bgp = { .reconfigure = bgp_reconfigure, .copy_config = bgp_copy_config, .get_status = bgp_get_status, - .get_attr = bgp_get_attr, .get_route_info = bgp_get_route_info, .show_proto_info = bgp_show_proto_info }; + +void bgp_build(void) +{ + proto_build(&proto_bgp); + bgp_register_attrs(); +} diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h index c79dd1b2..662d9d48 100644 --- a/proto/bgp/bgp.h +++ b/proto/bgp/bgp.h @@ -14,13 +14,12 @@ #include <stdint.h> #include <setjmp.h> #include "nest/bird.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/bfd.h" //#include "lib/lists.h" #include "lib/hash.h" #include "lib/socket.h" -struct linpool; struct eattr; @@ -86,6 +85,7 @@ struct bgp_config { int peer_type; /* Internal or external BGP (BGP_PT_*, optional) */ int multihop; /* Number of hops if multihop */ int strict_bind; /* Bind listening socket to local address */ + int free_bind; /* Bind listening socket with SKF_FREEBIND */ int ttl_security; /* Enable TTL security [RFC 5082] */ int compare_path_lengths; /* Use path lengths when selecting best route */ int med_metric; /* Compare MULTI_EXIT_DISC even between routes from differen ASes */ @@ -146,6 +146,7 @@ struct bgp_channel_config { u8 mandatory; /* Channel is mandatory in capability negotiation */ u8 gw_mode; /* How we compute route gateway from next_hop attr, see GW_* */ u8 secondary; /* Accept also non-best routes (i.e. RA_ACCEPTED) */ + u8 validate; /* Validate Flowspec per RFC 8955 (6) */ u8 gr_able; /* Allow full graceful restart for the channel */ u8 llgr_able; /* Allow full long-lived GR for the channel */ uint llgr_time; /* Long-lived graceful restart stale time */ @@ -159,6 +160,7 @@ struct bgp_channel_config { struct rtable_config *igp_table_ip4; /* Table for recursive IPv4 next hop lookups */ struct rtable_config *igp_table_ip6; /* Table for recursive IPv6 next hop lookups */ + struct rtable_config *base_table; /* Base table for Flowspec validation */ }; #define BGP_PT_INTERNAL 1 @@ -344,6 +346,7 @@ struct bgp_channel { rtable *igp_table_ip4; /* Table for recursive IPv4 next hop lookups */ rtable *igp_table_ip6; /* Table for recursive IPv6 next hop lookups */ + rtable *base_table; /* Base table for Flowspec validation */ /* Rest are zeroed when down */ pool *pool; @@ -400,7 +403,7 @@ struct bgp_export_state { int mpls; u32 attrs_seen[1]; - uint err_withdraw; + uint err_reject; uint local_next_hop; }; @@ -426,6 +429,7 @@ struct bgp_parse_state { int as4_session; int add_path; int mpls; + int reach_nlri_step; u32 attrs_seen[256/32]; @@ -452,7 +456,6 @@ struct bgp_parse_state { uint err_subcode; jmp_buf err_jmpbuf; - struct hostentry *hostentry; adata *mpls_labels; /* Cached state for bgp_rte_update() */ @@ -493,9 +496,6 @@ bgp_parse_error(struct bgp_parse_state *s, uint subcode) longjmp(s->err_jmpbuf, 1); } -extern struct linpool *bgp_linpool; -extern struct linpool *bgp_linpool2; - void bgp_start_timer(timer *t, uint value); void bgp_check_config(struct bgp_config *c); @@ -519,7 +519,9 @@ struct rte_source *bgp_get_source(struct bgp_proto *p, u32 path_id); static inline int rta_resolvable(rta *a) { - return a->dest == RTD_UNICAST; + eattr *nhea = ea_find(a->eattrs, &ea_gen_nexthop); + struct nexthop_adata *nhad = (void *) nhea->u.ptr; + return NEXTHOP_IS_REACHABLE(nhad) || (nhad->dest != RTD_UNREACHABLE); } @@ -537,34 +539,13 @@ rta_resolvable(rta *a) /* attrs.c */ -static inline eattr * -bgp_find_attr(ea_list *attrs, uint code) -{ - return ea_find(attrs, EA_CODE(PROTOCOL_BGP, code)); -} - eattr * -bgp_set_attr(ea_list **attrs, struct linpool *pool, uint code, uint flags, uintptr_t val); +bgp_find_attr(ea_list *attrs, uint code); -static inline void -bgp_set_attr_u32(ea_list **to, struct linpool *pool, uint code, uint flags, u32 val) -{ bgp_set_attr(to, pool, code, flags, (uintptr_t) val); } - -static inline void -bgp_set_attr_ptr(ea_list **to, struct linpool *pool, uint code, uint flags, const struct adata *val) -{ bgp_set_attr(to, pool, code, flags, (uintptr_t) val); } - -static inline void -bgp_set_attr_data(ea_list **to, struct linpool *pool, uint code, uint flags, void *data, uint len) -{ - struct adata *a = lp_alloc_adata(pool, len); - bmemcpy(a->data, data, len); - bgp_set_attr(to, pool, code, flags, (uintptr_t) a); -} - -static inline void -bgp_unset_attr(ea_list **to, struct linpool *pool, uint code) -{ eattr *e = bgp_set_attr(to, pool, code, 0, 0); e->type = EAF_TYPE_UNDEF; } +void bgp_set_attr_u32(ea_list **to, uint code, uint flags, u32 val); +void bgp_set_attr_ptr(ea_list **to, uint code, uint flags, const struct adata *ad); +void bgp_set_attr_data(ea_list **to, uint code, uint flags, void *data, uint len); +void bgp_unset_attr(ea_list **to, uint code); int bgp_encode_mp_reach_mrt(struct bgp_write_state *s, eattr *a, byte *buf, uint size); @@ -586,26 +567,27 @@ int bgp_rte_better(struct rte *, struct rte *); int bgp_rte_mergable(rte *pri, rte *sec); int bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best); struct rte *bgp_rte_modify_stale(struct rte *r, struct linpool *pool); -u32 bgp_rte_igp_metric(struct rte *); +u32 bgp_rte_igp_metric(const rte *); void bgp_rt_notify(struct proto *P, struct channel *C, const net_addr *n, rte *new, const rte *old); int bgp_preexport(struct channel *, struct rte *); -int bgp_get_attr(const struct eattr *e, byte *buf, int buflen); void bgp_get_route_info(struct rte *, byte *); -int bgp_total_aigp_metric_(rta *a, u64 *metric, const struct adata **ad); +int bgp_total_aigp_metric_(const rte *e, u64 *metric, const struct adata **ad); #define BGP_AIGP_METRIC 1 #define BGP_AIGP_MAX U64(0xffffffffffffffff) static inline u64 -bgp_total_aigp_metric(rta *a) +bgp_total_aigp_metric(const rte *e) { u64 metric = BGP_AIGP_MAX; const struct adata *ad; - bgp_total_aigp_metric_(a, &metric, &ad); + bgp_total_aigp_metric_(e, &metric, &ad); return metric; } +void bgp_register_attrs(void); + /* packets.c */ @@ -642,26 +624,31 @@ void bgp_update_next_hop(struct bgp_export_state *s, eattr *a, ea_list **to); #define BAF_DECODE_FLAGS 0x0100 /* Private flag - attribute flags are handled by the decode hook */ -#define BA_ORIGIN 0x01 /* RFC 4271 */ /* WM */ -#define BA_AS_PATH 0x02 /* WM */ -#define BA_NEXT_HOP 0x03 /* WM */ -#define BA_MULTI_EXIT_DISC 0x04 /* ON */ -#define BA_LOCAL_PREF 0x05 /* WD */ -#define BA_ATOMIC_AGGR 0x06 /* WD */ -#define BA_AGGREGATOR 0x07 /* OT */ -#define BA_COMMUNITY 0x08 /* RFC 1997 */ /* OT */ -#define BA_ORIGINATOR_ID 0x09 /* RFC 4456 */ /* ON */ -#define BA_CLUSTER_LIST 0x0a /* RFC 4456 */ /* ON */ -#define BA_MP_REACH_NLRI 0x0e /* RFC 4760 */ -#define BA_MP_UNREACH_NLRI 0x0f /* RFC 4760 */ -#define BA_EXT_COMMUNITY 0x10 /* RFC 4360 */ -#define BA_AS4_PATH 0x11 /* RFC 6793 */ -#define BA_AS4_AGGREGATOR 0x12 /* RFC 6793 */ -#define BA_AIGP 0x1a /* RFC 7311 */ -#define BA_LARGE_COMMUNITY 0x20 /* RFC 8092 */ +enum bgp_attr_id { + BA_ORIGIN = 0x01, /* RFC 4271 */ /* WM */ + BA_AS_PATH = 0x02, /* WM */ + BA_NEXT_HOP = 0x03, /* WM */ + BA_MULTI_EXIT_DISC = 0x04, /* ON */ + BA_LOCAL_PREF = 0x05, /* WD */ + BA_ATOMIC_AGGR = 0x06, /* WD */ + BA_AGGREGATOR = 0x07, /* OT */ + BA_COMMUNITY = 0x08, /* RFC 1997 */ /* OT */ + BA_ORIGINATOR_ID = 0x09, /* RFC 4456 */ /* ON */ + BA_CLUSTER_LIST = 0x0a, /* RFC 4456 */ /* ON */ + BA_MP_REACH_NLRI = 0x0e, /* RFC 4760 */ + BA_MP_UNREACH_NLRI = 0x0f, /* RFC 4760 */ + BA_EXT_COMMUNITY = 0x10, /* RFC 4360 */ + BA_AS4_PATH = 0x11, /* RFC 6793 */ + BA_AS4_AGGREGATOR = 0x12, /* RFC 6793 */ + BA_AIGP = 0x1a, /* RFC 7311 */ + BA_LARGE_COMMUNITY = 0x20, /* RFC 8092 */ /* Bird's private internal BGP attributes */ -#define BA_MPLS_LABEL_STACK 0xfe /* MPLS label stack transfer attribute */ + BA_MPLS_LABEL_STACK = 0x100, /* MPLS label stack transfer attribute */ + +/* Maximum */ + BGP_ATTR_MAX, +}; /* BGP connection states */ diff --git a/proto/bgp/config.Y b/proto/bgp/config.Y index 2dfbdca9..24f3ec8f 100644 --- a/proto/bgp/config.Y +++ b/proto/bgp/config.Y @@ -19,19 +19,18 @@ CF_DECLS CF_KEYWORDS(BGP, LOCAL, NEIGHBOR, AS, HOLD, TIME, CONNECT, RETRY, KEEPALIVE, MULTIHOP, STARTUP, VIA, NEXT, HOP, SELF, DEFAULT, PATH, METRIC, ERROR, - START, DELAY, FORGET, WAIT, ENABLE, DISABLE, AFTER, BGP_PATH, - BGP_LOCAL_PREF, BGP_MED, BGP_ORIGIN, BGP_NEXT_HOP, BGP_ATOMIC_AGGR, - BGP_AGGREGATOR, BGP_COMMUNITY, BGP_EXT_COMMUNITY, BGP_LARGE_COMMUNITY, + START, DELAY, FORGET, WAIT, ENABLE, DISABLE, AFTER, + BGP_LOCAL_PREF, BGP_MED, SOURCE, ADDRESS, PASSWORD, RR, RS, CLIENT, CLUSTER, ID, AS4, ADVERTISE, IPV4, CAPABILITIES, LIMIT, PASSIVE, PREFER, OLDER, MISSING, LLADDR, - DROP, IGNORE, ROUTE, REFRESH, INTERPRET, COMMUNITIES, BGP_ORIGINATOR_ID, - BGP_CLUSTER_LIST, IGP, TABLE, GATEWAY, DIRECT, RECURSIVE, MED, TTL, + DROP, IGNORE, ROUTE, REFRESH, INTERPRET, COMMUNITIES, + IGP, TABLE, GATEWAY, DIRECT, RECURSIVE, MED, TTL, SECURITY, DETERMINISTIC, SECONDARY, ALLOW, BFD, ADD, PATHS, RX, TX, GRACEFUL, RESTART, AWARE, CHECK, LINK, PORT, EXTENDED, MESSAGES, SETKEY, STRICT, BIND, CONFEDERATION, MEMBER, MULTICAST, FLOW4, FLOW6, LONG, LIVED, STALE, IMPORT, IBGP, EBGP, MANDATORY, INTERNAL, EXTERNAL, SETS, - DYNAMIC, RANGE, NAME, DIGITS, BGP_AIGP, AIGP, ORIGINATE, COST, ENFORCE, - FIRST) + DYNAMIC, RANGE, NAME, DIGITS, AIGP, ORIGINATE, COST, ENFORCE, + FIRST, FREE, VALIDATE, BASE) %type <i> bgp_nh %type <i32> bgp_afi @@ -44,6 +43,8 @@ CF_KEYWORDS(CEASE, PREFIX, LIMIT, HIT, ADMINISTRATIVE, SHUTDOWN, RESET, PEER, CF_GRAMMAR +toksym: BGP_MED | BGP_LOCAL_PREF | SOURCE ; + proto: bgp_proto '}' ; bgp_proto_start: proto_start BGP { @@ -155,6 +156,7 @@ bgp_proto: } | bgp_proto DYNAMIC NAME DIGITS expr ';' { BGP_CFG->dynamic_name_digits = $5; if ($5>10) cf_error("Dynamic name digits must be at most 10"); } | bgp_proto STRICT BIND bool ';' { BGP_CFG->strict_bind = $4; } + | bgp_proto FREE BIND bool ';' { BGP_CFG->free_bind = $4; } | bgp_proto PATH METRIC bool ';' { BGP_CFG->compare_path_lengths = $4; } | bgp_proto MED METRIC bool ';' { BGP_CFG->med_metric = $4; } | bgp_proto IGP METRIC bool ';' { BGP_CFG->igp_metric = $4; } @@ -255,6 +257,11 @@ bgp_channel_item: | GATEWAY DIRECT { BGP_CC->gw_mode = GW_DIRECT; } | GATEWAY RECURSIVE { BGP_CC->gw_mode = GW_RECURSIVE; } | SECONDARY bool { BGP_CC->secondary = $2; } + | VALIDATE bool { + BGP_CC->validate = $2; + if (BGP_SAFI(BGP_CC->afi) != BGP_SAFI_FLOW) + cf_error("Validate option limited to flowspec channels"); + } | GRACEFUL RESTART bool { BGP_CC->gr_able = $3; } | LONG LIVED GRACEFUL RESTART bool { BGP_CC->llgr_able = $5; } | LONG LIVED STALE TIME expr { BGP_CC->llgr_time = $5; } @@ -278,6 +285,16 @@ bgp_channel_item: else cf_error("Mismatched IGP table type"); } + | BASE TABLE rtable { + if (BGP_SAFI(BGP_CC->afi) != BGP_SAFI_FLOW) + cf_error("Base table option limited to flowspec channels"); + + if (((BGP_CC->afi == BGP_AF_FLOW4) && ($3->addr_type == NET_IP4)) || + ((BGP_CC->afi == BGP_AF_FLOW6) && ($3->addr_type == NET_IP6))) + BGP_CC->base_table = $3; + else + cf_error("Mismatched base table type"); + } ; bgp_channel_opts: @@ -300,36 +317,6 @@ bgp_channel_end: bgp_proto_channel: bgp_channel_start bgp_channel_opt_list bgp_channel_end; - -dynamic_attr: BGP_ORIGIN - { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_ENUM_BGP_ORIGIN, EA_CODE(PROTOCOL_BGP, BA_ORIGIN)); } ; -dynamic_attr: BGP_PATH - { $$ = f_new_dynamic_attr(EAF_TYPE_AS_PATH, T_PATH, EA_CODE(PROTOCOL_BGP, BA_AS_PATH)); } ; -dynamic_attr: BGP_NEXT_HOP - { $$ = f_new_dynamic_attr(EAF_TYPE_IP_ADDRESS, T_IP, EA_CODE(PROTOCOL_BGP, BA_NEXT_HOP)); } ; -dynamic_attr: BGP_MED - { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC)); } ; -dynamic_attr: BGP_LOCAL_PREF - { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF)); } ; -dynamic_attr: BGP_ATOMIC_AGGR - { $$ = f_new_dynamic_attr(EAF_TYPE_OPAQUE, T_ENUM_EMPTY, EA_CODE(PROTOCOL_BGP, BA_ATOMIC_AGGR)); } ; -dynamic_attr: BGP_AGGREGATOR - { $$ = f_new_dynamic_attr(EAF_TYPE_OPAQUE, T_ENUM_EMPTY, EA_CODE(PROTOCOL_BGP, BA_AGGREGATOR)); } ; -dynamic_attr: BGP_COMMUNITY - { $$ = f_new_dynamic_attr(EAF_TYPE_INT_SET, T_CLIST, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY)); } ; -dynamic_attr: BGP_ORIGINATOR_ID - { $$ = f_new_dynamic_attr(EAF_TYPE_ROUTER_ID, T_QUAD, EA_CODE(PROTOCOL_BGP, BA_ORIGINATOR_ID)); } ; -dynamic_attr: BGP_CLUSTER_LIST - { $$ = f_new_dynamic_attr(EAF_TYPE_INT_SET, T_CLIST, EA_CODE(PROTOCOL_BGP, BA_CLUSTER_LIST)); } ; -dynamic_attr: BGP_EXT_COMMUNITY - { $$ = f_new_dynamic_attr(EAF_TYPE_EC_SET, T_ECLIST, EA_CODE(PROTOCOL_BGP, BA_EXT_COMMUNITY)); } ; -dynamic_attr: BGP_AIGP - { $$ = f_new_dynamic_attr(EAF_TYPE_OPAQUE, T_ENUM_EMPTY, EA_CODE(PROTOCOL_BGP, BA_AIGP)); } ; -dynamic_attr: BGP_LARGE_COMMUNITY - { $$ = f_new_dynamic_attr(EAF_TYPE_LC_SET, T_LCLIST, EA_CODE(PROTOCOL_BGP, BA_LARGE_COMMUNITY)); } ; - - - CF_ENUM(T_ENUM_BGP_ORIGIN, ORIGIN_, IGP, EGP, INCOMPLETE) CF_CODE diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c index 647551e5..9911738d 100644 --- a/proto/bgp/packets.c +++ b/proto/bgp/packets.c @@ -15,8 +15,8 @@ #include "nest/bird.h" #include "nest/iface.h" #include "nest/protocol.h" -#include "nest/route.h" -#include "nest/attrs.h" +#include "nest/rt.h" +#include "lib/attrs.h" #include "proto/mrt/mrt.h" #include "conf/conf.h" #include "lib/unaligned.h" @@ -932,11 +932,15 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, uint len) #define WITHDRAW(msg, args...) \ ({ REPORT(msg, ## args); s->err_withdraw = 1; return; }) +#define REJECT(msg, args...) \ + ({ log(L_ERR "%s: " msg, s->proto->p.name, ## args); s->err_reject = 1; return; }) + #define BAD_AFI "Unexpected AF <%u/%u> in UPDATE" #define BAD_NEXT_HOP "Invalid NEXT_HOP attribute" #define NO_NEXT_HOP "Missing NEXT_HOP attribute" #define NO_LABEL_STACK "Missing MPLS stack" +#define MISMATCHED_AF " - mismatched address family (%I for %s)" static void bgp_apply_next_hop(struct bgp_parse_state *s, rta *a, ip_addr gw, ip_addr ll) @@ -949,67 +953,86 @@ bgp_apply_next_hop(struct bgp_parse_state *s, rta *a, ip_addr gw, ip_addr ll) neighbor *nbr = NULL; /* GW_DIRECT -> single_hop -> p->neigh != NULL */ - if (ipa_nonzero(gw)) + if (ipa_nonzero2(gw)) nbr = neigh_find(&p->p, gw, NULL, 0); else if (ipa_nonzero(ll)) nbr = neigh_find(&p->p, ll, p->neigh->iface, 0); + else + WITHDRAW(BAD_NEXT_HOP " - zero address"); + + if (!nbr) + WITHDRAW(BAD_NEXT_HOP " - address %I not directly reachable", ipa_nonzero(gw) ? gw : ll); - if (!nbr || (nbr->scope == SCOPE_HOST)) - WITHDRAW(BAD_NEXT_HOP); + if (nbr->scope == SCOPE_HOST) + WITHDRAW(BAD_NEXT_HOP " - address %I is local", nbr->addr); - a->dest = RTD_UNICAST; - a->nh.gw = nbr->addr; - a->nh.iface = nbr->iface; - a->igp_metric = c->cf->cost; + ea_set_attr_u32(&a->eattrs, &ea_gen_igp_metric, 0, c->cf->cost); + + struct nexthop_adata nhad = { + .nh = { + .gw = nbr->addr, + .iface = nbr->iface, + }, + .ad = { + .length = sizeof nhad - sizeof nhad.ad, + }, + }; + ea_set_attr_data(&a->eattrs, &ea_gen_nexthop, 0, nhad.ad.data, nhad.ad.length); } else /* GW_RECURSIVE */ { - if (ipa_zero(gw)) - WITHDRAW(BAD_NEXT_HOP); + if (ipa_zero2(gw)) + WITHDRAW(BAD_NEXT_HOP " - zero address"); rtable *tab = ipa_is_ip4(gw) ? c->igp_table_ip4 : c->igp_table_ip6; - s->hostentry = rt_get_hostentry(tab, gw, ll, c->c.table); - - if (!s->mpls) - rta_apply_hostentry(a, s->hostentry, NULL); - - /* With MPLS, hostentry is applied later in bgp_apply_mpls_labels() */ + if (s->mpls) + { + u32 labels[BGP_MPLS_MAX]; + ea_set_hostentry(&a->eattrs, c->c.table, tab, gw, ll, BGP_MPLS_MAX, labels); + } + else + ea_set_hostentry(&a->eattrs, c->c.table, tab, gw, ll, 0, NULL); } } static void -bgp_apply_mpls_labels(struct bgp_parse_state *s, rta *a, u32 *labels, uint lnum) +bgp_apply_mpls_labels(struct bgp_parse_state *s, rta *a, u32 lnum, u32 labels[lnum]) { if (lnum > MPLS_MAX_LABEL_STACK) { REPORT("Too many MPLS labels ($u)", lnum); - a->dest = RTD_UNREACHABLE; - a->hostentry = NULL; - a->nh = (struct nexthop) { }; + ea_set_dest(&a->eattrs, 0, RTD_UNREACHABLE); return; } /* Handle implicit NULL as empty MPLS stack */ if ((lnum == 1) && (labels[0] == BGP_MPLS_NULL)) - lnum = 0; + lnum = s->mpls_labels->length = 0; if (s->channel->cf->gw_mode == GW_DIRECT) { - a->nh.labels = lnum; - memcpy(a->nh.label, labels, 4*lnum); + eattr *e = ea_find(a->eattrs, &ea_gen_nexthop); + struct { + struct nexthop_adata nhad; + u32 labels[MPLS_MAX_LABEL_STACK]; + } nh; + + memcpy(&nh.nhad, e->u.ptr, sizeof(struct adata) + e->u.ptr->length); + nh.nhad.nh.labels = lnum; + memcpy(nh.labels, labels, lnum * sizeof(u32)); + nh.nhad.ad.length = sizeof nh.nhad + lnum * sizeof(u32); } else /* GW_RECURSIVE */ { - mpls_label_stack ms; - - ms.len = lnum; - memcpy(ms.stack, labels, 4*lnum); - rta_apply_hostentry(a, s->hostentry, &ms); + eattr *e = ea_find(a->eattrs, &ea_gen_hostentry); + ASSERT_DIE(e); + struct hostentry_adata *head = (void *) e->u.ptr; + memcpy(&head->labels, labels, lnum * sizeof(u32)); + head->ad.length = (void *)(&head->labels[lnum]) - (void *) head->ad.data; } } - static int bgp_match_src(struct bgp_export_state *s, int mode) { @@ -1039,7 +1062,7 @@ bgp_use_next_hop(struct bgp_export_state *s, eattr *a) return 1; /* Keep it when explicitly set in export filter */ - if (a->type & EAF_FRESH) + if (a->fresh) return 1; /* Check for non-matching AF */ @@ -1056,7 +1079,7 @@ bgp_use_next_hop(struct bgp_export_state *s, eattr *a) return p->neigh && (p->neigh->iface == ifa); } -static inline int +static inline struct nexthop * bgp_use_gateway(struct bgp_export_state *s) { struct bgp_proto *p = s->proto; @@ -1065,22 +1088,32 @@ bgp_use_gateway(struct bgp_export_state *s) /* Handle next hop self option - also applies to gateway */ if (c->cf->next_hop_self && bgp_match_src(s, c->cf->next_hop_self)) - return 0; + return NULL; + + eattr *nhea = ea_find(ra->eattrs, &ea_gen_nexthop); + if (!nhea) + return NULL; /* We need one valid global gateway */ - if ((ra->dest != RTD_UNICAST) || ra->nh.next || ipa_zero(ra->nh.gw) || ipa_is_link_local(ra->nh.gw)) - return 0; + struct nexthop_adata *nhad = (struct nexthop_adata *) nhea->u.ptr; + if (!NEXTHOP_IS_REACHABLE(nhad) || + !NEXTHOP_ONE(nhad) || ipa_zero(nhad->nh.gw) || + ipa_is_link_local(nhad->nh.gw)) + return NULL; /* Check for non-matching AF */ - if ((ipa_is_ip4(ra->nh.gw) != bgp_channel_is_ipv4(c)) && !c->ext_next_hop) - return 0; + if ((ipa_is_ip4(nhad->nh.gw) != bgp_channel_is_ipv4(c)) && !c->ext_next_hop) + return NULL; /* Use it when exported to internal peers */ if (p->is_interior) - return 1; + return &nhad->nh; /* Use it when forwarded to single-hop BGP peer on on the same iface */ - return p->neigh && (p->neigh->iface == ra->nh.iface); + if (p->neigh && (p->neigh->iface == nhad->nh.iface)) + return &nhad->nh; + + return NULL; } static void @@ -1088,31 +1121,31 @@ bgp_update_next_hop_ip(struct bgp_export_state *s, eattr *a, ea_list **to) { if (!a || !bgp_use_next_hop(s, a)) { - if (bgp_use_gateway(s)) + struct nexthop *nhloc; + if (nhloc = bgp_use_gateway(s)) { - rta *ra = s->route->attrs; - ip_addr nh[1] = { ra->nh.gw }; - bgp_set_attr_data(to, s->pool, BA_NEXT_HOP, 0, nh, 16); + ip_addr nh[1] = { nhloc->gw }; + bgp_set_attr_data(to, BA_NEXT_HOP, 0, nh, 16); if (s->mpls) { u32 implicit_null = BGP_MPLS_NULL; - u32 *labels = ra->nh.labels ? ra->nh.label : &implicit_null; - uint lnum = ra->nh.labels ? ra->nh.labels : 1; - bgp_set_attr_data(to, s->pool, BA_MPLS_LABEL_STACK, 0, labels, lnum * 4); + u32 *labels = nhloc->labels ? nhloc->label : &implicit_null; + uint lnum = nhloc->labels ? nhloc->labels : 1; + bgp_set_attr_data(to, BA_MPLS_LABEL_STACK, 0, labels, lnum * 4); } } else { ip_addr nh[2] = { s->channel->next_hop_addr, s->channel->link_addr }; - bgp_set_attr_data(to, s->pool, BA_NEXT_HOP, 0, nh, ipa_nonzero(nh[1]) ? 32 : 16); + bgp_set_attr_data(to, BA_NEXT_HOP, 0, nh, ipa_nonzero(nh[1]) ? 32 : 16); s->local_next_hop = 1; /* TODO: Use local MPLS assigned label */ if (s->mpls) { u32 implicit_null = BGP_MPLS_NULL; - bgp_set_attr_data(to, s->pool, BA_MPLS_LABEL_STACK, 0, &implicit_null, 4); + bgp_set_attr_data(to, BA_MPLS_LABEL_STACK, 0, &implicit_null, 4); } } } @@ -1120,28 +1153,28 @@ bgp_update_next_hop_ip(struct bgp_export_state *s, eattr *a, ea_list **to) /* Check if next hop is valid */ a = bgp_find_attr(*to, BA_NEXT_HOP); if (!a) - WITHDRAW(NO_NEXT_HOP); + REJECT(NO_NEXT_HOP); ip_addr *nh = (void *) a->u.ptr->data; ip_addr peer = s->proto->remote_ip; uint len = a->u.ptr->length; /* Forbid zero next hop */ - if (ipa_zero(nh[0]) && ((len != 32) || ipa_zero(nh[1]))) - WITHDRAW(BAD_NEXT_HOP); + if (ipa_zero2(nh[0]) && ((len != 32) || ipa_zero(nh[1]))) + REJECT(BAD_NEXT_HOP " - zero address"); /* Forbid next hop equal to neighbor IP */ if (ipa_equal(peer, nh[0]) || ((len == 32) && ipa_equal(peer, nh[1]))) - WITHDRAW(BAD_NEXT_HOP); + REJECT(BAD_NEXT_HOP " - neighbor address %I", peer); /* Forbid next hop with non-matching AF */ if ((ipa_is_ip4(nh[0]) != bgp_channel_is_ipv4(s->channel)) && !s->channel->ext_next_hop) - WITHDRAW(BAD_NEXT_HOP); + REJECT(BAD_NEXT_HOP MISMATCHED_AF, nh[0], s->channel->desc->name); /* Just check if MPLS stack */ if (s->mpls && !bgp_find_attr(*to, BA_MPLS_LABEL_STACK)) - WITHDRAW(NO_LABEL_STACK); + REJECT(NO_LABEL_STACK); } static uint @@ -1212,11 +1245,11 @@ bgp_decode_next_hop_ip(struct bgp_parse_state *s, byte *data, uint len, rta *a) ad->length = 16; if ((bgp_channel_is_ipv4(c) != ipa_is_ip4(nh[0])) && !c->ext_next_hop) - WITHDRAW(BAD_NEXT_HOP); + WITHDRAW(BAD_NEXT_HOP MISMATCHED_AF, nh[0], c->desc->name); // XXXX validate next hop - bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_NEXT_HOP, 0, ad); + bgp_set_attr_ptr(&(a->eattrs), BA_NEXT_HOP, 0, ad); bgp_apply_next_hop(s, a, nh[0], nh[1]); } @@ -1293,11 +1326,11 @@ bgp_decode_next_hop_vpn(struct bgp_parse_state *s, byte *data, uint len, rta *a) bgp_parse_error(s, 9); if ((bgp_channel_is_ipv4(c) != ipa_is_ip4(nh[0])) && !c->ext_next_hop) - WITHDRAW(BAD_NEXT_HOP); + WITHDRAW(BAD_NEXT_HOP MISMATCHED_AF, nh[0], c->desc->name); // XXXX validate next hop - bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_NEXT_HOP, 0, ad); + bgp_set_attr_ptr(&(a->eattrs), BA_NEXT_HOP, 0, ad); bgp_apply_next_hop(s, a, nh[0], nh[1]); } @@ -1322,11 +1355,11 @@ bgp_decode_next_hop_none(struct bgp_parse_state *s UNUSED, byte *data UNUSED, ui } static void -bgp_update_next_hop_none(struct bgp_export_state *s, eattr *a, ea_list **to) +bgp_update_next_hop_none(struct bgp_export_state *s UNUSED, eattr *a, ea_list **to) { /* NEXT_HOP shall not pass */ if (a) - bgp_unset_attr(to, s->pool, BA_NEXT_HOP); + bgp_unset_attr(to, BA_NEXT_HOP); } @@ -1335,7 +1368,7 @@ bgp_update_next_hop_none(struct bgp_export_state *s, eattr *a, ea_list **to) */ static void -bgp_rte_update(struct bgp_parse_state *s, net_addr *n, u32 path_id, rta *a0) +bgp_rte_update(struct bgp_parse_state *s, const net_addr *n, u32 path_id, rta *a0) { if (path_id != s->last_id) { @@ -1348,6 +1381,10 @@ bgp_rte_update(struct bgp_parse_state *s, net_addr *n, u32 path_id, rta *a0) if (!a0) { + /* Route update was changed to withdraw */ + if (s->err_withdraw && s->reach_nlri_step) + REPORT("Invalid route %N withdrawn", n); + /* Route withdraw */ rte_update(&s->channel->c, n, NULL, s->last_src); return; @@ -1392,7 +1429,8 @@ bgp_encode_mpls_labels(struct bgp_write_state *s UNUSED, const adata *mpls, byte static void bgp_decode_mpls_labels(struct bgp_parse_state *s, byte **pos, uint *len, uint *pxlen, rta *a) { - u32 labels[BGP_MPLS_MAX], label; + u32 labels[BGP_MPLS_MAX]; + u32 label; uint lnum = 0; do { @@ -1406,7 +1444,7 @@ bgp_decode_mpls_labels(struct bgp_parse_state *s, byte **pos, uint *len, uint *p /* RFC 8277 2.4 - withdraw does not have variable-size MPLS stack but fixed-size 24-bit Compatibility field, which MUST be ignored */ - if (!a && !s->err_withdraw) + if (!s->reach_nlri_step) return; } while (!(label & BGP_MPLS_BOS)); @@ -1414,19 +1452,8 @@ bgp_decode_mpls_labels(struct bgp_parse_state *s, byte **pos, uint *len, uint *p if (!a) return; - /* Attach MPLS attribute unless we already have one */ - if (!s->mpls_labels) - { - s->mpls_labels = lp_alloc_adata(s->pool, 4*BGP_MPLS_MAX); - bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_MPLS_LABEL_STACK, 0, s->mpls_labels); - } - - /* Overwrite data in the attribute */ - s->mpls_labels->length = 4*lnum; - memcpy(s->mpls_labels->data, labels, 4*lnum); - /* Update next hop entry in rta */ - bgp_apply_mpls_labels(s, a, labels, lnum); + bgp_apply_mpls_labels(s, a, lnum, labels); /* Attributes were changed, invalidate cached entry */ rta_free(s->cached_rta); @@ -2289,11 +2316,14 @@ bgp_create_update(struct bgp_channel *c, byte *buf) again: ; + struct lp_state tmpp; + lp_save(tmp_linpool, &tmpp); + /* Initialize write state */ struct bgp_write_state s = { .proto = p, .channel = c, - .pool = bgp_linpool, + .pool = tmp_linpool, .mp_reach = (c->afi != BGP_AF_IPV4) || c->ext_next_hop, .as4_session = p->as4_session, .add_path = c->add_path_tx, @@ -2319,6 +2349,7 @@ again: ; if (EMPTY_LIST(buck->prefixes)) { bgp_free_bucket(c, buck); + lp_restore(tmp_linpool, &tmpp); goto again; } @@ -2332,7 +2363,10 @@ again: ; bgp_defer_bucket(c, buck); if (!res) + { + lp_restore(tmp_linpool, &tmpp); goto again; + } goto done; } @@ -2343,7 +2377,7 @@ again: ; done: BGP_TRACE_RL(&rl_snd_update, D_PACKETS, "Sending UPDATE"); p->stats.tx_updates++; - lp_flush(s.pool); + lp_restore(tmp_linpool, &tmpp); return res; } @@ -2433,11 +2467,11 @@ bgp_decode_nlri(struct bgp_parse_state *s, u32 afi, byte *nlri, uint len, ea_lis { a = allocz(RTA_MAX_SIZE); - a->source = RTS_BGP; - a->scope = SCOPE_UNIVERSE; - a->from = s->proto->remote_ip; a->eattrs = ea; - a->pref = c->c.preference; + + ea_set_attr_data(&a->eattrs, &ea_gen_from, 0, &s->proto->remote_ip, sizeof(ip_addr)); + ea_set_attr_u32(&a->eattrs, &ea_gen_preference, 0, c->c.preference); + ea_set_attr_u32(&a->eattrs, &ea_gen_source, 0, RTS_BGP); c->desc->decode_next_hop(s, nh, nh_len, a); bgp_finish_attrs(s, a); @@ -2472,10 +2506,13 @@ bgp_rx_update(struct bgp_conn *conn, byte *pkt, uint len) bgp_start_timer(conn->hold_timer, conn->hold_time); + struct lp_state tmpp; + lp_save(tmp_linpool, &tmpp); + /* Initialize parse state */ struct bgp_parse_state s = { .proto = p, - .pool = bgp_linpool, + .pool = tmp_linpool, .as4_session = p->as4_session, }; @@ -2541,6 +2578,8 @@ bgp_rx_update(struct bgp_conn *conn, byte *pkt, uint len) if (s.mp_unreach_len) bgp_decode_nlri(&s, s.mp_unreach_af, s.mp_unreach_nlri, s.mp_unreach_len, NULL, NULL, 0); + s.reach_nlri_step = 1; + if (s.ip_reach_len) bgp_decode_nlri(&s, BGP_AF_IPV4, s.ip_reach_nlri, s.ip_reach_len, ea, s.ip_next_hop_data, s.ip_next_hop_len); @@ -2551,7 +2590,7 @@ bgp_rx_update(struct bgp_conn *conn, byte *pkt, uint len) done: rta_free(s.cached_rta); - lp_flush(s.pool); + lp_restore(tmp_linpool, &tmpp); return; } @@ -2695,7 +2734,7 @@ bgp_rx_route_refresh(struct bgp_conn *conn, byte *pkt, uint len) { case BGP_RR_REQUEST: BGP_TRACE(D_PACKETS, "Got ROUTE-REFRESH"); - channel_request_feeding(&c->c); + rt_refeed_channel(&c->c); break; case BGP_RR_BEGIN: |