summaryrefslogtreecommitdiff
path: root/proto/bgp
diff options
context:
space:
mode:
authorOndrej Zajicek <santiago@crfreenet.org>2012-08-14 16:25:22 +0200
committerOndrej Zajicek <santiago@crfreenet.org>2012-08-14 16:46:43 +0200
commit094d2bdb79e1ffa0a02761fd651aa0f0b6b0c585 (patch)
treef7cb65c540403ed152677dde3b803c3dd117d8e5 /proto/bgp
parentd760229ab897fa1bf1fd0fe7019cc2431d21a1cc (diff)
Implements ADD-PATH extension for BGP.
Allows to send and receive multiple routes for one network by one BGP session. Also contains necessary core changes to support this (routing tables accepting several routes for one network from one protocol). It needs some more cleanup before merging to the master branch.
Diffstat (limited to 'proto/bgp')
-rw-r--r--proto/bgp/attrs.c117
-rw-r--r--proto/bgp/bgp.c31
-rw-r--r--proto/bgp/bgp.h39
-rw-r--r--proto/bgp/config.Y5
-rw-r--r--proto/bgp/packets.c182
5 files changed, 295 insertions, 79 deletions
diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c
index e5bc84dd..837a6861 100644
--- a/proto/bgp/attrs.c
+++ b/proto/bgp/attrs.c
@@ -621,12 +621,14 @@ bgp_encode_attrs(struct bgp_proto *p, byte *w, ea_list *attrs, int remains)
return -1;
}
+/*
static void
bgp_init_prefix(struct fib_node *N)
{
struct bgp_prefix *p = (struct bgp_prefix *) N;
p->bucket_node.next = NULL;
}
+*/
static int
bgp_compare_u32(const u32 *x, const u32 *y)
@@ -870,30 +872,125 @@ bgp_free_bucket(struct bgp_proto *p, struct bgp_bucket *buck)
mb_free(buck);
}
+
+/* Prefix hash table */
+
+static inline u32 prefix_hash(ip_addr prefix, int pxlen, u32 path_id, u32 order)
+{
+ u32 x = ipa_hash(prefix) + pxlen + path_id;
+ return (x * 2902958171u) >> (32 - order);
+}
+
+static inline u32 px_hash_size(struct bgp_proto *p)
+{ return 1 << p->px_hash_order; }
+
+void
+bgp_init_prefix_table(struct bgp_proto *p, u32 order)
+{
+ p->px_hash_count = 0;
+ p->px_hash_order = order;
+ p->prefix_table = mb_allocz(p->p.pool, px_hash_size(p) * sizeof(struct bgp_prefix *));
+ p->prefix_slab = sl_new(p->p.pool, sizeof(struct bgp_prefix));
+}
+
+static void
+bgp_rehash_prefix_table(struct bgp_proto *p, int step)
+{
+ struct bgp_prefix **old_tab, *px, *px_next;
+ u32 old_size, hash, i;
+
+ old_tab = p->prefix_table;
+ old_size = px_hash_size(p);
+
+ p->px_hash_order += step;
+ p->prefix_table = mb_allocz(p->p.pool, px_hash_size(p) * sizeof(struct bgp_prefix *));
+
+ for (i = 0; i < old_size; i++)
+ for (px = old_tab[i]; px; px = px_next)
+ {
+ px_next = px->next;
+ hash = prefix_hash(px->n.prefix, px->n.pxlen, px->path_id, p->px_hash_order);
+ px->next = p->prefix_table[hash];
+ p->prefix_table[hash] = px;
+ }
+
+ mb_free(old_tab);
+}
+
+static struct bgp_prefix *
+bgp_get_prefix(struct bgp_proto *p, ip_addr prefix, int pxlen, u32 path_id)
+{
+ struct bgp_prefix *bp;
+ u32 hash = prefix_hash(prefix, pxlen, path_id, p->px_hash_order);
+
+ for (bp = p->prefix_table[hash]; bp; bp = bp->next)
+ if (bp->n.pxlen == pxlen && ipa_equal(bp->n.prefix, prefix) && bp->path_id == path_id)
+ return bp;
+
+ bp = sl_alloc(p->prefix_slab);
+ bp->n.prefix = prefix;
+ bp->n.pxlen = pxlen;
+ bp->path_id = path_id;
+ bp->next = p->prefix_table[hash];
+ p->prefix_table[hash] = bp;
+
+ bp->bucket_node.next = NULL;
+
+ p->px_hash_count++;
+ if ((p->px_hash_count > px_hash_size(p)) && (p->px_hash_order < 18))
+ bgp_rehash_prefix_table(p, 1);
+
+ return bp;
+}
+
+void
+bgp_free_prefix(struct bgp_proto *p, struct bgp_prefix *bp)
+{
+ struct bgp_prefix **bpp;
+ u32 hash = prefix_hash(bp->n.prefix, bp->n.pxlen, bp->path_id, p->px_hash_order);
+
+ for (bpp = &p->prefix_table[hash]; *bpp; *bpp = (*bpp)->next)
+ if (*bpp == bp)
+ break;
+
+ *bpp = bp->next;
+ sl_free(p->prefix_slab, bp);
+
+ p->px_hash_count--;
+ if ((p->px_hash_count < (px_hash_size(p) / 4)) && (p->px_hash_order > 10))
+ bgp_rehash_prefix_table(p, -1);
+}
+
+
void
bgp_rt_notify(struct proto *P, rtable *tbl UNUSED, net *n, rte *new, rte *old UNUSED, ea_list *attrs)
{
struct bgp_proto *p = (struct bgp_proto *) P;
struct bgp_bucket *buck;
struct bgp_prefix *px;
+ rte *key;
+ u32 path_id;
DBG("BGP: Got route %I/%d %s\n", n->n.prefix, n->n.pxlen, new ? "up" : "down");
if (new)
{
+ key = new;
buck = bgp_get_bucket(p, n, attrs, new->attrs->source != RTS_BGP);
if (!buck) /* Inconsistent attribute list */
return;
}
else
{
+ key = old;
if (!(buck = p->withdraw_bucket))
{
buck = p->withdraw_bucket = mb_alloc(P->pool, sizeof(struct bgp_bucket));
init_list(&buck->prefixes);
}
}
- px = fib_get(&p->prefix_fib, &n->n.prefix, n->n.pxlen);
+ path_id = p->add_path_tx ? key->attrs->src->global_id : 0;
+ px = bgp_get_prefix(p, n->n.prefix, n->n.pxlen, path_id);
if (px->bucket_node.next)
{
DBG("\tRemoving old entry.\n");
@@ -1021,7 +1118,7 @@ bgp_update_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *p
if (rr)
{
/* Handling route reflection, RFC 4456 */
- struct bgp_proto *src = (struct bgp_proto *) e->attrs->proto;
+ struct bgp_proto *src = (struct bgp_proto *) e->attrs->src->proto;
a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID));
if (!a)
@@ -1071,7 +1168,8 @@ bgp_import_control(struct proto *P, rte **new, ea_list **attrs, struct linpool *
{
rte *e = *new;
struct bgp_proto *p = (struct bgp_proto *) P;
- struct bgp_proto *new_bgp = (e->attrs->proto->proto == &proto_bgp) ? (struct bgp_proto *) e->attrs->proto : NULL;
+ struct bgp_proto *new_bgp = (e->attrs->src->proto->proto == &proto_bgp) ?
+ (struct bgp_proto *) e->attrs->src->proto : NULL;
if (p == new_bgp) /* Poison reverse updates */
return -1;
@@ -1110,7 +1208,7 @@ bgp_get_neighbor(rte *r)
if (e && as_path_get_first(e->u.ptr, &as))
return as;
else
- return ((struct bgp_proto *) r->attrs->proto)->remote_as;
+ return ((struct bgp_proto *) r->attrs->src->proto)->remote_as;
}
static inline int
@@ -1123,8 +1221,8 @@ rte_resolvable(rte *rt)
int
bgp_rte_better(rte *new, rte *old)
{
- struct bgp_proto *new_bgp = (struct bgp_proto *) new->attrs->proto;
- struct bgp_proto *old_bgp = (struct bgp_proto *) old->attrs->proto;
+ struct bgp_proto *new_bgp = (struct bgp_proto *) new->attrs->src->proto;
+ struct bgp_proto *old_bgp = (struct bgp_proto *) old->attrs->src->proto;
eattr *x, *y;
u32 n, o;
@@ -1258,7 +1356,7 @@ same_group(rte *r, u32 lpref, u32 lasn)
static inline int
use_deterministic_med(rte *r)
{
- struct proto *P = r->attrs->proto;
+ struct proto *P = r->attrs->src->proto;
return (P->proto == &proto_bgp) && ((struct bgp_proto *) P)->cf->deterministic_med;
}
@@ -1543,7 +1641,6 @@ bgp_decode_attrs(struct bgp_conn *conn, byte *attr, unsigned int len, struct lin
int withdraw = 0;
bzero(a, sizeof(rta));
- a->proto = &bgp->p;
a->source = RTS_BGP;
a->scope = SCOPE_UNIVERSE;
a->cast = RTC_UNICAST;
@@ -1752,14 +1849,14 @@ bgp_get_attr(eattr *a, byte *buf, int buflen)
}
void
-bgp_attr_init(struct bgp_proto *p)
+bgp_init_bucket_table(struct bgp_proto *p)
{
p->hash_size = 256;
p->hash_limit = p->hash_size * 4;
p->bucket_hash = mb_allocz(p->p.pool, p->hash_size * sizeof(struct bgp_bucket *));
init_list(&p->bucket_queue);
p->withdraw_bucket = NULL;
- fib_init(&p->prefix_fib, p->p.pool, sizeof(struct bgp_prefix), 0, bgp_init_prefix);
+ // fib_init(&p->prefix_fib, p->p.pool, sizeof(struct bgp_prefix), 0, bgp_init_prefix);
}
void
diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c
index dbc59eea..f290f227 100644
--- a/proto/bgp/bgp.c
+++ b/proto/bgp/bgp.c
@@ -362,7 +362,9 @@ bgp_conn_enter_established_state(struct bgp_conn *conn)
p->conn = conn;
p->last_error_class = 0;
p->last_error_code = 0;
- bgp_attr_init(conn->bgp);
+ bgp_init_bucket_table(p);
+ bgp_init_prefix_table(p, 8);
+
bgp_conn_set_state(conn, BS_ESTABLISHED);
proto_notify_state(&p->p, PS_UP);
}
@@ -410,8 +412,11 @@ static void
bgp_send_open(struct bgp_conn *conn)
{
conn->start_state = conn->bgp->start_state;
- conn->want_as4_support = conn->bgp->cf->enable_as4 && (conn->start_state != BSS_CONNECT_NOCAP);
- conn->peer_as4_support = 0; // Default value, possibly changed by receiving capability.
+
+ // Default values, possibly changed by receiving capabilities.
+ conn->peer_refresh_support = 0;
+ conn->peer_as4_support = 0;
+ conn->peer_add_path = 0;
conn->advertised_as = 0;
DBG("BGP: Sending open\n");
@@ -920,19 +925,17 @@ get_igp_table(struct bgp_config *cf)
static struct proto *
bgp_init(struct proto_config *C)
{
- struct bgp_config *c = (struct bgp_config *) C;
struct proto *P = proto_new(C, sizeof(struct bgp_proto));
+ struct bgp_config *c = (struct bgp_config *) C;
struct bgp_proto *p = (struct bgp_proto *) P;
P->accept_ra_types = c->secondary ? RA_ACCEPTED : RA_OPTIMAL;
P->rt_notify = bgp_rt_notify;
- P->rte_better = bgp_rte_better;
P->import_control = bgp_import_control;
P->neigh_notify = bgp_neigh_notify;
P->reload_routes = bgp_reload_routes;
-
- if (c->deterministic_med)
- P->rte_recalculate = bgp_rte_recalculate;
+ P->rte_better = bgp_rte_better;
+ P->rte_recalculate = c->deterministic_med ? bgp_rte_recalculate : NULL;
p->cf = c;
p->local_as = c->local_as;
@@ -1176,15 +1179,19 @@ bgp_show_proto_info(struct proto *P)
else if (P->proto_state == PS_UP)
{
cli_msg(-1006, " Neighbor ID: %R", p->remote_id);
- cli_msg(-1006, " Neighbor caps: %s%s",
+ cli_msg(-1006, " Neighbor caps: %s%s%s%s",
c->peer_refresh_support ? " refresh" : "",
- c->peer_as4_support ? " AS4" : "");
- cli_msg(-1006, " Session: %s%s%s%s%s",
+ c->peer_as4_support ? " AS4" : "",
+ (c->peer_add_path & ADD_PATH_RX) ? " add-path-rx" : "",
+ (c->peer_add_path & ADD_PATH_TX) ? " add-path-tx" : "");
+ cli_msg(-1006, " Session: %s%s%s%s%s%s%s",
p->is_internal ? "internal" : "external",
p->cf->multihop ? " multihop" : "",
p->rr_client ? " route-reflector" : "",
p->rs_client ? " route-server" : "",
- p->as4_session ? " AS4" : "");
+ p->as4_session ? " AS4" : "",
+ p->add_path_rx ? " add-path-rx" : "",
+ p->add_path_tx ? " add-path-tx" : "");
cli_msg(-1006, " Source address: %I", p->source_addr);
if (P->cf->in_limit)
cli_msg(-1006, " Route limit: %d/%d",
diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h
index c3adf254..b87de46e 100644
--- a/proto/bgp/bgp.h
+++ b/proto/bgp/bgp.h
@@ -43,6 +43,7 @@ struct bgp_config {
int passive; /* Do not initiate outgoing connection */
int interpret_communities; /* Hardwired handling of well-known communities */
int secondary; /* Accept also non-best routes (i.e. RA_ACCEPTED) */
+ int add_path; /* Use ADD-PATH extension [draft] */
unsigned connect_retry_time;
unsigned hold_time, initial_hold_time;
unsigned keepalive_time;
@@ -62,6 +63,11 @@ struct bgp_config {
#define GW_DIRECT 1
#define GW_RECURSIVE 2
+#define ADD_PATH_RX 1
+#define ADD_PATH_TX 2
+#define ADD_PATH_FULL 3
+
+
struct bgp_conn {
struct bgp_proto *bgp;
struct birdsock *sk;
@@ -75,9 +81,9 @@ struct bgp_conn {
byte *notify_data;
u32 advertised_as; /* Temporary value for AS number received */
int start_state; /* protocol start_state snapshot when connection established */
- int want_as4_support; /* Connection tries to establish AS4 session */
- int peer_as4_support; /* Peer supports 4B AS numbers [RFC4893] */
- int peer_refresh_support; /* Peer supports route refresh [RFC2918] */
+ u8 peer_refresh_support; /* Peer supports route refresh [RFC2918] */
+ u8 peer_as4_support; /* Peer supports 4B AS numbers [RFC4893] */
+ u8 peer_add_path; /* Peer supports ADD-PATH [draft] */
unsigned hold_time, keepalive_time; /* Times calculated from my and neighbor's requirements */
};
@@ -86,8 +92,10 @@ struct bgp_proto {
struct bgp_config *cf; /* Shortcut to BGP configuration */
u32 local_as, remote_as;
int start_state; /* Substates that partitions BS_START */
- int is_internal; /* Internal BGP connection (local_as == remote_as) */
- int as4_session; /* Session uses 4B AS numbers in AS_PATH (both sides support it) */
+ u8 is_internal; /* Internal BGP connection (local_as == remote_as) */
+ u8 as4_session; /* Session uses 4B AS numbers in AS_PATH (both sides support it) */
+ u8 add_path_rx; /* Session expects receive of ADD-PATH extended NLRI */
+ u8 add_path_tx; /* Session expects transmit of ADD-PATH extended NLRI */
u32 local_id; /* BGP identifier of this router */
u32 remote_id; /* BGP identifier of the neighbor */
u32 rr_cluster_id; /* Route reflector cluster ID */
@@ -104,7 +112,10 @@ struct bgp_proto {
struct timer *startup_timer; /* Timer used to delay protocol startup due to previous errors (startup_delay) */
struct bgp_bucket **bucket_hash; /* Hash table of attribute buckets */
unsigned int hash_size, hash_count, hash_limit;
- struct fib prefix_fib; /* Prefixes to be sent */
+ // struct fib prefix_fib; /* Prefixes to be sent */
+ struct bgp_prefix **prefix_table; /* Prefixes to be sent */
+ slab *prefix_slab; /* Slab holding prefix nodes */
+ u32 px_hash_order, px_hash_count;
list bucket_queue; /* Queue of buckets to send */
struct bgp_bucket *withdraw_bucket; /* Withdrawn routes */
unsigned startup_delay; /* Time to delay protocol startup by due to errors */
@@ -120,7 +131,12 @@ struct bgp_proto {
};
struct bgp_prefix {
- struct fib_node n; /* Node in prefix fib */
+ struct {
+ ip_addr prefix;
+ int pxlen;
+ } n;
+ u32 path_id;
+ struct bgp_prefix *next;
node bucket_node; /* Node in per-bucket list */
};
@@ -154,6 +170,9 @@ void bgp_conn_enter_idle_state(struct bgp_conn *conn);
void bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code);
void bgp_stop(struct bgp_proto *p, unsigned subcode);
+struct rte_source *bgp_find_source(struct bgp_proto *p, u32 path_id);
+struct rte_source *bgp_get_source(struct bgp_proto *p, u32 path_id);
+
#ifdef LOCAL_DEBUG
@@ -189,9 +208,11 @@ int bgp_rte_better(struct rte *, struct rte *);
int bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best);
void bgp_rt_notify(struct proto *P, rtable *tbl UNUSED, net *n, rte *new, rte *old UNUSED, ea_list *attrs);
int bgp_import_control(struct proto *, struct rte **, struct ea_list **, struct linpool *);
-void bgp_attr_init(struct bgp_proto *);
-unsigned int bgp_encode_attrs(struct bgp_proto *p, byte *w, ea_list *attrs, int remains);
+void bgp_init_bucket_table(struct bgp_proto *);
void bgp_free_bucket(struct bgp_proto *p, struct bgp_bucket *buck);
+void bgp_init_prefix_table(struct bgp_proto *p, u32 order);
+void bgp_free_prefix(struct bgp_proto *p, struct bgp_prefix *bp);
+unsigned int bgp_encode_attrs(struct bgp_proto *p, byte *w, ea_list *attrs, int remains);
void bgp_get_route_info(struct rte *, byte *buf, struct ea_list *attrs);
inline static void bgp_attach_attr_ip(struct ea_list **to, struct linpool *pool, unsigned attr, ip_addr a)
diff --git a/proto/bgp/config.Y b/proto/bgp/config.Y
index 8b80d7fd..0b096339 100644
--- a/proto/bgp/config.Y
+++ b/proto/bgp/config.Y
@@ -26,7 +26,7 @@ CF_KEYWORDS(BGP, LOCAL, NEIGHBOR, AS, HOLD, TIME, CONNECT, RETRY,
PREFER, OLDER, MISSING, LLADDR, DROP, IGNORE, ROUTE, REFRESH,
INTERPRET, COMMUNITIES, BGP_ORIGINATOR_ID, BGP_CLUSTER_LIST, IGP,
TABLE, GATEWAY, DIRECT, RECURSIVE, MED, TTL, SECURITY, DETERMINISTIC,
- SECONDARY)
+ SECONDARY, ADD, PATHS, RX, TX)
CF_GRAMMAR
@@ -107,6 +107,9 @@ bgp_proto:
| bgp_proto PASSIVE bool ';' { BGP_CFG->passive = $3; }
| bgp_proto INTERPRET COMMUNITIES bool ';' { BGP_CFG->interpret_communities = $4; }
| bgp_proto SECONDARY bool ';' { BGP_CFG->secondary = $3; }
+ | bgp_proto ADD PATHS RX ';' { BGP_CFG->add_path = ADD_PATH_RX; }
+ | bgp_proto ADD PATHS TX ';' { BGP_CFG->add_path = ADD_PATH_TX; }
+ | bgp_proto ADD PATHS bool ';' { BGP_CFG->add_path = $4 ? ADD_PATH_FULL : 0; }
| bgp_proto IGP TABLE rtable ';' { BGP_CFG->igp_table = $4; }
| bgp_proto TTL SECURITY bool ';' { BGP_CFG->ttl_security = $4; }
;
diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c
index cfa37fb5..3fae2c24 100644
--- a/proto/bgp/packets.c
+++ b/proto/bgp/packets.c
@@ -159,6 +159,21 @@ bgp_put_cap_as4(struct bgp_conn *conn, byte *buf)
}
static byte *
+bgp_put_cap_add_path(struct bgp_conn *conn, byte *buf)
+{
+ *buf++ = 69; /* Capability 69: Support for ADD-PATH */
+ *buf++ = 4; /* Capability data length */
+
+ *buf++ = 0; /* Appropriate AF */
+ *buf++ = BGP_AF;
+ *buf++ = 1; /* SAFI 1 */
+
+ *buf++ = conn->bgp->cf->add_path;
+
+ return buf;
+}
+
+static byte *
bgp_create_open(struct bgp_conn *conn, byte *buf)
{
struct bgp_proto *p = conn->bgp;
@@ -194,9 +209,12 @@ bgp_create_open(struct bgp_conn *conn, byte *buf)
if (p->cf->enable_refresh)
cap = bgp_put_cap_rr(conn, cap);
- if (conn->want_as4_support)
+ if (p->cf->enable_as4)
cap = bgp_put_cap_as4(conn, cap);
+ if (p->cf->add_path)
+ cap = bgp_put_cap_add_path(conn, cap);
+
cap_len = cap - buf - 12;
if (cap_len > 0)
{
@@ -223,6 +241,13 @@ bgp_encode_prefixes(struct bgp_proto *p, byte *w, struct bgp_bucket *buck, unsig
{
struct bgp_prefix *px = SKIP_BACK(struct bgp_prefix, bucket_node, HEAD(buck->prefixes));
DBG("\tDequeued route %I/%d\n", px->n.prefix, px->n.pxlen);
+
+ if (p->add_path_tx)
+ {
+ put_u32(w, px->path_id);
+ w += 4;
+ }
+
*w++ = px->n.pxlen;
bytes = (px->n.pxlen + 7) / 8;
a = px->n.prefix;
@@ -231,7 +256,8 @@ bgp_encode_prefixes(struct bgp_proto *p, byte *w, struct bgp_bucket *buck, unsig
w += bytes;
remains -= bytes + 1;
rem_node(&px->bucket_node);
- fib_delete(&p->prefix_fib, px);
+ bgp_free_prefix(p, px);
+ // fib_delete(&p->prefix_fib, px);
}
return w - start;
}
@@ -244,7 +270,8 @@ bgp_flush_prefixes(struct bgp_proto *p, struct bgp_bucket *buck)
struct bgp_prefix *px = SKIP_BACK(struct bgp_prefix, bucket_node, HEAD(buck->prefixes));
log(L_ERR "%s: - route %I/%d skipped", p->p.name, px->n.prefix, px->n.pxlen);
rem_node(&px->bucket_node);
- fib_delete(&p->prefix_fib, px);
+ bgp_free_prefix(p, px);
+ // fib_delete(&p->prefix_fib, px);
}
}
@@ -626,7 +653,7 @@ void
bgp_parse_capabilities(struct bgp_conn *conn, byte *opt, int len)
{
// struct bgp_proto *p = conn->bgp;
- int cl;
+ int i, cl;
while (len > 0)
{
@@ -643,14 +670,25 @@ bgp_parse_capabilities(struct bgp_conn *conn, byte *opt, int len)
conn->peer_refresh_support = 1;
break;
- case 65: /* AS4 capability, RFC 4893 */
+ case 65: /* AS4 capability, RFC 4893 */
if (cl != 4)
goto err;
conn->peer_as4_support = 1;
- if (conn->want_as4_support)
+ if (conn->bgp->cf->enable_as4)
conn->advertised_as = get_u32(opt + 2);
break;
+ case 69: /* ADD-PATH capability, draft */
+ if (cl % 4)
+ goto err;
+ for (i = 0; i < cl; i += 4)
+ if (opt[2+i+0] == 0 && opt[2+i+1] == BGP_AF && opt[2+i+2] == 1) /* Match AFI/SAFI */
+ conn->peer_add_path = opt[2+i+3];
+ if (conn->peer_add_path > ADD_PATH_FULL)
+ goto err;
+
+ break;
+
/* We can safely ignore all other capabilities */
}
len -= 2 + cl;
@@ -789,7 +827,12 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, int len)
conn->hold_time = MIN(hold, p->cf->hold_time);
conn->keepalive_time = p->cf->keepalive_time ? : conn->hold_time / 3;
p->remote_id = id;
- p->as4_session = conn->want_as4_support && conn->peer_as4_support;
+ p->as4_session = p->cf->enable_as4 && conn->peer_as4_support;
+ p->add_path_rx = (p->cf->add_path & ADD_PATH_RX) && (conn->peer_add_path & ADD_PATH_TX);
+ p->add_path_tx = (p->cf->add_path & ADD_PATH_TX) && (conn->peer_add_path & ADD_PATH_RX);
+
+ if (p->add_path_tx)
+ p->p.accept_ra_types = RA_ANY;
DBG("BGP: Hold timer set to %d, keepalive to %d, AS to %d, ID to %x, AS4 session to %d\n", conn->hold_time, conn->keepalive_time, p->remote_as, p->remote_id, p->as4_session);
@@ -799,6 +842,13 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, int len)
}
#define DECODE_PREFIX(pp, ll) do { \
+ if (p->add_path_rx) \
+ { \
+ if (ll < 5) { err=1; goto done; } \
+ path_id = get_u32(pp); \
+ pp += 4; \
+ ll -= 4; \
+ } \
int b = *pp++; \
int q; \
ll--; \
@@ -813,6 +863,53 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, int len)
pxlen = b; \
} while (0)
+
+static inline void
+bgp_rte_update(struct bgp_proto *p, ip_addr prefix, int pxlen,
+ u32 path_id, u32 *last_id, struct rte_src **src,
+ rta *a0, rta **a)
+{
+ if (path_id != *last_id)
+ {
+ *src = rt_get_source(&p->p, path_id);
+ *last_id = path_id;
+
+ if (*a)
+ {
+ rta_free(*a);
+ *a = NULL;
+ }
+ }
+
+ /* Prepare cached route attributes */
+ if (!*a)
+ {
+ a0->src = *src;
+ *a = rta_lookup(a0);
+ }
+
+ net *n = net_get(p->p.table, prefix, pxlen);
+ rte *e = rte_get_temp(rta_clone(*a));
+ e->net = n;
+ e->pflags = 0;
+ e->u.bgp.suppressed = 0;
+ rte_update2(p->p.main_ahook, n, e, *src);
+}
+
+static inline void
+bgp_rte_withdraw(struct bgp_proto *p, ip_addr prefix, int pxlen,
+ u32 path_id, u32 *last_id, struct rte_src **src)
+{
+ if (path_id != *last_id)
+ {
+ *src = rt_find_source(&p->p, path_id);
+ *last_id = path_id;
+ }
+
+ net *n = net_find(p->p.table, prefix, pxlen);
+ rte_update2( p->p.main_ahook, n, NULL, *src);
+}
+
static inline int
bgp_set_next_hop(struct bgp_proto *p, rta *a)
{
@@ -871,18 +968,20 @@ bgp_do_rx_update(struct bgp_conn *conn,
byte *attrs, int attr_len)
{
struct bgp_proto *p = conn->bgp;
- net *n;
- rta *a0, *a = NULL;
+ struct rte_src *src = p->p.main_source;
+ rta *a0, *a;
ip_addr prefix;
int pxlen, err = 0;
+ u32 path_id = 0;
+ u32 last_id = 0;
/* Withdraw routes */
while (withdrawn_len)
{
DECODE_PREFIX(withdrawn, withdrawn_len);
DBG("Withdraw %I/%d\n", prefix, pxlen);
- if (n = net_find(p->p.table, prefix, pxlen))
- rte_update(p->p.table, n, &p->p, &p->p, NULL);
+
+ bgp_rte_withdraw(p, prefix, pxlen, path_id, &last_id, &src);
}
if (!attr_len && !nlri_len) /* shortcut */
@@ -893,28 +992,22 @@ bgp_do_rx_update(struct bgp_conn *conn,
if (conn->state != BS_ESTABLISHED) /* fatal error during decoding */
return;
- if (a0 && nlri_len && bgp_set_next_hop(p, a0))
- a = rta_lookup(a0);
+ if (a0 && ! bgp_set_next_hop(p, a0))
+ a0 = NULL;
+
+ a = NULL;
+ last_id = 0;
+ src = p->p.main_source;
while (nlri_len)
{
DECODE_PREFIX(nlri, nlri_len);
DBG("Add %I/%d\n", prefix, pxlen);
- if (a)
- {
- rte *e = rte_get_temp(rta_clone(a));
- e->net = net_get(p->p.table, prefix, pxlen);
- e->pflags = 0;
- e->u.bgp.suppressed = 0;
- rte_update(p->p.table, e->net, &p->p, &p->p, e);
- }
- else
- {
- /* Forced withdraw as a result of soft error */
- if (n = net_find(p->p.table, prefix, pxlen))
- rte_update(p->p.table, n, &p->p, &p->p, NULL);
- }
+ if (a0)
+ bgp_rte_update(p, prefix, pxlen, path_id, &last_id, &src, a0, &a);
+ else /* Forced withdraw as a result of soft error */
+ bgp_rte_withdraw(p, prefix, pxlen, path_id, &last_id, &src);
}
done:
@@ -970,13 +1063,15 @@ bgp_do_rx_update(struct bgp_conn *conn,
byte *attrs, int attr_len)
{
struct bgp_proto *p = conn->bgp;
+ struct rte_src *src = p->p.main_source;
byte *start, *x;
int len, len0;
unsigned af, sub;
- net *n;
- rta *a0, *a = NULL;
+ rta *a0, *a;
ip_addr prefix;
int pxlen, err = 0;
+ u32 path_id = 0;
+ u32 last_id = 0;
p->mp_reach_len = 0;
p->mp_unreach_len = 0;
@@ -991,8 +1086,7 @@ bgp_do_rx_update(struct bgp_conn *conn,
{
DECODE_PREFIX(x, len);
DBG("Withdraw %I/%d\n", prefix, pxlen);
- if (n = net_find(p->p.table, prefix, pxlen))
- rte_update(p->p.table, n, &p->p, &p->p, NULL);
+ bgp_rte_withdraw(p, prefix, pxlen, path_id, &last_id, &src);
}
}
@@ -1009,28 +1103,22 @@ bgp_do_rx_update(struct bgp_conn *conn,
len -= *x + 2;
x += *x + 2;
- if (a0 && bgp_set_next_hop(p, a0))
- a = rta_lookup(a0);
+ if (a0 && ! bgp_set_next_hop(p, a0))
+ a0 = NULL;
+
+ a = NULL;
+ last_id = 0;
+ src = p->p.main_source;
while (len)
{
DECODE_PREFIX(x, len);
DBG("Add %I/%d\n", prefix, pxlen);
- if (a)
- {
- rte *e = rte_get_temp(rta_clone(a));
- e->net = net_get(p->p.table, prefix, pxlen);
- e->pflags = 0;
- e->u.bgp.suppressed = 0;
- rte_update(p->p.table, e->net, &p->p, &p->p, e);
- }
- else
- {
- /* Forced withdraw as a result of soft error */
- if (n = net_find(p->p.table, prefix, pxlen))
- rte_update(p->p.table, n, &p->p, &p->p, NULL);
- }
+ if (a0)
+ bgp_rte_update(p, prefix, pxlen, path_id, &last_id, &src, a0, &a);
+ else /* Forced withdraw as a result of soft error */
+ bgp_rte_withdraw(p, prefix, pxlen, path_id, &last_id, &src);
}
}