summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMaria Matejka <mq@ucw.cz>2022-08-03 14:07:53 +0200
committerMaria Matejka <mq@ucw.cz>2022-08-03 14:07:53 +0200
commit5a96b9b12496082b9d6165f51597403546ed617d (patch)
tree41f7e773ee776bea66d306a0f546ae148aa37054
parent71b434a987067475b517792360f58dbe03bfee9e (diff)
parent97476e002d7dfb24a4613ac401b8f3192ca68d05 (diff)
Merge commit '97476e00' into thread-next
Had to fix route source locking inside BGP export table as we need to keep the route sources properly allocated until even last BGP pending update is sent out, therefore the export table printout is accurate.
-rw-r--r--lib/route.h15
-rw-r--r--proto/bgp/attrs.c136
-rw-r--r--proto/bgp/bgp.c15
-rw-r--r--proto/bgp/bgp.h29
-rw-r--r--proto/bgp/packets.c10
5 files changed, 126 insertions, 79 deletions
diff --git a/lib/route.h b/lib/route.h
index cf3c70ba..97e2e053 100644
--- a/lib/route.h
+++ b/lib/route.h
@@ -10,6 +10,8 @@
#ifndef _BIRD_LIB_ROUTE_H_
#define _BIRD_LIB_ROUTE_H_
+#undef RT_SOURCE_DEBUG
+
#include "lib/type.h"
#include "lib/rcu.h"
#include "lib/hash.h"
@@ -87,6 +89,11 @@ struct rte_src *rt_get_source_o(struct rte_owner *o, u32 id);
struct rte_src *rt_find_source_global(u32 id);
+#ifdef RT_SOURCE_DEBUG
+#define rt_lock_source _rt_lock_source_internal
+#define rt_unlock_source _rt_unlock_source_internal
+#endif
+
static inline void rt_lock_source(struct rte_src *src)
{
/* Locking a source is trivial; somebody already holds it so we just increase
@@ -139,6 +146,14 @@ static inline void rt_unlock_source(struct rte_src *src)
rcu_read_unlock();
}
+#ifdef RT_SOURCE_DEBUG
+#undef rt_lock_source
+#undef rt_unlock_source
+
+#define rt_lock_source(x) ( log(L_INFO "Lock source %uG at %s:%d", (x)->global_id, __FILE__, __LINE__), _rt_lock_source_internal(x) )
+#define rt_unlock_source(x) ( log(L_INFO "Unlock source %uG at %s:%d", (x)->global_id, __FILE__, __LINE__), _rt_unlock_source_internal(x) )
+#endif
+
void rt_init_sources(struct rte_owner *, const char *name, event_list *list);
void rt_destroy_sources(struct rte_owner *, event *);
diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c
index e96b175d..a1f5791a 100644
--- a/proto/bgp/attrs.c
+++ b/proto/bgp/attrs.c
@@ -1522,8 +1522,8 @@ bgp_finish_attrs(struct bgp_parse_state *s, ea_list **to)
HASH_DEFINE_REHASH_FN(RBH, struct bgp_bucket)
-void
-bgp_init_bucket_table(struct bgp_channel *c)
+static void
+bgp_init_bucket_table(struct bgp_pending_tx *c)
{
HASH_INIT(c->bucket_hash, c->pool, 8);
@@ -1531,24 +1531,8 @@ bgp_init_bucket_table(struct bgp_channel *c)
c->withdraw_bucket = NULL;
}
-void
-bgp_free_bucket_table(struct bgp_channel *c)
-{
- HASH_FREE(c->bucket_hash);
-
- struct bgp_bucket *b;
- WALK_LIST_FIRST(b, c->bucket_queue)
- {
- rem_node(&b->send_node);
- mb_free(b);
- }
-
- mb_free(c->withdraw_bucket);
- c->withdraw_bucket = NULL;
-}
-
static struct bgp_bucket *
-bgp_get_bucket(struct bgp_channel *c, ea_list *new)
+bgp_get_bucket(struct bgp_pending_tx *c, ea_list *new)
{
/* Hash and lookup */
u32 hash = ea_hash(new);
@@ -1577,7 +1561,7 @@ bgp_get_bucket(struct bgp_channel *c, ea_list *new)
}
static struct bgp_bucket *
-bgp_get_withdraw_bucket(struct bgp_channel *c)
+bgp_get_withdraw_bucket(struct bgp_pending_tx *c)
{
if (!c->withdraw_bucket)
{
@@ -1589,15 +1573,17 @@ bgp_get_withdraw_bucket(struct bgp_channel *c)
}
static void
-bgp_free_bucket_xx(struct bgp_channel *c, struct bgp_bucket *b)
+bgp_free_bucket(struct bgp_pending_tx *c, struct bgp_bucket *b)
{
HASH_REMOVE2(c->bucket_hash, RBH, c->pool, b);
mb_free(b);
}
int
-bgp_done_bucket(struct bgp_channel *c, struct bgp_bucket *b)
+bgp_done_bucket(struct bgp_channel *bc, struct bgp_bucket *b)
{
+ struct bgp_pending_tx *c = bc->ptx;
+
/* Won't free the withdraw bucket */
if (b == c->withdraw_bucket)
return 0;
@@ -1608,21 +1594,23 @@ bgp_done_bucket(struct bgp_channel *c, struct bgp_bucket *b)
if (b->px_uc || !EMPTY_LIST(b->prefixes))
return 0;
- bgp_free_bucket_xx(c, b);
+ bgp_free_bucket(c, b);
return 1;
}
void
-bgp_defer_bucket(struct bgp_channel *c, struct bgp_bucket *b)
+bgp_defer_bucket(struct bgp_channel *bc, struct bgp_bucket *b)
{
+ struct bgp_pending_tx *c = bc->ptx;
rem_node(&b->send_node);
add_tail(&c->bucket_queue, &b->send_node);
}
void
-bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b)
+bgp_withdraw_bucket(struct bgp_channel *bc, struct bgp_bucket *b)
{
- struct bgp_proto *p = (void *) c->c.proto;
+ struct bgp_proto *p = (void *) bc->c.proto;
+ struct bgp_pending_tx *c = bc->ptx;
struct bgp_bucket *wb = bgp_get_withdraw_bucket(c);
log(L_ERR "%s: Attribute list too long", p->p.name);
@@ -1643,7 +1631,7 @@ bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b)
#define PXH_KEY(px) px->net, px->path_id, px->hash
#define PXH_NEXT(px) px->next
-#define PXH_EQ(n1,i1,h1,n2,i2,h2) h1 == h2 && (c->add_path_tx ? (i1 == i2) : 1) && net_equal(n1, n2)
+#define PXH_EQ(n1,i1,h1,n2,i2,h2) h1 == h2 && (add_path_tx ? (i1 == i2) : 1) && net_equal(n1, n2)
#define PXH_FN(n,i,h) h
#define PXH_REHASH bgp_pxh_rehash
@@ -1652,29 +1640,21 @@ bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b)
HASH_DEFINE_REHASH_FN(PXH, struct bgp_prefix)
-void
-bgp_init_prefix_table(struct bgp_channel *c)
+static void
+bgp_init_prefix_table(struct bgp_channel *bc)
{
+ struct bgp_pending_tx *c = bc->ptx;
HASH_INIT(c->prefix_hash, c->pool, 8);
- uint alen = net_addr_length[c->c.net_type];
+ uint alen = net_addr_length[bc->c.net_type];
c->prefix_slab = alen ? sl_new(c->pool, sizeof(struct bgp_prefix) + alen) : NULL;
}
-void
-bgp_free_prefix_table(struct bgp_channel *c)
-{
- HASH_FREE(c->prefix_hash);
-
- rfree(c->prefix_slab);
- c->prefix_slab = NULL;
-}
-
static struct bgp_prefix *
-bgp_get_prefix(struct bgp_channel *c, const net_addr *net, struct rte_src *src)
+bgp_get_prefix(struct bgp_pending_tx *c, const net_addr *net, struct rte_src *src, int add_path_tx)
{
u32 path_id = src->global_id;
- u32 path_id_hash = c->add_path_tx ? path_id : 0;
+ u32 path_id_hash = add_path_tx ? path_id : 0;
/* We must use a different hash function than the rtable */
u32 hash = u32_hash(net_hash(net) ^ u32_hash(path_id_hash));
struct bgp_prefix *px = HASH_FIND(c->prefix_hash, PXH, net, path_id_hash, hash);
@@ -1698,15 +1678,16 @@ bgp_get_prefix(struct bgp_channel *c, const net_addr *net, struct rte_src *src)
return px;
}
-static void bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *px);
+static void bgp_free_prefix(struct bgp_pending_tx *c, struct bgp_prefix *px);
static inline int
bgp_update_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bucket *b)
{
+#define IS_WITHDRAW_BUCKET(b) ((b) == c->ptx->withdraw_bucket)
#define BPX_TRACE(what) do { \
if (c->c.debug & D_ROUTES) log(L_TRACE "%s.%s < %s %N %uG %s", \
c->c.proto->name, c->c.name, what, \
- px->net, px->path_id, (b == c->withdraw_bucket) ? "withdraw" : "update"); } while (0)
+ px->net, px->path_id, IS_WITHDRAW_BUCKET(b) ? "withdraw" : "update"); } while (0)
px->lastmod = current_time();
/* Already queued for the same bucket */
@@ -1724,7 +1705,7 @@ bgp_update_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bucke
}
/* The new bucket is the same as we sent before */
- if ((px->last == b) || c->c.out_table && !px->last && (b == c->withdraw_bucket))
+ if ((px->last == b) || c->c.out_table && !px->last && IS_WITHDRAW_BUCKET(b))
{
if (px->cur)
BPX_TRACE("reverted");
@@ -1733,15 +1714,15 @@ bgp_update_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bucke
/* Well, we haven't sent anything yet */
if (!px->last)
- bgp_free_prefix(c, px);
+ bgp_free_prefix(c->ptx, px);
px->cur = NULL;
return 0;
}
/* Enqueue the bucket if it has been empty */
- if ((b != c->withdraw_bucket) && EMPTY_LIST(b->prefixes))
- add_tail(&c->bucket_queue, &b->send_node);
+ if (!IS_WITHDRAW_BUCKET(b) && EMPTY_LIST(b->prefixes))
+ add_tail(&c->ptx->bucket_queue, &b->send_node);
/* Enqueue to the new bucket and indicate the change */
add_tail(&b->prefixes, &px->buck_node_xx);
@@ -1754,7 +1735,7 @@ bgp_update_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bucke
}
static void
-bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *px)
+bgp_free_prefix(struct bgp_pending_tx *c, struct bgp_prefix *px)
{
HASH_REMOVE2(c->prefix_hash, PXH, c->pool, px);
@@ -1784,7 +1765,7 @@ bgp_done_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bucket
px->last->px_uc--;
/* Ref the current sent version */
- if (buck != c->withdraw_bucket)
+ if (!IS_WITHDRAW_BUCKET(buck))
{
px->last = buck;
px->last->px_uc++;
@@ -1794,7 +1775,49 @@ bgp_done_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bucket
/* Prefixes belonging to the withdraw bucket are freed always */
}
- bgp_free_prefix(c, px);
+ bgp_free_prefix(c->ptx, px);
+}
+
+static void
+bgp_pending_tx_rfree(resource *r)
+{
+ struct bgp_pending_tx *ptx = SKIP_BACK(struct bgp_pending_tx, r, r);
+
+ HASH_WALK(ptx->prefix_hash, next, n)
+ rt_unlock_source(rt_find_source_global(n->path_id));
+ HASH_WALK_END;
+}
+
+static void bgp_pending_tx_dump(resource *r UNUSED) { debug("\n"); }
+
+static struct resclass bgp_pending_tx_class = {
+ .name = "BGP Pending TX",
+ .size = sizeof(struct bgp_pending_tx),
+ .free = bgp_pending_tx_rfree,
+ .dump = bgp_pending_tx_dump,
+};
+
+void
+bgp_init_pending_tx(struct bgp_channel *c)
+{
+ ASSERT_DIE(!c->ptx);
+
+ pool *p = rp_new(c->pool, "BGP Pending TX");
+ c->ptx = ralloc(p, &bgp_pending_tx_class);
+ c->ptx->pool = p;
+
+ bgp_init_bucket_table(c->ptx);
+ bgp_init_prefix_table(c);
+}
+
+void
+bgp_free_pending_tx(struct bgp_channel *c)
+{
+ ASSERT_DIE(c->ptx);
+ ASSERT_DIE(c->ptx->pool);
+
+ rfree(c->ptx->pool);
+ c->ptx = NULL;
}
@@ -1806,7 +1829,8 @@ static void
bgp_out_table_feed(void *data)
{
struct rt_export_hook *hook = data;
- struct bgp_channel *c = SKIP_BACK(struct bgp_channel, prefix_exporter, hook->table);
+ struct bgp_channel *bc = SKIP_BACK(struct bgp_channel, prefix_exporter, hook->table);
+ struct bgp_pending_tx *c = bc->ptx;
int max = 512;
@@ -1901,8 +1925,8 @@ bgp_out_table_feed(void *data)
static struct rt_export_hook *
bgp_out_table_export_start(struct rt_exporter *re, struct rt_export_request *req UNUSED)
{
- struct bgp_channel *c = SKIP_BACK(struct bgp_channel, prefix_exporter, re);
- pool *p = rp_new(c->c.proto->pool, "Export hook");
+ struct bgp_channel *bc = SKIP_BACK(struct bgp_channel, prefix_exporter, re);
+ pool *p = rp_new(bc->c.proto->pool, "Export hook");
struct rt_export_hook *hook = mb_allocz(p, sizeof(struct rt_export_hook));
hook->pool = p;
hook->event = ev_new_init(p, bgp_out_table_feed, hook);
@@ -2134,16 +2158,16 @@ bgp_rt_notify(struct proto *P, struct channel *C, const net_addr *n, rte *new, c
log(L_ERR "%s: Invalid route %N withdrawn", p->p.name, n);
/* If attributes are invalid, we fail back to withdraw */
- buck = attrs ? bgp_get_bucket(c, attrs) : bgp_get_withdraw_bucket(c);
+ buck = attrs ? bgp_get_bucket(c->ptx, attrs) : bgp_get_withdraw_bucket(c->ptx);
path = new->src;
}
else
{
- buck = bgp_get_withdraw_bucket(c);
+ buck = bgp_get_withdraw_bucket(c->ptx);
path = old->src;
}
- if (bgp_update_prefix(c, bgp_get_prefix(c, n, path), buck))
+ if (bgp_update_prefix(c, bgp_get_prefix(c->ptx, n, path, c->add_path_tx), buck))
bgp_schedule_packet(p->conn, c, PKT_UPDATE);
}
diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c
index d240112c..65673e37 100644
--- a/proto/bgp/bgp.c
+++ b/proto/bgp/bgp.c
@@ -518,6 +518,12 @@ bgp_stop(struct bgp_proto *p, int subcode, byte *data, uint len)
p->uncork_ev->data = NULL;
bgp_graceful_close_conn(&p->outgoing_conn, subcode, data, len);
bgp_graceful_close_conn(&p->incoming_conn, subcode, data, len);
+
+ struct bgp_channel *c;
+ WALK_LIST(c, p->p.channels)
+ if (c->ptx)
+ bgp_free_pending_tx(c);
+
ev_schedule(p->event);
}
@@ -787,10 +793,8 @@ bgp_handle_graceful_restart(struct bgp_proto *p)
}
/* Reset bucket and prefix tables */
- bgp_free_bucket_table(c);
- bgp_free_prefix_table(c);
- bgp_init_bucket_table(c);
- bgp_init_prefix_table(c);
+ bgp_free_pending_tx(c);
+ bgp_init_pending_tx(c);
c->packets_to_send = 0;
}
@@ -1806,8 +1810,7 @@ bgp_channel_start(struct channel *C)
if (c->cf->export_table)
bgp_setup_out_table(c);
- bgp_init_bucket_table(c);
- bgp_init_prefix_table(c);
+ bgp_init_pending_tx(c);
c->stale_timer = tm_new_init(c->pool, bgp_long_lived_stale_timeout, c, 0, 0);
diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h
index 81382099..fdd134f4 100644
--- a/proto/bgp/bgp.h
+++ b/proto/bgp/bgp.h
@@ -351,14 +351,8 @@ struct bgp_channel {
/* Rest are zeroed when down */
pool *pool;
- HASH(struct bgp_bucket) bucket_hash; /* Hash table of route buckets */
- struct bgp_bucket *withdraw_bucket; /* Withdrawn routes */
- list bucket_queue; /* Queue of buckets to send (struct bgp_bucket) */
-
- HASH(struct bgp_prefix) prefix_hash; /* Prefixes to be sent */
- slab *prefix_slab; /* Slab holding prefix nodes */
-
- struct rt_exporter prefix_exporter; /* Table-like exporter for prefix_hash */
+ struct bgp_pending_tx *ptx; /* Routes waiting to be sent */
+ struct rt_exporter prefix_exporter; /* Table-like exporter for ptx */
ip_addr next_hop_addr; /* Local address for NEXT_HOP attribute */
ip_addr link_addr; /* Link-local version of next_hop_addr */
@@ -401,6 +395,18 @@ struct bgp_bucket {
ea_list eattrs[0]; /* Per-bucket extended attributes */
};
+struct bgp_pending_tx {
+ resource r;
+ pool *pool;
+
+ HASH(struct bgp_bucket) bucket_hash; /* Hash table of route buckets */
+ struct bgp_bucket *withdraw_bucket; /* Withdrawn routes */
+ list bucket_queue; /* Queue of buckets to send (struct bgp_bucket) */
+
+ HASH(struct bgp_prefix) prefix_hash; /* Prefixes to be sent */
+ slab *prefix_slab; /* Slab holding prefix nodes */
+};
+
struct bgp_export_state {
struct bgp_proto *proto;
struct bgp_channel *channel;
@@ -567,13 +573,12 @@ void bgp_finish_attrs(struct bgp_parse_state *s, ea_list **to);
void bgp_setup_out_table(struct bgp_channel *c);
-void bgp_init_bucket_table(struct bgp_channel *c);
-void bgp_free_bucket_table(struct bgp_channel *c);
+void bgp_init_pending_tx(struct bgp_channel *c);
+void bgp_free_pending_tx(struct bgp_channel *c);
+
void bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b);
int bgp_done_bucket(struct bgp_channel *c, struct bgp_bucket *b);
-void bgp_init_prefix_table(struct bgp_channel *c);
-void bgp_free_prefix_table(struct bgp_channel *c);
void bgp_done_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bucket *buck);
int bgp_rte_better(struct rte *, struct rte *);
diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c
index 867be75f..1f208432 100644
--- a/proto/bgp/packets.c
+++ b/proto/bgp/packets.c
@@ -2169,7 +2169,7 @@ bgp_create_ip_reach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *bu
* var IPv4 Network Layer Reachability Information
*/
- ASSERT_DIE(s->channel->withdraw_bucket != buck);
+ ASSERT_DIE(s->channel->ptx->withdraw_bucket != buck);
int lr, la;
@@ -2192,7 +2192,7 @@ bgp_create_ip_reach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *bu
static byte *
bgp_create_mp_reach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end)
{
- ASSERT_DIE(s->channel->withdraw_bucket != buck);
+ ASSERT_DIE(s->channel->ptx->withdraw_bucket != buck);
/*
* 2 B IPv4 Withdrawn Routes Length (zero)
@@ -2332,7 +2332,7 @@ again: ;
};
/* Try unreachable bucket */
- if ((buck = c->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
+ if ((buck = c->ptx->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
{
res = (c->afi == BGP_AF_IPV4) && !c->ext_next_hop ?
bgp_create_ip_unreach(&s, buck, buf, end):
@@ -2342,9 +2342,9 @@ again: ;
}
/* Try reachable buckets */
- if (!EMPTY_LIST(c->bucket_queue))
+ if (!EMPTY_LIST(c->ptx->bucket_queue))
{
- buck = HEAD(c->bucket_queue);
+ buck = HEAD(c->ptx->bucket_queue);
/* Cleanup empty buckets */
if (bgp_done_bucket(c, buck))