summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/route.h2
-rw-r--r--nest/config.Y2
-rw-r--r--nest/proto.c16
-rw-r--r--nest/protocol.h3
-rw-r--r--nest/rt-attr.c27
-rw-r--r--nest/rt-table.c168
-rw-r--r--nest/rt.h20
-rw-r--r--proto/bgp/attrs.c266
-rw-r--r--proto/bgp/bgp.c7
-rw-r--r--proto/bgp/bgp.h19
-rw-r--r--proto/bgp/packets.c32
11 files changed, 342 insertions, 220 deletions
diff --git a/lib/route.h b/lib/route.h
index 30be7c4e..68596316 100644
--- a/lib/route.h
+++ b/lib/route.h
@@ -35,6 +35,7 @@ typedef struct rte {
#define REF_STALE 4 /* Route is stale in a refresh cycle */
#define REF_DISCARD 8 /* Route is scheduled for discard */
#define REF_MODIFY 16 /* Route is scheduled for modify */
+#define REF_PENDING 32 /* Route has not propagated completely yet */
/* Route is valid for propagation (may depend on other flags in the future), accepts NULL */
static inline int rte_is_valid(rte *r) { return r && !(r->flags & REF_FILTERED); }
@@ -53,6 +54,7 @@ struct rte_src {
struct rte_src *rt_find_source(struct proto *p, u32 id);
struct rte_src *rt_get_source(struct proto *p, u32 id);
+struct rte_src *rt_find_source_global(u32 id);
static inline void rt_lock_source(struct rte_src *src) { src->uc++; }
static inline void rt_unlock_source(struct rte_src *src) { src->uc--; }
void rt_prune_sources(void);
diff --git a/nest/config.Y b/nest/config.Y
index 7c163f74..edddfc2b 100644
--- a/nest/config.Y
+++ b/nest/config.Y
@@ -694,7 +694,7 @@ r_args:
}
| r_args EXPORT TABLE channel_arg {
if (!$4->out_table) cf_error("No export table in channel %s.%s", $4->proto->name, $4->name);
- rt_show_add_exporter($$, &$4->out_table->exporter, "export");
+ rt_show_add_exporter($$, $4->out_table, "export");
$$->tables_defined_by = RSD_TDB_DIRECT;
}
| r_args FILTER filter {
diff --git a/nest/proto.c b/nest/proto.c
index e7be8001..061205c1 100644
--- a/nest/proto.c
+++ b/nest/proto.c
@@ -559,10 +559,6 @@ channel_export_stopped(struct rt_export_request *req)
return;
}
- /* Free the routes from out_table */
- if (c->out_table)
- rt_prune_sync(c->out_table, 1);
-
mb_free(c->out_req.name);
c->out_req.name = NULL;
@@ -647,18 +643,6 @@ channel_setup_in_table(struct channel *c)
c->in_keep |= RIK_PREFILTER;
}
-/* Called by protocol to activate out_table */
-void
-channel_setup_out_table(struct channel *c)
-{
- struct rtable_config *cf = mb_allocz(c->proto->pool, sizeof(struct rtable_config));
- cf->name = "export";
- cf->addr_type = c->net_type;
- cf->internal = 1;
-
- c->out_table = rt_setup(c->proto->pool, cf);
-}
-
static void
channel_do_start(struct channel *c)
diff --git a/nest/protocol.h b/nest/protocol.h
index a1701a7e..3ccd364a 100644
--- a/nest/protocol.h
+++ b/nest/protocol.h
@@ -537,7 +537,7 @@ struct channel {
u8 refeed_pending; /* Refeeding and another refeed is scheduled */
u8 rpki_reload; /* RPKI changes trigger channel reload */
- struct rtable *out_table; /* Internal table for exported routes */
+ struct rt_exporter *out_table; /* Internal table for exported routes */
list roa_subscriptions; /* List of active ROA table subscriptions based on filters roa_check() */
};
@@ -610,7 +610,6 @@ int proto_configure_channel(struct proto *p, struct channel **c, struct channel_
void channel_set_state(struct channel *c, uint state);
void channel_setup_in_table(struct channel *c);
-void channel_setup_out_table(struct channel *c);
void channel_schedule_reload(struct channel *c);
static inline void channel_init(struct channel *c) { channel_set_state(c, CS_START); }
diff --git a/nest/rt-attr.c b/nest/rt-attr.c
index a66d4c6e..b31bc5cc 100644
--- a/nest/rt-attr.c
+++ b/nest/rt-attr.c
@@ -197,11 +197,14 @@ static struct idm src_ids;
#define RSH_INIT_ORDER 6
static HASH(struct rte_src) src_hash;
+static struct rte_src **rte_src_global;
+static uint rte_src_global_max = SRC_ID_INIT_SIZE;
static void
rte_src_init(void)
{
rte_src_slab = sl_new(rta_pool, sizeof(struct rte_src));
+ rte_src_global = mb_allocz(rta_pool, sizeof(struct rte_src *) * rte_src_global_max);
idm_init(&src_ids, rta_pool, SRC_ID_INIT_SIZE);
@@ -232,10 +235,27 @@ rt_get_source(struct proto *p, u32 id)
src->uc = 0;
HASH_INSERT2(src_hash, RSH, rta_pool, src);
+ if (src->global_id >= rte_src_global_max)
+ {
+ rte_src_global = mb_realloc(rte_src_global, sizeof(struct rte_src *) * (rte_src_global_max *= 2));
+ memset(&rte_src_global[rte_src_global_max / 2], 0,
+ sizeof(struct rte_src *) * (rte_src_global_max / 2));
+ }
+
+ rte_src_global[src->global_id] = src;
return src;
}
+struct rte_src *
+rt_find_source_global(u32 id)
+{
+ if (id >= rte_src_global_max)
+ return NULL;
+ else
+ return rte_src_global[id];
+}
+
void
rt_prune_sources(void)
{
@@ -1081,8 +1101,11 @@ ea_show_nexthop_list(struct cli *c, struct nexthop_adata *nhad)
bsprintf(weight, " weight %d", nh->weight + 1);
if (ipa_nonzero(nh->gw))
- cli_printf(c, -1007, "\tvia %I on %s%s%s%s",
- nh->gw, nh->iface->name, mpls, onlink, weight);
+ if (nh->iface)
+ cli_printf(c, -1007, "\tvia %I on %s%s%s%s",
+ nh->gw, nh->iface->name, mpls, onlink, weight);
+ else
+ cli_printf(c, -1007, "\tvia %I", nh->gw);
else
cli_printf(c, -1007, "\tdev %s%s%s",
nh->iface->name, mpls, onlink, weight);
diff --git a/nest/rt-table.c b/nest/rt-table.c
index 5adf6346..29690414 100644
--- a/nest/rt-table.c
+++ b/nest/rt-table.c
@@ -820,17 +820,6 @@ do_rt_notify(struct channel *c, const net_addr *net, rte *new, const rte *old)
if (!new && old)
CHANNEL_LIMIT_POP(c, OUT);
- /* Apply export table */
- struct rte_storage *old_exported = NULL;
- if (c->out_table)
- {
- if (!rte_update_out(c, net, new, old, &old_exported))
- {
- channel_rte_trace_out(D_ROUTES, c, new, "idempotent");
- return;
- }
- }
-
if (new)
stats->updates_accepted++;
else
@@ -852,10 +841,7 @@ do_rt_notify(struct channel *c, const net_addr *net, rte *new, const rte *old)
channel_rte_trace_out(D_ROUTES, c, old, "removed");
}
- p->rt_notify(p, c, net, new, old_exported ? &old_exported->rte : old);
-
- if (c->out_table && old_exported)
- rte_free(old_exported);
+ p->rt_notify(p, c, net, new, old);
}
static void
@@ -1747,7 +1733,7 @@ rt_export_stopped(void *data)
hook->stopped(hook->req);
/* Reporting the hook as finished. */
- tab->done(hook);
+ CALL(tab->done, hook);
/* Freeing the hook together with its coroutine. */
rfree(hook->pool);
@@ -1763,7 +1749,7 @@ rt_set_import_state(struct rt_import_hook *hook, u8 state)
hook->req->log_state_change(hook->req, state);
}
-static inline void
+void
rt_set_export_state(struct rt_export_hook *hook, u8 state)
{
hook->last_state_change = current_time();
@@ -1874,16 +1860,22 @@ rt_table_export_stop(struct rt_export_hook *hook)
if (hook->export_state != TES_FEEDING)
return;
- if (hook->walk_lock)
+ switch (hook->req->addr_mode)
{
- rt_unlock_trie(tab, hook->walk_lock);
- hook->walk_lock = NULL;
-
- mb_free(hook->walk_state);
- hook->walk_state = NULL;
+ case TE_ADDR_IN:
+ if (hook->walk_lock)
+ {
+ rt_unlock_trie(tab, hook->walk_lock);
+ hook->walk_lock = NULL;
+ mb_free(hook->walk_state);
+ hook->walk_state = NULL;
+ break;
+ }
+ /* fall through */
+ case TE_ADDR_NONE:
+ fit_get(&tab->fib, &hook->feed_fit);
+ break;
}
- else
- fit_get(&tab->fib, &hook->feed_fit);
}
void
@@ -1896,7 +1888,7 @@ rt_stop_export(struct rt_export_request *req, void (*stopped)(struct rt_export_r
ev_postpone(hook->event);
/* Stop feeding from the exporter */
- hook->table->stop(hook);
+ CALL(hook->table->stop, hook);
/* Reset the event as the stopped event */
hook->event->hook = rt_export_stopped;
@@ -3397,130 +3389,6 @@ void channel_reload_export_bulk(struct rt_export_request *req, const net_addr *n
}
}
-void
-rt_prune_sync(rtable *t, int all)
-{
- struct fib_iterator fit;
-
- FIB_ITERATE_INIT(&fit, &t->fib);
-
-again:
- FIB_ITERATE_START(&t->fib, &fit, net, n)
- {
- struct rte_storage *e, **ee = &n->routes;
-
- while (e = *ee)
- {
- if (all || (e->rte.flags & (REF_STALE | REF_DISCARD)))
- {
- *ee = e->next;
- rte_free(e);
- t->rt_count--;
- }
- else
- ee = &e->next;
- }
-
- if (all || !n->routes)
- {
- FIB_ITERATE_PUT(&fit);
- fib_delete(&t->fib, n);
- goto again;
- }
- }
- FIB_ITERATE_END;
-}
-
-
-/*
- * Export table
- */
-
-int
-rte_update_out(struct channel *c, const net_addr *n, rte *new, const rte *old0, struct rte_storage **old_exported)
-{
- struct rtable *tab = c->out_table;
- struct rte_src *src;
- net *net;
-
- if (new)
- {
- net = net_get(tab, n);
- src = new->src;
- }
- else
- {
- net = net_find(tab, n);
- src = old0->src;
-
- if (!net)
- goto drop;
- }
-
- /* Find the old rte */
- struct rte_storage **pos = (c->ra_mode == RA_ANY) ? rte_find(net, src) : &net->routes;
- struct rte_storage *old = NULL;
-
- if (old = *pos)
- {
- if (new && rte_same(&(*pos)->rte, new))
- goto drop;
-
- /* Remove the old rte */
- *pos = old->next;
- *old_exported = old;
- tab->rt_count--;
- }
-
- if (!new)
- {
- if (!old)
- goto drop;
-
- if (!net->routes)
- fib_delete(&tab->fib, net);
-
- return 1;
- }
-
- /* Insert the new rte */
- struct rte_storage *e = rte_store(new, net, tab);
- e->rte.lastmod = current_time();
- e->next = *pos;
- *pos = e;
- tab->rt_count++;
- return 1;
-
-drop:
- return 0;
-}
-
-void
-rt_refeed_channel(struct channel *c)
-{
- if (!c->out_table)
- {
- channel_request_feeding(c);
- return;
- }
-
- ASSERT_DIE(c->ra_mode != RA_ANY);
-
- c->proto->feed_begin(c, 0);
-
- FIB_WALK(&c->out_table->fib, net, n)
- {
- if (!n->routes)
- continue;
-
- rte e = n->routes->rte;
- c->proto->rt_notify(c->proto, c, n->n.addr, &e, NULL);
- }
- FIB_WALK_END;
-
- c->proto->feed_end(c);
-}
-
/*
* Hostcache
diff --git a/nest/rt.h b/nest/rt.h
index 3ed56fcc..5a8c5a19 100644
--- a/nest/rt.h
+++ b/nest/rt.h
@@ -247,14 +247,20 @@ struct rt_export_hook {
u32 withdraws_received; /* Number of route withdraws received */
} stats;
- struct fib_iterator feed_fit; /* Routing table iterator used during feeding */
- struct f_trie_walk_state *walk_state; /* Iterator over networks in trie */
- struct f_trie *walk_lock; /* Locked trie for walking */
+ union {
+ struct fib_iterator feed_fit; /* Routing table iterator used during feeding */
+ struct {
+ struct f_trie_walk_state *walk_state; /* Iterator over networks in trie */
+ struct f_trie *walk_lock; /* Locked trie for walking */
+ };
+ u32 hash_iter; /* Iterator over hash */
+ };
btime last_state_change; /* Time of last state transition */
u8 refeed_pending; /* Refeeding and another refeed is scheduled */
u8 export_state; /* Route export state (TES_*, see below) */
+ u8 feed_type; /* Which feeding method is used (TFT_*, see below) */
struct event *event; /* Event running all the export operations */
@@ -282,6 +288,10 @@ struct rt_export_hook {
#define TE_ADDR_IN 3 /* Interval query - show route in <addr> */
+#define TFT_FIB 1
+#define TFT_TRIE 2
+#define TFT_HASH 3
+
void rt_request_import(rtable *tab, struct rt_import_request *req);
void rt_request_export(struct rt_exporter *tab, struct rt_export_request *req);
@@ -296,6 +306,8 @@ const char *rt_export_state_name(u8 state);
static inline u8 rt_import_get_state(struct rt_import_hook *ih) { return ih ? ih->import_state : TIS_DOWN; }
static inline u8 rt_export_get_state(struct rt_export_hook *eh) { return eh ? eh->export_state : TES_DOWN; }
+void rt_set_export_state(struct rt_export_hook *hook, u8 state);
+
void rte_import(struct rt_import_request *req, const net_addr *net, rte *new, struct rte_src *src);
/* Types of route announcement, also used as flags */
@@ -386,8 +398,6 @@ int rt_reload_channel(struct channel *c);
void rt_reload_channel_abort(struct channel *c);
void rt_refeed_channel(struct channel *c);
void rt_prune_sync(rtable *t, int all);
-int rte_update_in(struct channel *c, const net_addr *n, rte *new, struct rte_src *src);
-int rte_update_out(struct channel *c, const net_addr *n, rte *new, const rte *old, struct rte_storage **old_exported);
struct rtable_config *rt_new_table(struct symbol *s, uint addr_type);
static inline int rt_is_ip(rtable *tab)
diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c
index 22a53dce..084c9b63 100644
--- a/proto/bgp/attrs.c
+++ b/proto/bgp/attrs.c
@@ -1550,7 +1550,6 @@ bgp_free_bucket_table(struct bgp_channel *c)
static struct bgp_bucket *
bgp_get_bucket(struct bgp_channel *c, ea_list *new)
{
-
/* Hash and lookup */
u32 hash = ea_hash(new);
struct bgp_bucket *b = HASH_FIND(c->bucket_hash, RBH, new, hash);
@@ -1571,8 +1570,7 @@ bgp_get_bucket(struct bgp_channel *c, ea_list *new)
/* Copy the ea_list */
ea_list_copy(b->eattrs, new, ea_size);
- /* Insert the bucket to send queue and bucket hash */
- add_tail(&c->bucket_queue, &b->send_node);
+ /* Insert the bucket to bucket hash */
HASH_INSERT2(c->bucket_hash, RBH, c->pool, b);
return b;
@@ -1590,14 +1588,30 @@ bgp_get_withdraw_bucket(struct bgp_channel *c)
return c->withdraw_bucket;
}
-void
-bgp_free_bucket(struct bgp_channel *c, struct bgp_bucket *b)
+static void
+bgp_free_bucket_xx(struct bgp_channel *c, struct bgp_bucket *b)
{
- rem_node(&b->send_node);
HASH_REMOVE2(c->bucket_hash, RBH, c->pool, b);
mb_free(b);
}
+int
+bgp_done_bucket(struct bgp_channel *c, struct bgp_bucket *b)
+{
+ /* Won't free the withdraw bucket */
+ if (b == c->withdraw_bucket)
+ return 0;
+
+ if (EMPTY_LIST(b->prefixes))
+ rem_node(&b->send_node);
+
+ if (b->px_uc || !EMPTY_LIST(b->prefixes))
+ return 0;
+
+ bgp_free_bucket_xx(c, b);
+ return 1;
+}
+
void
bgp_defer_bucket(struct bgp_channel *c, struct bgp_bucket *b)
{
@@ -1617,8 +1631,8 @@ bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b)
struct bgp_prefix *px = HEAD(b->prefixes);
log(L_ERR "%s: - withdrawing %N", p->p.name, &px->net);
- rem_node(&px->buck_node);
- add_tail(&wb->prefixes, &px->buck_node);
+ rem_node(&px->buck_node_xx);
+ add_tail(&wb->prefixes, &px->buck_node_xx);
}
}
@@ -1629,7 +1643,7 @@ bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b)
#define PXH_KEY(px) px->net, px->path_id, px->hash
#define PXH_NEXT(px) px->next
-#define PXH_EQ(n1,i1,h1,n2,i2,h2) h1 == h2 && i1 == i2 && net_equal(n1, n2)
+#define PXH_EQ(n1,i1,h1,n2,i2,h2) h1 == h2 && (c->add_path_tx ? (i1 == i2) : 1) && net_equal(n1, n2)
#define PXH_FN(n,i,h) h
#define PXH_REHASH bgp_pxh_rehash
@@ -1659,15 +1673,13 @@ bgp_free_prefix_table(struct bgp_channel *c)
static struct bgp_prefix *
bgp_get_prefix(struct bgp_channel *c, const net_addr *net, u32 path_id)
{
+ u32 path_id_hash = c->add_path_tx ? path_id : 0;
/* We must use a different hash function than the rtable */
- u32 hash = u32_hash(net_hash(net) ^ u32_hash(path_id));
- struct bgp_prefix *px = HASH_FIND(c->prefix_hash, PXH, net, path_id, hash);
+ u32 hash = u32_hash(net_hash(net) ^ u32_hash(path_id_hash));
+ struct bgp_prefix *px = HASH_FIND(c->prefix_hash, PXH, net, path_id_hash, hash);
if (px)
- {
- rem_node(&px->buck_node);
return px;
- }
if (c->prefix_slab)
px = sl_alloc(c->prefix_slab);
@@ -1684,10 +1696,64 @@ bgp_get_prefix(struct bgp_channel *c, const net_addr *net, u32 path_id)
return px;
}
-void
+static void bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *px);
+
+static inline int
+bgp_update_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bucket *b)
+{
+#define BPX_TRACE(what) do { \
+ if (c->c.debug & D_ROUTES) log(L_TRACE "%s.%s < %s %N %uG %s", \
+ c->c.proto->name, c->c.name, what, \
+ px->net, px->path_id, (b == c->withdraw_bucket) ? "withdraw" : "update"); } while (0)
+ px->lastmod = current_time();
+
+ /* Already queued for the same bucket */
+ if (px->cur == b)
+ {
+ BPX_TRACE("already queued");
+ return 0;
+ }
+
+ /* Unqueue from the old bucket */
+ if (px->cur)
+ {
+ rem_node(&px->buck_node_xx);
+ bgp_done_bucket(c, px->cur);
+ }
+
+ /* The new bucket is the same as we sent before */
+ if ((px->last == b) || c->c.out_table && !px->last && (b == c->withdraw_bucket))
+ {
+ if (px->cur)
+ BPX_TRACE("reverted");
+ else
+ BPX_TRACE("already sent");
+
+ /* Well, we haven't sent anything yet */
+ if (!px->last)
+ bgp_free_prefix(c, px);
+
+ px->cur = NULL;
+ return 0;
+ }
+
+ /* Enqueue the bucket if it has been empty */
+ if ((b != c->withdraw_bucket) && EMPTY_LIST(b->prefixes))
+ add_tail(&c->bucket_queue, &b->send_node);
+
+ /* Enqueue to the new bucket and indicate the change */
+ add_tail(&b->prefixes, &px->buck_node_xx);
+ px->cur = b;
+
+ BPX_TRACE("queued");
+ return 1;
+
+#undef BPX_TRACE
+}
+
+static void
bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *px)
{
- rem_node(&px->buck_node);
HASH_REMOVE2(c->prefix_hash, PXH, c->pool, px);
if (c->prefix_slab)
@@ -1696,6 +1762,167 @@ bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *px)
mb_free(px);
}
+void
+bgp_done_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bucket *buck)
+{
+ /* Cleanup: We're called from bucket senders. */
+ ASSERT_DIE(px->cur == buck);
+ rem_node(&px->buck_node_xx);
+
+ /* We may want to store the updates */
+ if (c->c.out_table)
+ {
+ /* Nothing to be sent right now */
+ px->cur = NULL;
+
+ /* Unref the previous sent version */
+ if (px->last)
+ px->last->px_uc--;
+
+ /* Ref the current sent version */
+ if (buck != c->withdraw_bucket)
+ {
+ px->last = buck;
+ px->last->px_uc++;
+ return;
+ }
+
+ /* Prefixes belonging to the withdraw bucket are freed always */
+ }
+
+ bgp_free_prefix(c, px);
+}
+
+
+/*
+ * Prefix hash table exporter
+ */
+
+static void
+bgp_out_table_feed(void *data)
+{
+ struct rt_export_hook *hook = data;
+ struct bgp_channel *c = SKIP_BACK(struct bgp_channel, prefix_exporter, hook->table);
+
+ int max = 512;
+
+ const net_addr *neq = (hook->req->addr_mode == TE_ADDR_EQUAL) ? hook->req->addr : NULL;
+ const net_addr *cand = NULL;
+
+ do {
+ HASH_WALK_ITER(c->prefix_hash, PXH, n, hook->hash_iter)
+ {
+ switch (hook->req->addr_mode)
+ {
+ case TE_ADDR_IN:
+ if (!net_in_netX(n->net, hook->req->addr))
+ continue;
+ /* fall through */
+ case TE_ADDR_NONE:
+ /* Splitting only for multi-net exports */
+ if (--max <= 0)
+ HASH_WALK_ITER_PUT;
+ break;
+
+ case TE_ADDR_FOR:
+ if (!neq)
+ {
+ if (net_in_netX(hook->req->addr, n->net) && (!cand || (n->net->length > cand->length)))
+ cand = n->net;
+ continue;
+ }
+ /* fall through */
+ case TE_ADDR_EQUAL:
+ if (!net_equal(n->net, neq))
+ continue;
+ break;
+ }
+
+ struct bgp_bucket *buck = n->cur ?: n->last;
+ ea_list *ea = NULL;
+ if (buck == c->withdraw_bucket)
+ ea_set_dest(&ea, 0, RTD_UNREACHABLE);
+ else
+ {
+ ea = buck->eattrs;
+ eattr *eanh = bgp_find_attr(ea, BA_NEXT_HOP);
+ ASSERT_DIE(eanh);
+ const ip_addr *nh = (const void *) eanh->u.ptr->data;
+
+ struct nexthop_adata nhad = {
+ .ad = { .length = sizeof (struct nexthop_adata) - sizeof (struct adata), },
+ .nh = { .gw = nh[0], },
+ };
+
+ ea_set_attr(&ea, EA_LITERAL_DIRECT_ADATA(&ea_gen_nexthop, 0, tmp_copy_adata(&nhad.ad)));
+ }
+
+ struct rte_storage es = {
+ .rte = {
+ .attrs = ea,
+ .net = n->net,
+ .src = rt_find_source_global(n->path_id),
+ .sender = NULL,
+ .lastmod = n->lastmod,
+ .flags = n->cur ? REF_PENDING : 0,
+ },
+ };
+
+ struct rt_pending_export rpe = {
+ .new = &es, .new_best = &es,
+ };
+
+ if (hook->req->export_bulk)
+ {
+ rte *feed = &es.rte;
+ hook->req->export_bulk(hook->req, n->net, &rpe, &feed, 1);
+ }
+ else if (hook->req->export_one)
+ hook->req->export_one(hook->req, n->net, &rpe);
+ else
+ bug("No export method in export request");
+ }
+ HASH_WALK_ITER_END;
+
+ neq = cand;
+ cand = NULL;
+ } while (neq);
+
+ if (hook->hash_iter)
+ ev_schedule_work(hook->event);
+ else
+ rt_set_export_state(hook, TES_READY);
+}
+
+static struct rt_export_hook *
+bgp_out_table_export_start(struct rt_exporter *re, struct rt_export_request *req UNUSED)
+{
+ struct bgp_channel *c = SKIP_BACK(struct bgp_channel, prefix_exporter, re);
+ pool *p = rp_new(c->c.proto->pool, "Export hook");
+ struct rt_export_hook *hook = mb_allocz(p, sizeof(struct rt_export_hook));
+ hook->pool = p;
+ hook->lp = lp_new_default(p);
+ hook->event = ev_new_init(p, bgp_out_table_feed, hook);
+ hook->feed_type = TFT_HASH;
+
+ return hook;
+}
+
+void
+bgp_setup_out_table(struct bgp_channel *c)
+{
+ ASSERT_DIE(c->c.out_table == NULL);
+
+ c->prefix_exporter = (struct rt_exporter) {
+ .addr_type = c->c.table->addr_type,
+ .start = bgp_out_table_export_start,
+ };
+
+ init_list(&c->prefix_exporter.hooks);
+
+ c->c.out_table = &c->prefix_exporter;
+}
+
/*
* BGP protocol glue
@@ -1894,7 +2121,6 @@ bgp_rt_notify(struct proto *P, struct channel *C, const net_addr *n, rte *new, c
struct bgp_proto *p = (void *) P;
struct bgp_channel *c = (void *) C;
struct bgp_bucket *buck;
- struct bgp_prefix *px;
u32 path;
if (new)
@@ -1915,10 +2141,8 @@ bgp_rt_notify(struct proto *P, struct channel *C, const net_addr *n, rte *new, c
path = old->src->global_id;
}
- px = bgp_get_prefix(c, n, c->add_path_tx ? path : 0);
- add_tail(&buck->prefixes, &px->buck_node);
-
- bgp_schedule_packet(p->conn, c, PKT_UPDATE);
+ if (bgp_update_prefix(c, bgp_get_prefix(c, n, path), buck))
+ bgp_schedule_packet(p->conn, c, PKT_UPDATE);
}
diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c
index 8df99420..6ffe8824 100644
--- a/proto/bgp/bgp.c
+++ b/proto/bgp/bgp.c
@@ -1740,14 +1740,15 @@ bgp_channel_start(struct channel *C)
}
c->pool = p->p.pool; // XXXX
- bgp_init_bucket_table(c);
- bgp_init_prefix_table(c);
if (c->cf->import_table)
channel_setup_in_table(C);
if (c->cf->export_table)
- channel_setup_out_table(C);
+ bgp_setup_out_table(c);
+
+ bgp_init_bucket_table(c);
+ bgp_init_prefix_table(c);
c->stale_timer = tm_new_init(c->pool, bgp_long_lived_stale_timeout, c, 0, 0);
diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h
index cca7873a..003893e0 100644
--- a/proto/bgp/bgp.h
+++ b/proto/bgp/bgp.h
@@ -156,7 +156,7 @@ struct bgp_channel_config {
u8 aigp_originate; /* AIGP is originated automatically */
u32 cost; /* IGP cost for direct next hops */
u8 import_table; /* Use c.in_table as Adj-RIB-In */
- u8 export_table; /* Use c.out_table as Adj-RIB-Out */
+ u8 export_table; /* Keep Adj-RIB-Out and export it */
struct rtable_config *igp_table_ip4; /* Table for recursive IPv4 next hop lookups */
struct rtable_config *igp_table_ip6; /* Table for recursive IPv6 next hop lookups */
@@ -357,6 +357,8 @@ struct bgp_channel {
HASH(struct bgp_prefix) prefix_hash; /* Prefixes to be sent */
slab *prefix_slab; /* Slab holding prefix nodes */
+ struct rt_exporter prefix_exporter; /* Table-like exporter for prefix_hash */
+
ip_addr next_hop_addr; /* Local address for NEXT_HOP attribute */
ip_addr link_addr; /* Link-local version of next_hop_addr */
@@ -378,8 +380,11 @@ struct bgp_channel {
};
struct bgp_prefix {
- node buck_node; /* Node in per-bucket list */
+ node buck_node_xx; /* Node in per-bucket list */
struct bgp_prefix *next; /* Node in prefix hash table */
+ struct bgp_bucket *last; /* Last bucket sent with this prefix */
+ struct bgp_bucket *cur; /* Current bucket (cur == last) if no update is required */
+ btime lastmod; /* Last modification of this prefix */
u32 hash;
u32 path_id;
net_addr net[0];
@@ -388,8 +393,9 @@ struct bgp_prefix {
struct bgp_bucket {
node send_node; /* Node in send queue */
struct bgp_bucket *next; /* Node in bucket hash table */
- list prefixes; /* Prefixes in this bucket (struct bgp_prefix) */
+ list prefixes; /* Prefixes to send in this bucket (struct bgp_prefix) */
u32 hash; /* Hash over extended attributes */
+ u32 px_uc; /* How many prefixes are linking this bucket */
ea_list eattrs[0]; /* Per-bucket extended attributes */
};
@@ -556,15 +562,16 @@ int bgp_encode_attrs(struct bgp_write_state *s, ea_list *attrs, byte *buf, byte
ea_list * bgp_decode_attrs(struct bgp_parse_state *s, byte *data, uint len);
void bgp_finish_attrs(struct bgp_parse_state *s, ea_list **to);
+void bgp_setup_out_table(struct bgp_channel *c);
+
void bgp_init_bucket_table(struct bgp_channel *c);
void bgp_free_bucket_table(struct bgp_channel *c);
-void bgp_free_bucket(struct bgp_channel *c, struct bgp_bucket *b);
-void bgp_defer_bucket(struct bgp_channel *c, struct bgp_bucket *b);
void bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b);
+int bgp_done_bucket(struct bgp_channel *c, struct bgp_bucket *b);
void bgp_init_prefix_table(struct bgp_channel *c);
void bgp_free_prefix_table(struct bgp_channel *c);
-void bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *bp);
+void bgp_done_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bucket *buck);
int bgp_rte_better(struct rte *, struct rte *);
int bgp_rte_mergable(rte *pri, rte *sec);
diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c
index c2e98340..4d4ae3eb 100644
--- a/proto/bgp/packets.c
+++ b/proto/bgp/packets.c
@@ -1488,7 +1488,7 @@ bgp_encode_nlri_ip4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *bu
memcpy(pos, &a, b);
ADVANCE(pos, size, b);
- bgp_free_prefix(s->channel, px);
+ bgp_done_prefix(s->channel, px, buck);
}
return pos - buf;
@@ -1573,7 +1573,7 @@ bgp_encode_nlri_ip6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *bu
memcpy(pos, &a, b);
ADVANCE(pos, size, b);
- bgp_free_prefix(s->channel, px);
+ bgp_done_prefix(s->channel, px, buck);
}
return pos - buf;
@@ -1661,7 +1661,7 @@ bgp_encode_nlri_vpn4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *b
memcpy(pos, &a, b);
ADVANCE(pos, size, b);
- bgp_free_prefix(s->channel, px);
+ bgp_done_prefix(s->channel, px, buck);
}
return pos - buf;
@@ -1758,7 +1758,7 @@ bgp_encode_nlri_vpn6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *b
memcpy(pos, &a, b);
ADVANCE(pos, size, b);
- bgp_free_prefix(s->channel, px);
+ bgp_done_prefix(s->channel, px, buck);
}
return pos - buf;
@@ -1845,7 +1845,7 @@ bgp_encode_nlri_flow4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *
memcpy(pos, net->data, flen);
ADVANCE(pos, size, flen);
- bgp_free_prefix(s->channel, px);
+ bgp_done_prefix(s->channel, px, buck);
}
return pos - buf;
@@ -1933,7 +1933,7 @@ bgp_encode_nlri_flow6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *
memcpy(pos, net->data, flen);
ADVANCE(pos, size, flen);
- bgp_free_prefix(s->channel, px);
+ bgp_done_prefix(s->channel, px, buck);
}
return pos - buf;
@@ -2167,6 +2167,8 @@ bgp_create_ip_reach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *bu
* var IPv4 Network Layer Reachability Information
*/
+ ASSERT_DIE(s->channel->withdraw_bucket != buck);
+
int lr, la;
la = bgp_encode_attrs(s, buck->eattrs, buf+4, buf + MAX_ATTRS_LENGTH);
@@ -2188,6 +2190,8 @@ bgp_create_ip_reach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *bu
static byte *
bgp_create_mp_reach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end)
{
+ ASSERT_DIE(s->channel->withdraw_bucket != buck);
+
/*
* 2 B IPv4 Withdrawn Routes Length (zero)
* --- IPv4 Withdrawn Routes NLRI (unused)
@@ -2341,9 +2345,8 @@ again: ;
buck = HEAD(c->bucket_queue);
/* Cleanup empty buckets */
- if (EMPTY_LIST(buck->prefixes))
+ if (bgp_done_bucket(c, buck))
{
- bgp_free_bucket(c, buck);
lp_restore(tmp_linpool, &tmpp);
goto again;
}
@@ -2352,10 +2355,7 @@ again: ;
bgp_create_ip_reach(&s, buck, buf, end):
bgp_create_mp_reach(&s, buck, buf, end);
- if (EMPTY_LIST(buck->prefixes))
- bgp_free_bucket(c, buck);
- else
- bgp_defer_bucket(c, buck);
+ bgp_done_bucket(c, buck);
if (!res)
{
@@ -2724,7 +2724,7 @@ bgp_rx_route_refresh(struct bgp_conn *conn, byte *pkt, uint len)
{
case BGP_RR_REQUEST:
BGP_TRACE(D_PACKETS, "Got ROUTE-REFRESH");
- rt_refeed_channel(&c->c);
+ channel_request_feeding(&c->c);
break;
case BGP_RR_BEGIN:
@@ -2903,7 +2903,11 @@ bgp_schedule_packet(struct bgp_conn *conn, struct bgp_channel *c, int type)
{
ASSERT(conn->sk);
- DBG("BGP: Scheduling packet type %d\n", type);
+ struct bgp_proto *p = conn->bgp;
+ if (c)
+ BGP_TRACE(D_PACKETS, "Scheduling packet type %d for channel %s", type, c->c.name);
+ else
+ BGP_TRACE(D_PACKETS, "Scheduling packet type %d", type);
if (c)
{