From f8e273b5e7a3c721f4a30cf27a0b4fe54602e83f Mon Sep 17 00:00:00 2001 From: "Ondrej Zajicek (work)" Date: Mon, 14 Jun 2021 16:30:59 +0200 Subject: Nest: Fix export of tmpattrs through pipes In most cases of export there is no need to store back temporary attributes to rte, as receivers (protocols) access eattr list anyway. But pipe copies the original rte with old values, so we should store tmpattrs also during export. Thanks to Paul Donohue for the bugreport. --- nest/rt-table.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'nest') diff --git a/nest/rt-table.c b/nest/rt-table.c index 13209dd7..a7e31d85 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -618,6 +618,9 @@ export_filter_(struct channel *c, rte *rt0, rte **rt_free, linpool *pool, int si goto reject; } + /* Needed for pipes */ + rte_store_tmp_attrs(rt, pool, NULL); + accept: if (rt != rt0) *rt_free = rt; -- cgit v1.2.3 From 3ebabab2778d05212cc07ebccf583159d5e0890a Mon Sep 17 00:00:00 2001 From: "Ondrej Zajicek (work)" Date: Mon, 14 Jun 2021 17:58:37 +0200 Subject: Revert "Nest: Fix export of tmpattrs through pipes" This reverts commit f8e273b5e7a3c721f4a30cf27a0b4fe54602e83f. --- nest/rt-table.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'nest') diff --git a/nest/rt-table.c b/nest/rt-table.c index a7e31d85..13209dd7 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -618,9 +618,6 @@ export_filter_(struct channel *c, rte *rt0, rte **rt_free, linpool *pool, int si goto reject; } - /* Needed for pipes */ - rte_store_tmp_attrs(rt, pool, NULL); - accept: if (rt != rt0) *rt_free = rt; -- cgit v1.2.3 From 1b9bf4e192a252db861acadc7f800d7046435a3f Mon Sep 17 00:00:00 2001 From: "Ondrej Zajicek (work)" Date: Mon, 14 Jun 2021 20:02:50 +0200 Subject: Nest: Fix export of tmpattrs through pipes Pipes copy the original rte with old values, so they require rte to be exported with stored tmpattrs. Other protocols access stored attributes using eattr list, so they require rte to be exported with expanded tmpattrs. This is temporary hack, we plan to remove whoe tmpattr mechanism. Thanks to Paul Donohue for the bugreport. --- nest/rt-table.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'nest') diff --git a/nest/rt-table.c b/nest/rt-table.c index 13209dd7..390b3277 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -618,6 +618,12 @@ export_filter_(struct channel *c, rte *rt0, rte **rt_free, linpool *pool, int si goto reject; } +#ifdef CONFIG_PIPE + /* Pipes need rte with stored tmpattrs, remaining protocols need expanded tmpattrs */ + if (p->proto == &proto_pipe) + rte_store_tmp_attrs(rt, pool, NULL); +#endif + accept: if (rt != rt0) *rt_free = rt; -- cgit v1.2.3 From f761be6b30633054a54369eee7d08b951a366e5e Mon Sep 17 00:00:00 2001 From: "Ondrej Zajicek (work)" Date: Thu, 17 Jun 2021 16:56:51 +0200 Subject: Nest: Clean up main channel handling Remove assumption that main channel is the only channel. --- nest/protocol.h | 2 +- proto/ospf/config.Y | 5 ++--- proto/radv/config.Y | 1 + proto/radv/radv.c | 2 +- proto/rip/rip.c | 2 +- proto/rpki/rpki.c | 2 +- proto/static/static.c | 2 +- sysdep/unix/krt.c | 2 +- 8 files changed, 9 insertions(+), 9 deletions(-) (limited to 'nest') diff --git a/nest/protocol.h b/nest/protocol.h index 48eb01d2..abcc505d 100644 --- a/nest/protocol.h +++ b/nest/protocol.h @@ -616,7 +616,7 @@ struct channel { struct channel_config *proto_cf_find_channel(struct proto_config *p, uint net_type); static inline struct channel_config *proto_cf_main_channel(struct proto_config *pc) -{ struct channel_config *cc = HEAD(pc->channels); return NODE_VALID(cc) ? cc : NULL; } +{ return proto_cf_find_channel(pc, pc->net_type); } struct channel *proto_find_channel_by_table(struct proto *p, struct rtable *t); struct channel *proto_find_channel_by_name(struct proto *p, const char *n); diff --git a/proto/ospf/config.Y b/proto/ospf/config.Y index fd2cfe8a..4b7d5a36 100644 --- a/proto/ospf/config.Y +++ b/proto/ospf/config.Y @@ -85,7 +85,7 @@ ospf_proto_finish(void) struct ospf_iface_patt *ic; /* Define default channel */ - if (EMPTY_LIST(this_proto->channels)) + if (! proto_cf_main_channel(this_proto)) { uint net_type = this_proto->net_type = ospf_cfg_is_v2() ? NET_IP4 : NET_IP6; channel_config_new(NULL, net_label[net_type], net_type, this_proto); @@ -248,8 +248,7 @@ ospf_channel_start: net_type ospf_af_mc $$ = this_channel = channel_config_get(NULL, net_label[$1], $1, this_proto); /* Save the multicast flag */ - if (this_channel == proto_cf_main_channel(this_proto)) - OSPF_CFG->af_mc = $2; + OSPF_CFG->af_mc = $2; }; ospf_channel: ospf_channel_start channel_opt_list channel_end; diff --git a/proto/radv/config.Y b/proto/radv/config.Y index dda9cfcd..8d4a3ab9 100644 --- a/proto/radv/config.Y +++ b/proto/radv/config.Y @@ -46,6 +46,7 @@ proto: radv_proto ; radv_proto_start: proto_start RADV { this_proto = proto_config_new(&proto_radv, $1); + this_proto->net_type = NET_IP6; init_list(&RADV_CFG->patt_list); init_list(&RADV_CFG->pref_list); diff --git a/proto/radv/radv.c b/proto/radv/radv.c index b4235917..66e8eb4b 100644 --- a/proto/radv/radv.c +++ b/proto/radv/radv.c @@ -564,7 +564,7 @@ radv_postconfig(struct proto_config *CF) // struct radv_config *cf = (void *) CF; /* Define default channel */ - if (EMPTY_LIST(CF->channels)) + if (! proto_cf_main_channel(CF)) channel_config_new(NULL, net_label[NET_IP6], NET_IP6, CF); } diff --git a/proto/rip/rip.c b/proto/rip/rip.c index 8b4719f7..e1a235a0 100644 --- a/proto/rip/rip.c +++ b/proto/rip/rip.c @@ -1105,7 +1105,7 @@ rip_postconfig(struct proto_config *CF) // struct rip_config *cf = (void *) CF; /* Define default channel */ - if (EMPTY_LIST(CF->channels)) + if (! proto_cf_main_channel(CF)) channel_config_new(NULL, net_label[CF->net_type], CF->net_type, CF); } diff --git a/proto/rpki/rpki.c b/proto/rpki/rpki.c index 799cb877..ab0837f3 100644 --- a/proto/rpki/rpki.c +++ b/proto/rpki/rpki.c @@ -923,7 +923,7 @@ rpki_postconfig(struct proto_config *CF) { /* Define default channel */ if (EMPTY_LIST(CF->channels)) - channel_config_new(NULL, net_label[CF->net_type], CF->net_type, CF); + cf_error("Channel not specified"); } static void diff --git a/proto/static/static.c b/proto/static/static.c index 661f1aac..2789c1bb 100644 --- a/proto/static/static.c +++ b/proto/static/static.c @@ -434,7 +434,7 @@ static_postconfig(struct proto_config *CF) struct static_config *cf = (void *) CF; struct static_route *r; - if (EMPTY_LIST(CF->channels)) + if (! proto_cf_main_channel(CF)) cf_error("Channel not specified"); struct channel_config *cc = proto_cf_main_channel(CF); diff --git a/sysdep/unix/krt.c b/sysdep/unix/krt.c index ceb88563..7c2614b1 100644 --- a/sysdep/unix/krt.c +++ b/sysdep/unix/krt.c @@ -1013,7 +1013,7 @@ krt_postconfig(struct proto_config *CF) if (cf->c.class == SYM_TEMPLATE) return; - if (EMPTY_LIST(CF->channels)) + if (! proto_cf_main_channel(CF)) cf_error("Channel not specified"); #ifdef CONFIG_ALL_TABLES_AT_ONCE -- cgit v1.2.3 From 644e9ca94e2d10ba0c2de45f94523da2414328e3 Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Wed, 24 Nov 2021 17:30:13 +0100 Subject: Directly mapped pages are kept for future use if temporarily not needed --- lib/resource.h | 1 + nest/cmds.c | 7 ++++++- sysdep/unix/alloc.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 58 insertions(+), 3 deletions(-) (limited to 'nest') diff --git a/lib/resource.h b/lib/resource.h index e65455c8..76e3745f 100644 --- a/lib/resource.h +++ b/lib/resource.h @@ -98,6 +98,7 @@ void buffer_realloc(void **buf, unsigned *size, unsigned need, unsigned item_siz u64 get_page_size(void); void *alloc_page(void); void free_page(void *); +extern uint pages_kept; #ifdef HAVE_LIBDMALLOC /* diff --git a/nest/cmds.c b/nest/cmds.c index 18f39eb5..f58923a7 100644 --- a/nest/cmds.c +++ b/nest/cmds.c @@ -91,7 +91,12 @@ cmd_show_memory(void) print_size("Routing tables:", rmemsize(rt_table_pool)); print_size("Route attributes:", rmemsize(rta_pool)); print_size("Protocols:", rmemsize(proto_pool)); - print_size("Total:", rmemsize(&root_pool)); + size_t total = rmemsize(&root_pool); +#ifdef HAVE_MMAP + print_size("Standby memory:", get_page_size() * pages_kept); + total += get_page_size() * pages_kept; +#endif + print_size("Total:", total); cli_msg(0, ""); } diff --git a/sysdep/unix/alloc.c b/sysdep/unix/alloc.c index c525f713..5dd70c99 100644 --- a/sysdep/unix/alloc.c +++ b/sysdep/unix/alloc.c @@ -8,6 +8,8 @@ #include "nest/bird.h" #include "lib/resource.h" +#include "lib/lists.h" +#include "lib/event.h" #include #include @@ -17,8 +19,17 @@ #endif #ifdef HAVE_MMAP +#define KEEP_PAGES 512 + static u64 page_size = 0; static _Bool use_fake = 0; + +uint pages_kept = 0; +static list pages_list; + +static void cleanup_pages(void *data); +static event page_cleanup_event = { .hook = cleanup_pages }; + #else static const u64 page_size = 4096; /* Fake page size */ #endif @@ -48,6 +59,15 @@ void * alloc_page(void) { #ifdef HAVE_MMAP + if (pages_kept) + { + node *page = TAIL(pages_list); + rem_node(page); + pages_kept--; + memset(page, 0, get_page_size()); + return page; + } + if (!use_fake) { void *ret = mmap(NULL, get_page_size(), PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); @@ -71,10 +91,39 @@ free_page(void *ptr) #ifdef HAVE_MMAP if (!use_fake) { - if (munmap(ptr, get_page_size()) < 0) - bug("munmap(%p) failed: %m", ptr); + if (!pages_kept) + init_list(&pages_list); + + memset(ptr, 0, sizeof(node)); + add_tail(&pages_list, ptr); + + if (++pages_kept > KEEP_PAGES) + ev_schedule(&page_cleanup_event); } else #endif free(ptr); } + +#ifdef HAVE_MMAP +static void +cleanup_pages(void *data UNUSED) +{ + for (uint seen = 0; (pages_kept > KEEP_PAGES) && (seen < KEEP_PAGES); seen++) + { + void *ptr = HEAD(pages_list); + rem_node(ptr); + if (munmap(ptr, get_page_size()) == 0) + pages_kept--; +#ifdef ENOMEM + else if (errno == ENOMEM) + add_tail(&pages_list, ptr); +#endif + else + bug("munmap(%p) failed: %m", ptr); + } + + if (pages_kept > KEEP_PAGES) + ev_schedule(&page_cleanup_event); +} +#endif -- cgit v1.2.3 From f772afc525156498900770ffe5a98349df89a45c Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Sat, 27 Nov 2021 00:21:12 +0100 Subject: Memory statistics split into Effective and Overhead This feature is intended mostly for checking that BIRD's allocation strategies don't consume much memory space. There are some cases where withdrawing routes in a specific order lead to memory fragmentation and this output should give the user at least a notion of how much memory is actually used for data storage and how much memory is "just allocated" or used for overhead. Also raising the "system allocator overhead estimation" from 8 to 16 bytes; it is probably even more. I've found 16 as a local minimum in best scenarios among reachable machines. I couldn't find any reasonable method to estimate this value when BIRD starts up. This commit also fixes the inaccurate computation of memory overhead for slabs where the "system allocater overhead estimation" was improperly added to the size of mmap-ed memory. --- lib/mempool.c | 12 +++++++----- lib/resource.c | 32 +++++++++++++++++++++++--------- lib/resource.h | 12 +++++++++--- lib/slab.c | 29 ++++++++++++++++++++++------- nest/cmds.c | 44 +++++++++++++++++++++++++++++++++++--------- 5 files changed, 96 insertions(+), 33 deletions(-) (limited to 'nest') diff --git a/lib/mempool.c b/lib/mempool.c index 758882ce..90d7c774 100644 --- a/lib/mempool.c +++ b/lib/mempool.c @@ -45,7 +45,7 @@ struct linpool { static void lp_free(resource *); static void lp_dump(resource *); static resource *lp_lookup(resource *, unsigned long); -static size_t lp_memsize(resource *r); +static struct resmem lp_memsize(resource *r); static struct resclass lp_class = { "LinPool", @@ -287,7 +287,7 @@ lp_dump(resource *r) m->total_large); } -static size_t +static struct resmem lp_memsize(resource *r) { linpool *m = (linpool *) r; @@ -299,9 +299,11 @@ lp_memsize(resource *r) for(c=m->first_large; c; c=c->next) cnt++; - return ALLOC_OVERHEAD + sizeof(struct linpool) + - cnt * (ALLOC_OVERHEAD + sizeof(struct lp_chunk)) + - m->total + m->total_large; + return (struct resmem) { + .effective = m->total + m->total_large, + .overhead = ALLOC_OVERHEAD + sizeof(struct linpool) + + cnt * (ALLOC_OVERHEAD + sizeof(struct lp_chunk)), + }; } diff --git a/lib/resource.c b/lib/resource.c index 4c4b92ec..5d4c7780 100644 --- a/lib/resource.c +++ b/lib/resource.c @@ -2,6 +2,7 @@ * BIRD Resource Manager * * (c) 1998--2000 Martin Mares + * (c) 2021 Maria Matejka * * Can be freely distributed and used under the terms of the GNU GPL. */ @@ -37,7 +38,7 @@ struct pool { static void pool_dump(resource *); static void pool_free(resource *); static resource *pool_lookup(resource *, unsigned long); -static size_t pool_memsize(resource *P); +static struct resmem pool_memsize(resource *P); static struct resclass pool_class = { "Pool", @@ -97,15 +98,22 @@ pool_dump(resource *P) indent -= 3; } -static size_t +static struct resmem pool_memsize(resource *P) { pool *p = (pool *) P; resource *r; - size_t sum = sizeof(pool) + ALLOC_OVERHEAD; + struct resmem sum = { + .effective = 0, + .overhead = sizeof(pool) + ALLOC_OVERHEAD, + }; WALK_LIST(r, p->inside) - sum += rmemsize(r); + { + struct resmem add = rmemsize(r); + sum.effective += add.effective; + sum.overhead += add.overhead; + } return sum; } @@ -193,14 +201,17 @@ rdump(void *res) debug("NULL\n"); } -size_t +struct resmem rmemsize(void *res) { resource *r = res; if (!r) - return 0; + return (struct resmem) {}; if (!r->class->memsize) - return r->class->size + ALLOC_OVERHEAD; + return (struct resmem) { + .effective = r->class->size - sizeof(resource), + .overhead = ALLOC_OVERHEAD + sizeof(resource), + }; return r->class->memsize(r); } @@ -305,11 +316,14 @@ mbl_lookup(resource *r, unsigned long a) return NULL; } -static size_t +static struct resmem mbl_memsize(resource *r) { struct mblock *m = (struct mblock *) r; - return ALLOC_OVERHEAD + sizeof(struct mblock) + m->size; + return (struct resmem) { + .effective = m->size, + .overhead = ALLOC_OVERHEAD + sizeof(struct mblock), + }; } static struct resclass mb_class = { diff --git a/lib/resource.h b/lib/resource.h index 76e3745f..9ec41ed8 100644 --- a/lib/resource.h +++ b/lib/resource.h @@ -2,6 +2,7 @@ * BIRD Resource Manager * * (c) 1998--1999 Martin Mares + * (c) 2021 Maria Matejka * * Can be freely distributed and used under the terms of the GNU GPL. */ @@ -11,6 +12,11 @@ #include "lib/lists.h" +struct resmem { + size_t effective; /* Memory actually used for data storage */ + size_t overhead; /* Overhead memory imposed by allocator strategies */ +}; + /* Resource */ typedef struct resource { @@ -26,11 +32,11 @@ struct resclass { void (*free)(resource *); /* Freeing function */ void (*dump)(resource *); /* Dump to debug output */ resource *(*lookup)(resource *, unsigned long); /* Look up address (only for debugging) */ - size_t (*memsize)(resource *); /* Return size of memory used by the resource, may be NULL */ + struct resmem (*memsize)(resource *); /* Return size of memory used by the resource, may be NULL */ }; /* Estimate of system allocator overhead per item, for memory consumtion stats */ -#define ALLOC_OVERHEAD 8 +#define ALLOC_OVERHEAD 16 /* Generic resource manipulation */ @@ -40,7 +46,7 @@ void resource_init(void); pool *rp_new(pool *, const char *); /* Create new pool */ void rfree(void *); /* Free single resource */ void rdump(void *); /* Dump to debug output */ -size_t rmemsize(void *res); /* Return size of memory used by the resource */ +struct resmem rmemsize(void *res); /* Return size of memory used by the resource */ void rlookup(unsigned long); /* Look up address (only for debugging) */ void rmove(void *, pool *); /* Move to a different pool */ diff --git a/lib/slab.c b/lib/slab.c index b0a01ae7..6cab6b7b 100644 --- a/lib/slab.c +++ b/lib/slab.c @@ -42,7 +42,7 @@ static void slab_free(resource *r); static void slab_dump(resource *r); static resource *slab_lookup(resource *r, unsigned long addr); -static size_t slab_memsize(resource *r); +static struct resmem slab_memsize(resource *r); #ifdef FAKE_SLAB @@ -128,7 +128,7 @@ slab_dump(resource *r) debug("(%d objects per %d bytes)\n", cnt, s->size); } -static size_t +static struct resmem slab_memsize(resource *r) { slab *s = (slab *) r; @@ -138,7 +138,10 @@ slab_memsize(resource *r) WALK_LIST(o, s->objs) cnt++; - return ALLOC_OVERHEAD + sizeof(struct slab) + cnt * (ALLOC_OVERHEAD + s->size); + return (struct resmem) { + .effective = cnt * s->size, + .overhead = ALLOC_OVERHEAD + sizeof(struct slab) + cnt * ALLOC_OVERHEAD, + }; } @@ -363,21 +366,33 @@ slab_dump(resource *r) debug("(%de+%dp+%df blocks per %d objs per %d bytes)\n", ec, pc, fc, s->objs_per_slab, s->obj_size); } -static size_t +static struct resmem slab_memsize(resource *r) { slab *s = (slab *) r; size_t heads = 0; struct sl_head *h; - WALK_LIST(h, s->empty_heads) + WALK_LIST(h, s->full_heads) heads++; + + size_t items = heads * s->objs_per_slab; + WALK_LIST(h, s->partial_heads) + { heads++; - WALK_LIST(h, s->full_heads) + items += h->num_full; + } + + WALK_LIST(h, s->empty_heads) heads++; - return ALLOC_OVERHEAD + sizeof(struct slab) + heads * (ALLOC_OVERHEAD + get_page_size()); + size_t eff = items * s->obj_size; + + return (struct resmem) { + .effective = eff, + .overhead = ALLOC_OVERHEAD + sizeof(struct slab) + heads * get_page_size() - eff, + }; } static resource * diff --git a/nest/cmds.c b/nest/cmds.c index f58923a7..1a16f9c7 100644 --- a/nest/cmds.c +++ b/nest/cmds.c @@ -67,18 +67,43 @@ cmd_show_symbols(struct sym_show_data *sd) } } -static void -print_size(char *dsc, size_t val) +#define SIZE_SUFFIX " kMGT" +#define SIZE_FORMAT "% 4u.%1u % 1cB" +#define SIZE_ARGS(a) (a).val, (a).decimal, SIZE_SUFFIX[(a).magnitude] + +struct size_args { + u64 val:48; + u64 decimal:8; + u64 magnitude:8; +}; + +static struct size_args +get_size_args(u64 val) { - char *px = " kMG"; - int i = 0; - while ((val >= 10000) && (i < 3)) +#define VALDEC 10 /* One decimal place */ + val *= VALDEC; + + uint i = 0; + while ((val >= 10000 * VALDEC) && (i < 4)) { val = (val + 512) / 1024; i++; } - cli_msg(-1018, "%-17s %4u %cB", dsc, (unsigned) val, px[i]); + return (struct size_args) { + .val = (val / VALDEC), + .decimal = (val % VALDEC), + .magnitude = i, + }; +} + +static void +print_size(char *dsc, struct resmem vals) +{ + struct size_args effective = get_size_args(vals.effective); + struct size_args overhead = get_size_args(vals.overhead); + + cli_msg(-1018, "%-17s " SIZE_FORMAT " " SIZE_FORMAT, dsc, SIZE_ARGS(effective), SIZE_ARGS(overhead)); } extern pool *rt_table_pool; @@ -88,13 +113,14 @@ void cmd_show_memory(void) { cli_msg(-1018, "BIRD memory usage"); + cli_msg(-1018, "%-17s Effective Overhead", ""); print_size("Routing tables:", rmemsize(rt_table_pool)); print_size("Route attributes:", rmemsize(rta_pool)); print_size("Protocols:", rmemsize(proto_pool)); - size_t total = rmemsize(&root_pool); + struct resmem total = rmemsize(&root_pool); #ifdef HAVE_MMAP - print_size("Standby memory:", get_page_size() * pages_kept); - total += get_page_size() * pages_kept; + print_size("Standby memory:", (struct resmem) { .overhead = get_page_size() * pages_kept }); + total.overhead += get_page_size() * pages_kept; #endif print_size("Total:", total); cli_msg(0, ""); -- cgit v1.2.3 From b21104c97e59128973501fc23570e2d929f48923 Mon Sep 17 00:00:00 2001 From: "Ondrej Zajicek (work)" Date: Sat, 18 Dec 2021 00:58:47 +0100 Subject: Nest: Do not ignore secondary flag changes in ifa updates Compare all IA_* flags that are set by sysdep iface code. The old code ignores IA_SECONDARY flag when comparing whether iface address updates from kernel changed anything. This is usually not an issue as kernel removes all secondary addresses due to removal of the primary one, but it breaks when sysctl 'promote_secondaries' is enabled and kernel promotes secondary addresses to primary ones. Thanks to 'Alexander' for the bugreport. --- nest/iface.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'nest') diff --git a/nest/iface.c b/nest/iface.c index 83a633a3..682340c5 100644 --- a/nest/iface.c +++ b/nest/iface.c @@ -591,7 +591,7 @@ ifa_update(struct ifa *a) if (ipa_equal(b->brd, a->brd) && ipa_equal(b->opposite, a->opposite) && b->scope == a->scope && - !((b->flags ^ a->flags) & IA_PEER)) + !((b->flags ^ a->flags) & (IA_SECONDARY | IA_PEER | IA_HOST))) { b->flags |= IA_UPDATED; return b; -- cgit v1.2.3 From 0e1fd7ea6af8aaeb68a23e320c2a0a0a8480d6de Mon Sep 17 00:00:00 2001 From: Alexander Zubkov Date: Tue, 28 Dec 2021 04:05:05 +0100 Subject: Filter: Add operators to find minimum and maximum element of sets Add operators .min and .max to find minumum or maximum element in sets of types: clist, eclist, lclist. Example usage: bgp_community.min bgp_ext_community.max filter(bgp_large_community, [(as1, as2, *)]).min Signed-off-by: Alexander Zubkov --- filter/config.Y | 3 ++ filter/data.c | 11 +++++ filter/data.h | 2 + filter/f-inst.c | 68 +++++++++++++++++++++++++++++ filter/test.conf | 18 ++++++++ nest/a-set.c | 130 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ nest/attrs.h | 6 +++ 7 files changed, 238 insertions(+) (limited to 'nest') diff --git a/filter/config.Y b/filter/config.Y index 2fb55e4e..8916ea97 100644 --- a/filter/config.Y +++ b/filter/config.Y @@ -287,6 +287,7 @@ CF_KEYWORDS(FUNCTION, PRINT, PRINTN, UNSET, RETURN, DEFINED, ADD, DELETE, CONTAINS, RESET, PREPEND, FIRST, LAST, LAST_NONAGGREGATED, MATCH, + MIN, MAX, EMPTY, FILTER, WHERE, EVAL, ATTRIBUTE, BT_ASSERT, BT_TEST_SUITE, BT_CHECK_ASSIGN, BT_TEST_SAME, FORMAT) @@ -800,6 +801,8 @@ term: | term '.' DATA { $$ = f_new_inst(FI_PAIR_DATA, $1); } | term '.' DATA1 { $$ = f_new_inst(FI_LC_DATA1, $1); } | term '.' DATA2 { $$ = f_new_inst(FI_LC_DATA2, $1); } + | term '.' MIN { $$ = f_new_inst(FI_MIN, $1); } + | term '.' MAX { $$ = f_new_inst(FI_MAX, $1); } /* Communities */ /* This causes one shift/reduce conflict diff --git a/filter/data.c b/filter/data.c index 7c33d2cb..56c1fb17 100644 --- a/filter/data.c +++ b/filter/data.c @@ -68,6 +68,17 @@ f_type_name(enum f_type t) return "?"; } +enum f_type +f_type_element_type(enum f_type t) +{ + switch(t) { + case T_CLIST: return T_PAIR; + case T_ECLIST: return T_EC; + case T_LCLIST: return T_LC; + default: return T_VOID; + }; +} + const struct f_val f_const_empty_path = { .type = T_PATH, .val.ad = &null_adata, diff --git a/filter/data.h b/filter/data.h index d296776d..bb815c34 100644 --- a/filter/data.h +++ b/filter/data.h @@ -188,6 +188,8 @@ void trie_format(const struct f_trie *t, buffer *buf); const char *f_type_name(enum f_type t); +enum f_type f_type_element_type(enum f_type t); + int val_same(const struct f_val *v1, const struct f_val *v2); int val_compare(const struct f_val *v1, const struct f_val *v2); void val_format(const struct f_val *v, buffer *buf); diff --git a/filter/f-inst.c b/filter/f-inst.c index 7e9e77d2..901d2939 100644 --- a/filter/f-inst.c +++ b/filter/f-inst.c @@ -983,6 +983,74 @@ RESULT(T_INT, i, v1.val.lc.ldp2); } + INST(FI_MIN, 1, 1) { /* Get minimum element from set */ + ARG_ANY(1); + RESULT_TYPE(f_type_element_type(v1.type)); + switch(v1.type) + { + case T_CLIST: + { + u32 val = 0; + int_set_min(v1.val.ad, &val); + RESULT_(T_PAIR, i, val); + } + break; + + case T_ECLIST: + { + u64 val = 0; + ec_set_min(v1.val.ad, &val); + RESULT_(T_EC, ec, val); + } + break; + + case T_LCLIST: + { + lcomm val = { 0, 0, 0 }; + lc_set_min(v1.val.ad, &val); + RESULT_(T_LC, lc, val); + } + break; + + default: + runtime( "Clist or lclist expected" ); + } + } + + INST(FI_MAX, 1, 1) { /* Get maximum element from set */ + ARG_ANY(1); + RESULT_TYPE(f_type_element_type(v1.type)); + switch(v1.type) + { + case T_CLIST: + { + u32 val = 0; + int_set_max(v1.val.ad, &val); + RESULT_(T_PAIR, i, val); + } + break; + + case T_ECLIST: + { + u64 val = 0; + ec_set_max(v1.val.ad, &val); + RESULT_(T_EC, ec, val); + } + break; + + case T_LCLIST: + { + lcomm val = { 0, 0, 0 }; + lc_set_max(v1.val.ad, &val); + RESULT_(T_LC, lc, val); + } + break; + + default: + runtime( "Clist or lclist expected" ); + } + } + INST(FI_RETURN, 1, 1) { NEVER_CONSTANT; /* Acquire the return value */ diff --git a/filter/test.conf b/filter/test.conf index 6f2f6fb5..6a28e4b3 100644 --- a/filter/test.conf +++ b/filter/test.conf @@ -781,6 +781,12 @@ clist r; r = filter(l, [(3,1), (*,2)]); bt_assert(r = add(add(-empty-, (3,1)), (3,2))); bt_assert(format(r) = "(clist (3,1) (3,2))"); + + # minimim & maximum element + r = add(add(add(add(add(-empty-, (2,1)), (1,3)), (2,2)), (3,1)), (2,3)); + bt_assert(format(r) = "(clist (2,1) (1,3) (2,2) (3,1) (2,3))"); + bt_assert(r.min = (1,3)); + bt_assert(r.max = (3,1)); } bt_test_suite(t_clist, "Testing lists of communities"); @@ -886,6 +892,12 @@ eclist r; r = filter(el, [(rt, 10, 1), (rt, 10, 25..30), (ro, 10, 40)]); bt_assert(r = add(add(--empty--, (rt, 10, 1)), (rt, 10, 30))); bt_assert(format(r) = "(eclist (rt, 10, 1) (rt, 10, 30))"); + + # minimim & maximum element + r = add(add(add(add(add(--empty--, (rt, 2, 1)), (rt, 1, 3)), (rt, 2, 2)), (rt, 3, 1)), (rt, 2, 3)); + bt_assert(format(r) = "(eclist (rt, 2, 1) (rt, 1, 3) (rt, 2, 2) (rt, 3, 1) (rt, 2, 3))"); + bt_assert(r.min = (rt, 1, 3)); + bt_assert(r.max = (rt, 3, 1)); } bt_test_suite(t_eclist, "Testing lists of extended communities"); @@ -995,6 +1007,12 @@ lclist r; r = filter(ll, [(5..15, *, *), (20, 15..25, *)]); bt_assert(r = add(add(---empty---, (10, 10, 10)), (20, 20, 20))); bt_assert(format(r) = "(lclist (10, 10, 10) (20, 20, 20))"); + + # minimim & maximum element + r = add(add(add(add(add(---empty---, (2, 3, 3)), (1, 2, 3)), (2, 3, 1)), (3, 1, 2)), (2, 1, 3)); + bt_assert(format(r) = "(lclist (2, 3, 3) (1, 2, 3) (2, 3, 1) (3, 1, 2) (2, 1, 3))"); + bt_assert(r.min = (1, 2, 3)); + bt_assert(r.max = (3, 1, 2)); } bt_test_suite(t_lclist, "Testing lists of large communities"); diff --git a/nest/a-set.c b/nest/a-set.c index 1186eb56..71fbac94 100644 --- a/nest/a-set.c +++ b/nest/a-set.c @@ -516,6 +516,48 @@ int_set_sort(struct linpool *pool, const struct adata *src) return dst; } +int +int_set_min(const struct adata *list, u32 *val) +{ + if (!list) + return 0; + + u32 *l = (u32 *) list->data; + int len = int_set_get_size(list); + int i; + + if (len < 1) + return 0; + + *val = *l++; + for (i = 1; i < len; i++, l++) + if (int_set_cmp(val, l) > 0) + *val = *l; + + return 1; +} + +int +int_set_max(const struct adata *list, u32 *val) +{ + if (!list) + return 0; + + u32 *l = (u32 *) list->data; + int len = int_set_get_size(list); + int i; + + if (len < 1) + return 0; + + *val = *l++; + for (i = 1; i < len; i++, l++) + if (int_set_cmp(val, l) < 0) + *val = *l; + + return 1; +} + static int ec_set_cmp(const void *X, const void *Y) @@ -541,6 +583,50 @@ ec_set_sort_x(struct adata *set) qsort(set->data, set->length / 8, 8, ec_set_cmp); } +int +ec_set_min(const struct adata *list, u64 *val) +{ + if (!list) + return 0; + + u32 *l = int_set_get_data(list); + int len = int_set_get_size(list); + int i; + + if (len < 1) + return 0; + + u32 *res = l; l += 2; + for (i = 2; i < len; i += 2, l += 2) + if (ec_set_cmp(res, l) > 0) + res = l; + + *val = ec_generic(res[0], res[1]); + return 1; +} + +int +ec_set_max(const struct adata *list, u64 *val) +{ + if (!list) + return 0; + + u32 *l = int_set_get_data(list); + int len = int_set_get_size(list); + int i; + + if (len < 1) + return 0; + + u32 *res = l; l += 2; + for (i = 2; i < len; i += 2, l += 2) + if (ec_set_cmp(res, l) < 0) + res = l; + + *val = ec_generic(res[0], res[1]); + return 1; +} + static int lc_set_cmp(const void *X, const void *Y) @@ -563,3 +649,47 @@ lc_set_sort(struct linpool *pool, const struct adata *src) qsort(dst->data, dst->length / LCOMM_LENGTH, LCOMM_LENGTH, lc_set_cmp); return dst; } + +int +lc_set_min(const struct adata *list, lcomm *val) +{ + if (!list) + return 0; + + u32 *l = int_set_get_data(list); + int len = int_set_get_size(list); + int i; + + if (len < 1) + return 0; + + u32 *res = l; l += 3; + for (i = 3; i < len; i += 3, l += 3) + if (lc_set_cmp(res, l) > 0) + res = l; + + *val = (lcomm) { res[0], res[1], res[2] }; + return 1; +} + +int +lc_set_max(const struct adata *list, lcomm *val) +{ + if (!list) + return 0; + + u32 *l = int_set_get_data(list); + int len = int_set_get_size(list); + int i; + + if (len < 1) + return 0; + + u32 *res = l; l += 3; + for (i = 3; i < len; i += 3, l += 3) + if (lc_set_cmp(res, l) < 0) + res = l; + + *val = (lcomm) { res[0], res[1], res[2] }; + return 1; +} diff --git a/nest/attrs.h b/nest/attrs.h index 50da817b..ef2b95e6 100644 --- a/nest/attrs.h +++ b/nest/attrs.h @@ -218,6 +218,12 @@ struct adata *ec_set_del_nontrans(struct linpool *pool, const struct adata *set) struct adata *int_set_sort(struct linpool *pool, const struct adata *src); struct adata *ec_set_sort(struct linpool *pool, const struct adata *src); struct adata *lc_set_sort(struct linpool *pool, const struct adata *src); +int int_set_min(const struct adata *list, u32 *val); +int ec_set_min(const struct adata *list, u64 *val); +int lc_set_min(const struct adata *list, lcomm *val); +int int_set_max(const struct adata *list, u32 *val); +int ec_set_max(const struct adata *list, u64 *val); +int lc_set_max(const struct adata *list, lcomm *val); void ec_set_sort_x(struct adata *set); /* Sort in place */ -- cgit v1.2.3 From 836a87b8acd5da40bde6b89702090c6616e06dfb Mon Sep 17 00:00:00 2001 From: "Ondrej Zajicek (work)" Date: Mon, 29 Nov 2021 19:23:42 +0100 Subject: Nest: Attach prefix trie to rtable for faster LPM and interval queries Attach a prefix trie to IP/VPN/ROA tables. Use it for net_route() and net_roa_check(). This leads to 3-5x speedups for IPv4 and 5-10x speedup for IPv6 of these calls. TODO: - Rebuild the trie during rt_prune_table() - Better way to avoid trie_add_prefix() in net_get() for existing tables - Make it configurable (?) --- lib/net.h | 1 + nest/route.h | 9 +- nest/rt-fib.c | 2 +- nest/rt-table.c | 292 ++++++++++++++++++++++++++++++++++++++++++++++------ proto/babel/babel.c | 2 +- 5 files changed, 270 insertions(+), 36 deletions(-) (limited to 'nest') diff --git a/lib/net.h b/lib/net.h index 8eb4c7b9..9f4a00ad 100644 --- a/lib/net.h +++ b/lib/net.h @@ -38,6 +38,7 @@ #define NB_IP (NB_IP4 | NB_IP6) #define NB_VPN (NB_VPN4 | NB_VPN6) +#define NB_ROA (NB_ROA4 | NB_ROA6) #define NB_FLOW (NB_FLOW4 | NB_FLOW6) #define NB_DEST (NB_IP | NB_IP6_SADR | NB_VPN | NB_MPLS) #define NB_ANY 0xffffffff diff --git a/nest/route.h b/nest/route.h index f5fc9e31..fa87e22c 100644 --- a/nest/route.h +++ b/nest/route.h @@ -20,7 +20,9 @@ struct proto; struct rte_src; struct symbol; struct timer; +struct fib; struct filter; +struct f_trie; struct cli; /* @@ -49,7 +51,7 @@ struct fib_iterator { /* See lib/slists.h for an explanation */ uint hash; }; -typedef void (*fib_init_fn)(void *); +typedef void (*fib_init_fn)(struct fib *, void *); struct fib { pool *fib_pool; /* Pool holding all our data */ @@ -149,6 +151,7 @@ struct rtable_config { int gc_min_time; /* Minimum time between two consecutive GC runs */ byte sorted; /* Routes of network are sorted according to rte_better() */ byte internal; /* Internal table of a protocol */ + byte trie_used; /* Rtable has attached trie */ btime min_settle_time; /* Minimum settle time for notifications */ btime max_settle_time; /* Maximum settle time for notifications */ }; @@ -158,6 +161,7 @@ typedef struct rtable { node n; /* Node in list of all tables */ pool *rp; /* Resource pool to allocate everything from, including itself */ struct fib fib; + struct f_trie *trie; /* Trie of prefixes defined in fib */ char *name; /* Name of this table */ list channels; /* List of attached channels (struct channel) */ uint addr_type; /* Type of address data stored in table (NET_*) */ @@ -324,7 +328,8 @@ static inline net *net_find(rtable *tab, const net_addr *addr) { return (net *) static inline net *net_find_valid(rtable *tab, const net_addr *addr) { net *n = net_find(tab, addr); return (n && rte_is_valid(n->routes)) ? n : NULL; } static inline net *net_get(rtable *tab, const net_addr *addr) { return (net *) fib_get(&tab->fib, addr); } -void *net_route(rtable *tab, const net_addr *n); +net *net_get(rtable *tab, const net_addr *addr); +net *net_route(rtable *tab, const net_addr *n); int net_roa_check(rtable *tab, const net_addr *n, u32 asn); rte *rte_find(net *net, struct rte_src *src); rte *rte_get_temp(struct rta *); diff --git a/nest/rt-fib.c b/nest/rt-fib.c index a7f70371..1690a8f6 100644 --- a/nest/rt-fib.c +++ b/nest/rt-fib.c @@ -331,7 +331,7 @@ fib_get(struct fib *f, const net_addr *a) memset(b, 0, f->node_offset); if (f->init) - f->init(b); + f->init(f, b); if (f->entries++ > f->entries_max) fib_rehash(f, HASH_HI_STEP); diff --git a/nest/rt-table.c b/nest/rt-table.c index 390b3277..f4f25497 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -64,39 +64,178 @@ static inline void rt_prune_table(rtable *tab); static inline void rt_schedule_notify(rtable *tab); -/* Like fib_route(), but skips empty net entries */ +static void +net_init_with_trie(struct fib *f, void *N) +{ + rtable *tab = SKIP_BACK(rtable, fib, f); + net *n = N; + + if (tab->trie) + trie_add_prefix(tab->trie, n->n.addr, n->n.addr->pxlen, n->n.addr->pxlen); +} + +static inline net * +net_route_ip4_trie(rtable *t, const net_addr_ip4 *n0) +{ + TRIE_WALK_TO_ROOT_IP4(t->trie, n0, n) + { + net *r; + if (r = net_find_valid(t, (net_addr *) &n)) + return r; + } + TRIE_WALK_TO_ROOT_END; + + return NULL; +} + +static inline net * +net_route_vpn4_trie(rtable *t, const net_addr_vpn4 *n0) +{ + TRIE_WALK_TO_ROOT_IP4(t->trie, (const net_addr_ip4 *) n0, px) + { + net_addr_vpn4 n = NET_ADDR_VPN4(px.prefix, px.pxlen, n0->rd); + + net *r; + if (r = net_find_valid(t, (net_addr *) &n)) + return r; + } + TRIE_WALK_TO_ROOT_END; + + return NULL; +} + +static inline net * +net_route_ip6_trie(rtable *t, const net_addr_ip6 *n0) +{ + TRIE_WALK_TO_ROOT_IP6(t->trie, n0, n) + { + net *r; + if (r = net_find_valid(t, (net_addr *) &n)) + return r; + } + TRIE_WALK_TO_ROOT_END; + + return NULL; +} + +static inline net * +net_route_vpn6_trie(rtable *t, const net_addr_vpn6 *n0) +{ + TRIE_WALK_TO_ROOT_IP6(t->trie, (const net_addr_ip6 *) n0, px) + { + net_addr_vpn6 n = NET_ADDR_VPN6(px.prefix, px.pxlen, n0->rd); + + net *r; + if (r = net_find_valid(t, (net_addr *) &n)) + return r; + } + TRIE_WALK_TO_ROOT_END; + + return NULL; +} + static inline void * -net_route_ip4(rtable *t, net_addr_ip4 *n) +net_route_ip6_sadr_trie(rtable *t, const net_addr_ip6_sadr *n0) +{ + TRIE_WALK_TO_ROOT_IP6(t->trie, (const net_addr_ip6 *) n0, px) + { + net_addr_ip6_sadr n = NET_ADDR_IP6_SADR(px.prefix, px.pxlen, n0->src_prefix, n0->src_pxlen); + net *best = NULL; + int best_pxlen = 0; + + /* We need to do dst first matching. Since sadr addresses are hashed on dst + prefix only, find the hash table chain and go through it to find the + match with the longest matching src prefix. */ + for (struct fib_node *fn = fib_get_chain(&t->fib, (net_addr *) &n); fn; fn = fn->next) + { + net_addr_ip6_sadr *a = (void *) fn->addr; + + if (net_equal_dst_ip6_sadr(&n, a) && + net_in_net_src_ip6_sadr(&n, a) && + (a->src_pxlen >= best_pxlen)) + { + best = fib_node_to_user(&t->fib, fn); + best_pxlen = a->src_pxlen; + } + } + + if (best) + return best; + } + TRIE_WALK_TO_ROOT_END; + + return NULL; +} + +static inline net * +net_route_ip4_fib(rtable *t, const net_addr_ip4 *n0) { + net_addr_ip4 n; + net_copy_ip4(&n, n0); + net *r; + while (r = net_find_valid(t, (net_addr *) &n), (!r) && (n.pxlen > 0)) + { + n.pxlen--; + ip4_clrbit(&n.prefix, n.pxlen); + } + + return r; +} - while (r = net_find_valid(t, (net_addr *) n), (!r) && (n->pxlen > 0)) +static inline net * +net_route_vpn4_fib(rtable *t, const net_addr_vpn4 *n0) +{ + net_addr_vpn4 n; + net_copy_vpn4(&n, n0); + + net *r; + while (r = net_find_valid(t, (net_addr *) &n), (!r) && (n.pxlen > 0)) { - n->pxlen--; - ip4_clrbit(&n->prefix, n->pxlen); + n.pxlen--; + ip4_clrbit(&n.prefix, n.pxlen); } return r; } -static inline void * -net_route_ip6(rtable *t, net_addr_ip6 *n) +static inline net * +net_route_ip6_fib(rtable *t, const net_addr_ip6 *n0) { + net_addr_ip6 n; + net_copy_ip6(&n, n0); + net *r; + while (r = net_find_valid(t, (net_addr *) &n), (!r) && (n.pxlen > 0)) + { + n.pxlen--; + ip6_clrbit(&n.prefix, n.pxlen); + } + + return r; +} - while (r = net_find_valid(t, (net_addr *) n), (!r) && (n->pxlen > 0)) +static inline net * +net_route_vpn6_fib(rtable *t, const net_addr_vpn6 *n0) +{ + net_addr_vpn6 n; + net_copy_vpn6(&n, n0); + + net *r; + while (r = net_find_valid(t, (net_addr *) &n), (!r) && (n.pxlen > 0)) { - n->pxlen--; - ip6_clrbit(&n->prefix, n->pxlen); + n.pxlen--; + ip6_clrbit(&n.prefix, n.pxlen); } return r; } static inline void * -net_route_ip6_sadr(rtable *t, net_addr_ip6_sadr *n) +net_route_ip6_sadr_fib(rtable *t, const net_addr_ip6_sadr *n0) { - struct fib_node *fn; + net_addr_ip6_sadr n; + net_copy_ip6_sadr(&n, n0); while (1) { @@ -105,13 +244,13 @@ net_route_ip6_sadr(rtable *t, net_addr_ip6_sadr *n) /* We need to do dst first matching. Since sadr addresses are hashed on dst prefix only, find the hash table chain and go through it to find the - match with the smallest matching src prefix. */ - for (fn = fib_get_chain(&t->fib, (net_addr *) n); fn; fn = fn->next) + match with the longest matching src prefix. */ + for (struct fib_node *fn = fib_get_chain(&t->fib, (net_addr *) &n); fn; fn = fn->next) { net_addr_ip6_sadr *a = (void *) fn->addr; - if (net_equal_dst_ip6_sadr(n, a) && - net_in_net_src_ip6_sadr(n, a) && + if (net_equal_dst_ip6_sadr(&n, a) && + net_in_net_src_ip6_sadr(&n, a) && (a->src_pxlen >= best_pxlen)) { best = fib_node_to_user(&t->fib, fn); @@ -122,38 +261,52 @@ net_route_ip6_sadr(rtable *t, net_addr_ip6_sadr *n) if (best) return best; - if (!n->dst_pxlen) + if (!n.dst_pxlen) break; - n->dst_pxlen--; - ip6_clrbit(&n->dst_prefix, n->dst_pxlen); + n.dst_pxlen--; + ip6_clrbit(&n.dst_prefix, n.dst_pxlen); } return NULL; } -void * +net * net_route(rtable *tab, const net_addr *n) { ASSERT(tab->addr_type == n->type); - net_addr *n0 = alloca(n->length); - net_copy(n0, n); - switch (n->type) { case NET_IP4: + if (tab->trie) + return net_route_ip4_trie(tab, (net_addr_ip4 *) n); + else + return net_route_ip4_fib (tab, (net_addr_ip4 *) n); + case NET_VPN4: - case NET_ROA4: - return net_route_ip4(tab, (net_addr_ip4 *) n0); + if (tab->trie) + return net_route_vpn4_trie(tab, (net_addr_vpn4 *) n); + else + return net_route_vpn4_fib (tab, (net_addr_vpn4 *) n); case NET_IP6: + if (tab->trie) + return net_route_ip6_trie(tab, (net_addr_ip6 *) n); + else + return net_route_ip6_fib (tab, (net_addr_ip6 *) n); + case NET_VPN6: - case NET_ROA6: - return net_route_ip6(tab, (net_addr_ip6 *) n0); + if (tab->trie) + return net_route_vpn6_trie(tab, (net_addr_vpn6 *) n); + else + return net_route_vpn6_fib (tab, (net_addr_vpn6 *) n); case NET_IP6_SADR: - return net_route_ip6_sadr(tab, (net_addr_ip6_sadr *) n0); + if (tab->trie) + return net_route_ip6_sadr_trie(tab, (net_addr_ip6_sadr *) n); + else + return net_route_ip6_sadr_fib (tab, (net_addr_ip6_sadr *) n); default: return NULL; @@ -162,7 +315,35 @@ net_route(rtable *tab, const net_addr *n) static int -net_roa_check_ip4(rtable *tab, const net_addr_ip4 *px, u32 asn) +net_roa_check_ip4_trie(rtable *tab, const net_addr_ip4 *px, u32 asn) +{ + int anything = 0; + + TRIE_WALK_TO_ROOT_IP4(tab->trie, px, px0) + { + net_addr_roa4 roa0 = NET_ADDR_ROA4(px0.prefix, px0.pxlen, 0, 0); + + struct fib_node *fn; + for (fn = fib_get_chain(&tab->fib, (net_addr *) &roa0); fn; fn = fn->next) + { + net_addr_roa4 *roa = (void *) fn->addr; + net *r = fib_node_to_user(&tab->fib, fn); + + if (net_equal_prefix_roa4(roa, &roa0) && rte_is_valid(r->routes)) + { + anything = 1; + if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen)) + return ROA_VALID; + } + } + } + TRIE_WALK_TO_ROOT_END; + + return anything ? ROA_INVALID : ROA_UNKNOWN; +} + +static int +net_roa_check_ip4_fib(rtable *tab, const net_addr_ip4 *px, u32 asn) { struct net_addr_roa4 n = NET_ADDR_ROA4(px->prefix, px->pxlen, 0, 0); struct fib_node *fn; @@ -194,7 +375,35 @@ net_roa_check_ip4(rtable *tab, const net_addr_ip4 *px, u32 asn) } static int -net_roa_check_ip6(rtable *tab, const net_addr_ip6 *px, u32 asn) +net_roa_check_ip6_trie(rtable *tab, const net_addr_ip6 *px, u32 asn) +{ + int anything = 0; + + TRIE_WALK_TO_ROOT_IP6(tab->trie, px, px0) + { + net_addr_roa6 roa0 = NET_ADDR_ROA6(px0.prefix, px0.pxlen, 0, 0); + + struct fib_node *fn; + for (fn = fib_get_chain(&tab->fib, (net_addr *) &roa0); fn; fn = fn->next) + { + net_addr_roa6 *roa = (void *) fn->addr; + net *r = fib_node_to_user(&tab->fib, fn); + + if (net_equal_prefix_roa6(roa, &roa0) && rte_is_valid(r->routes)) + { + anything = 1; + if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen)) + return ROA_VALID; + } + } + } + TRIE_WALK_TO_ROOT_END; + + return anything ? ROA_INVALID : ROA_UNKNOWN; +} + +static int +net_roa_check_ip6_fib(rtable *tab, const net_addr_ip6 *px, u32 asn) { struct net_addr_roa6 n = NET_ADDR_ROA6(px->prefix, px->pxlen, 0, 0); struct fib_node *fn; @@ -244,9 +453,19 @@ int net_roa_check(rtable *tab, const net_addr *n, u32 asn) { if ((tab->addr_type == NET_ROA4) && (n->type == NET_IP4)) - return net_roa_check_ip4(tab, (const net_addr_ip4 *) n, asn); + { + if (tab->trie) + return net_roa_check_ip4_trie(tab, (const net_addr_ip4 *) n, asn); + else + return net_roa_check_ip4_fib (tab, (const net_addr_ip4 *) n, asn); + } else if ((tab->addr_type == NET_ROA6) && (n->type == NET_IP6)) - return net_roa_check_ip6(tab, (const net_addr_ip6 *) n, asn); + { + if (tab->trie) + return net_roa_check_ip6_trie(tab, (const net_addr_ip6 *) n, asn); + else + return net_roa_check_ip6_fib (tab, (const net_addr_ip6 *) n, asn); + } else return ROA_UNKNOWN; /* Should not happen */ } @@ -1940,6 +2159,14 @@ rt_setup(pool *pp, struct rtable_config *cf) fib_init(&t->fib, p, t->addr_type, sizeof(net), OFFSETOF(net, n), 0, NULL); + if (cf->trie_used) + { + t->trie = f_new_trie(lp_new_default(p), 0); + t->trie->ipv4 = net_val_match(t->addr_type, NB_IP4 | NB_VPN4 | NB_ROA4); + + t->fib.init = net_init_with_trie; + } + if (!(t->internal = cf->internal)) { init_list(&t->channels); @@ -2352,6 +2579,7 @@ rt_new_table(struct symbol *s, uint addr_type) c->gc_min_time = 5; c->min_settle_time = 1 S; c->max_settle_time = 20 S; + c->trie_used = net_val_match(addr_type, NB_IP | NB_VPN | NB_ROA | NB_IP6_SADR); add_tail(&new_config->tables, &c->n); diff --git a/proto/babel/babel.c b/proto/babel/babel.c index 1e87212c..e43818f5 100644 --- a/proto/babel/babel.c +++ b/proto/babel/babel.c @@ -63,7 +63,7 @@ static inline void babel_iface_kick_timer(struct babel_iface *ifa); */ static void -babel_init_entry(void *E) +babel_init_entry(struct fib *f UNUSED, void *E) { struct babel_entry *e = E; -- cgit v1.2.3 From ea97b8905197180bee5244bce378d03e4b741d88 Mon Sep 17 00:00:00 2001 From: "Ondrej Zajicek (work)" Date: Thu, 2 Dec 2021 02:22:30 +0100 Subject: Nest: Implement 'show route in ' command Implement 'show route in ' command, which shows all routes in networks that are subnets of given network. Currently limited to IP network types. --- nest/config.Y | 14 +++++++++++--- nest/route.h | 7 ++++++- nest/rt-show.c | 9 +++++++-- 3 files changed, 24 insertions(+), 6 deletions(-) (limited to 'nest') diff --git a/nest/config.Y b/nest/config.Y index 7ead8589..310fce25 100644 --- a/nest/config.Y +++ b/nest/config.Y @@ -117,7 +117,7 @@ CF_KEYWORDS(IPV4, IPV6, VPN4, VPN6, ROA4, ROA6, FLOW4, FLOW6, SADR, MPLS) CF_KEYWORDS(RECEIVE, LIMIT, ACTION, WARN, BLOCK, RESTART, DISABLE, KEEP, FILTERED, RPKI) CF_KEYWORDS(PASSWORD, KEY, FROM, PASSIVE, TO, ID, EVENTS, PACKETS, PROTOCOLS, CHANNELS, INTERFACES) CF_KEYWORDS(ALGORITHM, KEYED, HMAC, MD5, SHA1, SHA256, SHA384, SHA512, BLAKE2S128, BLAKE2S256, BLAKE2B256, BLAKE2B512) -CF_KEYWORDS(PRIMARY, STATS, COUNT, BY, FOR, COMMANDS, PREEXPORT, NOEXPORT, EXPORTED, GENERATE) +CF_KEYWORDS(PRIMARY, STATS, COUNT, BY, FOR, IN, COMMANDS, PREEXPORT, NOEXPORT, EXPORTED, GENERATE) CF_KEYWORDS(BGP, PASSWORDS, DESCRIPTION, SORTED) CF_KEYWORDS(RELOAD, IN, OUT, MRTDUMP, MESSAGES, RESTRICT, MEMORY, IGP_METRIC, CLASS, DSCP) CF_KEYWORDS(TIMEFORMAT, ISO, SHORT, LONG, ROUTE, PROTOCOL, BASE, LOG, S, MS, US) @@ -621,14 +621,22 @@ r_args: $$ = $1; if ($$->addr) cf_error("Only one prefix expected"); $$->addr = $2; + $$->addr_mode = RSD_ADDR_EQUAL; } | r_args FOR r_args_for { $$ = $1; if ($$->addr) cf_error("Only one prefix expected"); - $$->show_for = 1; $$->addr = $3; + $$->addr_mode = RSD_ADDR_FOR; } - | r_args TABLE CF_SYM_KNOWN { + | r_args IN net_any { + $$ = $1; + if ($$->addr) cf_error("Only one prefix expected"); + if (!net_type_match($3, NB_IP)) cf_error("Only IP networks accepted for 'in' argument"); + $$->addr = $3; + $$->addr_mode = RSD_ADDR_IN; + } +| r_args TABLE CF_SYM_KNOWN { cf_assert_symbol($3, SYM_TABLE); $$ = $1; rt_show_add_table($$, $3->table->table); diff --git a/nest/route.h b/nest/route.h index fa87e22c..ace4c7f7 100644 --- a/nest/route.h +++ b/nest/route.h @@ -384,7 +384,7 @@ struct rt_show_data { struct channel *export_channel; struct config *running_on_config; struct krt_proto *kernel; - int export_mode, primary_only, filtered, stats, show_for; + int export_mode, addr_mode, primary_only, filtered, stats; int table_open; /* Iteration (fit) is open */ int net_counter, rt_counter, show_counter, table_counter; @@ -403,6 +403,11 @@ struct rt_show_data_rtable * rt_show_add_table(struct rt_show_data *d, rtable *t #define RSD_TDB_SET 0x1 /* internal: show empty tables */ #define RSD_TDB_NMN 0x2 /* internal: need matching net */ +/* Value of addr_mode */ +#define RSD_ADDR_EQUAL 1 /* Exact query - show route */ +#define RSD_ADDR_FOR 2 /* Longest prefix match - show route for */ +#define RSD_ADDR_IN 3 /* Interval query - show route in */ + /* Value of export_mode in struct rt_show_data */ #define RSEM_NONE 0 /* Export mode not used */ #define RSEM_PREEXPORT 1 /* Routes ready for export, before filtering */ diff --git a/nest/rt-show.c b/nest/rt-show.c index 7691878d..b8c818f8 100644 --- a/nest/rt-show.c +++ b/nest/rt-show.c @@ -255,12 +255,17 @@ rt_show_cont(struct cli *c) FIB_ITERATE_START(fib, it, net, n) { + if ((d->addr_mode == RSD_ADDR_IN) && (!net_in_netX(n->n.addr, d->addr))) + goto next; + if (!max--) { FIB_ITERATE_PUT(it); return; } rt_show_net(c, n, d); + + next:; } FIB_ITERATE_END; @@ -402,7 +407,7 @@ rt_show(struct rt_show_data *d) rt_show_prepare_tables(d); - if (!d->addr) + if (!d->addr || (d->addr_mode == RSD_ADDR_IN)) { WALK_LIST(tab, d->tables) rt_lock_table(tab->table); @@ -420,7 +425,7 @@ rt_show(struct rt_show_data *d) d->tab = tab; d->kernel = rt_show_get_kernel(d); - if (d->show_for) + if (d->addr_mode == RSD_ADDR_FOR) n = net_route(tab->table, d->addr); else n = net_find(tab->table, d->addr); -- cgit v1.2.3 From 9ac16df3d7239bc82d8016591755b41b14285608 Mon Sep 17 00:00:00 2001 From: "Ondrej Zajicek (work)" Date: Thu, 2 Dec 2021 03:30:39 +0100 Subject: Nest: Add trie iteration code to 'show route' Add trie iteration code to rt_show_cont() CLI hook and use it to accelerate 'show route in ' commands using interval queries. --- nest/route.h | 3 +++ nest/rt-show.c | 62 +++++++++++++++++++++++++++++++++++++++++++++------------- 2 files changed, 51 insertions(+), 14 deletions(-) (limited to 'nest') diff --git a/nest/route.h b/nest/route.h index ace4c7f7..5f0c3578 100644 --- a/nest/route.h +++ b/nest/route.h @@ -23,6 +23,7 @@ struct timer; struct fib; struct filter; struct f_trie; +struct f_trie_walk_state; struct cli; /* @@ -377,6 +378,7 @@ struct rt_show_data { struct rt_show_data_rtable *tab; /* Iterator over table list */ struct rt_show_data_rtable *last_table; /* Last table in output */ struct fib_iterator fit; /* Iterator over networks in table */ + struct f_trie_walk_state *walk_state; /* Iterator over networks in trie */ int verbose, tables_defined_by; const struct filter *filter; struct proto *show_protocol; @@ -387,6 +389,7 @@ struct rt_show_data { int export_mode, addr_mode, primary_only, filtered, stats; int table_open; /* Iteration (fit) is open */ + int trie_walk; /* Current table is iterated using trie */ int net_counter, rt_counter, show_counter, table_counter; int net_counter_last, rt_counter_last, show_counter_last; }; diff --git a/nest/rt-show.c b/nest/rt-show.c index b8c818f8..d8abab5f 100644 --- a/nest/rt-show.c +++ b/nest/rt-show.c @@ -15,6 +15,7 @@ #include "nest/cli.h" #include "nest/iface.h" #include "filter/filter.h" +#include "filter/data.h" #include "sysdep/unix/krt.h" static void @@ -212,7 +213,7 @@ rt_show_cleanup(struct cli *c) struct rt_show_data_rtable *tab; /* Unlink the iterator */ - if (d->table_open) + if (d->table_open && !d->trie_walk) fit_get(&d->tab->table->fib, &d->fit); /* Unlock referenced tables */ @@ -224,12 +225,13 @@ static void rt_show_cont(struct cli *c) { struct rt_show_data *d = c->rover; + struct rtable *tab = d->tab->table; #ifdef DEBUGGING unsigned max = 4; #else unsigned max = 64; #endif - struct fib *fib = &d->tab->table->fib; + struct fib *fib = &tab->fib; struct fib_iterator *it = &d->fit; if (d->running_on_config && (d->running_on_config != config)) @@ -240,7 +242,19 @@ rt_show_cont(struct cli *c) if (!d->table_open) { - FIB_ITERATE_INIT(&d->fit, &d->tab->table->fib); + /* We use either trie-based walk or fib-based walk */ + d->trie_walk = tab->trie && + (d->addr_mode == RSD_ADDR_IN) && + net_val_match(tab->addr_type, NB_IP); + + if (d->trie_walk && !d->walk_state) + d->walk_state = lp_allocz(c->parser_pool, sizeof (struct f_trie_walk_state)); + + if (d->trie_walk) + trie_walk_init(d->walk_state, tab->trie, d->addr); + else + FIB_ITERATE_INIT(&d->fit, &tab->fib); + d->table_open = 1; d->table_counter++; d->kernel = rt_show_get_kernel(d); @@ -253,21 +267,41 @@ rt_show_cont(struct cli *c) rt_show_table(c, d); } - FIB_ITERATE_START(fib, it, net, n) + if (d->trie_walk) { - if ((d->addr_mode == RSD_ADDR_IN) && (!net_in_netX(n->n.addr, d->addr))) - goto next; - - if (!max--) + /* Trie-based walk */ + net_addr addr; + while (trie_walk_next(d->walk_state, &addr)) { - FIB_ITERATE_PUT(it); - return; + net *n = net_find(tab, &addr); + if (!n) + continue; + + rt_show_net(c, n, d); + + if (!--max) + return; } - rt_show_net(c, n, d); + } + else + { + /* fib-based walk */ + FIB_ITERATE_START(fib, it, net, n) + { + if ((d->addr_mode == RSD_ADDR_IN) && (!net_in_netX(n->n.addr, d->addr))) + goto next; - next:; + if (!max--) + { + FIB_ITERATE_PUT(it); + return; + } + rt_show_net(c, n, d); + + next:; + } + FIB_ITERATE_END; } - FIB_ITERATE_END; if (d->stats) { @@ -276,7 +310,7 @@ rt_show_cont(struct cli *c) cli_printf(c, -1007, "%d of %d routes for %d networks in table %s", d->show_counter - d->show_counter_last, d->rt_counter - d->rt_counter_last, - d->net_counter - d->net_counter_last, d->tab->table->name); + d->net_counter - d->net_counter_last, tab->name); } d->kernel = NULL; -- cgit v1.2.3 From 61375bd0b3803fada0d7bb5b81b5824bab16b7c1 Mon Sep 17 00:00:00 2001 From: "Ondrej Zajicek (work)" Date: Thu, 2 Dec 2021 04:05:17 +0100 Subject: Nest: Avoid unnecessary net_format() in 'show route' command When output of 'show route' command was generated, the net_format() was called for each network prematurely, even if the result was not needed. Fix the code to call net_format() only when needed. This makes queries that process many networks but show only few (e.g. 'show route where ..', or 'show route count') much faster (like 5x - 10x faster). --- nest/rt-show.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'nest') diff --git a/nest/rt-show.c b/nest/rt-show.c index d8abab5f..ea1f918c 100644 --- a/nest/rt-show.c +++ b/nest/rt-show.c @@ -111,10 +111,9 @@ rt_show_net(struct cli *c, net *n, struct rt_show_data *d) ASSUME(!d->export_mode || ec); int first = 1; + int first_show = 1; int pass = 0; - bsnprintf(ia, sizeof(ia), "%N", n->n.addr); - for (e = n->routes; e; e = e->next) { if (rte_is_filtered(e) != d->filtered) @@ -188,10 +187,17 @@ rt_show_net(struct cli *c, net *n, struct rt_show_data *d) goto skip; if (d->stats < 2) + { + if (first_show) + net_format(n->n.addr, ia, sizeof(ia)); + else + ia[0] = 0; + rt_show_rte(c, ia, e, d, (e->net->routes == ee)); + first_show = 0; + } d->show_counter++; - ia[0] = 0; skip: if (e != ee) -- cgit v1.2.3 From fde1cff0122ee9d68f141976395e7f89ba28c311 Mon Sep 17 00:00:00 2001 From: "Ondrej Zajicek (work)" Date: Mon, 20 Dec 2021 20:44:36 +0100 Subject: Nest: Add convenience functions to check rtable net type --- nest/route.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'nest') diff --git a/nest/route.h b/nest/route.h index 5f0c3578..a1732bc7 100644 --- a/nest/route.h +++ b/nest/route.h @@ -362,6 +362,18 @@ void rt_prune_sync(rtable *t, int all); int rte_update_out(struct channel *c, const net_addr *n, rte *new, rte *old0, int refeed); struct rtable_config *rt_new_table(struct symbol *s, uint addr_type); +static inline int rt_is_ip(rtable *tab) +{ return (tab->addr_type == NET_IP4) || (tab->addr_type == NET_IP6); } + +static inline int rt_is_vpn(rtable *tab) +{ return (tab->addr_type == NET_VPN4) || (tab->addr_type == NET_VPN6); } + +static inline int rt_is_roa(rtable *tab) +{ return (tab->addr_type == NET_ROA4) || (tab->addr_type == NET_ROA6); } + +static inline int rt_is_flow(rtable *tab) +{ return (tab->addr_type == NET_FLOW4) || (tab->addr_type == NET_FLOW6); } + /* Default limit for ECMP next hops, defined in sysdep code */ extern const int rt_default_ecmp; -- cgit v1.2.3 From 1ae42e522374ae60c23fe4c419c62b2209fbeea8 Mon Sep 17 00:00:00 2001 From: "Ondrej Zajicek (work)" Date: Wed, 22 Dec 2021 04:32:26 +0100 Subject: Nest: Add routing table configuration blocks Allow to specify sorted flag, trie fla, and min/max settle time. Also do not enable trie by default, it must be explicitly enabled. --- nest/config.Y | 39 +++++++++++++++++++++++++++++++-------- nest/rt-table.c | 52 +++++++++++++++++++++++++++++----------------------- 2 files changed, 60 insertions(+), 31 deletions(-) (limited to 'nest') diff --git a/nest/config.Y b/nest/config.Y index 310fce25..72bc7930 100644 --- a/nest/config.Y +++ b/nest/config.Y @@ -17,6 +17,7 @@ CF_HDR CF_DEFINES +static struct rtable_config *this_table; static struct proto_config *this_proto; static struct channel_config *this_channel; static struct iface_patt *this_ipatt; @@ -118,12 +119,13 @@ CF_KEYWORDS(RECEIVE, LIMIT, ACTION, WARN, BLOCK, RESTART, DISABLE, KEEP, FILTERE CF_KEYWORDS(PASSWORD, KEY, FROM, PASSIVE, TO, ID, EVENTS, PACKETS, PROTOCOLS, CHANNELS, INTERFACES) CF_KEYWORDS(ALGORITHM, KEYED, HMAC, MD5, SHA1, SHA256, SHA384, SHA512, BLAKE2S128, BLAKE2S256, BLAKE2B256, BLAKE2B512) CF_KEYWORDS(PRIMARY, STATS, COUNT, BY, FOR, IN, COMMANDS, PREEXPORT, NOEXPORT, EXPORTED, GENERATE) -CF_KEYWORDS(BGP, PASSWORDS, DESCRIPTION, SORTED) +CF_KEYWORDS(BGP, PASSWORDS, DESCRIPTION) CF_KEYWORDS(RELOAD, IN, OUT, MRTDUMP, MESSAGES, RESTRICT, MEMORY, IGP_METRIC, CLASS, DSCP) CF_KEYWORDS(TIMEFORMAT, ISO, SHORT, LONG, ROUTE, PROTOCOL, BASE, LOG, S, MS, US) CF_KEYWORDS(GRACEFUL, RESTART, WAIT, MAX, FLUSH, AS) CF_KEYWORDS(MIN, IDLE, RX, TX, INTERVAL, MULTIPLIER, PASSIVE) CF_KEYWORDS(CHECK, LINK) +CF_KEYWORDS(SORTED, TRIE, MIN, MAX, SETTLE, TIME) /* For r_args_channel */ CF_KEYWORDS(IPV4, IPV4_MC, IPV4_MPLS, IPV6, IPV6_MC, IPV6_MPLS, IPV6_SADR, VPN4, VPN4_MC, VPN4_MPLS, VPN6, VPN6_MC, VPN6_MPLS, ROA4, ROA6, FLOW4, FLOW6, MPLS, PRI, SEC) @@ -141,7 +143,7 @@ CF_ENUM_PX(T_ENUM_AF, AF_, AFI_, IPV4, IPV6) %type optproto %type r_args %type sym_args -%type proto_start echo_mask echo_size debug_mask debug_list debug_flag mrtdump_mask mrtdump_list mrtdump_flag export_mode limit_action net_type table_sorted tos password_algorithm +%type proto_start echo_mask echo_size debug_mask debug_list debug_flag mrtdump_mask mrtdump_list mrtdump_flag export_mode limit_action net_type tos password_algorithm %type proto_patt proto_patt2 %type channel_start proto_channel %type limit_spec @@ -206,16 +208,37 @@ CF_ENUM(T_ENUM_NETTYPE, NET_, IP4, IP6, VPN4, VPN6, ROA4, ROA6, FLOW4, FLOW6, IP conf: table ; +table: table_start table_sorted table_opt_list ; + +table_start: net_type TABLE symbol { + this_table = rt_new_table($3, $1); + } + ; + table_sorted: - { $$ = 0; } - | SORTED { $$ = 1; } + /* empty */ + | SORTED { this_table->sorted = 1; } ; -table: net_type TABLE symbol table_sorted { - struct rtable_config *cf; - cf = rt_new_table($3, $1); - cf->sorted = $4; +table_opt: + SORTED bool { this_table->sorted = $2; } + | TRIE bool { + if (!net_val_match(this_table->addr_type, NB_IP | NB_VPN | NB_ROA | NB_IP6_SADR)) + cf_error("Trie option not supported for %s table", net_label[this_table->addr_type]); + this_table->trie_used = $2; } + | MIN SETTLE TIME expr_us { this_table->min_settle_time = $4; } + | MAX SETTLE TIME expr_us { this_table->max_settle_time = $4; } + ; + +table_opts: + /* empty */ + | table_opts table_opt ';' + ; + +table_opt_list: + /* empty */ + | '{' table_opts '}' ; diff --git a/nest/rt-table.c b/nest/rt-table.c index f4f25497..94ae987b 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -2579,7 +2579,6 @@ rt_new_table(struct symbol *s, uint addr_type) c->gc_min_time = 5; c->min_settle_time = 1 S; c->max_settle_time = 20 S; - c->trie_used = net_val_match(addr_type, NB_IP | NB_VPN | NB_ROA | NB_IP6_SADR); add_tail(&new_config->tables, &c->n); @@ -2625,6 +2624,22 @@ rt_unlock_table(rtable *r) } } +static int +rt_reconfigure(rtable *tab, struct rtable_config *new, struct rtable_config *old) +{ + if ((new->addr_type != old->addr_type) || + (new->sorted != old->sorted) || + (new->trie_used != old->trie_used)) + return 0; + + DBG("\t%s: same\n", new->name); + new->table = tab; + tab->name = new->name; + tab->config = new; + + return 1; +} + static struct rtable_config * rt_find_table_config(struct config *cf, char *name) { @@ -2654,28 +2669,19 @@ rt_commit(struct config *new, struct config *old) { WALK_LIST(o, old->tables) { - rtable *ot = o->table; - if (!ot->deleted) - { - r = rt_find_table_config(new, o->name); - if (r && (r->addr_type == o->addr_type) && !new->shutdown) - { - DBG("\t%s: same\n", o->name); - r->table = ot; - ot->name = r->name; - ot->config = r; - if (o->sorted != r->sorted) - log(L_WARN "Reconfiguration of rtable sorted flag not implemented"); - } - else - { - DBG("\t%s: deleted\n", o->name); - ot->deleted = old; - config_add_obstacle(old); - rt_lock_table(ot); - rt_unlock_table(ot); - } - } + rtable *tab = o->table; + if (tab->deleted) + continue; + + r = rt_find_table_config(new, o->name); + if (r && !new->shutdown && rt_reconfigure(tab, r, o)) + continue; + + DBG("\t%s: deleted\n", o->name); + tab->deleted = old; + config_add_obstacle(old); + rt_lock_table(tab); + rt_unlock_table(tab); } } -- cgit v1.2.3 From 1f2eb2aca8e348fefc1822ec2adcad0cc97768d8 Mon Sep 17 00:00:00 2001 From: "Ondrej Zajicek (work)" Date: Mon, 20 Dec 2021 20:25:35 +0100 Subject: BGP: Implement flowspec validation procedure Implement flowspec validation procedure as described in RFC 8955 sec. 6 and RFC 9117. The Validation procedure enforces that only routers in the forwarding path for a network can originate flowspec rules for that network. The patch adds new mechanism for tracking inter-table dependencies, which is necessary as the flowspec validation depends on IP routes, and flowspec rules must be revalidated when best IP routes change. The validation procedure is disabled by default and requires that relevant IP table uses trie, as it uses interval queries for subnets. --- doc/bird.sgml | 28 +++- nest/route.h | 15 +++ nest/rt-table.c | 358 +++++++++++++++++++++++++++++++++++++++++++++++++--- proto/bgp/attrs.c | 4 + proto/bgp/bgp.c | 54 +++++++- proto/bgp/bgp.h | 6 +- proto/bgp/config.Y | 17 ++- proto/bgp/packets.c | 28 ++++ proto/pipe/pipe.c | 3 + 9 files changed, 487 insertions(+), 26 deletions(-) (limited to 'nest') diff --git a/doc/bird.sgml b/doc/bird.sgml index 39dadaf2..d1d2bdae 100644 --- a/doc/bird.sgml +++ b/doc/bird.sgml @@ -2274,6 +2274,7 @@ avoid routing loops. - BGP Large Communities Attribute - BGP Administrative Shutdown Communication - Default EBGP Route Propagation Behavior without Policies + - Revised Validation Procedure for BGP Flow Specifications Route selection rules @@ -2659,7 +2660,7 @@ using the following configuration parameters: