From 836a87b8acd5da40bde6b89702090c6616e06dfb Mon Sep 17 00:00:00 2001 From: "Ondrej Zajicek (work)" Date: Mon, 29 Nov 2021 19:23:42 +0100 Subject: Nest: Attach prefix trie to rtable for faster LPM and interval queries Attach a prefix trie to IP/VPN/ROA tables. Use it for net_route() and net_roa_check(). This leads to 3-5x speedups for IPv4 and 5-10x speedup for IPv6 of these calls. TODO: - Rebuild the trie during rt_prune_table() - Better way to avoid trie_add_prefix() in net_get() for existing tables - Make it configurable (?) --- nest/route.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'nest/route.h') diff --git a/nest/route.h b/nest/route.h index f5fc9e31..fa87e22c 100644 --- a/nest/route.h +++ b/nest/route.h @@ -20,7 +20,9 @@ struct proto; struct rte_src; struct symbol; struct timer; +struct fib; struct filter; +struct f_trie; struct cli; /* @@ -49,7 +51,7 @@ struct fib_iterator { /* See lib/slists.h for an explanation */ uint hash; }; -typedef void (*fib_init_fn)(void *); +typedef void (*fib_init_fn)(struct fib *, void *); struct fib { pool *fib_pool; /* Pool holding all our data */ @@ -149,6 +151,7 @@ struct rtable_config { int gc_min_time; /* Minimum time between two consecutive GC runs */ byte sorted; /* Routes of network are sorted according to rte_better() */ byte internal; /* Internal table of a protocol */ + byte trie_used; /* Rtable has attached trie */ btime min_settle_time; /* Minimum settle time for notifications */ btime max_settle_time; /* Maximum settle time for notifications */ }; @@ -158,6 +161,7 @@ typedef struct rtable { node n; /* Node in list of all tables */ pool *rp; /* Resource pool to allocate everything from, including itself */ struct fib fib; + struct f_trie *trie; /* Trie of prefixes defined in fib */ char *name; /* Name of this table */ list channels; /* List of attached channels (struct channel) */ uint addr_type; /* Type of address data stored in table (NET_*) */ @@ -324,7 +328,8 @@ static inline net *net_find(rtable *tab, const net_addr *addr) { return (net *) static inline net *net_find_valid(rtable *tab, const net_addr *addr) { net *n = net_find(tab, addr); return (n && rte_is_valid(n->routes)) ? n : NULL; } static inline net *net_get(rtable *tab, const net_addr *addr) { return (net *) fib_get(&tab->fib, addr); } -void *net_route(rtable *tab, const net_addr *n); +net *net_get(rtable *tab, const net_addr *addr); +net *net_route(rtable *tab, const net_addr *n); int net_roa_check(rtable *tab, const net_addr *n, u32 asn); rte *rte_find(net *net, struct rte_src *src); rte *rte_get_temp(struct rta *); -- cgit v1.2.3 From ea97b8905197180bee5244bce378d03e4b741d88 Mon Sep 17 00:00:00 2001 From: "Ondrej Zajicek (work)" Date: Thu, 2 Dec 2021 02:22:30 +0100 Subject: Nest: Implement 'show route in ' command Implement 'show route in ' command, which shows all routes in networks that are subnets of given network. Currently limited to IP network types. --- nest/config.Y | 14 +++++++++++--- nest/route.h | 7 ++++++- nest/rt-show.c | 9 +++++++-- 3 files changed, 24 insertions(+), 6 deletions(-) (limited to 'nest/route.h') diff --git a/nest/config.Y b/nest/config.Y index 7ead8589..310fce25 100644 --- a/nest/config.Y +++ b/nest/config.Y @@ -117,7 +117,7 @@ CF_KEYWORDS(IPV4, IPV6, VPN4, VPN6, ROA4, ROA6, FLOW4, FLOW6, SADR, MPLS) CF_KEYWORDS(RECEIVE, LIMIT, ACTION, WARN, BLOCK, RESTART, DISABLE, KEEP, FILTERED, RPKI) CF_KEYWORDS(PASSWORD, KEY, FROM, PASSIVE, TO, ID, EVENTS, PACKETS, PROTOCOLS, CHANNELS, INTERFACES) CF_KEYWORDS(ALGORITHM, KEYED, HMAC, MD5, SHA1, SHA256, SHA384, SHA512, BLAKE2S128, BLAKE2S256, BLAKE2B256, BLAKE2B512) -CF_KEYWORDS(PRIMARY, STATS, COUNT, BY, FOR, COMMANDS, PREEXPORT, NOEXPORT, EXPORTED, GENERATE) +CF_KEYWORDS(PRIMARY, STATS, COUNT, BY, FOR, IN, COMMANDS, PREEXPORT, NOEXPORT, EXPORTED, GENERATE) CF_KEYWORDS(BGP, PASSWORDS, DESCRIPTION, SORTED) CF_KEYWORDS(RELOAD, IN, OUT, MRTDUMP, MESSAGES, RESTRICT, MEMORY, IGP_METRIC, CLASS, DSCP) CF_KEYWORDS(TIMEFORMAT, ISO, SHORT, LONG, ROUTE, PROTOCOL, BASE, LOG, S, MS, US) @@ -621,14 +621,22 @@ r_args: $$ = $1; if ($$->addr) cf_error("Only one prefix expected"); $$->addr = $2; + $$->addr_mode = RSD_ADDR_EQUAL; } | r_args FOR r_args_for { $$ = $1; if ($$->addr) cf_error("Only one prefix expected"); - $$->show_for = 1; $$->addr = $3; + $$->addr_mode = RSD_ADDR_FOR; } - | r_args TABLE CF_SYM_KNOWN { + | r_args IN net_any { + $$ = $1; + if ($$->addr) cf_error("Only one prefix expected"); + if (!net_type_match($3, NB_IP)) cf_error("Only IP networks accepted for 'in' argument"); + $$->addr = $3; + $$->addr_mode = RSD_ADDR_IN; + } +| r_args TABLE CF_SYM_KNOWN { cf_assert_symbol($3, SYM_TABLE); $$ = $1; rt_show_add_table($$, $3->table->table); diff --git a/nest/route.h b/nest/route.h index fa87e22c..ace4c7f7 100644 --- a/nest/route.h +++ b/nest/route.h @@ -384,7 +384,7 @@ struct rt_show_data { struct channel *export_channel; struct config *running_on_config; struct krt_proto *kernel; - int export_mode, primary_only, filtered, stats, show_for; + int export_mode, addr_mode, primary_only, filtered, stats; int table_open; /* Iteration (fit) is open */ int net_counter, rt_counter, show_counter, table_counter; @@ -403,6 +403,11 @@ struct rt_show_data_rtable * rt_show_add_table(struct rt_show_data *d, rtable *t #define RSD_TDB_SET 0x1 /* internal: show empty tables */ #define RSD_TDB_NMN 0x2 /* internal: need matching net */ +/* Value of addr_mode */ +#define RSD_ADDR_EQUAL 1 /* Exact query - show route */ +#define RSD_ADDR_FOR 2 /* Longest prefix match - show route for */ +#define RSD_ADDR_IN 3 /* Interval query - show route in */ + /* Value of export_mode in struct rt_show_data */ #define RSEM_NONE 0 /* Export mode not used */ #define RSEM_PREEXPORT 1 /* Routes ready for export, before filtering */ diff --git a/nest/rt-show.c b/nest/rt-show.c index 7691878d..b8c818f8 100644 --- a/nest/rt-show.c +++ b/nest/rt-show.c @@ -255,12 +255,17 @@ rt_show_cont(struct cli *c) FIB_ITERATE_START(fib, it, net, n) { + if ((d->addr_mode == RSD_ADDR_IN) && (!net_in_netX(n->n.addr, d->addr))) + goto next; + if (!max--) { FIB_ITERATE_PUT(it); return; } rt_show_net(c, n, d); + + next:; } FIB_ITERATE_END; @@ -402,7 +407,7 @@ rt_show(struct rt_show_data *d) rt_show_prepare_tables(d); - if (!d->addr) + if (!d->addr || (d->addr_mode == RSD_ADDR_IN)) { WALK_LIST(tab, d->tables) rt_lock_table(tab->table); @@ -420,7 +425,7 @@ rt_show(struct rt_show_data *d) d->tab = tab; d->kernel = rt_show_get_kernel(d); - if (d->show_for) + if (d->addr_mode == RSD_ADDR_FOR) n = net_route(tab->table, d->addr); else n = net_find(tab->table, d->addr); -- cgit v1.2.3 From 9ac16df3d7239bc82d8016591755b41b14285608 Mon Sep 17 00:00:00 2001 From: "Ondrej Zajicek (work)" Date: Thu, 2 Dec 2021 03:30:39 +0100 Subject: Nest: Add trie iteration code to 'show route' Add trie iteration code to rt_show_cont() CLI hook and use it to accelerate 'show route in ' commands using interval queries. --- nest/route.h | 3 +++ nest/rt-show.c | 62 +++++++++++++++++++++++++++++++++++++++++++++------------- 2 files changed, 51 insertions(+), 14 deletions(-) (limited to 'nest/route.h') diff --git a/nest/route.h b/nest/route.h index ace4c7f7..5f0c3578 100644 --- a/nest/route.h +++ b/nest/route.h @@ -23,6 +23,7 @@ struct timer; struct fib; struct filter; struct f_trie; +struct f_trie_walk_state; struct cli; /* @@ -377,6 +378,7 @@ struct rt_show_data { struct rt_show_data_rtable *tab; /* Iterator over table list */ struct rt_show_data_rtable *last_table; /* Last table in output */ struct fib_iterator fit; /* Iterator over networks in table */ + struct f_trie_walk_state *walk_state; /* Iterator over networks in trie */ int verbose, tables_defined_by; const struct filter *filter; struct proto *show_protocol; @@ -387,6 +389,7 @@ struct rt_show_data { int export_mode, addr_mode, primary_only, filtered, stats; int table_open; /* Iteration (fit) is open */ + int trie_walk; /* Current table is iterated using trie */ int net_counter, rt_counter, show_counter, table_counter; int net_counter_last, rt_counter_last, show_counter_last; }; diff --git a/nest/rt-show.c b/nest/rt-show.c index b8c818f8..d8abab5f 100644 --- a/nest/rt-show.c +++ b/nest/rt-show.c @@ -15,6 +15,7 @@ #include "nest/cli.h" #include "nest/iface.h" #include "filter/filter.h" +#include "filter/data.h" #include "sysdep/unix/krt.h" static void @@ -212,7 +213,7 @@ rt_show_cleanup(struct cli *c) struct rt_show_data_rtable *tab; /* Unlink the iterator */ - if (d->table_open) + if (d->table_open && !d->trie_walk) fit_get(&d->tab->table->fib, &d->fit); /* Unlock referenced tables */ @@ -224,12 +225,13 @@ static void rt_show_cont(struct cli *c) { struct rt_show_data *d = c->rover; + struct rtable *tab = d->tab->table; #ifdef DEBUGGING unsigned max = 4; #else unsigned max = 64; #endif - struct fib *fib = &d->tab->table->fib; + struct fib *fib = &tab->fib; struct fib_iterator *it = &d->fit; if (d->running_on_config && (d->running_on_config != config)) @@ -240,7 +242,19 @@ rt_show_cont(struct cli *c) if (!d->table_open) { - FIB_ITERATE_INIT(&d->fit, &d->tab->table->fib); + /* We use either trie-based walk or fib-based walk */ + d->trie_walk = tab->trie && + (d->addr_mode == RSD_ADDR_IN) && + net_val_match(tab->addr_type, NB_IP); + + if (d->trie_walk && !d->walk_state) + d->walk_state = lp_allocz(c->parser_pool, sizeof (struct f_trie_walk_state)); + + if (d->trie_walk) + trie_walk_init(d->walk_state, tab->trie, d->addr); + else + FIB_ITERATE_INIT(&d->fit, &tab->fib); + d->table_open = 1; d->table_counter++; d->kernel = rt_show_get_kernel(d); @@ -253,21 +267,41 @@ rt_show_cont(struct cli *c) rt_show_table(c, d); } - FIB_ITERATE_START(fib, it, net, n) + if (d->trie_walk) { - if ((d->addr_mode == RSD_ADDR_IN) && (!net_in_netX(n->n.addr, d->addr))) - goto next; - - if (!max--) + /* Trie-based walk */ + net_addr addr; + while (trie_walk_next(d->walk_state, &addr)) { - FIB_ITERATE_PUT(it); - return; + net *n = net_find(tab, &addr); + if (!n) + continue; + + rt_show_net(c, n, d); + + if (!--max) + return; } - rt_show_net(c, n, d); + } + else + { + /* fib-based walk */ + FIB_ITERATE_START(fib, it, net, n) + { + if ((d->addr_mode == RSD_ADDR_IN) && (!net_in_netX(n->n.addr, d->addr))) + goto next; - next:; + if (!max--) + { + FIB_ITERATE_PUT(it); + return; + } + rt_show_net(c, n, d); + + next:; + } + FIB_ITERATE_END; } - FIB_ITERATE_END; if (d->stats) { @@ -276,7 +310,7 @@ rt_show_cont(struct cli *c) cli_printf(c, -1007, "%d of %d routes for %d networks in table %s", d->show_counter - d->show_counter_last, d->rt_counter - d->rt_counter_last, - d->net_counter - d->net_counter_last, d->tab->table->name); + d->net_counter - d->net_counter_last, tab->name); } d->kernel = NULL; -- cgit v1.2.3 From fde1cff0122ee9d68f141976395e7f89ba28c311 Mon Sep 17 00:00:00 2001 From: "Ondrej Zajicek (work)" Date: Mon, 20 Dec 2021 20:44:36 +0100 Subject: Nest: Add convenience functions to check rtable net type --- nest/route.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'nest/route.h') diff --git a/nest/route.h b/nest/route.h index 5f0c3578..a1732bc7 100644 --- a/nest/route.h +++ b/nest/route.h @@ -362,6 +362,18 @@ void rt_prune_sync(rtable *t, int all); int rte_update_out(struct channel *c, const net_addr *n, rte *new, rte *old0, int refeed); struct rtable_config *rt_new_table(struct symbol *s, uint addr_type); +static inline int rt_is_ip(rtable *tab) +{ return (tab->addr_type == NET_IP4) || (tab->addr_type == NET_IP6); } + +static inline int rt_is_vpn(rtable *tab) +{ return (tab->addr_type == NET_VPN4) || (tab->addr_type == NET_VPN6); } + +static inline int rt_is_roa(rtable *tab) +{ return (tab->addr_type == NET_ROA4) || (tab->addr_type == NET_ROA6); } + +static inline int rt_is_flow(rtable *tab) +{ return (tab->addr_type == NET_FLOW4) || (tab->addr_type == NET_FLOW6); } + /* Default limit for ECMP next hops, defined in sysdep code */ extern const int rt_default_ecmp; -- cgit v1.2.3 From 1f2eb2aca8e348fefc1822ec2adcad0cc97768d8 Mon Sep 17 00:00:00 2001 From: "Ondrej Zajicek (work)" Date: Mon, 20 Dec 2021 20:25:35 +0100 Subject: BGP: Implement flowspec validation procedure Implement flowspec validation procedure as described in RFC 8955 sec. 6 and RFC 9117. The Validation procedure enforces that only routers in the forwarding path for a network can originate flowspec rules for that network. The patch adds new mechanism for tracking inter-table dependencies, which is necessary as the flowspec validation depends on IP routes, and flowspec rules must be revalidated when best IP routes change. The validation procedure is disabled by default and requires that relevant IP table uses trie, as it uses interval queries for subnets. --- doc/bird.sgml | 28 +++- nest/route.h | 15 +++ nest/rt-table.c | 358 +++++++++++++++++++++++++++++++++++++++++++++++++--- proto/bgp/attrs.c | 4 + proto/bgp/bgp.c | 54 +++++++- proto/bgp/bgp.h | 6 +- proto/bgp/config.Y | 17 ++- proto/bgp/packets.c | 28 ++++ proto/pipe/pipe.c | 3 + 9 files changed, 487 insertions(+), 26 deletions(-) (limited to 'nest/route.h') diff --git a/doc/bird.sgml b/doc/bird.sgml index 39dadaf2..d1d2bdae 100644 --- a/doc/bird.sgml +++ b/doc/bird.sgml @@ -2274,6 +2274,7 @@ avoid routing loops. - BGP Large Communities Attribute - BGP Administrative Shutdown Communication - Default EBGP Route Propagation Behavior without Policies + - Revised Validation Procedure for BGP Flow Specifications Route selection rules @@ -2659,7 +2660,7 @@ using the following configuration parameters: