diff options
149 files changed, 10510 insertions, 7547 deletions
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 7809fecd..0a758cff 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -134,21 +134,11 @@ docker_fedora-34-amd64: IMG_NAME: "fedora-34-amd64" <<: *docker_build -docker_centos-7-amd64: - variables: - IMG_NAME: "centos-7-amd64" - <<: *docker_build - docker_centos-8-amd64: variables: IMG_NAME: "centos-8-amd64" <<: *docker_build -docker_ubuntu-14_04-amd64: - variables: - IMG_NAME: "ubuntu-14.04-amd64" - <<: *docker_build - docker_ubuntu-16_04-amd64: variables: IMG_NAME: "ubuntu-16.04-amd64" @@ -312,18 +302,10 @@ build-fedora-34-amd64: <<: *build-linux image: registry.nic.cz/labs/bird:fedora-33-amd64 -build-centos-7-amd64: - <<: *build-linux - image: registry.nic.cz/labs/bird:centos-7-amd64 - build-centos-8-amd64: <<: *build-linux image: registry.nic.cz/labs/bird:centos-8-amd64 -build-ubuntu-14_04-amd64: - <<: *build-linux - image: registry.nic.cz/labs/bird:ubuntu-14.04-amd64 - build-ubuntu-16_04-amd64: <<: *build-linux image: registry.nic.cz/labs/bird:ubuntu-16.04-amd64 @@ -466,14 +448,7 @@ pkg-fedora-33-amd64: pkg-fedora-34-amd64: <<: *pkg-rpm needs: [build-fedora-34-amd64] - image: registry.nic.cz/labs/bird:fedora-34-amd64 - -pkg-centos-7-amd64: - <<: *pkg-rpm-wa - variables: - LC_ALL: en_US.UTF-8 - needs: [build-centos-7-amd64] - image: registry.nic.cz/labs/bird:centos-7-amd64 + image: registry.labs.nic.cz/labs/bird:fedora-34-amd64 pkg-centos-8-amd64: <<: *pkg-rpm-wa @@ -27,9 +27,9 @@ Requirements For compiling BIRD you need these programs and libraries: - - GNU C Compiler (or LLVM Clang) + - GNU C Compiler (or LLVM Clang) capable of compiling C11 code - GNU Make - - GNU Bison + - GNU Bison (at least 3.0) - GNU M4 - Flex diff --git a/Makefile.in b/Makefile.in index e0ff4a1d..0d55807b 100644 --- a/Makefile.in +++ b/Makefile.in @@ -82,6 +82,9 @@ conf-lex-targets := $(addprefix $(objdir)/conf/,cf-lex.o) conf-y-targets := $(addprefix $(objdir)/conf/,cf-parse.y keywords.h commands.h) cf-local = $(conf-y-targets): $(s)config.Y +# nest/Makefile declarations needed for all other modules +proto-build-c := $(addprefix $(objdir)/nest/,proto-build.c) + src-o-files = $(patsubst %.c,$(o)%.o,$(src)) tests-target-files = $(patsubst %.c,$(o)%,$(tests_src)) @@ -95,6 +98,13 @@ else o = $(patsubst $(srcdir)%,$(objdir)%,$(s)) endif +define proto-build_in = +PROTO_BUILD += $(1) +$(proto-build-c): $(lastword $(MAKEFILE_LIST)) +endef + +proto-build = $(eval $(call proto-build_in,$(1))) + define clean_in = clean:: rm -f $(addprefix $(o),$(1)) @@ -1,5 +1,32 @@ dnl ** Additional Autoconf tests for BIRD configure script dnl ** (c) 1999 Martin Mares <mj@ucw.cz> +dnl ** (c) 2021 Maria Matejka <mq@jmq.cz> + +AC_DEFUN([BIRD_CHECK_POINTER_ALIGNMENT], +[ + AC_CACHE_CHECK( + [how pointers are aligned], + [bird_cv_pointer_alignment], + AC_COMPILE_IFELSE([ + AC_LANG_PROGRAM( + [ + _Static_assert(_Alignof(void *) == 8, "bad"); + ], [] + ) + ], + [bird_cv_pointer_alignment=8], + AC_COMPILE_IFELSE([ + AC_LANG_PROGRAM( + [ + _Static_assert(_Alignof(void *) == 4, "bad"); + ], [] + ) + ], + [bird_cv_pointer_alignment=4], + [bird_cv_pointer_alignment=unknown] + )) + ) +]) AC_DEFUN([BIRD_CHECK_THREAD_LOCAL], [ @@ -9,14 +36,23 @@ AC_DEFUN([BIRD_CHECK_THREAD_LOCAL], AC_COMPILE_IFELSE([ AC_LANG_PROGRAM( [ - _Thread_local static int x = 42; + static _Thread_local int x = 42; ], [] ) ], [bird_cv_thread_local=yes], + [AC_COMPILE_IFELSE([ + AC_LANG_PROGRAM( + [ + static __thread int x = 42; + ], + [] + ) + ], + [bird_cv_thread_local=__thread], [bird_cv_thread_local=no] - ) + )]) ) ]) diff --git a/bird-gdb.py b/bird-gdb.py index 3cf65a9c..077703f9 100644 --- a/bird-gdb.py +++ b/bird-gdb.py @@ -25,7 +25,6 @@ class BIRDFValPrinter(BIRDPrinter): "T_ENUM_RTS": "i", "T_ENUM_BGP_ORIGIN": "i", "T_ENUM_SCOPE": "i", - "T_ENUM_RTC": "i", "T_ENUM_RTD": "i", "T_ENUM_ROA": "i", "T_ENUM_NETTYPE": "i", @@ -123,7 +122,7 @@ class BIRDFLinePrinter(BIRDPrinter): "n": n, "code": str(self.val['items'][n]['fi_code']), } if n % 8 == 0 else str(self.val['items'][n]['fi_code']) for n in range(cnt)])) - + class BIRDFExecStackPrinter(BIRDPrinter): "Print BIRD's struct f_exec_stack" @@ -141,6 +140,118 @@ class BIRDFExecStackPrinter(BIRDPrinter): "n": n } for n in range(cnt-1, -1, -1) ]) + +class BIRD: + def skip_back(t, i, v): + if isinstance(t, str): + t = gdb.lookup_type(t) + elif isinstance(t, gdb.Value): + t = gdb.lookup_type(t.string()) + elif not isinstance(t, gdb.Type): + raise Exception(f"First argument of skip_back(t, i, v) must be a type, got {type(t)}") + + t = t.strip_typedefs() + nullptr = gdb.Value(0).cast(t.pointer()) + + if isinstance(i, gdb.Value): + i = i.string() + elif not isinstance(i, str): + raise Exception(f"Second argument of skip_back(t, i, v) must be a item name, got {type(i)}") + + if not isinstance(v, gdb.Value): + raise Exception(f"Third argument of skip_back(t, i, v) must be a value, got {type(v)}") + if v.type.code != gdb.TYPE_CODE_PTR and v.type.code != gdb.TYPE_CODE_REF: + raise Exception(f"Third argument of skip_back(t, i, v) must be a pointer, is {v.type} ({v.type.code})") + if v.type.target().strip_typedefs() != nullptr[i].type: + raise Exception(f"Third argument of skip_back(t, i, v) points to type {v.type.target().strip_typedefs()}, should be {nullptr[i].type}") + + uintptr_t = gdb.lookup_type("uintptr_t") + taddr = v.dereference().address.cast(uintptr_t) - nullptr[i].address.cast(uintptr_t) + return gdb.Value(taddr).cast(t.pointer()) + + class skip_back_gdb(gdb.Function): + "Given address of a structure item, returns address of the structure, as the SKIP_BACK macro does" + def __init__(self): + gdb.Function.__init__(self, "SKIP_BACK") + + def invoke(self, t, i, v): + return BIRD.skip_back(t, i, v) + + +BIRD.skip_back_gdb() + + +class BIRDList: + def __init__(self, val): + ltype = val.type.strip_typedefs() + if ltype.code != gdb.TYPE_CODE_UNION or ltype.tag != "list": + raise Exception(f"Not a list, is type {ltype}") + + self.head = val["head"] + self.tail_node = val["tail_node"] + + if str(self.head.address) == '0x0': + raise Exception("List head is NULL") + + if str(self.tail_node["prev"].address) == '0x0': + raise Exception("List tail is NULL") + + def walk(self, do): + cur = self.head + while cur.dereference() != self.tail_node: + do(cur) + cur = cur.dereference()["next"] + + +class BIRDListLength(gdb.Function): + """Returns length of the list, as in + print $list_length(routing_tables)""" + def __init__(self): + super(BIRDListLength, self).__init__("list_length") + + def count(self, _): + self.cnt += 1 + + def invoke(self, l): + self.cnt = 0 + BIRDList(l).walk(self.count) + return self.cnt + +BIRDListLength() + +class BIRDListItem(gdb.Function): + """Returns n-th item of the list.""" + def __init__(self): + super(BIRDListItem, self).__init__("list_item") + + class BLException(Exception): + def __init__(self, node, msg): + Exception.__init__(self, msg) + self.node = node + + def count(self, node): + if self.cnt == self.pos: + raise self.BLException(node, "Node found") + + self.cnt += 1 + + def invoke(self, l, n, t=None, item="n"): + self.cnt = 0 + self.pos = n + bl = BIRDList(l) + try: + bl.walk(self.count) + except self.BLException as e: + if t is None: + return e.node + else: + return BIRD.skip_back(t, item, e.node) + + raise Exception("List too short") + +BIRDListItem() + + def register_printers(objfile): objfile.pretty_printers.append(BIRDFInstPrinter.lookup) objfile.pretty_printers.append(BIRDFValPrinter.lookup) diff --git a/conf/cf-lex.l b/conf/cf-lex.l index d6f46223..04e0b3a5 100644 --- a/conf/cf-lex.l +++ b/conf/cf-lex.l @@ -42,7 +42,7 @@ #define PARSER 1 #include "nest/bird.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/protocol.h" #include "filter/filter.h" #include "filter/f-inst.h" @@ -77,19 +77,22 @@ static uint cf_hash(const byte *c); #define SYM_NEXT(n) n->next #define SYM_EQ(a,s1,b,s2) !strcmp(a,b) && s1 == s2 #define SYM_FN(k,s) cf_hash(k) -#define SYM_ORDER 6 /* Initial */ +#define SYM_ORDER 4 /* Initial */ #define SYM_REHASH sym_rehash -#define SYM_PARAMS /8, *1, 2, 2, 6, 20 +#define SYM_PARAMS /8, *1, 2, 2, 4, 20 HASH_DEFINE_REHASH_FN(SYM, struct symbol) HASH(struct keyword) kw_hash; - +HASH(struct ea_class) ea_name_hash; struct sym_scope *conf_this_scope; +static struct sym_scope global_root_scope__init = { .active = 1, }; +struct sym_scope *global_root_scope = &global_root_scope__init; + linpool *cfg_mem; int (*cf_read_hook)(byte *buf, unsigned int max, int fd); @@ -347,7 +350,7 @@ else: { return DDOT; } -[={}:;,.()+*/%<>~\[\]?!\|-] { +[={}:;,.()+*/%<>~\[\]?!\|&-] { return yytext[0]; } @@ -591,41 +594,58 @@ cf_new_symbol(const byte *c) *s = (struct symbol) { .scope = conf_this_scope, .class = SYM_VOID, }; strcpy(s->name, c); - if (!new_config->sym_hash.data) - HASH_INIT(new_config->sym_hash, new_config->pool, SYM_ORDER); + if (!conf_this_scope->hash.data) + HASH_INIT(conf_this_scope->hash, new_config->pool, SYM_ORDER); + + HASH_INSERT2(conf_this_scope->hash, SYM, new_config->pool, s); + + if (conf_this_scope == new_config->root_scope) + add_tail(&(new_config->symbols), &(s->n)); + + return s; +} + +static struct symbol * +cf_root_symbol(const byte *c) +{ + uint l = strlen(c); + if (l > SYM_MAX_LEN) + bug("Root symbol %s too long", c); - HASH_INSERT2(new_config->sym_hash, SYM, new_config->pool, s); + struct symbol *s = mb_alloc(&root_pool, sizeof(struct symbol) + l + 1); + *s = (struct symbol) { .scope = global_root_scope, .class = SYM_VOID, }; + memcpy(s->name, c, l+1); - add_tail(&(new_config->symbols), &(s->n)); + if (!global_root_scope->hash.data) + HASH_INIT(global_root_scope->hash, &root_pool, SYM_ORDER); + HASH_INSERT2(global_root_scope->hash, SYM, &root_pool, s); return s; } + /** - * cf_find_symbol - find a symbol by name - * @cfg: specificed config + * cf_find_symbol_scope - find a symbol by name + * @scope: config scope * @c: symbol name * - * This functions searches the symbol table in the config @cfg for a symbol of - * given name. First it examines the current scope, then the second recent one + * This functions searches the symbol table in the scope @scope for a symbol of + * given name. First it examines the current scope, then the underlying one * and so on until it either finds the symbol and returns a pointer to its * &symbol structure or reaches the end of the scope chain and returns %NULL to * signify no match. */ struct symbol * -cf_find_symbol(const struct config *cfg, const byte *c) +cf_find_symbol_scope(const struct sym_scope *scope, const byte *c) { struct symbol *s; - if (cfg->sym_hash.data && - (s = HASH_FIND(cfg->sym_hash, SYM, c, 1))) - return s; - - /* In CLI command parsing, fallback points to the current config, otherwise it is NULL. */ - if (cfg->fallback && - cfg->fallback->sym_hash.data && - (s = HASH_FIND(cfg->fallback->sym_hash, SYM, c, 1))) - return s; + /* Find the symbol here or anywhere below */ + while (scope) + if (scope->hash.data && (s = HASH_FIND(scope->hash, SYM, c, 1))) + return s; + else + scope = scope->next; return NULL; } @@ -642,7 +662,7 @@ cf_find_symbol(const struct config *cfg, const byte *c) struct symbol * cf_get_symbol(const byte *c) { - return cf_find_symbol(new_config, c) ?: cf_new_symbol(c); + return cf_find_symbol_scope(conf_this_scope, c) ?: cf_new_symbol(c); } /** @@ -693,10 +713,7 @@ cf_lex_symbol(const char *data) struct symbol *sym = cf_get_symbol(data); cf_lval.s = sym; - if (sym->class != SYM_VOID) - return CF_SYM_KNOWN; - - /* Is it a keyword? */ + /* Is it a keyword? Prefer the keyword. */ struct keyword *k = HASH_FIND(kw_hash, KW, data); if (k) { @@ -709,9 +726,11 @@ cf_lex_symbol(const char *data) } } - /* OK, undefined symbol */ - cf_lval.s = sym; - return CF_SYM_UNDEFINED; + /* OK, only a symbol. */ + if (sym->class == SYM_VOID) + return CF_SYM_UNDEFINED; + else + return CF_SYM_KNOWN; } static void @@ -724,6 +743,34 @@ cf_lex_init_kh(void) HASH_INSERT(kw_hash, KW, k); } +void +ea_lex_register(struct ea_class *def) +{ + struct symbol *sym = cf_root_symbol(def->name); + sym->class = SYM_ATTRIBUTE; + sym->attribute = def; + def->sym = sym; +} + +void +ea_lex_unregister(struct ea_class *def) +{ + struct symbol *sym = def->sym; + HASH_REMOVE2(global_root_scope->hash, SYM, &root_pool, sym); + mb_free(sym); + def->sym = NULL; +} + +struct ea_class * +ea_class_find_by_name(const char *name) +{ + struct symbol *sym = cf_find_symbol(global_root_scope, name); + if (!sym || (sym->class != SYM_ATTRIBUTE)) + return NULL; + else + return sym->attribute; +} + /** * cf_lex_init - initialize the lexer * @is_cli: true if we're going to parse CLI command, false for configuration @@ -757,6 +804,11 @@ cf_lex_init(int is_cli, struct config *c) c->root_scope = cfg_allocz(sizeof(struct sym_scope)); conf_this_scope = c->root_scope; conf_this_scope->active = 1; + + if (is_cli) + conf_this_scope->next = config->root_scope; + else + conf_this_scope->next = global_root_scope; } /** diff --git a/conf/conf.c b/conf/conf.c index 025c040e..17424402 100644 --- a/conf/conf.c +++ b/conf/conf.c @@ -46,7 +46,7 @@ #undef LOCAL_DEBUG #include "nest/bird.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/protocol.h" #include "nest/iface.h" #include "lib/resource.h" @@ -169,7 +169,6 @@ int cli_parse(struct config *c) { int done = 0; - c->fallback = config; new_config = c; cfg_mem = c->mem; if (setjmp(conf_jmpbuf)) @@ -180,7 +179,6 @@ cli_parse(struct config *c) done = 1; cleanup: - c->fallback = NULL; new_config = NULL; cfg_mem = NULL; return done; @@ -523,7 +521,6 @@ order_shutdown(int gr) init_list(&c->tables); init_list(&c->symbols); memset(c->def_tables, 0, sizeof(c->def_tables)); - HASH_INIT(c->sym_hash, c->pool, 4); c->shutdown = 1; c->gr_down = gr; diff --git a/conf/conf.h b/conf/conf.h index be46f172..50e01bf5 100644 --- a/conf/conf.h +++ b/conf/conf.h @@ -16,7 +16,6 @@ #include "lib/timer.h" /* Configuration structure */ - struct config { pool *pool; /* Pool the configuration is stored in */ linpool *mem; /* Linear pool containing configuration data */ @@ -35,6 +34,7 @@ struct config { u32 proto_default_debug; /* Default protocol debug mask */ u32 proto_default_mrtdump; /* Default protocol mrtdump mask */ u32 channel_default_debug; /* Default channel debug mask */ + u16 filter_vstk, filter_estk; /* Filter stack depth */ struct timeformat tf_route; /* Time format for 'show route' */ struct timeformat tf_proto; /* Time format for 'show protocol' */ struct timeformat tf_log; /* Time format for the logfile */ @@ -44,6 +44,7 @@ struct config { int cli_debug; /* Tracing of CLI connections and commands */ int latency_debug; /* I/O loop tracks duration of each event */ + int table_debug; /* Track route propagation through tables */ u32 latency_limit; /* Events with longer duration are logged (us) */ u32 watchdog_warning; /* I/O loop watchdog limit for warning (us) */ u32 watchdog_timeout; /* Watchdog timeout (in seconds, 0 = disabled) */ @@ -53,8 +54,7 @@ struct config { char *err_file_name; /* File name containing error */ char *file_name; /* Name of main configuration file */ int file_fd; /* File descriptor of main configuration file */ - HASH(struct symbol) sym_hash; /* Lexer: symbol hash table */ - struct config *fallback; /* Link to regular config for CLI parsing */ + struct sym_scope *root_scope; /* Scope for root symbols */ int obstacle_count; /* Number of items blocking freeing of this config */ int shutdown; /* This is a pseudo-config for daemon shutdown */ @@ -121,7 +121,7 @@ struct symbol { const struct f_line *function; /* For SYM_FUNCTION */ const struct filter *filter; /* For SYM_FILTER */ struct rtable_config *table; /* For SYM_TABLE */ - struct f_dynamic_attr *attribute; /* For SYM_ATTRIBUTE */ + struct ea_class *attribute; /* For SYM_ATTRIBUTE */ struct f_val *val; /* For SYM_CONSTANT */ uint offset; /* For SYM_VARIABLE */ }; @@ -132,11 +132,16 @@ struct symbol { struct sym_scope { struct sym_scope *next; /* Next on scope stack */ struct symbol *name; /* Name of this scope */ + + HASH(struct symbol) hash; /* Local symbol hash */ + uint slots; /* Variable slots */ byte active; /* Currently entered */ byte soft_scopes; /* Number of soft scopes above */ }; +extern struct sym_scope *global_root_scope; + struct bytestring { size_t length; byte data[]; @@ -185,7 +190,14 @@ int cf_lex(void); void cf_lex_init(int is_cli, struct config *c); void cf_lex_unwind(void); -struct symbol *cf_find_symbol(const struct config *cfg, const byte *c); +struct symbol *cf_find_symbol_scope(const struct sym_scope *scope, const byte *c); +static inline struct symbol *cf_find_symbol_cfg(const struct config *cfg, const byte *c) +{ return cf_find_symbol_scope(cfg->root_scope, c); } + +#define cf_find_symbol(where, what) _Generic(*(where), \ + struct config: cf_find_symbol_cfg, \ + struct sym_scope: cf_find_symbol_scope \ + )((where), (what)) struct symbol *cf_get_symbol(const byte *c); struct symbol *cf_default_name(char *template, int *counter); diff --git a/conf/confbase.Y b/conf/confbase.Y index 1d5738ff..241c332d 100644 --- a/conf/confbase.Y +++ b/conf/confbase.Y @@ -18,7 +18,7 @@ CF_HDR #include "lib/string.h" #include "nest/protocol.h" #include "nest/iface.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/bfd.h" #include "nest/cli.h" #include "filter/filter.h" @@ -71,8 +71,9 @@ CF_DECLS } xp; enum filter_return fret; enum ec_subtype ecs; - struct f_dynamic_attr fda; + struct ea_class *ea_class; struct f_static_attr fsa; + struct f_attr_bit fab; struct f_lval flv; struct f_line *fl; struct f_arg *fa; @@ -92,7 +93,7 @@ CF_DECLS struct proto_spec ps; struct channel_limit cl; struct timeformat *tf; - mpls_label_stack *mls; + struct adata *ad; struct bytestring *bs; } @@ -113,14 +114,15 @@ CF_DECLS %type <a> ipa %type <net> net_ip4_ net_ip4 net_ip6_ net_ip6 net_ip_ net_ip net_or_ipa %type <net_ptr> net_ net_any net_vpn4_ net_vpn6_ net_vpn_ net_roa4_ net_roa6_ net_roa_ net_ip6_sadr_ net_mpls_ -%type <mls> label_stack_start label_stack +%type <ad> label_stack_start label_stack %type <t> text opttext -%type <s> symbol +%type <s> symbol symbol_known toksym %nonassoc PREFIX_DUMMY %left AND OR %nonassoc '=' '<' '>' '~' GEQ LEQ NEQ NMA PO PC +%left '|' '&' %left '+' '-' %left '*' '/' '%' %left '!' @@ -152,16 +154,16 @@ conf: definition ; definition: DEFINE symbol '=' term ';' { - struct f_val *val = cfg_allocz(sizeof(struct f_val)); - if (f_eval(f_linearize($4, 1), cfg_mem, val) > F_RETURN) cf_error("Runtime error"); - cf_define_symbol($2, SYM_CONSTANT | val->type, val, val); + struct f_val val; + if (f_eval(f_linearize($4, 1), &val) > F_RETURN) cf_error("Runtime error"); + cf_define_symbol($2, SYM_CONSTANT | val.type, val, lp_val_copy(cfg_mem, &val)); } ; expr: NUM | '(' term ')' { $$ = f_eval_int(f_linearize($2, 1)); } - | CF_SYM_KNOWN { + | symbol_known { if ($1->class != (SYM_CONSTANT | T_INT)) cf_error("Number constant expected"); $$ = SYM_VAL($1).i; } ; @@ -172,7 +174,9 @@ expr_us: | expr US { $$ = $1 US_; } ; -symbol: CF_SYM_UNDEFINED | CF_SYM_KNOWN ; +toksym: FROM | PREFERENCE ; +symbol: CF_SYM_UNDEFINED | CF_SYM_KNOWN | toksym ; +symbol_known: CF_SYM_KNOWN | toksym ; /* Switches */ @@ -357,17 +361,19 @@ net_or_ipa: label_stack_start: NUM { - $$ = cfg_allocz(sizeof(mpls_label_stack)); - $$->len = 1; - $$->stack[0] = $1; + $$ = cfg_allocz(ADATA_SIZE(MPLS_MAX_LABEL_STACK * sizeof(u32))); + $$->length = sizeof(u32); + *((u32 *)$$->data) = $1; }; label_stack: label_stack_start | label_stack '/' NUM { - if ($1->len >= MPLS_MAX_LABEL_STACK) + if ($1->length >= MPLS_MAX_LABEL_STACK * sizeof(u32)) cf_error("Too many labels in stack"); - $1->stack[$1->len++] = $3; + + *((u32 *)($$->data + $1->length)) = $3; + $1->length += sizeof(u32); $$ = $1; } ; diff --git a/conf/gen_keywords.m4 b/conf/gen_keywords.m4 index 0c1dc545..53226e4d 100644 --- a/conf/gen_keywords.m4 +++ b/conf/gen_keywords.m4 @@ -26,8 +26,7 @@ m4_define(CF_DEFINES, `m4_divert(-1)') m4_define(CF_handle_kw, `m4_divert(1){ "m4_translit($1,[[A-Z]],[[a-z]])", $1, NULL }, m4_divert(-1)') m4_define(CF_keywd, `m4_ifdef([[CF_tok_$1]],,[[m4_define([[CF_tok_$1]],1)CF_handle_kw($1)]])') -m4_define(CF_KEYWORDS, `m4_define([[CF_toks]],[[]])CF_iterate([[CF_keywd]], [[$@]])m4_ifelse(CF_toks,,,%token[[]]CF_toks -)DNL') +m4_define(CF_KEYWORDS, `CF_iterate([[CF_keywd]], [[$@]])DNL') # CLI commands generate keywords as well m4_define(CF_CLI, `CF_KEYWORDS(m4_translit($1, [[ ]], [[,]])) diff --git a/conf/gen_parser.m4 b/conf/gen_parser.m4 index 5b378a93..af4b1455 100644 --- a/conf/gen_parser.m4 +++ b/conf/gen_parser.m4 @@ -31,7 +31,7 @@ m4_define(CF_iterate, `m4_define([[CF_iter]], m4_defn([[$1]]))CF_itera($2)') # Keywords act as untyped %token m4_define(CF_keywd, `m4_ifdef([[CF_tok_$1]],,[[m4_define([[CF_tok_$1]],1)m4_define([[CF_toks]],CF_toks $1)]])') -m4_define(CF_KEYWORDS, `m4_define([[CF_toks]],[[]])CF_iterate([[CF_keywd]], [[$@]])m4_ifelse(CF_toks,,,%token[[]]CF_toks +m4_define(CF_KEYWORDS, `m4_define([[CF_toks]],[[]])CF_iterate([[CF_keywd]], [[$@]])m4_ifelse(CF_toks,,,%token<s>[[]]CF_toks )DNL') # CLI commands diff --git a/configure.ac b/configure.ac index 64181d29..330add87 100644 --- a/configure.ac +++ b/configure.ac @@ -36,12 +36,6 @@ AC_ARG_ENABLE([memcheck], [enable_memcheck=yes] ) -AC_ARG_ENABLE([pthreads], - [AS_HELP_STRING([--enable-pthreads], [enable POSIX threads support @<:@try@:>@])], - [], - [enable_pthreads=try] -) - AC_ARG_ENABLE([libssh], [AS_HELP_STRING([--enable-libssh], [enable LibSSH support in RPKI @<:@try@:>@])], [], @@ -125,25 +119,19 @@ if test -z "$GCC" ; then fi BIRD_CHECK_THREAD_LOCAL -if test "$bird_cv_thread_local" = yes ; then - AC_DEFINE([HAVE_THREAD_LOCAL], [1], [Define to 1 if _Thread_local is available]) +if test "$bird_cv_thread_local" = no ; then + AC_MSG_ERROR([This program requires thread local storage.]) +elif test "$bird_cv_thread_local" != yes ; then + AC_DEFINE_UNQUOTED([_Thread_local], [$bird_cv_thread_local], [Legacy _Thread_local]) fi -if test "$enable_pthreads" != no ; then - BIRD_CHECK_PTHREADS +BIRD_CHECK_PTHREADS - if test "$bird_cv_lib_pthreads" = yes ; then - AC_DEFINE([USE_PTHREADS], [1], [Define to 1 if pthreads are enabled]) - CFLAGS="$CFLAGS -pthread" - LDFLAGS="$LDFLAGS -pthread" - proto_bfd=bfd - elif test "$enable_pthreads" = yes ; then - AC_MSG_ERROR([POSIX threads not available.]) - fi - - if test "$enable_pthreads" = try ; then - enable_pthreads="$bird_cv_lib_pthreads" - fi +if test "$bird_cv_lib_pthreads" = yes ; then + CFLAGS="$CFLAGS -pthread" + LDFLAGS="$LDFLAGS -pthread" +else + AC_MSG_ERROR([POSIX threads not available.]) fi # This is assumed to be necessary for proper BIRD build @@ -304,8 +292,7 @@ if test "$enable_mpls_kernel" != no ; then fi fi -all_protocols="$proto_bfd babel bgp mrt ospf perf pipe radv rip rpki static" - +all_protocols="bfd babel bgp mrt ospf perf pipe radv rip rpki static" all_protocols=`echo $all_protocols | sed 's/ /,/g'` if test "$with_protocols" = all ; then @@ -350,16 +337,29 @@ case $sysdesc in ;; esac -AC_CHECK_HEADERS_ONCE([alloca.h syslog.h]) -AC_CHECK_HEADER([sys/mman.h], [AC_DEFINE([HAVE_MMAP], [1], [Define to 1 if mmap() is available.])]) +AC_CHECK_HEADERS_ONCE([alloca.h syslog.h stdatomic.h]) +AC_CHECK_HEADER([sys/mman.h], [AC_DEFINE([HAVE_MMAP], [1], [Define to 1 if mmap() is available.])], have_mman=no) +AC_CHECK_FUNC([aligned_alloc], [AC_DEFINE([HAVE_ALIGNED_ALLOC], [1], [Define to 1 if aligned_alloc() is available.])], have_aligned_alloc=no) AC_CHECK_MEMBERS([struct sockaddr.sa_len], [], [], [#include <sys/socket.h>]) +if test "$have_aligned_alloc" = "no" && test "$have_mman" = "no" ; then + AC_MSG_ERROR([No means of aligned alloc found. Need mmap() or aligned_alloc().]) +fi + + AC_C_BIGENDIAN( [AC_DEFINE([CPU_BIG_ENDIAN], [1], [Define to 1 if cpu is big endian])], [AC_DEFINE([CPU_LITTLE_ENDIAN], [1], [Define to 1 if cpu is little endian])], [AC_MSG_ERROR([Cannot determine CPU endianity.])] ) +BIRD_CHECK_POINTER_ALIGNMENT +if test "$bird_cv_pointer_alignment" = "unknown" ; then + AC_MSG_ERROR([Couldn't determine pointer alignment]) +else + AC_DEFINE_UNQUOTED([CPU_POINTER_ALIGNMENT], [$bird_cv_pointer_alignment], [Pointer alignment for macro usage]) +fi + BIRD_CHECK_ANDROID_GLOB if test "$bird_cv_lib_glob" = no ; then AC_MSG_ERROR([glob.h not found.]) @@ -402,7 +402,7 @@ if test "$enable_debug" = yes ; then fi fi - if test "enable_debug_expensive" = yes ; then + if test "$enable_debug_expensive" = yes ; then AC_DEFINE([ENABLE_EXPENSIVE_CHECKS], [1], [Define to 1 if you want to run expensive consistency checks.]) fi fi @@ -467,7 +467,6 @@ AC_MSG_RESULT([ Object directory: $objdir]) AC_MSG_RESULT([ Iproute2 directory: $iproutedir]) AC_MSG_RESULT([ System configuration: $sysdesc]) AC_MSG_RESULT([ Debugging: $enable_debug]) -AC_MSG_RESULT([ POSIX threads: $enable_pthreads]) AC_MSG_RESULT([ Routing protocols: $protocols]) AC_MSG_RESULT([ LibSSH support in RPKI: $enable_libssh]) AC_MSG_RESULT([ Kernel MPLS support: $enable_mpls_kernel]) diff --git a/doc/bird.sgml b/doc/bird.sgml index a0d9c405..c29353fc 100644 --- a/doc/bird.sgml +++ b/doc/bird.sgml @@ -699,6 +699,15 @@ to set options. periods. Default: adaptive, based on number of routing tables in the configuration. From 10 s (with <= 25 routing tables) up to 600 s (with >= 1500 routing tables). + + <tag><label id="rtable-cork-threshold">cork threshold <m/number/ <m/number/</tag> + Too many pending exports may lead to memory bloating. In such cases, + BIRD tries to relieve the memory pressure by pausing some routines until + the queue sizes get low enough. This option allows the user to set the + thresholds; first value is the low threshold (when to resume), the + second one is the high threshold (when to pause). The higher is the + threshold, the more memory can get used. In most cases, the defaults + should work for you. Default: 128, 512. </descrip> @@ -929,10 +938,12 @@ inherited from templates can be updated by new definitions. <cf/none/ is for dropping all routes. Default: <cf/all/ (except for EBGP). - <tag><label id="proto-export">export <m/filter/</tag> + <tag><label id="proto-export">export [ in <m/prefix/ ] <m/filter/</tag> This is similar to the <cf>import</cf> keyword, except that it works in - the direction from the routing table to the protocol. Default: <cf/none/ - (except for EBGP). + the direction from the routing table to the protocol. If <cf/in/ keyword is used, + only routes inside the given prefix are exported. Other routes are completely + ignored (e.g. no logging and no statistics). + Default: <cf/none/ (except for EBGP). <tag><label id="proto-import-keep-filtered">import keep filtered <m/switch/</tag> Usually, if an import filter rejects a route, the route is forgotten. @@ -1755,17 +1766,8 @@ Common route attributes are: primary key of the routing table. Read-only. (See the <ref id="routes" name="chapter about routes">.) - <tag><label id="rta-scope"><m/enum/ scope</tag> - The scope of the route. Possible values: <cf/SCOPE_HOST/ for routes - local to this host, <cf/SCOPE_LINK/ for those specific for a physical - link, <cf/SCOPE_SITE/ and <cf/SCOPE_ORGANIZATION/ for private routes and - <cf/SCOPE_UNIVERSE/ for globally visible routes. This attribute is not - interpreted by BIRD and can be used to mark routes in filters. The - default value for new routes is <cf/SCOPE_UNIVERSE/. - <tag><label id="rta-preference"><m/int/ preference</tag> - Preference of the route. Valid values are 0-65535. (See the chapter - about routing tables.) + Preference of the route. <tag><label id="rta-from"><m/ip/ from</tag> The router which the route has originated from. @@ -1779,7 +1781,7 @@ Common route attributes are: <tag><label id="rta-source"><m/enum/ source</tag> what protocol has told me about this route. Possible values: - <cf/RTS_DUMMY/, <cf/RTS_STATIC/, <cf/RTS_INHERIT/, <cf/RTS_DEVICE/, + <cf/RTS_STATIC/, <cf/RTS_INHERIT/, <cf/RTS_DEVICE/, <cf/RTS_RIP/, <cf/RTS_OSPF/, <cf/RTS_OSPF_IA/, <cf/RTS_OSPF_EXT1/, <cf/RTS_OSPF_EXT2/, <cf/RTS_BGP/, <cf/RTS_PIPE/, <cf/RTS_BABEL/. @@ -4279,6 +4281,14 @@ include standard channel config options; see the example below. <tag><label id="pipe-peer-table">peer table <m/table/</tag> Defines secondary routing table to connect to. The primary one is selected by the <cf/table/ keyword. + + <tag><label id="pipe-max-generation">max generation <m/expr/</tag> + Sets maximal generation of route that may pass through this pipe. + The generation value is increased by one by each pipe on its path. + Not meeting this requirement causes an error message complaining about + an overpiped route. If you have long chains of pipes, you probably want + to raise this value; anyway the default of 16 should be enough for even + most strange uses. Maximum is 254. </descrip> <sect1>Attributes diff --git a/filter/config.Y b/filter/config.Y index c7c82068..5ba4f7e6 100644 --- a/filter/config.Y +++ b/filter/config.Y @@ -22,6 +22,16 @@ static inline u32 pair_b(u32 p) { return p & 0xFFFF; } #define f_generate_complex(fi_code, da, arg) \ f_new_inst(FI_EA_SET, f_new_inst(fi_code, f_new_inst(FI_EA_GET, da), arg), da) +#define f_generate_complex_sym(fi_code, sym, arg) ({ \ + if (sym->class != SYM_ATTRIBUTE) \ + cf_error("Can't empty %s: not an attribute", sym->name); \ + f_generate_complex(fi_code, sym->attribute, arg); \ +}) + +#define f_generate_complex_default(fi_code, da, arg, def) \ + f_new_inst(FI_EA_SET, f_new_inst(fi_code, f_new_inst(FI_DEFAULT, f_new_inst(FI_EA_GET, da), f_new_inst(FI_CONSTANT, (struct f_val) { .type = T_INT, .val.i = def })), arg), da) + + static int f_new_var(struct sym_scope *s) { @@ -191,28 +201,32 @@ f_new_lc_item(u32 f1, u32 t1, u32 f2, u32 t2, u32 f3, u32 t3) } static inline struct f_inst * -f_generate_empty(struct f_dynamic_attr dyn) +f_generate_empty(const struct symbol *sym) { - struct f_val empty; + if (sym->class != SYM_ATTRIBUTE) + cf_error("Can't empty %s: not an attribute", sym->name); - switch (dyn.type & EAF_TYPE_MASK) { - case EAF_TYPE_AS_PATH: - empty = f_const_empty_path; - break; - case EAF_TYPE_INT_SET: - empty = f_const_empty_clist; - break; - case EAF_TYPE_EC_SET: - empty = f_const_empty_eclist; - break; - case EAF_TYPE_LC_SET: - empty = f_const_empty_lclist; - break; - default: - cf_error("Can't empty that attribute"); - } + const struct ea_class *def = sym->attribute; + const struct f_val *empty = f_get_empty(def->type); + if (!empty) + cf_error("Can't empty attribute %s", def->name); - return f_new_inst(FI_EA_SET, f_new_inst(FI_CONSTANT, empty), dyn); + return f_new_inst(FI_EA_SET, f_new_inst(FI_CONSTANT, *empty), def); +} + +static inline struct f_inst * +f_implicit_roa_check(struct rtable_config *tab) +{ + const struct ea_class *def = ea_class_find("bgp_path"); + if (!def) + cf_error("Fatal: Couldn't find BGP path attribute definition."); + + struct f_static_attr fsa = f_new_static_attr(T_NET, SA_NET, 1); + + return f_new_inst(FI_ROA_CHECK, + f_new_inst(FI_RTA_GET, fsa), + f_new_inst(FI_AS_PATH_LAST, f_new_inst(FI_EA_GET, def)), + tab); } /* @@ -278,10 +292,6 @@ assert_assign(struct f_lval *lval, struct f_inst *expr, const char *start, const setter = f_new_inst(FI_VAR_SET, expr, lval->sym); getter = f_new_inst(FI_VAR_GET, lval->sym); break; - case F_LVAL_PREFERENCE: - setter = f_new_inst(FI_PREF_SET, expr); - getter = f_new_inst(FI_PREF_GET); - break; case F_LVAL_SA: setter = f_new_inst(FI_RTA_SET, expr, lval->sa); getter = f_new_inst(FI_RTA_GET, lval->sa); @@ -309,27 +319,26 @@ CF_KEYWORDS(FUNCTION, PRINT, PRINTN, UNSET, RETURN, IF, THEN, ELSE, CASE, FOR, IN, DO, TRUE, FALSE, RT, RO, UNKNOWN, GENERIC, - FROM, GW, NET, MASK, PROTO, SOURCE, SCOPE, DEST, IFNAME, IFINDEX, WEIGHT, GW_MPLS, - PREFERENCE, + FROM, GW, NET, MASK, PROTO, SCOPE, DEST, IFNAME, IFINDEX, WEIGHT, GW_MPLS, ROA_CHECK, ASN, SRC, DST, IS_V4, IS_V6, LEN, MAXLEN, DATA, DATA1, DATA2, DEFINED, - ADD, DELETE, CONTAINS, RESET, - PREPEND, FIRST, LAST, LAST_NONAGGREGATED, MATCH, + ADD, DELETE, RESET, + PREPEND, FIRST, LAST, LAST_NONAGGREGATED, MIN, MAX, EMPTY, FILTER, WHERE, EVAL, ATTRIBUTE, - BT_ASSERT, BT_TEST_SUITE, BT_CHECK_ASSIGN, BT_TEST_SAME, FORMAT) + BT_ASSERT, BT_TEST_SUITE, BT_CHECK_ASSIGN, BT_TEST_SAME, FORMAT, STACKS) %nonassoc THEN %nonassoc ELSE %type <xp> cmds_int cmd_prep %type <x> term cmd cmd_var cmds cmds_scoped constant constructor print_list var var_init var_list function_call symbol_value bgp_path_expr bgp_path bgp_path_tail -%type <fda> dynamic_attr %type <fsa> static_attr +%type <fab> attr_bit %type <f> filter where_filter %type <fl> filter_body function_body %type <flv> lvalue @@ -347,6 +356,12 @@ CF_KEYWORDS(FUNCTION, PRINT, PRINTN, UNSET, RETURN, CF_GRAMMAR +conf: FILTER STACKS expr expr ';' { + new_config->filter_vstk = $3; + new_config->filter_estk = $4; + } + ; + conf: filter_def ; filter_def: FILTER symbol { $2 = cf_define_symbol($2, SYM_FILTER, filter, NULL); cf_push_scope( $2 ); } @@ -366,12 +381,19 @@ filter_eval: conf: custom_attr ; custom_attr: ATTRIBUTE type symbol ';' { - cf_define_symbol($3, SYM_ATTRIBUTE, attribute, ca_lookup(new_config->pool, $3->name, $2)->fda); + if (($3->class == SYM_ATTRIBUTE) && ($3->scope == new_config->root_scope)) + cf_error("Duplicate attribute %s definition", $3->name); + + cf_define_symbol($3, SYM_ATTRIBUTE, attribute, + ea_register_alloc(new_config->pool, (struct ea_class) { + .name = $3->name, + .type = $2, + })->class); }; conf: bt_test_suite ; bt_test_suite: - BT_TEST_SUITE '(' CF_SYM_KNOWN ',' text ')' { + BT_TEST_SUITE '(' symbol_known ',' text ')' { cf_assert_symbol($3, SYM_FUNCTION); struct f_bt_test_suite *t = cfg_allocz(sizeof(struct f_bt_test_suite)); t->fn = $3->function; @@ -384,7 +406,7 @@ bt_test_suite: conf: bt_test_same ; bt_test_same: - BT_TEST_SAME '(' CF_SYM_KNOWN ',' CF_SYM_KNOWN ',' NUM ')' { + BT_TEST_SAME '(' symbol_known ',' symbol_known ',' NUM ')' { cf_assert_symbol($3, SYM_FUNCTION); cf_assert_symbol($5, SYM_FUNCTION); struct f_bt_test_suite *t = cfg_allocz(sizeof(struct f_bt_test_suite)); @@ -465,7 +487,7 @@ function_vars: filter_body: function_body ; filter: - CF_SYM_KNOWN { + symbol_known { cf_assert_symbol($1, SYM_FILTER); $$ = $1->filter; } @@ -573,10 +595,10 @@ set_atom: | VPN_RD { $$.type = T_RD; $$.val.ec = $1; } | ENUM { $$.type = pair_a($1); $$.val.i = pair_b($1); } | '(' term ')' { - if (f_eval(f_linearize($2, 1), cfg_mem, &($$)) > F_RETURN) cf_error("Runtime error"); + if (f_eval(f_linearize($2, 1), &($$)) > F_RETURN) cf_error("Runtime error"); if (!f_valid_set_type($$.type)) cf_error("Set-incompatible type"); } - | CF_SYM_KNOWN { + | symbol_known { cf_assert_symbol($1, SYM_CONSTANT); if (!f_valid_set_type(SYM_TYPE($1))) cf_error("%s: set-incompatible type", $1->name); $$ = *$1->val; @@ -748,7 +770,7 @@ var_list: /* EMPTY */ { $$ = NULL; } | var_list ',' term { $$ = $3; $$->next = $1; } function_call: - CF_SYM_KNOWN '(' var_list ')' + symbol_known '(' var_list ')' { if ($1->class != SYM_FUNCTION) cf_error("You can't call something which is not a function. Really."); @@ -767,7 +789,7 @@ function_call: } ; -symbol_value: CF_SYM_KNOWN +symbol_value: symbol_known { switch ($1->class) { case SYM_CONSTANT_RANGE: @@ -777,7 +799,7 @@ symbol_value: CF_SYM_KNOWN $$ = f_new_inst(FI_VAR_GET, $1); break; case SYM_ATTRIBUTE: - $$ = f_new_inst(FI_EA_GET, *$1->attribute); + $$ = f_new_inst(FI_EA_GET, $1->attribute); break; default: cf_error("Can't get value of symbol %s", $1->name); @@ -786,12 +808,9 @@ symbol_value: CF_SYM_KNOWN ; static_attr: - FROM { $$ = f_new_static_attr(T_IP, SA_FROM, 0); } - | GW { $$ = f_new_static_attr(T_IP, SA_GW, 0); } + GW { $$ = f_new_static_attr(T_IP, SA_GW, 0); } | NET { $$ = f_new_static_attr(T_NET, SA_NET, 1); } | PROTO { $$ = f_new_static_attr(T_STRING, SA_PROTO, 1); } - | SOURCE { $$ = f_new_static_attr(T_ENUM_RTS, SA_SOURCE, 1); } - | SCOPE { $$ = f_new_static_attr(T_ENUM_SCOPE, SA_SCOPE, 0); } | DEST { $$ = f_new_static_attr(T_ENUM_RTD, SA_DEST, 0); } | IFNAME { $$ = f_new_static_attr(T_STRING, SA_IFNAME, 0); } | IFINDEX { $$ = f_new_static_attr(T_INT, SA_IFINDEX, 1); } @@ -805,6 +824,8 @@ term: | term '-' term { $$ = f_new_inst(FI_SUBTRACT, $1, $3); } | term '*' term { $$ = f_new_inst(FI_MULTIPLY, $1, $3); } | term '/' term { $$ = f_new_inst(FI_DIVIDE, $1, $3); } + | term '&' term { $$ = f_new_inst(FI_BITAND, $1, $3); } + | term '|' term { $$ = f_new_inst(FI_BITOR, $1, $3); } | term AND term { $$ = f_new_inst(FI_AND, $1, $3); } | term OR term { $$ = f_new_inst(FI_OR, $1, $3); } | term '=' term { $$ = f_new_inst(FI_EQ, $1, $3); } @@ -822,11 +843,11 @@ term: | constant { $$ = $1; } | constructor { $$ = $1; } - | PREFERENCE { $$ = f_new_inst(FI_PREF_GET); } - | static_attr { $$ = f_new_inst(FI_RTA_GET, $1); } - - | dynamic_attr { $$ = f_new_inst(FI_EA_GET, $1); } + | attr_bit { + struct f_inst *c = f_new_inst(FI_CONSTANT, (struct f_val) { .type = T_INT, .val.i = (1U << $1.bit)}); + $$ = f_new_inst(FI_EQ, c, f_new_inst(FI_BITAND, f_new_inst(FI_EA_GET, $1.class), c)); + } | term '.' IS_V4 { $$ = f_new_inst(FI_IS_V4, $1); } | term '.' TYPE { $$ = f_new_inst(FI_TYPE, $1); } @@ -864,13 +885,11 @@ term: | DELETE '(' term ',' term ')' { $$ = f_new_inst(FI_CLIST_DEL, $3, $5); } | FILTER '(' term ',' term ')' { $$ = f_new_inst(FI_CLIST_FILTER, $3, $5); } - | ROA_CHECK '(' rtable ')' { $$ = f_new_inst(FI_ROA_CHECK_IMPLICIT, $3); } - | ROA_CHECK '(' rtable ',' term ',' term ')' { $$ = f_new_inst(FI_ROA_CHECK_EXPLICIT, $5, $7, $3); } + | ROA_CHECK '(' rtable ')' { $$ = f_implicit_roa_check($3); } + | ROA_CHECK '(' rtable ',' term ',' term ')' { $$ = f_new_inst(FI_ROA_CHECK, $5, $7, $3); } | FORMAT '(' term ')' { $$ = f_new_inst(FI_FORMAT, $3); } -/* | term '.' LEN { $$->code = P('P','l'); } */ - | function_call ; @@ -928,13 +947,15 @@ cmd: $$ = f_new_inst(FI_FOR_INIT, $6, $3); $$->next = f_new_inst(FI_FOR_NEXT, $3, $9); } - | CF_SYM_KNOWN '=' term ';' { + | symbol_known '=' term ';' { switch ($1->class) { case SYM_VARIABLE_RANGE: $$ = f_new_inst(FI_VAR_SET, $3, $1); break; case SYM_ATTRIBUTE: - $$ = f_new_inst(FI_EA_SET, $3, *$1->attribute); + if ($1->attribute->readonly) + cf_error("Attribute %s is read-only", $1->attribute->name); + $$ = f_new_inst(FI_EA_SET, $3, $1->attribute); break; default: cf_error("Can't assign to symbol %s", $1->name); @@ -944,19 +965,25 @@ cmd: DBG( "Ook, we'll return the value\n" ); $$ = f_new_inst(FI_RETURN, $2); } - | dynamic_attr '=' term ';' { - $$ = f_new_inst(FI_EA_SET, $3, $1); - } | static_attr '=' term ';' { if ($1.readonly) cf_error( "This static attribute is read-only."); $$ = f_new_inst(FI_RTA_SET, $3, $1); } - | PREFERENCE '=' term ';' { - $$ = f_new_inst(FI_PREF_SET, $3); + | UNSET '(' symbol_known ')' ';' { + if ($3->class != SYM_ATTRIBUTE) + cf_error("Can't unset %s", $3->name); + if ($3->attribute->readonly) + cf_error("Attribute %s is read-only", $3->attribute->name); + $$ = f_new_inst(FI_EA_UNSET, $3->attribute); } - | UNSET '(' dynamic_attr ')' ';' { - $$ = f_new_inst(FI_EA_UNSET, $3); + | attr_bit '=' term ';' { + $$ = f_new_inst(FI_CONDITION, $3, + f_generate_complex_default(FI_BITOR, $1.class, + f_new_inst(FI_CONSTANT, (struct f_val) { .type = T_INT, .val.i = (1U << $1.bit)}), 0), + f_generate_complex_default(FI_BITAND, $1.class, + f_new_inst(FI_CONSTANT, (struct f_val) { .type = T_INT, .val.i = ~(1U << $1.bit)}), 0) + ); } | break_command print_list ';' { struct f_inst *breaker = f_new_inst(FI_DIE, $1); @@ -981,11 +1008,11 @@ cmd: $$ = f_new_inst(FI_SWITCH, $2, build_tree($4)); } - | dynamic_attr '.' EMPTY ';' { $$ = f_generate_empty($1); } - | dynamic_attr '.' PREPEND '(' term ')' ';' { $$ = f_generate_complex( FI_PATH_PREPEND, $1, $5 ); } - | dynamic_attr '.' ADD '(' term ')' ';' { $$ = f_generate_complex( FI_CLIST_ADD, $1, $5 ); } - | dynamic_attr '.' DELETE '(' term ')' ';' { $$ = f_generate_complex( FI_CLIST_DEL, $1, $5 ); } - | dynamic_attr '.' FILTER '(' term ')' ';' { $$ = f_generate_complex( FI_CLIST_FILTER, $1, $5 ); } + | symbol_known '.' EMPTY ';' { $$ = f_generate_empty($1); } + | symbol_known '.' PREPEND '(' term ')' ';' { $$ = f_generate_complex_sym( FI_PATH_PREPEND, $1, $5 ); } + | symbol_known '.' ADD '(' term ')' ';' { $$ = f_generate_complex_sym( FI_CLIST_ADD, $1, $5 ); } + | symbol_known '.' DELETE '(' term ')' ';' { $$ = f_generate_complex_sym( FI_CLIST_DEL, $1, $5 ); } + | symbol_known '.' FILTER '(' term ')' ';' { $$ = f_generate_complex_sym( FI_CLIST_FILTER, $1, $5 ); } | BT_ASSERT '(' get_cf_position term get_cf_position ')' ';' { $$ = assert_done($4, $3 + 1, $5 - 1); } | BT_CHECK_ASSIGN '(' get_cf_position lvalue get_cf_position ',' term ')' ';' { $$ = assert_assign(&$4, $7, $3 + 1, $5 - 1); } ; @@ -996,9 +1023,17 @@ get_cf_position: }; lvalue: - CF_SYM_KNOWN { cf_assert_symbol($1, SYM_VARIABLE); $$ = (struct f_lval) { .type = F_LVAL_VARIABLE, .sym = $1 }; } - | PREFERENCE { $$ = (struct f_lval) { .type = F_LVAL_PREFERENCE }; } + symbol_known { + switch ($1->class) { + case SYM_VARIABLE_RANGE: + $$ = (struct f_lval) { .type = F_LVAL_VARIABLE, .sym = $1 }; + break; + case SYM_ATTRIBUTE: + $$ = (struct f_lval) { .type = F_LVAL_EA, .da = $1->attribute }; + break; + } + } | static_attr { $$ = (struct f_lval) { .type = F_LVAL_SA, .sa = $1 }; } - | dynamic_attr { $$ = (struct f_lval) { .type = F_LVAL_EA, .da = $1 }; }; + ; CF_END diff --git a/filter/data.c b/filter/data.c index 87c438fc..d26b07f5 100644 --- a/filter/data.c +++ b/filter/data.c @@ -16,10 +16,10 @@ #include "lib/unaligned.h" #include "lib/net.h" #include "lib/ip.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/protocol.h" #include "nest/iface.h" -#include "nest/attrs.h" +#include "lib/attrs.h" #include "conf/conf.h" #include "filter/filter.h" #include "filter/f-inst.h" @@ -27,6 +27,8 @@ static const char * const f_type_str[] = { [T_VOID] = "void", + [T_OPAQUE] = "opaque byte string", + [T_IFACE] = "interface", [T_INT] = "int", [T_BOOL] = "bool", @@ -36,7 +38,6 @@ static const char * const f_type_str[] = { [T_ENUM_RTS] = "enum rts", [T_ENUM_BGP_ORIGIN] = "enum bgp_origin", [T_ENUM_SCOPE] = "enum scope", - [T_ENUM_RTC] = "enum rtc", [T_ENUM_RTD] = "enum rtd", [T_ENUM_ROA] = "enum roa", [T_ENUM_NETTYPE] = "enum nettype", @@ -47,6 +48,7 @@ static const char * const f_type_str[] = { [T_NET] = "prefix", [T_STRING] = "string", [T_PATH_MASK] = "bgpmask", + [T_PATH_MASK_ITEM] = "bgpmask item", [T_PATH] = "bgppath", [T_CLIST] = "clist", [T_EC] = "ec", @@ -54,22 +56,19 @@ static const char * const f_type_str[] = { [T_LC] = "lc", [T_LCLIST] = "lclist", [T_RD] = "rd", + + [T_SET] = "set", + [T_PREFIX_SET] = "prefix set", }; const char * -f_type_name(enum f_type t) +f_type_name(btype t) { - if (t < ARRAY_SIZE(f_type_str)) - return f_type_str[t] ?: "?"; - - if ((t == T_SET) || (t == T_PREFIX_SET)) - return "set"; - - return "?"; + return (t < ARRAY_SIZE(f_type_str)) ? (f_type_str[t] ?: "?") : "?"; } -enum f_type -f_type_element_type(enum f_type t) +btype +f_type_element_type(btype t) { switch(t) { case T_PATH: return T_INT; @@ -99,14 +98,6 @@ const struct f_val f_const_empty_path = { .val.ti = &f_const_empty_trie, }; -static struct adata * -adata_empty(struct linpool *pool, int l) -{ - struct adata *res = lp_alloc(pool, sizeof(struct adata) + l); - res->length = l; - return res; -} - static void pm_format(const struct f_path_mask *p, buffer *buf) { @@ -435,7 +426,7 @@ clist_filter(struct linpool *pool, const struct adata *list, const struct f_val if (nl == list->length) return list; - struct adata *res = adata_empty(pool, nl); + struct adata *res = lp_alloc_adata(pool, nl); memcpy(res->data, tmp, nl); return res; } @@ -469,7 +460,7 @@ eclist_filter(struct linpool *pool, const struct adata *list, const struct f_val if (nl == list->length) return list; - struct adata *res = adata_empty(pool, nl); + struct adata *res = lp_alloc_adata(pool, nl); memcpy(res->data, tmp, nl); return res; } @@ -501,7 +492,7 @@ lclist_filter(struct linpool *pool, const struct adata *list, const struct f_val if (nl == list->length) return list; - struct adata *res = adata_empty(pool, nl); + struct adata *res = lp_alloc_adata(pool, nl); memcpy(res->data, tmp, nl); return res; } @@ -625,3 +616,75 @@ val_dump(const struct f_val *v) { return val_dump_buffer; } + +struct f_val * +lp_val_copy(struct linpool *lp, const struct f_val *v) +{ + switch (v->type) + { + case T_VOID: + case T_BOOL: + case T_INT: + case T_IP: + case T_PAIR: + case T_QUAD: + case T_EC: + case T_LC: + case T_RD: + case T_ENUM: + case T_PATH_MASK_ITEM: + /* These aren't embedded but there is no need to copy them */ + case T_SET: + case T_PREFIX_SET: + case T_PATH_MASK: + case T_IFACE: + { + struct f_val *out = lp_alloc(lp, sizeof(*out)); + *out = *v; + return out; + } + + case T_NET: + { + struct { + struct f_val val; + net_addr net[0]; + } *out = lp_alloc(lp, sizeof(*out) + v->val.net->length); + out->val = *v; + out->val.val.net = out->net; + net_copy(out->net, v->val.net); + return &out->val; + } + + case T_STRING: + { + uint len = strlen(v->val.s); + struct { + struct f_val val; + char buf[0]; + } *out = lp_alloc(lp, sizeof(*out) + len + 1); + out->val = *v; + out->val.val.s = out->buf; + memcpy(out->buf, v->val.s, len+1); + return &out->val; + } + + case T_PATH: + case T_CLIST: + case T_ECLIST: + case T_LCLIST: + { + struct { + struct f_val val; + struct adata ad; + } *out = lp_alloc(lp, sizeof(*out) + v->val.ad->length); + out->val = *v; + out->val.val.ad = &out->ad; + memcpy(&out->ad, v->val.ad, v->val.ad->length); + return &out->val; + } + + default: + bug("Unknown type in value copy: %d", v->type); + } +} diff --git a/filter/data.h b/filter/data.h index 221e4229..c1e7c736 100644 --- a/filter/data.h +++ b/filter/data.h @@ -11,91 +11,20 @@ #define _BIRD_FILTER_DATA_H_ #include "nest/bird.h" - -/* Type numbers must be in 0..0xff range */ -#define T_MASK 0xff - -/* Internal types */ -enum f_type { -/* Nothing. Simply nothing. */ - T_VOID = 0, - -/* User visible types, which fit in int */ - T_INT = 0x10, - T_BOOL = 0x11, - T_PAIR = 0x12, /* Notice that pair is stored as integer: first << 16 | second */ - T_QUAD = 0x13, - -/* Put enumerational types in 0x30..0x3f range */ - T_ENUM_LO = 0x30, - T_ENUM_HI = 0x3f, - - T_ENUM_RTS = 0x30, - T_ENUM_BGP_ORIGIN = 0x31, - T_ENUM_SCOPE = 0x32, - T_ENUM_RTC = 0x33, - T_ENUM_RTD = 0x34, - T_ENUM_ROA = 0x35, - T_ENUM_NETTYPE = 0x36, - T_ENUM_RA_PREFERENCE = 0x37, - T_ENUM_AF = 0x38, - -/* new enums go here */ - T_ENUM_EMPTY = 0x3f, /* Special hack for atomic_aggr */ - -#define T_ENUM T_ENUM_LO ... T_ENUM_HI - -/* Bigger ones */ - T_IP = 0x20, - T_NET = 0x21, - T_STRING = 0x22, - T_PATH_MASK = 0x23, /* mask for BGP path */ - T_PATH = 0x24, /* BGP path */ - T_CLIST = 0x25, /* Community list */ - T_EC = 0x26, /* Extended community value, u64 */ - T_ECLIST = 0x27, /* Extended community list */ - T_LC = 0x28, /* Large community value, lcomm */ - T_LCLIST = 0x29, /* Large community list */ - T_RD = 0x2a, /* Route distinguisher for VPN addresses */ - T_PATH_MASK_ITEM = 0x2b, /* Path mask item for path mask constructors */ - - T_SET = 0x80, - T_PREFIX_SET = 0x81, -} PACKED; +#include "lib/type.h" /* Filter value; size of this affects filter memory consumption */ struct f_val { - enum f_type type; /* T_* */ - union { - uint i; - u64 ec; - lcomm lc; - ip_addr ip; - const net_addr *net; - const char *s; - const struct f_tree *t; - const struct f_trie *ti; - const struct adata *ad; - const struct f_path_mask *path_mask; - struct f_path_mask_item pmi; - } val; + btype type; /* T_* */ + union bval_long val; }; -/* Dynamic attribute definition (eattrs) */ -struct f_dynamic_attr { - u8 type; /* EA type (EAF_*) */ - u8 bit; /* For bitfield accessors */ - enum f_type f_type; /* Filter type */ - uint ea_code; /* EA code */ -}; +#define fputip(a) ({ ip_addr *ax = falloc(sizeof(*ax)); *ax = (a); ax; }) enum f_sa_code { - SA_FROM = 1, - SA_GW, + SA_GW = 1, SA_NET, SA_PROTO, - SA_SOURCE, - SA_SCOPE, SA_DEST, SA_IFNAME, SA_IFINDEX, @@ -105,15 +34,14 @@ enum f_sa_code { /* Static attribute definition (members of struct rta) */ struct f_static_attr { - enum f_type f_type; /* Filter type */ + btype type; /* Data type */ enum f_sa_code sa_code; /* Static attribute id */ - int readonly:1; /* Don't allow writing */ + int readonly:1; /* Don't allow writing */ }; /* Filter l-value type */ enum f_lval_type { F_LVAL_VARIABLE, - F_LVAL_PREFERENCE, F_LVAL_SA, F_LVAL_EA, }; @@ -123,7 +51,7 @@ struct f_lval { enum f_lval_type type; union { struct symbol *sym; - struct f_dynamic_attr da; + const struct ea_class *da; struct f_static_attr sa; }; }; @@ -263,9 +191,9 @@ trie_match_next_longest_ip6(net_addr_ip6 *n, ip6_addr *found) #define F_CMP_ERROR 999 -const char *f_type_name(enum f_type t); +const char *f_type_name(btype t); -enum f_type f_type_element_type(enum f_type t); +enum btype f_type_element_type(btype t); int val_same(const struct f_val *v1, const struct f_val *v2); int val_compare(const struct f_val *v1, const struct f_val *v2); @@ -273,6 +201,8 @@ void val_format(const struct f_val *v, buffer *buf); char *val_format_str(struct linpool *lp, const struct f_val *v); const char *val_dump(const struct f_val *v); +struct f_val *lp_val_copy(struct linpool *lp, const struct f_val *v); + static inline int val_is_ip4(const struct f_val *v) { return (v->type == T_IP) && ipa_is_ip4(v->val.ip); } int val_in_range(const struct f_val *v1, const struct f_val *v2); @@ -300,7 +230,17 @@ undef_value(struct f_val v) } extern const struct f_val f_const_empty_path, f_const_empty_clist, f_const_empty_eclist, f_const_empty_lclist, f_const_empty_prefix_set; +static inline const struct f_val *f_get_empty(btype t) +{ + switch (t) { + case T_PATH: return &f_const_empty_path; + case T_CLIST: return &f_const_empty_clist; + case T_ECLIST: return &f_const_empty_eclist; + case T_LCLIST: return &f_const_empty_lclist; + default: return NULL; + } +} -enum filter_return f_eval(const struct f_line *expr, struct linpool *tmp_pool, struct f_val *pres); +enum filter_return f_eval(const struct f_line *expr, struct f_val *pres); #endif diff --git a/filter/decl.m4 b/filter/decl.m4 index 4c56cd9c..e2472127 100644 --- a/filter/decl.m4 +++ b/filter/decl.m4 @@ -32,6 +32,7 @@ m4_divert(-1)m4_dnl # # 101 content of per-inst struct # 102 constructor arguments +# 110 constructor attributes # 103 constructor body # 104 dump line item content # (there may be nothing in dump-line content and @@ -45,6 +46,7 @@ m4_divert(-1)m4_dnl # Here are macros to allow you to _divert to the right directions. m4_define(FID_STRUCT_IN, `m4_divert(101)') m4_define(FID_NEW_ARGS, `m4_divert(102)') +m4_define(FID_NEW_ATTRIBUTES, `m4_divert(110)') m4_define(FID_NEW_BODY, `m4_divert(103)') m4_define(FID_DUMP_BODY, `m4_divert(104)m4_define([[FID_DUMP_BODY_EXISTS]])') m4_define(FID_LINEARIZE_BODY, `m4_divert(105)') @@ -92,7 +94,7 @@ FID_DUMP_BODY()m4_dnl debug("%s" $4 "\n", INDENT, $5); ]]) FID_INTERPRET_EXEC()m4_dnl -const $1 $2 = whati->$2 +$1 $2 = whati->$2 FID_INTERPRET_BODY') # Instruction arguments are needed only until linearization is done. @@ -106,15 +108,18 @@ FID_STRUCT_IN()m4_dnl struct f_inst * f$1; FID_NEW_ARGS()m4_dnl , struct f_inst * f$1 +FID_NEW_ATTRIBUTES()m4_dnl +NONNULL(m4_eval($1+1)) FID_NEW_BODY()m4_dnl whati->f$1 = f$1; -for (const struct f_inst *child = f$1; child; child = child->next) { - what->size += child->size; +const struct f_inst *child$1 = f$1; +do { + what->size += child$1->size; FID_IFCONST([[ - if (child->fi_code != FI_CONSTANT) + if (child$1->fi_code != FI_CONSTANT) constargs = 0; ]]) -} +} while (child$1 = child$1->next); FID_LINEARIZE_BODY pos = linearize(dest, whati->f$1, pos); FID_INTERPRET_BODY()') @@ -196,6 +201,7 @@ FID_INTERPRET_BODY()') # that was needed in the former implementation. m4_define(LINEX, `FID_INTERPRET_EXEC()LINEX_($1)FID_INTERPRET_NEW()return $1 FID_INTERPRET_BODY()') m4_define(LINEX_, `do { + if (fstk->ecnt + 1 >= fstk->elen) runtime("Filter execution stack overflow"); fstk->estk[fstk->ecnt].pos = 0; fstk->estk[fstk->ecnt].line = $1; fstk->estk[fstk->ecnt].ventry = fstk->vcnt; @@ -233,7 +239,7 @@ FID_INTERPRET_BODY()') # state the result and put it to the right place. m4_define(RESULT, `RESULT_TYPE([[$1]]) RESULT_([[$1]],[[$2]],[[$3]])') m4_define(RESULT_, `RESULT_VAL([[ (struct f_val) { .type = $1, .val.$2 = $3 } ]])') -m4_define(RESULT_VAL, `FID_HIC(, [[do { res = $1; fstk->vcnt++; } while (0)]], +m4_define(RESULT_VAL, `FID_HIC(, [[do { res = $1; f_vcnt_check_overflow(1); fstk->vcnt++; } while (0)]], [[return fi_constant(what, $1)]])') m4_define(RESULT_VOID, `RESULT_VAL([[ (struct f_val) { .type = T_VOID } ]])') @@ -260,7 +266,7 @@ FID_INTERPRET_BODY()') m4_define(SYMBOL, `FID_MEMBER(struct symbol *, sym, [[strcmp(f1->sym->name, f2->sym->name) || (f1->sym->class != f2->sym->class)]], "symbol %s", item->sym->name)') m4_define(RTC, `FID_MEMBER(struct rtable_config *, rtc, [[strcmp(f1->rtc->name, f2->rtc->name)]], "route table %s", item->rtc->name)') m4_define(STATIC_ATTR, `FID_MEMBER(struct f_static_attr, sa, f1->sa.sa_code != f2->sa.sa_code,,)') -m4_define(DYNAMIC_ATTR, `FID_MEMBER(struct f_dynamic_attr, da, f1->da.ea_code != f2->da.ea_code,,)') +m4_define(DYNAMIC_ATTR, `FID_MEMBER(const struct ea_class *, da, f1->da != f2->da,,)') m4_define(ACCESS_RTE, `FID_HIC(,[[do { if (!fs->rte) runtime("No route to access"); } while (0)]],NEVER_CONSTANT())') # 2) Code wrapping @@ -320,7 +326,9 @@ m4_undivert(107)m4_dnl FID_NEW()m4_dnl Constructor and interpreter code together FID_HIC( [[m4_dnl Public declaration of constructor in H file -struct f_inst *f_new_inst_]]INST_NAME()[[(enum f_instruction_code fi_code +struct f_inst * +m4_undivert(110)m4_dnl +f_new_inst_]]INST_NAME()[[(enum f_instruction_code fi_code m4_undivert(102)m4_dnl );]], [[m4_dnl The one case in The Big Switch inside interpreter @@ -332,7 +340,9 @@ m4_undivert(102)m4_dnl break; ]], [[m4_dnl Constructor itself -struct f_inst *f_new_inst_]]INST_NAME()[[(enum f_instruction_code fi_code +struct f_inst * +m4_undivert(110)m4_dnl +f_new_inst_]]INST_NAME()[[(enum f_instruction_code fi_code m4_undivert(102)m4_dnl ) { @@ -493,7 +503,7 @@ fi_constant(struct f_inst *what, struct f_val val) } static int -f_const_promotion(struct f_inst *arg, enum f_type want) +f_const_promotion(struct f_inst *arg, btype want) { if (arg->fi_code != FI_CONSTANT) return 0; @@ -651,7 +661,7 @@ struct f_inst { struct f_inst *next; /* Next instruction */ enum f_instruction_code fi_code; /* Instruction code */ enum f_instruction_flags flags; /* Flags, instruction-specific */ - enum f_type type; /* Type of returned value, if known */ + btype type; /* Type of returned value, if known */ int size; /* How many instructions are underneath */ int lineno; /* Line number */ union { diff --git a/filter/f-inst.c b/filter/f-inst.c index 30db96f2..60042476 100644 --- a/filter/f-inst.c +++ b/filter/f-inst.c @@ -238,7 +238,6 @@ * m4_dnl NEVER_CONSTANT-> don't generate pre-interpretation code at all * m4_dnl ACCESS_RTE -> check that route is available, also NEVER_CONSTANT * m4_dnl ACCESS_EATTRS -> pre-cache the eattrs; use only with ACCESS_RTE - * m4_dnl f_rta_cow(fs) -> function to call before any change to route should be done * * m4_dnl If you are stymied, see FI_CALL or FI_CONSTANT or just search for * m4_dnl the mentioned macros in this file to see what is happening there in wild. @@ -297,6 +296,16 @@ if (v2.val.i == 0) runtime( "Mother told me not to divide by 0" ); RESULT(T_INT, i, v1.val.i / v2.val.i); } + INST(FI_BITOR, 2, 1) { + ARG(1,T_INT); + ARG(2,T_INT); + RESULT(T_INT, i, v1.val.i | v2.val.i); + } + INST(FI_BITAND, 2, 1) { + ARG(1,T_INT); + ARG(2,T_INT); + RESULT(T_INT, i, v1.val.i & v2.val.i); + } INST(FI_AND, 1, 1) { ARG(1,T_BOOL); ARG_TYPE_STATIC(2,T_BOOL); @@ -559,8 +568,8 @@ /* Static type check */ if (f1->type) { - enum f_type t_var = (sym->class & 0xff); - enum f_type t_arg = f_type_element_type(f1->type); + enum btype t_var = (sym->class & 0xff); + enum btype t_arg = f_type_element_type(f1->type); if (!t_arg) cf_error("Value of expression in FOR must be iterable, got %s", f_type_name(f1->type)); @@ -678,77 +687,99 @@ { STATIC_ATTR; ACCESS_RTE; - struct rta *rta = (*fs->rte)->attrs; + ACCESS_EATTRS; switch (sa.sa_code) { - case SA_FROM: RESULT(sa.f_type, ip, rta->from); break; - case SA_GW: RESULT(sa.f_type, ip, rta->nh.gw); break; - case SA_NET: RESULT(sa.f_type, net, (*fs->rte)->net->n.addr); break; - case SA_PROTO: RESULT(sa.f_type, s, rta->src->proto->name); break; - case SA_SOURCE: RESULT(sa.f_type, i, rta->source); break; - case SA_SCOPE: RESULT(sa.f_type, i, rta->scope); break; - case SA_DEST: RESULT(sa.f_type, i, rta->dest); break; - case SA_IFNAME: RESULT(sa.f_type, s, rta->nh.iface ? rta->nh.iface->name : ""); break; - case SA_IFINDEX: RESULT(sa.f_type, i, rta->nh.iface ? rta->nh.iface->index : 0); break; - case SA_WEIGHT: RESULT(sa.f_type, i, rta->nh.weight + 1); break; - case SA_GW_MPLS: RESULT(sa.f_type, i, rta->nh.labels ? rta->nh.label[0] : MPLS_NULL); break; - + case SA_NET: RESULT(sa.type, net, fs->rte->net); break; + case SA_PROTO: RESULT(sa.type, s, fs->rte->src->owner->name); break; default: - bug("Invalid static attribute access (%u/%u)", sa.f_type, sa.sa_code); + { + struct eattr *nhea = ea_find(*fs->eattrs, &ea_gen_nexthop); + struct nexthop_adata *nhad = nhea ? (struct nexthop_adata *) nhea->u.ptr : NULL; + struct nexthop *nh = nhad ? &nhad->nh : NULL; + + switch (sa.sa_code) + { + case SA_DEST: + RESULT(sa.type, i, nhad ? + (NEXTHOP_IS_REACHABLE(nhad) ? RTD_UNICAST : nhad->dest) + : RTD_NONE); + break; + case SA_GW: + RESULT(sa.type, ip, nh ? nh->gw : IPA_NONE); + break; + case SA_IFNAME: + RESULT(sa.type, s, (nh && nh->iface) ? nh->iface->name : ""); + break; + case SA_IFINDEX: + RESULT(sa.type, i, (nh && nh->iface) ? nh->iface->index : 0); + break; + case SA_WEIGHT: + RESULT(sa.type, i, (nh ? nh->weight : 0) + 1); + break; + case SA_GW_MPLS: + RESULT(sa.type, i, (nh && nh->labels) ? nh->label[0] : MPLS_NULL); + break; + default: + bug("Invalid static attribute access (%u/%u)", sa.type, sa.sa_code); + } + } } } } INST(FI_RTA_SET, 1, 0) { ACCESS_RTE; + ACCESS_EATTRS; ARG_ANY(1); STATIC_ATTR; - ARG_TYPE(1, sa.f_type); - - f_rta_cow(fs); + ARG_TYPE(1, sa.type); { - struct rta *rta = (*fs->rte)->attrs; + union { + struct nexthop_adata nha; + struct { + struct adata ad; + struct nexthop nh; + u32 label; + }; + } nha; + + nha.ad = (struct adata) { + .length = sizeof (struct nexthop_adata) - sizeof (struct adata), + }; + + eattr *a = NULL; switch (sa.sa_code) { - case SA_FROM: - rta->from = v1.val.ip; - break; + case SA_DEST: + { + int i = v1.val.i; + if ((i != RTD_BLACKHOLE) && (i != RTD_UNREACHABLE) && (i != RTD_PROHIBIT)) + runtime( "Destination can be changed only to blackhole, unreachable or prohibit" ); + nha.nha.dest = i; + nha.ad.length = NEXTHOP_DEST_SIZE; + break; + } case SA_GW: { + struct eattr *nh_ea = ea_find(*fs->eattrs, &ea_gen_nexthop); + ip_addr ip = v1.val.ip; - struct iface *ifa = ipa_is_link_local(ip) ? rta->nh.iface : NULL; - neighbor *n = neigh_find(rta->src->proto, ip, ifa, 0); + struct iface *ifa = (ipa_is_link_local(ip) && nh_ea) ? + ((struct nexthop_adata *) nh_ea->u.ptr)->nh.iface : NULL; + + /* XXX this code supposes that every owner is a protocol XXX */ + neighbor *n = neigh_find(SKIP_BACK(struct proto, sources, fs->rte->src->owner), ip, ifa, 0); if (!n || (n->scope == SCOPE_HOST)) runtime( "Invalid gw address" ); - rta->dest = RTD_UNICAST; - rta->nh.gw = ip; - rta->nh.iface = n->iface; - rta->nh.next = NULL; - rta->hostentry = NULL; - rta->nh.labels = 0; - } - break; - - case SA_SCOPE: - rta->scope = v1.val.i; - break; - - case SA_DEST: - { - int i = v1.val.i; - if ((i != RTD_BLACKHOLE) && (i != RTD_UNREACHABLE) && (i != RTD_PROHIBIT)) - runtime( "Destination can be changed only to blackhole, unreachable or prohibit" ); - - rta->dest = i; - rta->nh.gw = IPA_NONE; - rta->nh.iface = NULL; - rta->nh.next = NULL; - rta->hostentry = NULL; - rta->nh.labels = 0; + nha.nh = (struct nexthop) { + .gw = ip, + .iface = n->iface, + }; } break; @@ -758,12 +789,9 @@ if (!ifa) runtime( "Invalid iface name" ); - rta->dest = RTD_UNICAST; - rta->nh.gw = IPA_NONE; - rta->nh.iface = ifa; - rta->nh.next = NULL; - rta->hostentry = NULL; - rta->nh.labels = 0; + nha.nh = (struct nexthop) { + .iface = ifa, + }; } break; @@ -772,13 +800,20 @@ if (v1.val.i >= 0x100000) runtime( "Invalid MPLS label" ); + struct eattr *nh_ea = ea_find(*fs->eattrs, &ea_gen_nexthop); + if (!nh_ea) + runtime( "No nexthop to add a MPLS label to" ); + + nha.nh = ((struct nexthop_adata *) nh_ea->u.ptr)->nh; + if (v1.val.i != MPLS_NULL) { - rta->nh.label[0] = v1.val.i; - rta->nh.labels = 1; + nha.nh.label[0] = v1.val.i; + nha.nh.labels = 1; + nha.ad.length = sizeof nha - sizeof (struct adata); } else - rta->nh.labels = 0; + nha.nh.labels = 0; } break; @@ -787,18 +822,36 @@ int i = v1.val.i; if (i < 1 || i > 256) runtime( "Setting weight value out of bounds" ); - if (rta->dest != RTD_UNICAST) + + struct eattr *nh_ea = ea_find(*fs->eattrs, &ea_gen_nexthop); + if (!nh_ea) + runtime( "No nexthop to set weight on" ); + + struct nexthop_adata *nhad = (struct nexthop_adata *) nh_ea->u.ptr; + if (!NEXTHOP_IS_REACHABLE(nhad)) runtime( "Setting weight needs regular nexthop " ); + struct nexthop_adata *nhax = (struct nexthop_adata *) tmp_copy_adata(&nhad->ad); + /* Set weight on all next hops */ - for (struct nexthop *nh = &rta->nh; nh; nh = nh->next) + NEXTHOP_WALK(nh, nhax) nh->weight = i - 1; + + a = ea_set_attr(fs->eattrs, + EA_LITERAL_DIRECT_ADATA(&ea_gen_nexthop, 0, &nhax->ad)); } break; default: - bug("Invalid static attribute access (%u/%u)", sa.f_type, sa.sa_code); + bug("Invalid static attribute access (%u/%u)", sa.type, sa.sa_code); } + + if (!a) + a = ea_set_attr(fs->eattrs, + EA_LITERAL_DIRECT_ADATA(&ea_gen_nexthop, 0, tmp_copy_adata(&nha.ad))); + + a->originated = 1; + a->fresh = 1; } } @@ -806,74 +859,30 @@ DYNAMIC_ATTR; ACCESS_RTE; ACCESS_EATTRS; - RESULT_TYPE(da.f_type); + RESULT_TYPE(da->type); { - eattr *e = ea_find(*fs->eattrs, da.ea_code); - - if (!e) { - /* A special case: undefined as_path looks like empty as_path */ - if (da.type == EAF_TYPE_AS_PATH) { - RESULT_(T_PATH, ad, &null_adata); - break; - } - - /* The same special case for int_set */ - if (da.type == EAF_TYPE_INT_SET) { - RESULT_(T_CLIST, ad, &null_adata); - break; - } + const struct f_val *empty; + const eattr *e = ea_find(*fs->eattrs, da->id); - /* The same special case for ec_set */ - if (da.type == EAF_TYPE_EC_SET) { - RESULT_(T_ECLIST, ad, &null_adata); - break; - } + if (e) + { + ASSERT_DIE(e->type == da->type); - /* The same special case for lc_set */ - if (da.type == EAF_TYPE_LC_SET) { - RESULT_(T_LCLIST, ad, &null_adata); - break; + switch (e->type) { + case T_IP: + RESULT_(T_IP, ip, *((const ip_addr *) e->u.ptr->data)); + break; + default: + RESULT_VAL([[(struct f_val) { + .type = e->type, + .val.bval = e->u, + }]]); } - - /* Undefined value */ - RESULT_VOID; - break; } - - switch (e->type & EAF_TYPE_MASK) { - case EAF_TYPE_INT: - RESULT_(da.f_type, i, e->u.data); - break; - case EAF_TYPE_ROUTER_ID: - RESULT_(T_QUAD, i, e->u.data); - break; - case EAF_TYPE_OPAQUE: - RESULT_(T_ENUM_EMPTY, i, 0); - break; - case EAF_TYPE_IP_ADDRESS: - RESULT_(T_IP, ip, *((ip_addr *) e->u.ptr->data)); - break; - case EAF_TYPE_AS_PATH: - RESULT_(T_PATH, ad, e->u.ptr); - break; - case EAF_TYPE_BITFIELD: - RESULT_(T_BOOL, i, !!(e->u.data & (1u << da.bit))); - break; - case EAF_TYPE_INT_SET: - RESULT_(T_CLIST, ad, e->u.ptr); - break; - case EAF_TYPE_EC_SET: - RESULT_(T_ECLIST, ad, e->u.ptr); - break; - case EAF_TYPE_LC_SET: - RESULT_(T_LCLIST, ad, e->u.ptr); - break; - case EAF_TYPE_UNDEF: + else if (empty = f_get_empty(da->type)) + RESULT_VAL(*empty); + else RESULT_VOID; - break; - default: - bug("Unknown dynamic attribute type"); - } } } @@ -882,62 +891,32 @@ ACCESS_EATTRS; ARG_ANY(1); DYNAMIC_ATTR; - ARG_TYPE(1, da.f_type); + ARG_TYPE(1, da->type); { - struct ea_list *l = lp_alloc(fs->pool, sizeof(struct ea_list) + sizeof(eattr)); - - l->next = NULL; - l->flags = EALF_SORTED; - l->count = 1; - l->attrs[0].id = da.ea_code; - l->attrs[0].flags = 0; - l->attrs[0].type = da.type | EAF_ORIGINATED | EAF_FRESH; - - switch (da.type) { - case EAF_TYPE_INT: - case EAF_TYPE_ROUTER_ID: - l->attrs[0].u.data = v1.val.i; - break; + struct eattr *a; - case EAF_TYPE_OPAQUE: - runtime( "Setting opaque attribute is not allowed" ); - break; + if (da->type >= EAF_TYPE__MAX) + bug("Unsupported attribute type"); - case EAF_TYPE_IP_ADDRESS:; - int len = sizeof(ip_addr); - struct adata *ad = lp_alloc(fs->pool, sizeof(struct adata) + len); - ad->length = len; - (* (ip_addr *) ad->data) = v1.val.ip; - l->attrs[0].u.ptr = ad; - break; - - case EAF_TYPE_AS_PATH: - case EAF_TYPE_INT_SET: - case EAF_TYPE_EC_SET: - case EAF_TYPE_LC_SET: - l->attrs[0].u.ptr = v1.val.ad; + switch (da->type) { + case T_OPAQUE: + case T_IFACE: + runtime( "Setting opaque attribute is not allowed" ); break; - case EAF_TYPE_BITFIELD: - { - /* First, we have to find the old value */ - eattr *e = ea_find(*fs->eattrs, da.ea_code); - u32 data = e ? e->u.data : 0; - - if (v1.val.i) - l->attrs[0].u.data = data | (1u << da.bit); - else - l->attrs[0].u.data = data & ~(1u << da.bit); - } + case T_IP: + a = ea_set_attr(fs->eattrs, + EA_LITERAL_STORE_ADATA(da, 0, &v1.val.ip, sizeof(ip_addr))); break; default: - bug("Unknown dynamic attribute type"); + a = ea_set_attr(fs->eattrs, + EA_LITERAL_GENERIC(da->id, da->type, 0, .u = v1.val.bval)); + break; } - f_rta_cow(fs); - l->next = *fs->eattrs; - *fs->eattrs = l; + a->originated = 1; + a->fresh = 1; } } @@ -946,35 +925,20 @@ ACCESS_RTE; ACCESS_EATTRS; - { - struct ea_list *l = lp_alloc(fs->pool, sizeof(struct ea_list) + sizeof(eattr)); - - l->next = NULL; - l->flags = EALF_SORTED; - l->count = 1; - l->attrs[0].id = da.ea_code; - l->attrs[0].flags = 0; - l->attrs[0].type = EAF_TYPE_UNDEF | EAF_ORIGINATED | EAF_FRESH; - l->attrs[0].u.data = 0; - - f_rta_cow(fs); - l->next = *fs->eattrs; - *fs->eattrs = l; - } + ea_unset_attr(fs->eattrs, 1, da); } - INST(FI_PREF_GET, 0, 1) { - ACCESS_RTE; - RESULT(T_INT, i, (*fs->rte)->pref); - } + INST(FI_DEFAULT, 2, 1) { + ARG_ANY(1); + ARG_ANY(2); + RESULT_TYPE(f_type_element_type(v2.type)); - INST(FI_PREF_SET, 1, 0) { - ACCESS_RTE; - ARG(1,T_INT); - if (v1.val.i > 0xFFFF) - runtime( "Setting preference value out of bounds" ); - f_rte_cow(fs); - (*fs->rte)->pref = v1.val.i; + log(L_INFO "Type of arg 1 is: %d", v1.type); + + if (v1.type == T_VOID) + RESULT_VAL(v2); + else + RESULT_VAL(v1); } INST(FI_LENGTH, 1, 1) { /* Get length of */ @@ -1256,7 +1220,7 @@ struct f_arg *b = sym->function->arg_list; for (uint i = 1; a && b; a = a->next, b = b->next, i++) { - enum f_type b_type = b->arg->class & 0xff; + enum btype b_type = b->arg->class & 0xff; if (a->type && (a->type != b_type) && !f_const_promotion(a, b_type)) cf_error("Argument %u of '%s' must be %s, got %s", @@ -1293,6 +1257,7 @@ fstk->vcnt += sym->function->args; /* Storage for local variables */ + f_vcnt_check_overflow(sym->function->vars); memset(&(fstk->vstk[fstk->vcnt]), 0, sizeof(struct f_val) * sym->function->vars); fstk->vcnt += sym->function->vars; } @@ -1497,37 +1462,7 @@ runtime("Can't filter non-[e|l]clist"); } - INST(FI_ROA_CHECK_IMPLICIT, 0, 1) { /* ROA Check */ - NEVER_CONSTANT; - RTC(1); - struct rtable *table = rtc->table; - ACCESS_RTE; - ACCESS_EATTRS; - const net_addr *net = (*fs->rte)->net->n.addr; - - /* We ignore temporary attributes, probably not a problem here */ - /* 0x02 is a value of BA_AS_PATH, we don't want to include BGP headers */ - eattr *e = ea_find(*fs->eattrs, EA_CODE(PROTOCOL_BGP, 0x02)); - - if (!e || ((e->type & EAF_TYPE_MASK) != EAF_TYPE_AS_PATH)) - runtime("Missing AS_PATH attribute"); - - u32 as = 0; - as_path_get_last(e->u.ptr, &as); - - if (!table) - runtime("Missing ROA table"); - - if (table->addr_type != NET_ROA4 && table->addr_type != NET_ROA6) - runtime("Table type must be either ROA4 or ROA6"); - - if (table->addr_type != (net->type == NET_IP4 ? NET_ROA4 : NET_ROA6)) - RESULT(T_ENUM_ROA, i, ROA_UNKNOWN); /* Prefix and table type mismatch */ - else - RESULT(T_ENUM_ROA, i, [[ net_roa_check(table, net, as) ]]); - } - - INST(FI_ROA_CHECK_EXPLICIT, 2, 1) { /* ROA Check */ + INST(FI_ROA_CHECK, 2, 1) { /* ROA Check */ NEVER_CONSTANT; ARG(1, T_NET); ARG(2, T_INT); diff --git a/filter/f-inst.h b/filter/f-inst.h index e35f71c6..fbc59de7 100644 --- a/filter/f-inst.h +++ b/filter/f-inst.h @@ -94,15 +94,17 @@ void f_add_lines(const struct f_line_item *what, struct filter_iterator *fit); struct filter *f_new_where(struct f_inst *); -static inline struct f_dynamic_attr f_new_dynamic_attr(u8 type, enum f_type f_type, uint code) /* Type as core knows it, type as filters know it, and code of dynamic attribute */ -{ return (struct f_dynamic_attr) { .type = type, .f_type = f_type, .ea_code = code }; } /* f_type currently unused; will be handy for static type checking */ -static inline struct f_dynamic_attr f_new_dynamic_attr_bit(u8 bit, enum f_type f_type, uint code) /* Type as core knows it, type as filters know it, and code of dynamic attribute */ -{ return (struct f_dynamic_attr) { .type = EAF_TYPE_BITFIELD, .bit = bit, .f_type = f_type, .ea_code = code }; } /* f_type currently unused; will be handy for static type checking */ -static inline struct f_static_attr f_new_static_attr(int f_type, int code, int readonly) -{ return (struct f_static_attr) { .f_type = f_type, .sa_code = code, .readonly = readonly }; } -struct f_inst *f_generate_complex(enum f_instruction_code fi_code, struct f_dynamic_attr da, struct f_inst *argument); +static inline struct f_static_attr f_new_static_attr(btype type, int code, int readonly) +{ return (struct f_static_attr) { .type = type, .sa_code = code, .readonly = readonly }; } struct f_inst *f_generate_roa_check(struct rtable_config *table, struct f_inst *prefix, struct f_inst *asn); +struct f_attr_bit { + const struct ea_class *class; + uint bit; +}; + +#define f_new_dynamic_attr_bit(_bit, _name) ((struct f_attr_bit) { .bit = _bit, .class = ea_class_find(_name) }) + /* Hook for call bt_assert() function in configuration */ extern void (*bt_assert_hook)(int result, const struct f_line_item *assert); diff --git a/filter/f-util.c b/filter/f-util.c index fdb314b5..82a06bdd 100644 --- a/filter/f-util.c +++ b/filter/f-util.c @@ -2,7 +2,7 @@ * Filters: utility functions * * Copyright 1998 Pavel Machek <pavel@ucw.cz> - * 2017 Jan Maria Matejka <mq@ucw.cz> + * 2017 Maria Matejka <mq@ucw.cz> * * Can be freely distributed and used under the terms of the GNU GPL. */ @@ -13,7 +13,7 @@ #include "filter/f-inst.h" #include "lib/idm.h" #include "nest/protocol.h" -#include "nest/route.h" +#include "nest/rt.h" #define P(a,b) ((a<<8) | b) @@ -40,144 +40,3 @@ struct filter *f_new_where(struct f_inst *where) f->root = f_linearize(cond, 0); return f; } - -#define CA_KEY(n) n->name, n->fda.type -#define CA_NEXT(n) n->next -#define CA_EQ(na,ta,nb,tb) (!strcmp(na,nb) && (ta == tb)) -#define CA_FN(n,t) (mem_hash(n, strlen(n)) ^ (t*0xaae99453U)) -#define CA_ORDER 8 /* Fixed */ - -struct ca_storage { - struct ca_storage *next; - struct f_dynamic_attr fda; - u32 uc; - char name[0]; -}; - -HASH(struct ca_storage) ca_hash; - -static struct idm ca_idm; -static struct ca_storage **ca_storage; -static uint ca_storage_max; - -static void -ca_free(resource *r) -{ - struct custom_attribute *ca = (void *) r; - struct ca_storage *cas = HASH_FIND(ca_hash, CA, ca->name, ca->fda->type); - ASSERT(cas); - - ca->name = NULL; - ca->fda = NULL; - if (!--cas->uc) { - uint id = EA_CUSTOM_ID(cas->fda.ea_code); - idm_free(&ca_idm, id); - HASH_REMOVE(ca_hash, CA, cas); - ca_storage[id] = NULL; - mb_free(cas); - } -} - -static void -ca_dump(resource *r) -{ - struct custom_attribute *ca = (void *) r; - debug("name \"%s\" id 0x%04x ea_type 0x%02x f_type 0x%02x\n", - ca->name, ca->fda->ea_code, ca->fda->type, ca->fda->f_type); -} - -static struct resclass ca_class = { - .name = "Custom attribute", - .size = sizeof(struct custom_attribute), - .free = ca_free, - .dump = ca_dump, - .lookup = NULL, - .memsize = NULL, -}; - -struct custom_attribute * -ca_lookup(pool *p, const char *name, int f_type) -{ - int ea_type; - - switch (f_type) { - case T_INT: - ea_type = EAF_TYPE_INT; - break; - case T_IP: - ea_type = EAF_TYPE_IP_ADDRESS; - break; - case T_QUAD: - ea_type = EAF_TYPE_ROUTER_ID; - break; - case T_PATH: - ea_type = EAF_TYPE_AS_PATH; - break; - case T_CLIST: - ea_type = EAF_TYPE_INT_SET; - break; - case T_ECLIST: - ea_type = EAF_TYPE_EC_SET; - break; - case T_LCLIST: - ea_type = EAF_TYPE_LC_SET; - break; - default: - cf_error("Custom route attribute of unsupported type"); - } - - static int inited = 0; - if (!inited) { - idm_init(&ca_idm, &root_pool, 8); - HASH_INIT(ca_hash, &root_pool, CA_ORDER); - - ca_storage_max = 256; - ca_storage = mb_allocz(&root_pool, sizeof(struct ca_storage *) * ca_storage_max); - - inited++; - } - - struct ca_storage *cas = HASH_FIND(ca_hash, CA, name, ea_type); - if (cas) { - cas->uc++; - } else { - - uint id = idm_alloc(&ca_idm); - - if (id >= EA_CUSTOM_BIT) - cf_error("Too many custom attributes."); - - if (id >= ca_storage_max) { - ca_storage_max *= 2; - ca_storage = mb_realloc(ca_storage, sizeof(struct ca_storage *) * ca_storage_max * 2); - } - - cas = mb_allocz(&root_pool, sizeof(struct ca_storage) + strlen(name) + 1); - cas->fda = f_new_dynamic_attr(ea_type, f_type, EA_CUSTOM(id)); - cas->uc = 1; - - strcpy(cas->name, name); - ca_storage[id] = cas; - - HASH_INSERT(ca_hash, CA, cas); - } - - struct custom_attribute *ca = ralloc(p, &ca_class); - ca->fda = &(cas->fda); - ca->name = cas->name; - return ca; -} - -const char * -ea_custom_name(uint ea) -{ - uint id = EA_CUSTOM_ID(ea); - if (id >= ca_storage_max) - return NULL; - - if (!ca_storage[id]) - return NULL; - - return ca_storage[id]->name; -} - diff --git a/filter/filter.c b/filter/filter.c index 20a380dc..9a94545c 100644 --- a/filter/filter.c +++ b/filter/filter.c @@ -35,10 +35,10 @@ #include "lib/ip.h" #include "lib/net.h" #include "lib/flowspec.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/protocol.h" #include "nest/iface.h" -#include "nest/attrs.h" +#include "lib/attrs.h" #include "conf/conf.h" #include "filter/filter.h" #include "filter/f-inst.h" @@ -50,43 +50,35 @@ enum f_exception { FE_RETURN = 0x1, }; - -struct filter_stack { - /* Value stack for execution */ -#define F_VAL_STACK_MAX 4096 - uint vcnt; /* Current value stack size; 0 for empty */ - uint ecnt; /* Current execute stack size; 0 for empty */ - - struct f_val vstk[F_VAL_STACK_MAX]; /* The stack itself */ - - /* Instruction stack for execution */ -#define F_EXEC_STACK_MAX 4096 - struct { - const struct f_line *line; /* The line that is being executed */ - uint pos; /* Instruction index in the line */ - uint ventry; /* Value stack depth on entry */ - uint vbase; /* Where to index variable positions from */ - enum f_exception emask; /* Exception mask */ - } estk[F_EXEC_STACK_MAX]; +struct filter_exec_stack { + const struct f_line *line; /* The line that is being executed */ + uint pos; /* Instruction index in the line */ + uint ventry; /* Value stack depth on entry */ + uint vbase; /* Where to index variable positions from */ + enum f_exception emask; /* Exception mask */ }; /* Internal filter state, to be allocated on stack when executing filters */ struct filter_state { /* Stacks needed for execution */ - struct filter_stack *stack; + struct filter_stack { + /* Current filter stack depth */ - /* The route we are processing. This may be NULL to indicate no route available. */ - struct rte **rte; + /* Value stack */ + uint vcnt, vlen; + struct f_val *vstk; + + /* Instruction stack for execution */ + uint ecnt, elen; + struct filter_exec_stack *estk; + } stack; - /* The old rta to be freed after filters are done. */ - struct rta *old_rta; + /* The route we are processing. This may be NULL to indicate no route available. */ + struct rte *rte; /* Cached pointer to ea_list */ struct ea_list **eattrs; - /* Linpool for adata allocation */ - struct linpool *pool; - /* Buffer for log output */ struct buffer buf; @@ -95,48 +87,16 @@ struct filter_state { }; _Thread_local static struct filter_state filter_state; -_Thread_local static struct filter_stack filter_stack; void (*bt_assert_hook)(int result, const struct f_line_item *assert); -static inline void f_cache_eattrs(struct filter_state *fs) -{ - fs->eattrs = &((*fs->rte)->attrs->eattrs); -} - -static inline void f_rte_cow(struct filter_state *fs) -{ - if (!((*fs->rte)->flags & REF_COW)) - return; +#define _f_stack_init(fs, px, def) ((fs).stack.px##stk = alloca(sizeof(*(fs).stack.px##stk) * ((fs).stack.px##len = (config && config->filter_##px##stk) ? config->filter_##px##stk : (def)))) - *fs->rte = rte_cow(*fs->rte); -} +#define f_stack_init(fs) ( _f_stack_init(fs, v, 128), _f_stack_init(fs, e, 128) ) -/* - * rta_cow - prepare rta for modification by filter - */ -static void -f_rta_cow(struct filter_state *fs) +static inline void f_cache_eattrs(struct filter_state *fs) { - if (!rta_is_cached((*fs->rte)->attrs)) - return; - - /* Prepare to modify rte */ - f_rte_cow(fs); - - /* Store old rta to free it later, it stores reference from rte_cow() */ - fs->old_rta = (*fs->rte)->attrs; - - /* - * Get shallow copy of rta. Fields eattrs and nexthops of rta are shared - * with fs->old_rta (they will be copied when the cached rta will be obtained - * at the end of f_run()), also the lock of hostentry is inherited (we - * suppose hostentry is not changed by filters). - */ - (*fs->rte)->attrs = rta_do_cow((*fs->rte)->attrs, fs->pool); - - /* Re-cache the ea_list */ - f_cache_eattrs(fs); + fs->eattrs = &(fs->rte->attrs); } static struct tbf rl_runtime_err = TBF_DEFAULT_LOG_LIMITS; @@ -163,15 +123,17 @@ interpret(struct filter_state *fs, const struct f_line *line, struct f_val *val) ASSERT(line->args == 0); /* Initialize the filter stack */ - struct filter_stack *fstk = fs->stack; + struct filter_stack *fstk = &fs->stack; fstk->vcnt = line->vars; memset(fstk->vstk, 0, sizeof(struct f_val) * line->vars); /* The same as with the value stack. Not resetting the stack for performance reasons. */ fstk->ecnt = 1; - fstk->estk[0].line = line; - fstk->estk[0].pos = 0; + fstk->estk[0] = (struct filter_exec_stack) { + .line = line, + .pos = 0, + }; #define curline fstk->estk[fstk->ecnt-1] @@ -191,14 +153,16 @@ interpret(struct filter_state *fs, const struct f_line *line, struct f_val *val) #define v2 vv(1) #define v3 vv(2) +#define f_vcnt_check_overflow(n) do { if (fstk->vcnt + n >= fstk->vlen) runtime("Filter execution stack overflow"); } while (0) + #define runtime(fmt, ...) do { \ if (!(fs->flags & FF_SILENT)) \ log_rl(&rl_runtime_err, L_ERR "filters, line %d: " fmt, what->lineno, ##__VA_ARGS__); \ return F_ERROR; \ } while(0) -#define falloc(size) lp_alloc(fs->pool, size) -#define fpool fs->pool +#define falloc(size) tmp_alloc(size) +#define fpool tmp_linpool #define ACCESS_EATTRS do { if (!fs->eattrs) f_cache_eattrs(fs); } while (0) @@ -240,29 +204,15 @@ interpret(struct filter_state *fs, const struct f_line *line, struct f_val *val) /** * f_run - run a filter for a route * @filter: filter to run - * @rte: route being filtered, may be modified + * @rte: route being filtered, must be write-able * @tmp_pool: all filter allocations go from this pool * @flags: flags * - * If filter needs to modify the route, there are several - * posibilities. @rte might be read-only (with REF_COW flag), in that - * case rw copy is obtained by rte_cow() and @rte is replaced. If - * @rte is originally rw, it may be directly modified (and it is never - * copied). - * - * The returned rte may reuse the (possibly cached, cloned) rta, or - * (if rta was modified) contains a modified uncached rta, which - * uses parts allocated from @tmp_pool and parts shared from original - * rta. There is one exception - if @rte is rw but contains a cached - * rta and that is modified, rta in returned rte is also cached. - * - * Ownership of cached rtas is consistent with rte, i.e. - * if a new rte is returned, it has its own clone of cached rta - * (and cached rta of read-only source rte is intact), if rte is - * modified in place, old cached rta is possibly freed. + * If @rte->attrs is cached, the returned rte allocates a new rta on + * tmp_pool, otherwise the filters may modify it. */ enum filter_return -f_run(const struct filter *filter, struct rte **rte, struct linpool *tmp_pool, int flags) +f_run(const struct filter *filter, struct rte *rte, int flags) { if (filter == FILTER_ACCEPT) return F_ACCEPT; @@ -270,48 +220,21 @@ f_run(const struct filter *filter, struct rte **rte, struct linpool *tmp_pool, i if (filter == FILTER_REJECT) return F_REJECT; - int rte_cow = ((*rte)->flags & REF_COW); DBG( "Running filter `%s'...", filter->name ); /* Initialize the filter state */ filter_state = (struct filter_state) { - .stack = &filter_stack, .rte = rte, - .pool = tmp_pool, .flags = flags, }; + f_stack_init(filter_state); + LOG_BUFFER_INIT(filter_state.buf); /* Run the interpreter itself */ enum filter_return fret = interpret(&filter_state, filter->root, NULL); - if (filter_state.old_rta) { - /* - * Cached rta was modified and filter_state->rte contains now an uncached one, - * sharing some part with the cached one. The cached rta should - * be freed (if rte was originally COW, filter_state->old_rta is a clone - * obtained during rte_cow()). - * - * This also implements the exception mentioned in f_run() - * description. The reason for this is that rta reuses parts of - * filter_state->old_rta, and these may be freed during rta_free(filter_state->old_rta). - * This is not the problem if rte was COW, because original rte - * also holds the same rta. - */ - if (!rte_cow) { - /* Cache the new attrs */ - (*filter_state.rte)->attrs = rta_lookup((*filter_state.rte)->attrs); - - /* Drop cached ea_list pointer */ - filter_state.eattrs = NULL; - } - - /* Uncache the old attrs and drop the pointer as it is invalid now. */ - rta_free(filter_state.old_rta); - filter_state.old_rta = NULL; - } - /* Process the filter output, log it and return */ if (fret < F_ACCEPT) { if (!(filter_state.flags & FF_SILENT)) @@ -336,18 +259,17 @@ f_run(const struct filter *filter, struct rte **rte, struct linpool *tmp_pool, i */ enum filter_return -f_eval_rte(const struct f_line *expr, struct rte **rte, struct linpool *tmp_pool) +f_eval_rte(const struct f_line *expr, struct rte *rte) { filter_state = (struct filter_state) { - .stack = &filter_stack, .rte = rte, - .pool = tmp_pool, }; + f_stack_init(filter_state); + LOG_BUFFER_INIT(filter_state.buf); - ASSERT(!((*rte)->flags & REF_COW)); - ASSERT(!rta_is_cached((*rte)->attrs)); + ASSERT(!rta_is_cached(rte->attrs)); return interpret(&filter_state, expr, NULL); } @@ -359,12 +281,11 @@ f_eval_rte(const struct f_line *expr, struct rte **rte, struct linpool *tmp_pool * @pres: here the output will be stored */ enum filter_return -f_eval(const struct f_line *expr, struct linpool *tmp_pool, struct f_val *pres) +f_eval(const struct f_line *expr, struct f_val *pres) { - filter_state = (struct filter_state) { - .stack = &filter_stack, - .pool = tmp_pool, - }; + filter_state = (struct filter_state) {}; + + f_stack_init(filter_state); LOG_BUFFER_INIT(filter_state.buf); @@ -381,10 +302,9 @@ uint f_eval_int(const struct f_line *expr) { /* Called independently in parse-time to eval expressions */ - filter_state = (struct filter_state) { - .stack = &filter_stack, - .pool = cfg_mem, - }; + filter_state = (struct filter_state) {}; + + f_stack_init(filter_state); struct f_val val; @@ -403,10 +323,10 @@ f_eval_int(const struct f_line *expr) * f_eval_buf - get a value of a term and print it to the supplied buffer */ enum filter_return -f_eval_buf(const struct f_line *expr, struct linpool *tmp_pool, buffer *buf) +f_eval_buf(const struct f_line *expr, buffer *buf) { struct f_val val; - enum filter_return fret = f_eval(expr, tmp_pool, &val); + enum filter_return fret = f_eval(expr, &val); if (fret <= F_RETURN) val_format(&val, buf); return fret; @@ -474,6 +394,23 @@ filter_commit(struct config *new, struct config *old) } } +void channel_filter_dump(const struct filter *f) +{ + if (f == FILTER_ACCEPT) + debug(" ALL"); + else if (f == FILTER_REJECT) + debug(" NONE"); + else if (f == FILTER_UNDEF) + debug(" UNDEF"); + else if (f->sym) { + ASSERT(f->sym->filter == f); + debug(" named filter %s", f->sym->name); + } else { + debug("\n"); + f_dump_line(f->root, 2); + } +} + void filters_dump_all(void) { struct symbol *sym; @@ -493,19 +430,10 @@ void filters_dump_all(void) struct channel *c; WALK_LIST(c, sym->proto->proto->channels) { debug(" Channel %s (%s) IMPORT", c->name, net_label[c->net_type]); - if (c->in_filter == FILTER_ACCEPT) - debug(" ALL\n"); - else if (c->in_filter == FILTER_REJECT) - debug(" NONE\n"); - else if (c->in_filter == FILTER_UNDEF) - debug(" UNDEF\n"); - else if (c->in_filter->sym) { - ASSERT(c->in_filter->sym->filter == c->in_filter); - debug(" named filter %s\n", c->in_filter->sym->name); - } else { - debug("\n"); - f_dump_line(c->in_filter->root, 2); - } + channel_filter_dump(c->in_filter); + debug(" EXPORT", c->name, net_label[c->net_type]); + channel_filter_dump(c->out_filter); + debug("\n"); } } } diff --git a/filter/filter.h b/filter/filter.h index 26c1037b..3f2e62eb 100644 --- a/filter/filter.h +++ b/filter/filter.h @@ -13,8 +13,8 @@ #include "lib/resource.h" #include "lib/ip.h" #include "lib/macro.h" -#include "nest/route.h" -#include "nest/attrs.h" +#include "nest/rt.h" +#include "lib/attrs.h" /* Possible return values of filter execution */ enum filter_return { @@ -51,10 +51,10 @@ struct filter { struct rte; -enum filter_return f_run(const struct filter *filter, struct rte **rte, struct linpool *tmp_pool, int flags); -enum filter_return f_eval_rte(const struct f_line *expr, struct rte **rte, struct linpool *tmp_pool); +enum filter_return f_run(const struct filter *filter, struct rte *rte, int flags); +enum filter_return f_eval_rte(const struct f_line *expr, struct rte *rte); uint f_eval_int(const struct f_line *expr); -enum filter_return f_eval_buf(const struct f_line *expr, struct linpool *tmp_pool, buffer *buf); +enum filter_return f_eval_buf(const struct f_line *expr, buffer *buf); const char *filter_name(const struct filter *filter); int filter_same(const struct filter *new, const struct filter *old); @@ -70,13 +70,4 @@ void filters_dump_all(void); #define FF_SILENT 2 /* Silent filter execution */ -/* Custom route attributes */ -struct custom_attribute { - resource r; - struct f_dynamic_attr *fda; - const char *name; -}; - -struct custom_attribute *ca_lookup(pool *p, const char *name, int ea_type); - #endif diff --git a/filter/filter_test.c b/filter/filter_test.c index 7e4af092..5b24a765 100644 --- a/filter/filter_test.c +++ b/filter/filter_test.c @@ -46,9 +46,7 @@ run_function(const void *arg) if (t->cmp) return t->result == f_same(t->fn, t->cmp); - linpool *tmp = lp_new_default(&root_pool); - enum filter_return fret = f_eval(t->fn, tmp, NULL); - rfree(tmp); + enum filter_return fret = f_eval(t->fn, NULL); return (fret < F_REJECT); } @@ -72,6 +70,7 @@ int main(int argc, char *argv[]) { bt_init(argc, argv); + bt_bird_init(); bt_assert_hook = bt_assert_filter; diff --git a/filter/test.conf b/filter/test.conf index bc5f898e..22f5dea3 100644 --- a/filter/test.conf +++ b/filter/test.conf @@ -9,7 +9,109 @@ router id 62.168.0.1; /* We have to setup any protocol */ protocol device { } - +/* Setting some custom attributes, enough to force BIRD to reallocate the attribute idmap */ +attribute int test_ca_int1; +attribute int test_ca_int2; +attribute int test_ca_int3; +attribute int test_ca_int4; +attribute int test_ca_int5; +attribute int test_ca_int6; +attribute int test_ca_int7; +attribute int test_ca_int8; +attribute int test_ca_int9; +attribute int test_ca_int10; + +attribute ip test_ca_ip1; +attribute ip test_ca_ip2; +attribute ip test_ca_ip3; +attribute ip test_ca_ip4; +attribute ip test_ca_ip5; +attribute ip test_ca_ip6; +attribute ip test_ca_ip7; +attribute ip test_ca_ip8; +attribute ip test_ca_ip9; +attribute ip test_ca_ip10; + +attribute quad test_ca_quad1; +attribute quad test_ca_quad2; +attribute quad test_ca_quad3; +attribute quad test_ca_quad4; +attribute quad test_ca_quad5; +attribute quad test_ca_quad6; +attribute quad test_ca_quad7; +attribute quad test_ca_quad8; +attribute quad test_ca_quad9; +attribute quad test_ca_quad10; + +attribute bgppath test_ca_bgppath1; +attribute bgppath test_ca_bgppath2; +attribute bgppath test_ca_bgppath3; +attribute bgppath test_ca_bgppath4; +attribute bgppath test_ca_bgppath5; +attribute bgppath test_ca_bgppath6; +attribute bgppath test_ca_bgppath7; +attribute bgppath test_ca_bgppath8; +attribute bgppath test_ca_bgppath9; +attribute bgppath test_ca_bgppath10; + +attribute clist test_ca_clist1; +attribute clist test_ca_clist2; +attribute clist test_ca_clist3; +attribute clist test_ca_clist4; +attribute clist test_ca_clist5; +attribute clist test_ca_clist6; +attribute clist test_ca_clist7; +attribute clist test_ca_clist8; +attribute clist test_ca_clist9; +attribute clist test_ca_clist10; + +attribute eclist test_ca_eclist1; +attribute eclist test_ca_eclist2; +attribute eclist test_ca_eclist3; +attribute eclist test_ca_eclist4; +attribute eclist test_ca_eclist5; +attribute eclist test_ca_eclist6; +attribute eclist test_ca_eclist7; +attribute eclist test_ca_eclist8; +attribute eclist test_ca_eclist9; +attribute eclist test_ca_eclist10; + +attribute lclist test_ca_lclist1; +attribute lclist test_ca_lclist2; +attribute lclist test_ca_lclist3; +attribute lclist test_ca_lclist4; +attribute lclist test_ca_lclist5; +attribute lclist test_ca_lclist6; +attribute lclist test_ca_lclist7; +attribute lclist test_ca_lclist8; +attribute lclist test_ca_lclist9; +attribute lclist test_ca_lclist10; + +attribute lclist test_ca_lclist_max1; +attribute lclist test_ca_lclist_max2; +attribute lclist test_ca_lclist_max3; +attribute lclist test_ca_lclist_max4; +attribute lclist test_ca_lclist_max5; +attribute lclist test_ca_lclist_max6; +attribute lclist test_ca_lclist_max7; +attribute lclist test_ca_lclist_max8; +attribute lclist test_ca_lclist_max9; +attribute lclist test_ca_lclist_max10; +attribute lclist test_ca_lclist_max11; +attribute lclist test_ca_lclist_max12; +attribute lclist test_ca_lclist_max13; +attribute lclist test_ca_lclist_max14; +attribute lclist test_ca_lclist_max15; +attribute lclist test_ca_lclist_max16; +attribute lclist test_ca_lclist_max17; +attribute lclist test_ca_lclist_max18; +attribute lclist test_ca_lclist_max19; +attribute lclist test_ca_lclist_max20; +attribute lclist test_ca_lclist_max21; + + +/* Uncomment this to get an error */ +#attribute int bgp_path; /* * Common definitions and functions @@ -109,6 +211,14 @@ function t_int() bt_assert(!(i = 4)); bt_assert(1 <= 1); bt_assert(!(1234 < 1234)); + + bt_assert(10 - 5 = 5); + bt_assert(4294967295 + 1 = 0); + bt_assert(6*9=54); + bt_assert(984/41 = 24); + bt_assert(123/45 = 2); + bt_assert(0xfee1a | 0xbeef = 0xffeff); + bt_assert(0xfee1a & 0xbeef = 0xae0a); } bt_test_suite(t_int, "Testing integers"); @@ -404,10 +514,9 @@ bt_test_suite(t_ip_set, "Testing sets of ip address"); function t_enum() { - bt_assert(format(RTS_DUMMY) = "(enum 30)0"); - bt_assert(format(RTS_STATIC) = "(enum 30)1"); - bt_assert(format(NET_IP4) = "(enum 36)1"); - bt_assert(format(NET_VPN6) = "(enum 36)4"); + bt_assert(format(RTS_STATIC) = "(enum 31)1"); + bt_assert(format(NET_IP4) = "(enum 3b)1"); + bt_assert(format(NET_VPN6) = "(enum 3b)4"); bt_assert(RTS_STATIC ~ [RTS_STATIC, RTS_DEVICE]); bt_assert(RTS_BGP !~ [RTS_STATIC, RTS_DEVICE]); @@ -1505,6 +1614,7 @@ function __test2() filter testf int j; +bool t; { print "Heya, filtering route to ", net.ip, " prefixlen ", net.len, " source ", source; print "This route was from ", from; @@ -1516,6 +1626,54 @@ int j; rip_metric = 14; unset(rip_metric); + preference = 1234; + + test_ca_int1 = 42; + test_ca_ip2 = 1.3.5.7; + test_ca_quad3 = 2.4.6.8; + test_ca_bgppath4 = +empty+; + test_ca_clist5 = -empty-; + test_ca_eclist6 = --empty--; + test_ca_lclist7 = ---empty---; + + igp_metric = 53; + babel_metric = 64; + t = defined(babel_router_id); + j = babel_seqno; + + bgp_origin = ORIGIN_IGP; + bgp_path = +empty+; + bgp_next_hop = 3456:789a:bcde:f012::3456:789a; + bgp_med = 71; + bgp_local_pref = 942; + t = defined(bgp_atomic_aggr); + t = defined(bgp_aggregator); + bgp_community = -empty-; + bgp_originator_id = 9.7.5.3; + bgp_cluster_list = -empty-; + bgp_ext_community = --empty--; + t = defined(bgp_aigp); + bgp_large_community = ---empty---; + t = defined(bgp_mpls_label_stack); + + ospf_metric1 = 64; + ospf_metric2 = 111; + ospf_tag = 654432; + + radv_preference = RA_PREF_LOW; + radv_lifetime = 28; + + rip_metric = 2; + rip_tag = 4; + t = defined(rip_from); + + krt_source = 17; + krt_metric = 19; + +# krt_lock_mtu = false; +# krt_lock_window = true; +# krt_lock_rtt = krt_lock_rttvar && krt_lock_sstresh || krt_lock_cwnd; + accept "ok I take that"; } diff --git a/filter/test.conf2 b/filter/test.conf2 index e95f9563..9fc8330f 100644 --- a/filter/test.conf2 +++ b/filter/test.conf2 @@ -38,12 +38,6 @@ protocol static { print from; from = 1.2.3.4; print from; - print scope; - scope = SCOPE_HOST; - print scope; - if !(scope ~ [ SCOPE_HOST, SCOPE_SITE ]) then { - print "Failed in test"; - } preference = 15; print preference; diff --git a/filter/tree_test.c b/filter/tree_test.c index 6472d17e..05702f81 100644 --- a/filter/tree_test.c +++ b/filter/tree_test.c @@ -19,10 +19,7 @@ static void start_conf_env(void) { bt_bird_init(); - - pool *p = rp_new(&root_pool, "helper_pool"); - linpool *l = lp_new_default(p); - cfg_mem = l; + cfg_mem = tmp_linpool; } static struct f_tree * diff --git a/filter/trie_test.c b/filter/trie_test.c index cae86995..dc791280 100644 --- a/filter/trie_test.c +++ b/filter/trie_test.c @@ -249,14 +249,14 @@ get_outer_net(net_addr *net, const struct f_prefix *src) } static list * -make_random_prefix_list(linpool *lp, int num, int v6, int tight) +make_random_prefix_list(int num, int v6, int tight) { - list *prefixes = lp_allocz(lp, sizeof(struct f_prefix_node)); + list *prefixes = lp_allocz(tmp_linpool, sizeof(struct f_prefix_node)); init_list(prefixes); for (int i = 0; i < num; i++) { - struct f_prefix_node *px = lp_allocz(lp, sizeof(struct f_prefix_node)); + struct f_prefix_node *px = lp_allocz(tmp_linpool, sizeof(struct f_prefix_node)); get_random_prefix(&px->prefix, v6, tight); add_tail(prefixes, &px->n); @@ -269,9 +269,9 @@ make_random_prefix_list(linpool *lp, int num, int v6, int tight) } static struct f_trie * -make_trie_from_prefix_list(linpool *lp, list *prefixes) +make_trie_from_prefix_list(list *prefixes) { - struct f_trie *trie = f_new_trie(lp, 0); + struct f_trie *trie = f_new_trie(tmp_linpool, 0); struct f_prefix_node *n; WALK_LIST(n, *prefixes) @@ -286,7 +286,7 @@ make_trie_from_prefix_list(linpool *lp, list *prefixes) * Arg @plus means prefix should include all longer ones. */ static list * -read_prefix_list(linpool *lp, FILE *f, int v6, int plus) +read_prefix_list(FILE *f, int v6, int plus) { ASSERT(!v6); @@ -294,7 +294,7 @@ read_prefix_list(linpool *lp, FILE *f, int v6, int plus) char s[32]; int n; - list *pxlist = lp_allocz(lp, sizeof(struct f_prefix_node)); + list *pxlist = lp_allocz(tmp_linpool, sizeof(struct f_prefix_node)); init_list(pxlist); errno = 0; @@ -308,7 +308,7 @@ read_prefix_list(linpool *lp, FILE *f, int v6, int plus) if (n != 5) bt_abort_msg("Invalid content of trie_data"); - struct f_prefix_node *px = lp_allocz(lp, sizeof(struct f_prefix_node)); + struct f_prefix_node *px = lp_allocz(tmp_linpool, sizeof(struct f_prefix_node)); net_fill_ip4(&px->prefix.net, ip4_build(a0, a1, a2, a3), pl); px->prefix.lo = pl; px->prefix.hi = plus ? IP4_MAX_PREFIX_LENGTH : pl; @@ -331,7 +331,6 @@ read_prefix_list(linpool *lp, FILE *f, int v6, int plus) */ static int read_prefix_file(const char *filename, int plus, - linpool *lp0, linpool *lp1, list *data[], struct f_trie *trie[]) { FILE *f = fopen(filename, "r"); @@ -339,10 +338,10 @@ read_prefix_file(const char *filename, int plus, int n = 0; list *pxlist; - while (pxlist = read_prefix_list(lp0, f, 0, plus)) + while (pxlist = read_prefix_list(f, 0, plus)) { data[n] = pxlist; - trie[n] = make_trie_from_prefix_list(lp1, pxlist); + trie[n] = make_trie_from_prefix_list(pxlist); bt_debug("NEXT\n"); n++; } @@ -437,11 +436,10 @@ t_match_random_net(void) bt_config_parse(BT_CONFIG_SIMPLE); int v6 = 0; - linpool *lp = lp_new_default(&root_pool); for (int round = 0; round < TESTS_NUM; round++) { - list *prefixes = make_random_prefix_list(lp, PREFIXES_NUM, v6, 0); - struct f_trie *trie = make_trie_from_prefix_list(lp, prefixes); + list *prefixes = make_random_prefix_list(PREFIXES_NUM, v6, 0); + struct f_trie *trie = make_trie_from_prefix_list(prefixes); for (int i = 0; i < PREFIX_TESTS_NUM; i++) { @@ -451,7 +449,7 @@ t_match_random_net(void) } v6 = !v6; - lp_flush(lp); + tmp_flush(); } bt_bird_cleanup(); @@ -465,11 +463,10 @@ t_match_inner_net(void) bt_config_parse(BT_CONFIG_SIMPLE); int v6 = 0; - linpool *lp = lp_new_default(&root_pool); for (int round = 0; round < TESTS_NUM; round++) { - list *prefixes = make_random_prefix_list(lp, PREFIXES_NUM, v6, 0); - struct f_trie *trie = make_trie_from_prefix_list(lp, prefixes); + list *prefixes = make_random_prefix_list(PREFIXES_NUM, v6, 0); + struct f_trie *trie = make_trie_from_prefix_list(prefixes); struct f_prefix_node *n = HEAD(*prefixes); for (int i = 0; i < PREFIX_TESTS_NUM; i++) @@ -482,7 +479,7 @@ t_match_inner_net(void) } v6 = !v6; - lp_flush(lp); + tmp_flush(); } bt_bird_cleanup(); @@ -496,11 +493,10 @@ t_match_outer_net(void) bt_config_parse(BT_CONFIG_SIMPLE); int v6 = 0; - linpool *lp = lp_new_default(&root_pool); for (int round = 0; round < TESTS_NUM; round++) { - list *prefixes = make_random_prefix_list(lp, PREFIXES_NUM, v6, 0); - struct f_trie *trie = make_trie_from_prefix_list(lp, prefixes); + list *prefixes = make_random_prefix_list(PREFIXES_NUM, v6, 0); + struct f_trie *trie = make_trie_from_prefix_list(prefixes); struct f_prefix_node *n = HEAD(*prefixes); for (int i = 0; i < PREFIX_TESTS_NUM; i++) @@ -513,7 +509,7 @@ t_match_outer_net(void) } v6 = !v6; - lp_flush(lp); + tmp_flush(); } v6 = !v6; @@ -531,24 +527,22 @@ static int benchmark_trie_dataset(const char *filename, int plus) { int n = 0; - linpool *lp0 = lp_new_default(&root_pool); - linpool *lp1 = lp_new_default(&root_pool); list *data[TRIE_BUFFER_SIZE]; struct f_trie *trie[TRIE_BUFFER_SIZE]; net_addr *nets; bt_reset_suite_case_timer(); bt_log_suite_case_result(1, "Reading %s", filename, n); - n = read_prefix_file(filename, plus, lp0, lp1, data, trie); + n = read_prefix_file(filename, plus, data, trie); bt_log_suite_case_result(1, "Read prefix data, %d lists, ", n); - size_t trie_size = rmemsize(lp1).effective * 1000 / (1024*1024); + size_t trie_size = rmemsize(tmp_linpool).effective * 1000 / (1024*1024); bt_log_suite_case_result(1, "Trie size %u.%03u MB", (uint) (trie_size / 1000), (uint) (trie_size % 1000)); int t = PREFIX_BENCH_NUM / n; int tb = MIN(t, TEST_BUFFER_SIZE); - nets = lp_alloc(lp0, tb * sizeof(net_addr)); + nets = tmp_alloc(tb * sizeof(net_addr)); if (!plus) select_random_prefix_subset(data, nets, n, tb); @@ -573,9 +567,7 @@ benchmark_trie_dataset(const char *filename, int plus) bt_log_suite_case_result(1, "Matching done, %d / %d matches", match, t * n); - rfree(lp0); - rfree(lp1); - + tmp_flush(); return 1; } @@ -621,12 +613,11 @@ t_trie_same(void) bt_config_parse(BT_CONFIG_SIMPLE); int v6 = 0; - linpool *lp = lp_new_default(&root_pool); for (int round = 0; round < TESTS_NUM*4; round++) { - list *prefixes = make_random_prefix_list(lp, 100 * PREFIXES_NUM, v6, 0); - struct f_trie *trie1 = f_new_trie(lp, 0); - struct f_trie *trie2 = f_new_trie(lp, 0); + list *prefixes = make_random_prefix_list(100 * PREFIXES_NUM, v6, 0); + struct f_trie *trie1 = f_new_trie(tmp_linpool, 0); + struct f_trie *trie2 = f_new_trie(tmp_linpool, 0); struct f_prefix_node *n; WALK_LIST(n, *prefixes) @@ -638,7 +629,7 @@ t_trie_same(void) bt_assert(trie_same(trie1, trie2)); v6 = !v6; - lp_flush(lp); + tmp_flush(); } bt_bird_cleanup(); @@ -664,15 +655,14 @@ t_trie_walk(void) bt_bird_init(); bt_config_parse(BT_CONFIG_SIMPLE); - linpool *lp = lp_new_default(&root_pool); for (int round = 0; round < TESTS_NUM*8; round++) { int level = round / TESTS_NUM; int v6 = level % 2; int num = PREFIXES_NUM * (int[]){1, 10, 100, 1000}[level / 2]; int pos = 0, end = 0; - list *prefixes = make_random_prefix_list(lp, num, v6, 1); - struct f_trie *trie = make_trie_from_prefix_list(lp, prefixes); + list *prefixes = make_random_prefix_list(num, v6, 1); + struct f_trie *trie = make_trie_from_prefix_list(prefixes); struct f_prefix *pxset = malloc((num + 1) * sizeof(struct f_prefix)); struct f_prefix_node *n; @@ -770,7 +760,7 @@ t_trie_walk(void) bt_assert((pos == num) || !net_in_netX(&pxset[pos].net, &from.net)); bt_debug("Subnet walk done for %s (found %d nets)\n", buf0, pos - p0); - lp_flush(lp); + tmp_flush(); } bt_bird_cleanup(); @@ -815,7 +805,6 @@ t_trie_walk_to_root(void) bt_bird_init(); bt_config_parse(BT_CONFIG_SIMPLE); - linpool *lp = lp_new_default(&root_pool); for (int round = 0; round < TESTS_NUM * 4; round++) { int level = round / TESTS_NUM; @@ -824,8 +813,8 @@ t_trie_walk_to_root(void) int pos = 0; int st = 0, sn = 0, sm = 0; - list *prefixes = make_random_prefix_list(lp, num, v6, 1); - struct f_trie *trie = make_trie_from_prefix_list(lp, prefixes); + list *prefixes = make_random_prefix_list(num, v6, 1); + struct f_trie *trie = make_trie_from_prefix_list(prefixes); struct f_prefix *pxset = malloc((num + 1) * sizeof(struct f_prefix)); struct f_prefix_node *pxn; @@ -884,7 +873,7 @@ t_trie_walk_to_root(void) bt_debug("Success in %d / %d, sum %d, max %d\n", sn, i, st, sm); - lp_flush(lp); + tmp_flush(); } bt_bird_cleanup(); diff --git a/lib/Makefile b/lib/Makefile index 4378a7bd..f4ade9a6 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -1,7 +1,7 @@ -src := bitmap.c bitops.c blake2s.c blake2b.c checksum.c event.c flowspec.c idm.c ip.c lists.c mac.c md5.c mempool.c net.c patmatch.c printf.c resource.c sha1.c sha256.c sha512.c slab.c slists.c strtoul.c tbf.c timer.c xmalloc.c +src := a-path.c a-set.c bitmap.c bitops.c blake2s.c blake2b.c checksum.c event.c flowspec.c idm.c ip.c lists.c mac.c md5.c mempool.c net.c patmatch.c printf.c rcu.c resource.c sha1.c sha256.c sha512.c slab.c slists.c strtoul.c tbf.c timer.c xmalloc.c obj := $(src-o-files) $(all-daemon) -tests_src := bitmap_test.c heap_test.c buffer_test.c event_test.c flowspec_test.c bitops_test.c patmatch_test.c fletcher16_test.c slist_test.c checksum_test.c lists_test.c mac_test.c ip_test.c hash_test.c printf_test.c +tests_src := a-set_test.c a-path_test.c bitmap_test.c heap_test.c buffer_test.c event_test.c flowspec_test.c bitops_test.c patmatch_test.c fletcher16_test.c slist_test.c checksum_test.c lists_test.c mac_test.c ip_test.c hash_test.c printf_test.c slab_test.c type_test.c tests_targets := $(tests_targets) $(tests-target-files) tests_objs := $(tests_objs) $(src-o-files) diff --git a/nest/a-path.c b/lib/a-path.c index 6bb18285..a7a22e40 100644 --- a/nest/a-path.c +++ b/lib/a-path.c @@ -8,8 +8,8 @@ */ #include "nest/bird.h" -#include "nest/route.h" -#include "nest/attrs.h" +#include "nest/rt.h" +#include "lib/attrs.h" #include "lib/resource.h" #include "lib/unaligned.h" #include "lib/string.h" @@ -591,7 +591,7 @@ as_path_match_set(const struct adata *path, const struct f_tree *set) p += 2; for (i=0; i<n; i++) { - struct f_val v = {T_INT, .val.i = get_as(p)}; + struct f_val v = { .type = T_INT, .val.i = get_as(p)}; if (find_tree(set, &v)) return 1; p += BS; @@ -633,7 +633,7 @@ as_path_filter(struct linpool *pool, const struct adata *path, const struct f_va if (set->type == T_SET) { - struct f_val v = {T_INT, .val.i = as}; + struct f_val v = { .type = T_INT, .val.i = as}; match = !!find_tree(set->val.t, &v); } else /* T_INT */ diff --git a/nest/a-path_test.c b/lib/a-path_test.c index 97924c00..c6f8ce8b 100644 --- a/nest/a-path_test.c +++ b/lib/a-path_test.c @@ -9,8 +9,8 @@ #include "test/birdtest.h" #include "test/bt-utils.h" -#include "nest/route.h" -#include "nest/attrs.h" +#include "nest/rt.h" +#include "lib/attrs.h" #include "lib/resource.h" #include "filter/data.h" @@ -24,8 +24,6 @@ static int t_as_path_match(void) { - resource_init(); - int round; for (round = 0; round < TESTS_NUM; round++) { @@ -33,14 +31,13 @@ t_as_path_match(void) struct adata *as_path = &empty_as_path; u32 first_prepended, last_prepended; first_prepended = last_prepended = 0; - struct linpool *lp = lp_new_default(&root_pool); struct f_path_mask *mask = alloca(sizeof(struct f_path_mask) + AS_PATH_LENGTH * sizeof(struct f_path_mask_item)); mask->len = AS_PATH_LENGTH; for (int i = AS_PATH_LENGTH - 1; i >= 0; i--) { u32 val = bt_random(); - as_path = as_path_prepend(lp, as_path, val); + as_path = as_path_prepend(tmp_linpool, as_path, val); bt_debug("Prepending ASN: %10u \n", val); if (i == 0) @@ -62,7 +59,7 @@ t_as_path_match(void) bt_assert(as_path_get_last(as_path, &asn)); bt_assert_msg(asn == first_prepended, "as_path_get_last() should return the first prepended ASN"); - rfree(lp); + tmp_flush(); } return 1; @@ -71,16 +68,13 @@ t_as_path_match(void) static int t_path_format(void) { - resource_init(); - struct adata empty_as_path = {}; struct adata *as_path = &empty_as_path; - struct linpool *lp = lp_new_default(&root_pool); uint i; for (i = 4294967285; i <= 4294967294; i++) { - as_path = as_path_prepend(lp, as_path, i); + as_path = as_path_prepend(tmp_linpool, as_path, i); bt_debug("Prepending ASN: %10u \n", i); } @@ -98,7 +92,7 @@ t_path_format(void) as_path_format(as_path, buf2, SMALL_BUFFER_SIZE); bt_assert_msg(strcmp(buf2, "4294967294 42...") == 0, "Small Buffer(%zu): '%s'", strlen(buf2), buf2); - rfree(lp); + tmp_flush(); return 1; } @@ -117,11 +111,8 @@ count_asn_in_array(const u32 *array, u32 asn) static int t_path_include(void) { - resource_init(); - struct adata empty_as_path = {}; struct adata *as_path = &empty_as_path; - struct linpool *lp = lp_new_default(&root_pool); u32 as_nums[AS_PATH_LENGTH] = {}; int i; @@ -129,7 +120,7 @@ t_path_include(void) { u32 val = bt_random(); as_nums[i] = val; - as_path = as_path_prepend(lp, as_path, val); + as_path = as_path_prepend(tmp_linpool, as_path, val); } for (i = 0; i < AS_PATH_LENGTH; i++) @@ -138,8 +129,8 @@ t_path_include(void) bt_assert_msg(as_path_contains(as_path, as_nums[i], counts_of_contains), "AS Path should contains %d-times number %d", counts_of_contains, as_nums[i]); struct f_val v = { .type = T_INT, .val.i = as_nums[i] }; - bt_assert(as_path_filter(lp, as_path, &v, 0) != NULL); - bt_assert(as_path_filter(lp, as_path, &v, 1) != NULL); + bt_assert(as_path_filter(tmp_linpool, as_path, &v, 0) != NULL); + bt_assert(as_path_filter(tmp_linpool, as_path, &v, 1) != NULL); } for (i = 0; i < 10000; i++) @@ -154,7 +145,7 @@ t_path_include(void) bt_assert_msg(result == 0, "As path should not contain the number %u", test_val); } - rfree(lp); + tmp_flush(); return 1; } @@ -163,16 +154,13 @@ t_path_include(void) static int t_as_path_converting(void) { - resource_init(); - struct adata empty_as_path = {}; struct adata *as_path = &empty_as_path; - struct linpool *lp = lp_new_default(&root_pool); #define AS_PATH_LENGTH_FOR_CONVERTING_TEST 10 int i; for (i = 0; i < AS_PATH_LENGTH_FOR_CONVERTING_TEST; i++) - as_path = as_path_prepend(lp, as_path, i); + as_path = as_path_prepend(tmp_linpool, as_path, i); bt_debug("data length: %u \n", as_path->length); diff --git a/nest/a-set.c b/lib/a-set.c index 40ed573c..dcb86058 100644 --- a/nest/a-set.c +++ b/lib/a-set.c @@ -10,8 +10,8 @@ #include <stdlib.h> #include "nest/bird.h" -#include "nest/route.h" -#include "nest/attrs.h" +#include "nest/rt.h" +#include "lib/attrs.h" #include "lib/resource.h" #include "lib/string.h" diff --git a/nest/a-set_test.c b/lib/a-set_test.c index 96b6a727..693b8f08 100644 --- a/nest/a-set_test.c +++ b/lib/a-set_test.c @@ -10,8 +10,8 @@ #include "test/bt-utils.h" #include "lib/net.h" -#include "nest/route.h" -#include "nest/attrs.h" +#include "nest/rt.h" +#include "lib/attrs.h" #include "lib/resource.h" #define SET_SIZE 10 @@ -25,8 +25,6 @@ static byte buf[BUFFER_SIZE] = {}; #define SET_SIZE_FOR_FORMAT_OUTPUT 10 -struct linpool *lp; - enum set_type { SET_TYPE_INT, @@ -38,24 +36,23 @@ generate_set_sequence(enum set_type type, int len) { struct adata empty_as_path = {}; set_sequence = set_sequence_same = set_sequence_higher = set_random = &empty_as_path; - lp = lp_new_default(&root_pool); int i; for (i = 0; i < len; i++) { if (type == SET_TYPE_INT) { - set_sequence = int_set_add(lp, set_sequence, i); - set_sequence_same = int_set_add(lp, set_sequence_same, i); - set_sequence_higher = int_set_add(lp, set_sequence_higher, i + SET_SIZE); - set_random = int_set_add(lp, set_random, bt_random()); + set_sequence = int_set_add(tmp_linpool, set_sequence, i); + set_sequence_same = int_set_add(tmp_linpool, set_sequence_same, i); + set_sequence_higher = int_set_add(tmp_linpool, set_sequence_higher, i + SET_SIZE); + set_random = int_set_add(tmp_linpool, set_random, bt_random()); } else if (type == SET_TYPE_EC) { - set_sequence = ec_set_add(lp, set_sequence, i); - set_sequence_same = ec_set_add(lp, set_sequence_same, i); - set_sequence_higher = ec_set_add(lp, set_sequence_higher, i + SET_SIZE); - set_random = ec_set_add(lp, set_random, (bt_random() << 32 | bt_random())); + set_sequence = ec_set_add(tmp_linpool, set_sequence, i); + set_sequence_same = ec_set_add(tmp_linpool, set_sequence_same, i); + set_sequence_higher = ec_set_add(tmp_linpool, set_sequence_higher, i + SET_SIZE); + set_random = ec_set_add(tmp_linpool, set_random, (bt_random() << 32 | bt_random())); } else bt_abort_msg("This should be unreachable"); @@ -71,7 +68,6 @@ t_set_int_contains(void) { int i; - resource_init(); generate_set_sequence(SET_TYPE_INT, SET_SIZE); bt_assert(int_set_get_size(set_sequence) == SET_SIZE); @@ -85,33 +81,29 @@ t_set_int_contains(void) for (i = 0; i < SET_SIZE; i++) bt_assert_msg(data[i] == i, "(data[i] = %d) == i = %d)", data[i], i); - rfree(lp); return 1; } static int t_set_int_union(void) { - resource_init(); generate_set_sequence(SET_TYPE_INT, SET_SIZE); const struct adata *set_union; - set_union = int_set_union(lp, set_sequence, set_sequence_same); + set_union = int_set_union(tmp_linpool, set_sequence, set_sequence_same); bt_assert(int_set_get_size(set_union) == SET_SIZE); bt_assert(int_set_format(set_union, 0, 2, buf, BUFFER_SIZE) == 0); - set_union = int_set_union(lp, set_sequence, set_sequence_higher); + set_union = int_set_union(tmp_linpool, set_sequence, set_sequence_higher); bt_assert_msg(int_set_get_size(set_union) == SET_SIZE*2, "int_set_get_size(set_union) %d, SET_SIZE*2 %d", int_set_get_size(set_union), SET_SIZE*2); bt_assert(int_set_format(set_union, 0, 2, buf, BUFFER_SIZE) == 0); - rfree(lp); return 1; } static int t_set_int_format(void) { - resource_init(); generate_set_sequence(SET_TYPE_INT, SET_SIZE_FOR_FORMAT_OUTPUT); bt_assert(int_set_format(set_sequence, 0, 0, buf, BUFFER_SIZE) == 0); @@ -125,21 +117,19 @@ t_set_int_format(void) bt_assert(int_set_format(set_sequence, 1, 0, buf, BUFFER_SIZE) == 0); bt_assert(strcmp(buf, "(0,0) (0,1) (0,2) (0,3) (0,4) (0,5) (0,6) (0,7) (0,8) (0,9)") == 0); - rfree(lp); return 1; } static int t_set_int_delete(void) { - resource_init(); generate_set_sequence(SET_TYPE_INT, SET_SIZE); const struct adata *deleting_sequence = set_sequence; u32 i; for (i = 0; i < SET_SIZE; i++) { - deleting_sequence = int_set_del(lp, deleting_sequence, i); + deleting_sequence = int_set_del(tmp_linpool, deleting_sequence, i); bt_assert_msg(int_set_get_size(deleting_sequence) == (int) (SET_SIZE-1-i), "int_set_get_size(deleting_sequence) %d == SET_SIZE-1-i %d", int_set_get_size(deleting_sequence), @@ -160,7 +150,6 @@ t_set_ec_contains(void) { u32 i; - resource_init(); generate_set_sequence(SET_TYPE_EC, SET_SIZE); bt_assert(ec_set_get_size(set_sequence) == SET_SIZE); @@ -174,62 +163,54 @@ t_set_ec_contains(void) // for (i = 0; i < SET_SIZE; i++) // bt_assert_msg(data[i] == (SET_SIZE-1-i), "(data[i] = %d) == ((SET_SIZE-1-i) = %d)", data[i], SET_SIZE-1-i); - rfree(lp); return 1; } static int t_set_ec_union(void) { - resource_init(); generate_set_sequence(SET_TYPE_EC, SET_SIZE); const struct adata *set_union; - set_union = ec_set_union(lp, set_sequence, set_sequence_same); + set_union = ec_set_union(tmp_linpool, set_sequence, set_sequence_same); bt_assert(ec_set_get_size(set_union) == SET_SIZE); bt_assert(ec_set_format(set_union, 0, buf, BUFFER_SIZE) == 0); - set_union = ec_set_union(lp, set_sequence, set_sequence_higher); + set_union = ec_set_union(tmp_linpool, set_sequence, set_sequence_higher); bt_assert_msg(ec_set_get_size(set_union) == SET_SIZE*2, "ec_set_get_size(set_union) %d, SET_SIZE*2 %d", ec_set_get_size(set_union), SET_SIZE*2); bt_assert(ec_set_format(set_union, 0, buf, BUFFER_SIZE) == 0); - rfree(lp); return 1; } static int t_set_ec_format(void) { - resource_init(); - const struct adata empty_as_path = {}; set_sequence = set_sequence_same = set_sequence_higher = set_random = &empty_as_path; - lp = lp_new_default(&root_pool); u64 i = 0; - set_sequence = ec_set_add(lp, set_sequence, i); + set_sequence = ec_set_add(tmp_linpool, set_sequence, i); for (i = 1; i < SET_SIZE_FOR_FORMAT_OUTPUT; i++) - set_sequence = ec_set_add(lp, set_sequence, i + ((i%2) ? ((u64)EC_RO << 48) : ((u64)EC_RT << 48))); + set_sequence = ec_set_add(tmp_linpool, set_sequence, i + ((i%2) ? ((u64)EC_RO << 48) : ((u64)EC_RT << 48))); bt_assert(ec_set_format(set_sequence, 0, buf, BUFFER_SIZE) == 0); bt_assert_msg(strcmp(buf, "(unknown 0x0, 0, 0) (ro, 0, 1) (rt, 0, 2) (ro, 0, 3) (rt, 0, 4) (ro, 0, 5) (rt, 0, 6) (ro, 0, 7) (rt, 0, 8) (ro, 0, 9)") == 0, "ec_set_format() returns '%s'", buf); - rfree(lp); return 1; } static int t_set_ec_delete(void) { - resource_init(); generate_set_sequence(SET_TYPE_EC, SET_SIZE); const struct adata *deleting_sequence = set_sequence; u32 i; for (i = 0; i < SET_SIZE; i++) { - deleting_sequence = ec_set_del(lp, deleting_sequence, i); + deleting_sequence = ec_set_del(tmp_linpool, deleting_sequence, i); bt_assert_msg(ec_set_get_size(deleting_sequence) == (int) (SET_SIZE-1-i), "ec_set_get_size(deleting_sequence) %d == SET_SIZE-1-i %d", ec_set_get_size(deleting_sequence), SET_SIZE-1-i); @@ -240,6 +221,7 @@ t_set_ec_delete(void) return 1; } + int main(int argc, char *argv[]) { diff --git a/nest/attrs.h b/lib/attrs.h index 9412439b..af2f1036 100644 --- a/nest/attrs.h +++ b/lib/attrs.h @@ -11,7 +11,39 @@ #include <stdint.h> #include "lib/unaligned.h" -#include "nest/route.h" + +typedef struct adata { + uint length; /* Length of data */ + byte data[0]; +} adata; + +#define ADATA_SIZE(s) BIRD_CPU_ALIGN(sizeof(struct adata) + s) + +extern const adata null_adata; /* adata of length 0 */ + +static inline struct adata * +lp_alloc_adata(struct linpool *pool, uint len) +{ + struct adata *ad = lp_alloc(pool, sizeof(struct adata) + len); + ad->length = len; + return ad; +} + +static inline struct adata * +lp_store_adata(struct linpool *pool, const void *buf, uint len) +{ + struct adata *ad = lp_alloc_adata(pool, len); + memcpy(ad->data, buf, len); + return ad; +} + +#define tmp_alloc_adata(len) lp_alloc_adata(tmp_linpool, len) +#define tmp_store_adata(buf, len) lp_store_adata(tmp_linpool, buf, len) +#define tmp_copy_adata(ad) tmp_store_adata((ad)->data, (ad)->length) + +static inline int adata_same(const struct adata *a, const struct adata *b) +{ return (!a && !b) || (a->length == b->length && !memcmp(a->data, b->data, a->length)); } + /* a-path.c */ diff --git a/lib/birdlib.h b/lib/birdlib.h index 81d4908a..d55b1a44 100644 --- a/lib/birdlib.h +++ b/lib/birdlib.h @@ -9,16 +9,29 @@ #ifndef _BIRD_BIRDLIB_H_ #define _BIRD_BIRDLIB_H_ +#include "sysdep/config.h" #include "lib/alloca.h" /* Ugly structure offset handling macros */ -struct align_probe { char x; long int y; }; - #define OFFSETOF(s, i) ((size_t) &((s *)0)->i) -#define SKIP_BACK(s, i, p) ((s *)((char *)p - OFFSETOF(s, i))) +#define SKIP_BACK(s, i, p) ({ s *_ptr = ((s *)((char *)p - OFFSETOF(s, i))); ASSERT_DIE(&_ptr->i == p); _ptr; }) #define BIRD_ALIGN(s, a) (((s)+a-1)&~(a-1)) -#define CPU_STRUCT_ALIGN (sizeof(struct align_probe)) +#define CPU_STRUCT_ALIGN (MAX_(_Alignof(void*), _Alignof(u64))) +#define BIRD_CPU_ALIGN(s) BIRD_ALIGN((s), CPU_STRUCT_ALIGN) + +/* Structure item alignment macros */ + +#define PADDING_NAME(id) _padding_##id +#define PADDING_(id, sz) u8 PADDING_NAME(id)[sz] + +#if CPU_POINTER_ALIGNMENT == 4 +#define PADDING(id, n32, n64) PADDING_(id, n32) +#elif CPU_POINTER_ALIGNMENT == 8 +#define PADDING(id, n32, n64) PADDING_(id, n64) +#else +#error "Strange CPU pointer alignment: " CPU_POINTER_ALIGNMENT +#endif /* Utility macros */ @@ -73,6 +86,7 @@ static inline int u64_cmp(u64 i1, u64 i2) /* Macros for gcc attributes */ #define NORET __attribute__((noreturn)) +#define USE_RESULT __atribute__((warn_unused_result)) #define UNUSED __attribute__((unused)) #define PACKED __attribute__((packed)) #define NONNULL(...) __attribute__((nonnull((__VA_ARGS__)))) @@ -80,10 +94,6 @@ static inline int u64_cmp(u64 i1, u64 i2) #define STATIC_ASSERT(EXP) _Static_assert(EXP, #EXP) #define STATIC_ASSERT_MSG(EXP,MSG) _Static_assert(EXP, MSG) -#ifndef HAVE_THREAD_LOCAL -#define _Thread_local -#endif - /* Microsecond time */ typedef s64 btime; @@ -165,8 +175,13 @@ void debug(const char *msg, ...); /* Printf to debug output */ #if defined(LOCAL_DEBUG) || defined(GLOBAL_DEBUG) #define DBG(x, y...) debug(x, ##y) +#define DBGL(x, y...) debug(x "\n", ##y) +#elif defined(DEBUG_TO_LOG) +#define DBG(...) do { } while (0) +#define DBGL(...) log(L_DEBUG __VA_ARGS__) #else -#define DBG(x, y...) do { } while(0) +#define DBG(...) do { } while(0) +#define DBGL(...) do { } while (0) #endif #define ASSERT_DIE(x) do { if (!(x)) bug("Assertion '%s' failed at %s:%d", #x, __FILE__, __LINE__); } while(0) diff --git a/lib/bitmap_test.c b/lib/bitmap_test.c index 0595a4d0..07860c94 100644 --- a/lib/bitmap_test.c +++ b/lib/bitmap_test.c @@ -24,7 +24,6 @@ t_bmap_set_clear_random(void) { struct bmap b; - resource_init(); bmap_init(&b, &root_pool, 1024); char expected[MAX_NUM] = {}; @@ -60,7 +59,6 @@ t_hmap_set_clear_random(void) { struct hmap b; - resource_init(); hmap_init(&b, &root_pool, 1024); char expected[MAX_NUM] = {}; @@ -119,7 +117,6 @@ t_hmap_set_clear_fill(void) { struct hmap b; - resource_init(); hmap_init(&b, &root_pool, 1024); char expected[MAX_NUM] = {}; diff --git a/lib/buffer_test.c b/lib/buffer_test.c index 5b7de330..0629e901 100644 --- a/lib/buffer_test.c +++ b/lib/buffer_test.c @@ -41,7 +41,6 @@ fill_expected_array(void) static void init_buffer(void) { - resource_init(); buffer_pool = &root_pool; BUFFER_INIT(buf, buffer_pool, MAX_NUM); } diff --git a/lib/event.c b/lib/event.c index 273447e0..07d7dc53 100644 --- a/lib/event.c +++ b/lib/event.c @@ -19,20 +19,95 @@ * events in them and explicitly ask to run them. */ +#undef LOCAL_DEBUG + #include "nest/bird.h" #include "lib/event.h" +#include "lib/io-loop.h" event_list global_event_list; event_list global_work_list; -inline void -ev_postpone(event *e) +STATIC_ASSERT(OFFSETOF(event_list, _sentinel.next) >= OFFSETOF(event_list, _end[0])); + +void +ev_init_list(event_list *el, struct birdloop *loop, const char *name) +{ + el->name = name; + el->loop = loop; + + atomic_store_explicit(&el->receiver, &el->_sentinel, memory_order_relaxed); + atomic_store_explicit(&el->_executor, &el->_sentinel, memory_order_relaxed); + atomic_store_explicit(&el->_sentinel.next, NULL, memory_order_relaxed); +} + +/* + * The event list should work as a message passing point. Sending a message + * must be a fairly fast process with no locks and low waiting times. OTOH, + * processing messages always involves running the assigned code and the + * receiver is always a single one thread with no concurrency at all. There is + * also a postponing requirement to synchronously remove an event from a queue, + * yet we allow this only when the caller has its receiver event loop locked. + * It still means that the event may get postponed from other event in the same + * list, therefore we have to be careful. + */ + +static inline int +ev_remove_from(event *e, event * _Atomic * head) { - if (ev_active(e)) + /* The head pointer stores where cur is pointed to from */ + event * _Atomic *prev = head; + + /* The current event in queue to check */ + event *cur = atomic_load_explicit(prev, memory_order_acquire); + + /* Pre-loaded next pointer; if NULL, this is sentinel */ + event *next = atomic_load_explicit(&cur->next, memory_order_acquire); + + while (next) + { + if (e == cur) { - rem_node(&e->n); - e->n.next = NULL; + /* Check whether we have collided with somebody else + * adding an item to the queue. */ + if (!atomic_compare_exchange_strong_explicit( + prev, &cur, next, + memory_order_acq_rel, memory_order_acquire)) + { + /* This may happen only on list head */ + ASSERT_DIE(prev == head); + + /* Restart. The collision should never happen again. */ + return ev_remove_from(e, head); + } + + /* Successfully removed from the list; inactivate this event. */ + atomic_store_explicit(&cur->next, NULL, memory_order_release); + return 1; } + + /* Go to the next event. */ + prev = &cur->next; + cur = next; + next = atomic_load_explicit(&cur->next, memory_order_acquire); + } + + return 0; +} + +inline void +ev_postpone(event *e) +{ + /* Find the list to remove the event from */ + event_list *sl = ev_get_list(e); + if (!sl) + return; + + /* Postponing allowed only from the target loop */ + ASSERT_DIE(birdloop_inside(sl->loop)); + + /* Remove from one of these lists. */ + ASSERT(ev_remove_from(e, &sl->_executor) || ev_remove_from(e, &sl->receiver)); } static void @@ -43,7 +118,7 @@ ev_dump(resource *r) debug("(code %p, data %p, %s)\n", e->hook, e->data, - e->n.next ? "scheduled" : "inactive"); + atomic_load_explicit(&e->next, memory_order_relaxed) ? "scheduled" : "inactive"); } static struct resclass ev_class = { @@ -95,40 +170,21 @@ ev_run(event *e) * list @l which can be run by calling ev_run_list(). */ inline void -ev_enqueue(event_list *l, event *e) +ev_send(event_list *l, event *e) { - ev_postpone(e); - add_tail(l, &e->n); -} - -/** - * ev_schedule - schedule an event - * @e: an event - * - * This function schedules an event by enqueueing it to a system-wide - * event list which is run by the platform dependent code whenever - * appropriate. - */ -void -ev_schedule(event *e) -{ - ev_enqueue(&global_event_list, e); -} - -/** - * ev_schedule_work - schedule a work-event. - * @e: an event - * - * This function schedules an event by enqueueing it to a system-wide work-event - * list which is run by the platform dependent code whenever appropriate. This - * is designated for work-events instead of regular events. They are executed - * less often in order to not clog I/O loop. - */ -void -ev_schedule_work(event *e) -{ - if (!ev_active(e)) - add_tail(&global_work_list, &e->n); + event_list *sl = ev_get_list(e); + if (sl == l) + return; + if (sl) + bug("Queuing an already queued event to another queue is not supported."); + + event *next = atomic_load_explicit(&l->receiver, memory_order_acquire); + do atomic_store_explicit(&e->next, next, memory_order_relaxed); + while (!atomic_compare_exchange_strong_explicit( + &l->receiver, &next, e, + memory_order_acq_rel, memory_order_acquire)); + + birdloop_ping(l->loop); } void io_log_event(void *hook, void *data); @@ -140,60 +196,56 @@ void io_log_event(void *hook, void *data); * This function calls ev_run() for all events enqueued in the list @l. */ int -ev_run_list(event_list *l) +ev_run_list_limited(event_list *l, uint limit) { - node *n; - list tmp_list; + event * _Atomic *ep = &l->_executor; - init_list(&tmp_list); - add_tail_list(&tmp_list, l); - init_list(l); - WALK_LIST_FIRST(n, tmp_list) - { - event *e = SKIP_BACK(event, n, n); - - /* This is ugly hack, we want to log just events executed from the main I/O loop */ - if ((l == &global_event_list) || (l == &global_work_list)) - io_log_event(e->hook, e->data); + /* No pending events, refill the queue. */ + if (atomic_load_explicit(ep, memory_order_relaxed) == &l->_sentinel) + { + /* Move the current event list aside and create a new one. */ + event *received = atomic_exchange_explicit( + &l->receiver, &l->_sentinel, memory_order_acq_rel); - ev_run(e); - } + /* No event to run. */ + if (received == &l->_sentinel) + return 0; - return !EMPTY_LIST(*l); -} + /* Setup the executor queue */ + event *head = &l->_sentinel; -int -ev_run_list_limited(event_list *l, uint limit) -{ - node *n; - list tmp_list; + /* Flip the order of the events by relinking them one by one (push-pop) */ + while (received != &l->_sentinel) + { + event *cur = received; + received = atomic_exchange_explicit(&cur->next, head, memory_order_relaxed); + head = cur; + } - init_list(&tmp_list); - add_tail_list(&tmp_list, l); - init_list(l); + /* Store the executor queue to its designated place */ + atomic_store_explicit(ep, head, memory_order_relaxed); + } - WALK_LIST_FIRST(n, tmp_list) + /* Run the events in order. */ + event *e; + while ((e = atomic_load_explicit(ep, memory_order_relaxed)) != &l->_sentinel) { - event *e = SKIP_BACK(event, n, n); - - if (!limit) - break; + /* Check limit */ + if (!--limit) + return 1; /* This is ugly hack, we want to log just events executed from the main I/O loop */ if ((l == &global_event_list) || (l == &global_work_list)) io_log_event(e->hook, e->data); - ev_run(e); - limit--; - } + /* Inactivate the event */ + atomic_store_explicit(ep, atomic_load_explicit(&e->next, memory_order_relaxed), memory_order_relaxed); + atomic_store_explicit(&e->next, NULL, memory_order_relaxed); - if (!EMPTY_LIST(tmp_list)) - { - /* Attach new items after the unprocessed old items */ - add_tail_list(&tmp_list, l); - init_list(l); - add_tail_list(l, &tmp_list); - } + /* Run the event */ + e->hook(e->data); + tmp_flush(); + } - return !EMPTY_LIST(*l); + return atomic_load_explicit(&l->receiver, memory_order_relaxed) != &l->_sentinel; } diff --git a/lib/event.h b/lib/event.h index 5f3b78d8..9773c3a9 100644 --- a/lib/event.h +++ b/lib/event.h @@ -10,33 +10,69 @@ #define _BIRD_EVENT_H_ #include "lib/resource.h" +#include "lib/locking.h" + +#include <stdatomic.h> + +struct birdloop; typedef struct event { resource r; void (*hook)(void *); void *data; - node n; /* Internal link */ + struct event * _Atomic next; } event; -typedef list event_list; +typedef union event_list { + struct { + event * _Atomic receiver; /* Event receive list */ + event * _Atomic _executor; /* Event execute list */ + const char *name; + struct birdloop *loop; /* The executor loop */ + char _end[0]; + }; + event _sentinel; /* Sentinel node to actively detect list end */ +} event_list; extern event_list global_event_list; extern event_list global_work_list; event *ev_new(pool *); void ev_run(event *); -#define ev_init_list(el) init_list(el) +void ev_init_list(event_list *, struct birdloop *loop, const char *name); void ev_enqueue(event_list *, event *); -void ev_schedule(event *); -void ev_schedule_work(event *); +#define ev_send ev_enqueue +#define ev_send_loop(l, e) ev_send(birdloop_event_list((l)), (e)) + +#define ev_schedule(e) ({ ASSERT_THE_BIRD_LOCKED; if (!ev_active((e))) ev_send(&global_event_list, (e)); }) +#define ev_schedule_work(e) ({ ASSERT_THE_BIRD_LOCKED; if (!ev_active((e))) ev_send(&global_work_list, (e)); }) + void ev_postpone(event *); -int ev_run_list(event_list *); int ev_run_list_limited(event_list *, uint); +#define ev_run_list(l) ev_run_list_limited((l), ~0) + +#define LEGACY_EVENT_LIST(l) (((l) == &global_event_list) || ((l) == &global_work_list)) static inline int ev_active(event *e) { - return e->n.next != NULL; + return atomic_load_explicit(&e->next, memory_order_relaxed) != NULL; +} + +static inline event_list * +ev_get_list(event *e) +{ + /* We are looking for the sentinel node at the list end. + * After this, we have s->next == NULL */ + event *s = e; + for (event *sn; sn = atomic_load_explicit(&s->next, memory_order_acquire); s = sn) + ; + + /* No sentinel, no list. */ + if (s == e) + return NULL; + else + return SKIP_BACK(event_list, _sentinel, s); } static inline event* diff --git a/lib/event_test.c b/lib/event_test.c index e1215bba..612deb25 100644 --- a/lib/event_test.c +++ b/lib/event_test.c @@ -15,7 +15,7 @@ #include "nest/locks.h" #include "sysdep/unix/unix.h" #include "nest/iface.h" -#include "nest/route.h" +#include "nest/rt.h" #define MAX_NUM 4 @@ -53,11 +53,9 @@ t_ev_run_list(void) { int i; - resource_init(); olock_init(); - timer_init(); - io_init(); rt_init(); + io_init(); if_init(); // roa_init(); config_init(); diff --git a/lib/fib.h b/lib/fib.h new file mode 100644 index 00000000..bec2a8d4 --- /dev/null +++ b/lib/fib.h @@ -0,0 +1,119 @@ +/* + * BIRD Internet Routing Daemon -- Network prefix storage + * + * (c) 1998--2000 Martin Mares <mj@ucw.cz> + * (c) 2022 Maria Matejka <mq@jmq.cz> + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#ifndef _BIRD_LIB_FIB_H_ +#define _BIRD_LIB_FIB_H_ + +/* + * BIRD FIBs are generic data structure for storing network prefixes. + * Also used for the master routing table. Currently implemented as + * a hash table. + * + * Available operations: + * - insertion of new entry + * - deletion of entry + * - searching for entry by network prefix + * - asynchronous retrieval of fib contents + */ + +struct fib; + +struct fib_node { + struct fib_node *next; /* Next in hash chain */ + struct fib_iterator *readers; /* List of readers of this node */ + net_addr addr[0]; +}; + +struct fib_iterator { /* See lib/slists.h for an explanation */ + struct fib_iterator *prev, *next; /* Must be synced with struct fib_node! */ + byte efef; /* 0xff to distinguish between iterator and node */ + byte pad[3]; + struct fib_node *node; /* Or NULL if freshly merged */ + uint hash; +}; + +typedef void (*fib_init_fn)(struct fib *, void *); + +struct fib { + pool *fib_pool; /* Pool holding all our data */ + slab *fib_slab; /* Slab holding all fib nodes */ + struct fib_node **hash_table; /* Node hash table */ + uint hash_size; /* Number of hash table entries (a power of two) */ + uint hash_order; /* Binary logarithm of hash_size */ + uint hash_shift; /* 32 - hash_order */ + uint addr_type; /* Type of address data stored in fib (NET_*) */ + uint node_size; /* FIB node size, 0 for nonuniform */ + uint node_offset; /* Offset of fib_node struct inside of user data */ + uint entries; /* Number of entries */ + uint entries_min, entries_max; /* Entry count limits (else start rehashing) */ + fib_init_fn init; /* Constructor */ +}; + +static inline void * fib_node_to_user(struct fib *f, struct fib_node *e) +{ return e ? (void *) ((char *) e - f->node_offset) : NULL; } + +static inline struct fib_node * fib_user_to_node(struct fib *f, void *e) +{ return e ? (void *) ((char *) e + f->node_offset) : NULL; } + +void fib_init(struct fib *f, pool *p, uint addr_type, uint node_size, uint node_offset, uint hash_order, fib_init_fn init); +void *fib_find(struct fib *, const net_addr *); /* Find or return NULL if doesn't exist */ +void *fib_get_chain(struct fib *f, const net_addr *a); /* Find first node in linked list from hash table */ +void *fib_get(struct fib *, const net_addr *); /* Find or create new if nonexistent */ +void *fib_route(struct fib *, const net_addr *); /* Longest-match routing lookup */ +void fib_delete(struct fib *, void *); /* Remove fib entry */ +void fib_free(struct fib *); /* Destroy the fib */ +void fib_check(struct fib *); /* Consistency check for debugging */ + +void fit_init(struct fib_iterator *, struct fib *); /* Internal functions, don't call */ +struct fib_node *fit_get(struct fib *, struct fib_iterator *); +void fit_put(struct fib_iterator *, struct fib_node *); +void fit_put_next(struct fib *f, struct fib_iterator *i, struct fib_node *n, uint hpos); +void fit_put_end(struct fib_iterator *i); +void fit_copy(struct fib *f, struct fib_iterator *dst, struct fib_iterator *src); + + +#define FIB_WALK(fib, type, z) do { \ + struct fib_node *fn_, **ff_ = (fib)->hash_table; \ + uint count_ = (fib)->hash_size; \ + type *z; \ + while (count_--) \ + for (fn_ = *ff_++; z = fib_node_to_user(fib, fn_); fn_=fn_->next) + +#define FIB_WALK_END } while (0) + +#define FIB_ITERATE_INIT(it, fib) fit_init(it, fib) + +#define FIB_ITERATE_START(fib, it, type, z) do { \ + struct fib_node *fn_ = fit_get(fib, it); \ + uint count_ = (fib)->hash_size; \ + uint hpos_ = (it)->hash; \ + type *z; \ + for(;;) { \ + if (!fn_) \ + { \ + if (++hpos_ >= count_) \ + break; \ + fn_ = (fib)->hash_table[hpos_]; \ + continue; \ + } \ + z = fib_node_to_user(fib, fn_); + +#define FIB_ITERATE_END fn_ = fn_->next; } } while(0) + +#define FIB_ITERATE_PUT(it) fit_put(it, fn_) + +#define FIB_ITERATE_PUT_NEXT(it, fib) fit_put_next(fib, it, fn_, hpos_) + +#define FIB_ITERATE_PUT_END(it) fit_put_end(it) + +#define FIB_ITERATE_UNLINK(it, fib) fit_get(fib, it) + +#define FIB_ITERATE_COPY(dst, src, fib) fit_copy(fib, dst, src) + +#endif diff --git a/lib/flowspec_test.c b/lib/flowspec_test.c index ed4afe51..03649b99 100644 --- a/lib/flowspec_test.c +++ b/lib/flowspec_test.c @@ -446,10 +446,7 @@ t_validation6(void) static int t_builder4(void) { - resource_init(); - struct flow_builder *fb = flow_builder_init(&root_pool); - linpool *lp = lp_new_default(&root_pool); /* Expectation */ @@ -492,7 +489,7 @@ t_builder4(void) flow_builder_set_type(fb, FLOW_TYPE_TCP_FLAGS); flow_builder_add_op_val(fb, 0, 0x55); - net_addr_flow4 *res = flow_builder4_finalize(fb, lp); + net_addr_flow4 *res = flow_builder4_finalize(fb, tmp_linpool); bt_assert(memcmp(res, expect, expect->length) == 0); @@ -529,8 +526,6 @@ t_builder6(void) { net_addr_ip6 ip; - resource_init(); - linpool *lp = lp_new_default(&root_pool); struct flow_builder *fb = flow_builder_init(&root_pool); fb->ipv6 = 1; @@ -574,7 +569,7 @@ t_builder6(void) flow_builder_set_type(fb, FLOW_TYPE_LABEL); flow_builder_add_op_val(fb, 0, 0x55); - net_addr_flow6 *res = flow_builder6_finalize(fb, lp); + net_addr_flow6 *res = flow_builder6_finalize(fb, tmp_linpool); bt_assert(memcmp(res, expect, expect->length) == 0); /* Reverse order */ @@ -601,7 +596,7 @@ t_builder6(void) flow_builder_set_type(fb, FLOW_TYPE_DST_PREFIX); flow_builder6_add_pfx(fb, &ip, 61); - res = flow_builder6_finalize(fb, lp); + res = flow_builder6_finalize(fb, tmp_linpool); bt_assert(memcmp(res, expect, expect->length) == 0); return 1; @@ -10,7 +10,7 @@ #ifndef _BIRD_HASH_H_ #define _BIRD_HASH_H_ -#define HASH(type) struct { type **data; uint count, order; } +#define HASH(type) struct { type **data; uint count; u16 iterators; u8 order; u8 down_requested:1; } #define HASH_TYPE(v) typeof(** (v).data) #define HASH_SIZE(v) (1U << (v).order) @@ -125,20 +125,26 @@ #define HASH_MAY_STEP_DOWN_(v,pool,rehash_fn,args) \ ({ \ - if (((v).count < (HASH_SIZE(v) REHASH_LO_MARK(args))) && \ - ((v).order > (REHASH_LO_BOUND(args)))) \ + if ((v).iterators) \ + (v).down_requested = 1; \ + else if (((v).count < (HASH_SIZE(v) REHASH_LO_MARK(args))) && \ + ((v).order > (REHASH_LO_BOUND(args)))) \ rehash_fn(&(v), pool, -(REHASH_LO_STEP(args))); \ }) #define HASH_MAY_RESIZE_DOWN_(v,pool,rehash_fn,args) \ ({ \ - uint _o = (v).order; \ - while (((v).count < ((1U << _o) REHASH_LO_MARK(args))) && \ - (_o > (REHASH_LO_BOUND(args)))) \ - _o -= (REHASH_LO_STEP(args)); \ - if (_o < (v).order) \ - rehash_fn(&(v), pool, _o - (v).order); \ - }) + if ((v).iterators) \ + (v).down_requested = 1; \ + else { \ + uint _o = (v).order; \ + while (((v).count < ((1U << _o) REHASH_LO_MARK(args))) && \ + (_o > (REHASH_LO_BOUND(args)))) \ + _o -= (REHASH_LO_STEP(args)); \ + if (_o < (v).order) \ + rehash_fn(&(v), pool, _o - (v).order); \ + } \ + }) #define HASH_INSERT2(v,id,pool,node) \ @@ -195,6 +201,20 @@ #define HASH_WALK_FILTER_END } while (0) +#define HASH_WALK_ITER(v, id, n, iter) \ + do { \ + uint _hash_walk_iter_put = 0; \ + uint _shift = 32 - (v).order; \ + for ( ; !_hash_walk_iter_put; (iter) += (1U << _shift)) { \ + _hash_walk_iter_put = ((iter) + (1U << _shift) == 0); \ + for (HASH_TYPE(v) *n = (v).data[(iter) >> _shift]; n; n = id##_NEXT((n)))\ + if (HASH_FN(v, id, id##_KEY(n)) >= ((iter) >> _shift)) \ + +#define HASH_WALK_ITER_PUT (_hash_walk_iter_put = 1) + +#define HASH_WALK_ITER_END } } while (0) + + static inline void mem_hash_init(u64 *h) { @@ -215,6 +235,12 @@ mem_hash_mix(u64 *h, const void *p, uint s) *h = *h * multiplier + pp[i]; } +static inline void +mem_hash_mix_num(u64 *h, u64 val) +{ + mem_hash_mix(h, &val, sizeof(val)); +} + static inline uint mem_hash_value(u64 *h) { diff --git a/lib/hash_test.c b/lib/hash_test.c index 59beb7c0..ecfcdd66 100644 --- a/lib/hash_test.c +++ b/lib/hash_test.c @@ -61,7 +61,6 @@ dump_nodes(void) static void init_hash_(uint order) { - resource_init(); my_pool = rp_new(&root_pool, "Test pool"); HASH_INIT(hash, my_pool, order); @@ -286,6 +285,46 @@ t_walk_filter(void) return 1; } +static int +t_walk_iter(void) +{ + init_hash(); + fill_hash(); + + u32 hit = 0; + + u32 prev_hash = ~0; + for (uint cnt = 0; cnt < MAX_NUM; ) + { + u32 last_hash = ~0; +// printf("PUT!\n"); + HASH_WALK_ITER(hash, TEST, n, hit) + { + cnt++; + u32 cur_hash = HASH_FN(hash, TEST, n->key); + /* + printf("C%08x L%08x P%08x K%08x H%08x N%p S%d I%ld\n", + cur_hash, last_hash, prev_hash, n->key, hit, n, _shift, n - &nodes[0]); + */ + + if (last_hash == ~0U) + { + if (prev_hash != ~0U) + bt_assert(prev_hash < cur_hash); + last_hash = prev_hash = cur_hash; + } + else + bt_assert(last_hash == cur_hash); + + if (cnt < MAX_NUM) + HASH_WALK_ITER_PUT; + } + HASH_WALK_ITER_END; + } + + return 1; +} + int main(int argc, char *argv[]) { @@ -300,6 +339,7 @@ main(int argc, char *argv[]) bt_test_suite(t_walk_delsafe_remove, "HASH_WALK_DELSAFE and HASH_REMOVE"); bt_test_suite(t_walk_delsafe_remove2, "HASH_WALK_DELSAFE and HASH_REMOVE2. HASH_REMOVE2 is HASH_REMOVE and smart auto-resize function"); bt_test_suite(t_walk_filter, "HASH_WALK_FILTER"); + bt_test_suite(t_walk_iter, "HASH_WALK_ITER"); return bt_exit_value(); } diff --git a/lib/io-loop.h b/lib/io-loop.h new file mode 100644 index 00000000..2450a609 --- /dev/null +++ b/lib/io-loop.h @@ -0,0 +1,58 @@ +/* + * BIRD -- I/O and event loop + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#ifndef _BIRD_IO_LOOP_H_ +#define _BIRD_IO_LOOP_H_ + +#include "nest/bird.h" +#include "lib/lists.h" +#include "lib/locking.h" +#include "lib/resource.h" +#include "lib/event.h" +#include "lib/socket.h" + +extern struct birdloop main_birdloop; + +void sk_start(sock *s); +void sk_stop(sock *s); +void sk_reloop(sock *s, struct birdloop *loop); + +/* Start a new birdloop owned by given pool and domain */ +struct birdloop *birdloop_new(pool *p, uint order, const char *name); + +/* Stop the loop. At the end, the @stopped callback is called unlocked in tail + * position to finish cleanup. Run birdloop_free() from that callback to free + * the loop itself. */ +void birdloop_stop(struct birdloop *loop, void (*stopped)(void *data), void *data); +void birdloop_stop_self(struct birdloop *loop, void (*stopped)(void *data), void *data); +void birdloop_free(struct birdloop *loop); + +/* Get birdloop's event list */ +event_list *birdloop_event_list(struct birdloop *loop); + +/* Get birdloop's time heap */ +struct timeloop *birdloop_time_loop(struct birdloop *loop); + +/* Enter and exit the birdloop */ +void birdloop_enter(struct birdloop *loop); +void birdloop_leave(struct birdloop *loop); + +_Bool birdloop_inside(struct birdloop *loop); + +void birdloop_mask_wakeups(struct birdloop *loop); +void birdloop_unmask_wakeups(struct birdloop *loop); + +void birdloop_link(struct birdloop *loop); +void birdloop_unlink(struct birdloop *loop); + +void birdloop_ping(struct birdloop *loop); + +void birdloop_init(void); + +/* Yield for a little while. Use only in special cases. */ +void birdloop_yield(void); + +#endif /* _BIRD_IO_LOOP_H_ */ @@ -362,11 +362,7 @@ static inline ip6_addr ip6_hton(ip6_addr a) static inline ip6_addr ip6_ntoh(ip6_addr a) { return _MI6(ntohl(_I0(a)), ntohl(_I1(a)), ntohl(_I2(a)), ntohl(_I3(a))); } -#define MPLS_MAX_LABEL_STACK 8 -typedef struct mpls_label_stack { - uint len; - u32 stack[MPLS_MAX_LABEL_STACK]; -} mpls_label_stack; +#define MPLS_MAX_LABEL_STACK 16 static inline int mpls_get(const char *buf, int buflen, u32 *stack) diff --git a/lib/lists.c b/lib/lists.c index 200576cf..8f95c7c2 100644 --- a/lib/lists.c +++ b/lib/lists.c @@ -26,7 +26,7 @@ #define _BIRD_LISTS_C_ -#include "nest/bird.h" +#include "lib/birdlib.h" #include "lib/lists.h" LIST_INLINE int @@ -35,11 +35,12 @@ check_list(list *l, node *n) if (!l) { ASSERT_DIE(n); - ASSERT_DIE(n->prev); - do { n = n->prev; } while (n->prev); + node *nn = n; + while (nn->prev) + nn = nn->prev; - l = SKIP_BACK(list, head_node, n); + l = SKIP_BACK(list, head_node, nn); } int seen = 0; @@ -60,7 +61,7 @@ check_list(list *l, node *n) } ASSERT_DIE(cur == &(l->tail_node)); - ASSERT_DIE(!n || (seen == 1)); + ASSERT_DIE(!n || (seen == 1) || (n == &l->head_node) || (n == &l->tail_node)); return 1; } @@ -120,7 +121,7 @@ add_head(list *l, node *n) LIST_INLINE void insert_node(node *n, node *after) { - EXPENSIVE_CHECK(check_list(l, after)); + EXPENSIVE_CHECK(check_list(NULL, after)); ASSUME(n->prev == NULL); ASSUME(n->next == NULL); @@ -141,7 +142,7 @@ insert_node(node *n, node *after) LIST_INLINE void rem_node(node *n) { - EXPENSIVE_CHECK(check_list(NULL, n)); + EXPENSIVE_CHECK((n == n->prev) && (n == n->next) || check_list(NULL, n)); node *z = n->prev; node *x = n->next; diff --git a/lib/lists.h b/lib/lists.h index 479f4ed1..86ff59c9 100644 --- a/lib/lists.h +++ b/lib/lists.h @@ -42,6 +42,7 @@ typedef union list { /* In fact two overlayed nodes */ }; } list; +#define STATIC_LIST_INIT(name) name = { .head = &name.tail_node, .tail = &name.head_node, .null = NULL } #define NODE (node *) #define HEAD(list) ((void *)((list).head)) @@ -68,6 +69,18 @@ typedef union list { /* In fact two overlayed nodes */ #define EMPTY_LIST(list) (!(list).head->next) +static inline _Bool +enlisted(node *n) +{ + switch ((!!n->next) + (!!n->prev)) + { + case 0: return 0; + case 2: return 1; + case 1: bug("Garbled event list node"); + } + + bug("Maths is broken. And you should see a new heaven and a new earth: for the first heaven and the first earth had been passed away."); +} #ifndef _BIRD_LISTS_C_ #define LIST_INLINE static inline diff --git a/lib/locking.h b/lib/locking.h new file mode 100644 index 00000000..1df30063 --- /dev/null +++ b/lib/locking.h @@ -0,0 +1,64 @@ +/* + * BIRD Library -- Locking + * + * (c) 2020--2021 Maria Matejka <mq@jmq.cz> + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#ifndef _BIRD_LOCKING_H_ +#define _BIRD_LOCKING_H_ + +struct domain_generic; + +/* Here define the global lock order; first to last. */ +struct lock_order { + struct domain_generic *the_bird; + struct domain_generic *proto; + struct domain_generic *rtable; + struct domain_generic *attrs; + struct domain_generic *resource; +}; + +extern _Thread_local struct lock_order locking_stack; +extern _Thread_local struct domain_generic **last_locked; + +#define DOMAIN(type) struct domain__##type +#define DEFINE_DOMAIN(type) DOMAIN(type) { struct domain_generic *type; } +#define DOMAIN_ORDER(type) OFFSETOF(struct lock_order, type) + +#define DOMAIN_NEW(type, name) (DOMAIN(type)) { .type = domain_new(name, DOMAIN_ORDER(type)) } +struct domain_generic *domain_new(const char *name, uint order); + +#define DOMAIN_FREE(type, d) domain_free((d).type) +void domain_free(struct domain_generic *); + +#define DOMAIN_NULL(type) (DOMAIN(type)) {} + +#define LOCK_DOMAIN(type, d) do_lock(((d).type), &(locking_stack.type)) +#define UNLOCK_DOMAIN(type, d) do_unlock(((d).type), &(locking_stack.type)) + +#define DOMAIN_IS_LOCKED(type, d) (((d).type) == (locking_stack.type)) +#define DG_IS_LOCKED(d) ((d) == *(DG_LSP(d))) + +/* Internal for locking */ +void do_lock(struct domain_generic *dg, struct domain_generic **lsp); +void do_unlock(struct domain_generic *dg, struct domain_generic **lsp); + +uint dg_order(struct domain_generic *dg); + +#define DG_LSP(d) ((struct domain_generic **) (((void *) &locking_stack) + dg_order(d))) +#define DG_LOCK(d) do_lock(d, DG_LSP(d)) +#define DG_UNLOCK(d) do_unlock(d, DG_LSP(d)) + +/* Use with care. To be removed in near future. */ +DEFINE_DOMAIN(the_bird); +extern DOMAIN(the_bird) the_bird_domain; + +#define the_bird_lock() LOCK_DOMAIN(the_bird, the_bird_domain) +#define the_bird_unlock() UNLOCK_DOMAIN(the_bird, the_bird_domain) +#define the_bird_locked() DOMAIN_IS_LOCKED(the_bird, the_bird_domain) + +#define ASSERT_THE_BIRD_LOCKED ({ if (!the_bird_locked()) bug("The BIRD lock must be locked here: %s:%d", __FILE__, __LINE__); }) + +#endif diff --git a/lib/mempool.c b/lib/mempool.c index 90d7c774..33eaec86 100644 --- a/lib/mempool.c +++ b/lib/mempool.c @@ -27,19 +27,18 @@ struct lp_chunk { struct lp_chunk *next; - uint size; uintptr_t data_align[0]; byte data[0]; }; -const int lp_chunk_size = sizeof(struct lp_chunk); +#define LP_DATA_SIZE (page_size - OFFSETOF(struct lp_chunk, data)) struct linpool { resource r; byte *ptr, *end; struct lp_chunk *first, *current; /* Normal (reusable) chunks */ struct lp_chunk *first_large; /* Large chunks */ - uint chunk_size, threshold, total, total_large; + uint total, total_large; }; static void lp_free(resource *); @@ -59,19 +58,14 @@ static struct resclass lp_class = { /** * lp_new - create a new linear memory pool * @p: pool - * @blk: block size * * lp_new() creates a new linear memory pool resource inside the pool @p. - * The linear pool consists of a list of memory chunks of size at least - * @blk. + * The linear pool consists of a list of memory chunks of page size. */ linpool -*lp_new(pool *p, uint blk) +*lp_new(pool *p) { - linpool *m = ralloc(p, &lp_class); - m->chunk_size = blk; - m->threshold = 3*blk/4; - return m; + return ralloc(p, &lp_class); } /** @@ -102,14 +96,13 @@ lp_alloc(linpool *m, uint size) else { struct lp_chunk *c; - if (size >= m->threshold) + if (size > LP_DATA_SIZE) { /* Too large => allocate large chunk */ c = xmalloc(sizeof(struct lp_chunk) + size); m->total_large += size; c->next = m->first_large; m->first_large = c; - c->size = size; } else { @@ -121,10 +114,10 @@ lp_alloc(linpool *m, uint size) else { /* Need to allocate a new chunk */ - c = xmalloc(sizeof(struct lp_chunk) + m->chunk_size); - m->total += m->chunk_size; + c = alloc_page(); + + m->total += LP_DATA_SIZE; c->next = NULL; - c->size = m->chunk_size; if (m->current) m->current->next = c; @@ -133,7 +126,7 @@ lp_alloc(linpool *m, uint size) } m->current = c; m->ptr = c->data + size; - m->end = c->data + m->chunk_size; + m->end = c->data + LP_DATA_SIZE; } return c->data; } @@ -195,7 +188,7 @@ lp_flush(linpool *m) /* Move ptr to the first chunk and free all large chunks */ m->current = c = m->first; m->ptr = c ? c->data : NULL; - m->end = c ? c->data + m->chunk_size : NULL; + m->end = c ? c->data + LP_DATA_SIZE : NULL; while (c = m->first_large) { @@ -218,6 +211,7 @@ lp_save(linpool *m, lp_state *p) { p->current = m->current; p->large = m->first_large; + p->total_large = m->total_large; p->ptr = m->ptr; } @@ -239,12 +233,12 @@ lp_restore(linpool *m, lp_state *p) /* Move ptr to the saved pos and free all newer large chunks */ m->current = c = p->current; m->ptr = p->ptr; - m->end = c ? c->data + m->chunk_size : NULL; + m->end = c ? c->data + LP_DATA_SIZE : NULL; + m->total_large = p->total_large; while ((c = m->first_large) && (c != p->large)) { m->first_large = c->next; - m->total_large -= c->size; xfree(c); } } @@ -258,7 +252,7 @@ lp_free(resource *r) for(d=m->first; d; d = c) { c = d->next; - xfree(d); + free_page(d); } for(d=m->first_large; d; d = c) { @@ -278,9 +272,7 @@ lp_dump(resource *r) ; for(cntl=0, c=m->first_large; c; c=c->next, cntl++) ; - debug("(chunk=%d threshold=%d count=%d+%d total=%d+%d)\n", - m->chunk_size, - m->threshold, + debug("(count=%d+%d total=%d+%d)\n", cnt, cntl, m->total, @@ -291,19 +283,22 @@ static struct resmem lp_memsize(resource *r) { linpool *m = (linpool *) r; - struct lp_chunk *c; - int cnt = 0; - - for(c=m->first; c; c=c->next) - cnt++; - for(c=m->first_large; c; c=c->next) - cnt++; - - return (struct resmem) { - .effective = m->total + m->total_large, - .overhead = ALLOC_OVERHEAD + sizeof(struct linpool) + - cnt * (ALLOC_OVERHEAD + sizeof(struct lp_chunk)), + struct resmem sz = { + .overhead = sizeof(struct linpool) + ALLOC_OVERHEAD, + .effective = m->total_large, }; + + for (struct lp_chunk *c = m->first_large; c; c = c->next) + sz.overhead += sizeof(struct lp_chunk) + ALLOC_OVERHEAD; + + uint regular = 0; + for (struct lp_chunk *c = m->first; c; c = c->next) + regular++; + + sz.effective += LP_DATA_SIZE * regular; + sz.overhead += (sizeof(struct lp_chunk) + ALLOC_OVERHEAD) * regular; + + return sz; } @@ -314,10 +309,7 @@ lp_lookup(resource *r, unsigned long a) struct lp_chunk *c; for(c=m->first; c; c=c->next) - if ((unsigned long) c->data <= a && (unsigned long) c->data + c->size > a) - return r; - for(c=m->first_large; c; c=c->next) - if ((unsigned long) c->data <= a && (unsigned long) c->data + c->size > a) + if ((unsigned long) c->data <= a && (unsigned long) c->data + LP_DATA_SIZE > a) return r; return NULL; } diff --git a/lib/printf.c b/lib/printf.c index 236df427..424d545f 100644 --- a/lib/printf.c +++ b/lib/printf.c @@ -568,3 +568,51 @@ buffer_puts(buffer *buf, const char *str) buf->pos = (bp < be) ? bp : buf->end; } + +#define POOL_PRINTF_MAXBUF 1024 + +char *mb_vsprintf(pool *p, const char *fmt, va_list args) +{ + char buf[POOL_PRINTF_MAXBUF]; + int count = bvsnprintf(buf, POOL_PRINTF_MAXBUF, fmt, args); + + if (count < 0) + bug("Attempted to mb_vsprintf() a too long string"); + + char *out = mb_alloc(p, count + 1); + memcpy(out, buf, count + 1); + return out; +} + +char *mb_sprintf(pool *p, const char *fmt, ...) +{ + va_list args; + char *out; + va_start(args, fmt); + out = mb_vsprintf(p, fmt, args); + va_end(args); + return out; +} + +char *lp_vsprintf(linpool *p, const char *fmt, va_list args) +{ + char buf[POOL_PRINTF_MAXBUF]; + int count = bvsnprintf(buf, POOL_PRINTF_MAXBUF, fmt, args); + + if (count < 0) + bug("Attempted to mb_vsprintf() a too long string"); + + char *out = lp_alloc(p, count + 1); + memcpy(out, buf, count + 1); + return out; +} + +char *lp_sprintf(linpool *p, const char *fmt, ...) +{ + va_list args; + char *out; + va_start(args, fmt); + out = lp_vsprintf(p, fmt, args); + va_end(args); + return out; +} diff --git a/lib/rcu.c b/lib/rcu.c new file mode 100644 index 00000000..83fdd022 --- /dev/null +++ b/lib/rcu.c @@ -0,0 +1,79 @@ +/* + * BIRD Library -- Read-Copy-Update Basic Operations + * + * (c) 2021 Maria Matejka <mq@jmq.cz> + * (c) 2021 CZ.NIC z.s.p.o. + * + * Can be freely distributed and used under the terms of the GNU GPL. + * Note: all the relevant patents shall be expired. + * + * Using the Supplementary Material for User-Level Implementations of Read-Copy-Update + * by Matthieu Desnoyers, Paul E. McKenney, Alan S. Stern, Michel R. Dagenais and Jonathan Walpole + * obtained from https://www.efficios.com/pub/rcu/urcu-supp-accepted.pdf + */ + +#include "lib/rcu.h" +#include "lib/io-loop.h" +#include "lib/locking.h" + +_Atomic uint rcu_gp_ctl = RCU_NEST_CNT; +_Thread_local struct rcu_birdloop *this_rcu_birdloop = NULL; + +static list rcu_birdloop_list; + +static struct rcu_birdloop main_rcu_birdloop; + +DEFINE_DOMAIN(resource); +static DOMAIN(resource) rcu_domain; + +static int +rcu_gp_ongoing(_Atomic uint *ctl) +{ + uint val = atomic_load(ctl); + return (val & RCU_NEST_CNT) && ((val ^ rcu_gp_ctl) & RCU_GP_PHASE); +} + +static void +update_counter_and_wait(void) +{ + atomic_fetch_xor(&rcu_gp_ctl, RCU_GP_PHASE); + struct rcu_birdloop *rc; + WALK_LIST(rc, rcu_birdloop_list) + while (rcu_gp_ongoing(&rc->ctl)) + birdloop_yield(); +} + +void +synchronize_rcu(void) +{ + LOCK_DOMAIN(resource, rcu_domain); + update_counter_and_wait(); + update_counter_and_wait(); + UNLOCK_DOMAIN(resource, rcu_domain); +} + +void +rcu_birdloop_start(struct rcu_birdloop *rc) +{ + LOCK_DOMAIN(resource, rcu_domain); + add_tail(&rcu_birdloop_list, &rc->n); + this_rcu_birdloop = rc; + UNLOCK_DOMAIN(resource, rcu_domain); +} + +void +rcu_birdloop_stop(struct rcu_birdloop *rc) +{ + LOCK_DOMAIN(resource, rcu_domain); + this_rcu_birdloop = NULL; + rem_node(&rc->n); + UNLOCK_DOMAIN(resource, rcu_domain); +} + +void +rcu_init(void) +{ + rcu_domain = DOMAIN_NEW(resource, "Read-Copy-Update"); + init_list(&rcu_birdloop_list); + rcu_birdloop_start(&main_rcu_birdloop); +} diff --git a/lib/rcu.h b/lib/rcu.h new file mode 100644 index 00000000..c537a1ef --- /dev/null +++ b/lib/rcu.h @@ -0,0 +1,55 @@ +/* + * BIRD Library -- Read-Copy-Update Basic Operations + * + * (c) 2021 Maria Matejka <mq@jmq.cz> + * (c) 2021 CZ.NIC z.s.p.o. + * + * Can be freely distributed and used under the terms of the GNU GPL. + * Note: all the relevant patents shall be expired. + */ + +#ifndef _BIRD_RCU_H_ +#define _BIRD_RCU_H_ + +#include "lib/birdlib.h" +#include "lib/lists.h" +#include <stdatomic.h> + +#define RCU_GP_PHASE 0x100000 +#define RCU_NEST_MASK 0x0fffff +#define RCU_NEST_CNT 0x000001 + +extern _Atomic uint rcu_gp_ctl; + +struct rcu_birdloop { + node n; + _Atomic uint ctl; +}; + +extern _Thread_local struct rcu_birdloop *this_rcu_birdloop; + +static inline void rcu_read_lock(void) +{ + uint cmp = atomic_load_explicit(&this_rcu_birdloop->ctl, memory_order_acquire); + + if (cmp & RCU_NEST_MASK) + atomic_store_explicit(&this_rcu_birdloop->ctl, cmp + RCU_NEST_CNT, memory_order_relaxed); + else + atomic_store(&this_rcu_birdloop->ctl, atomic_load_explicit(&rcu_gp_ctl, memory_order_acquire)); +} + +static inline void rcu_read_unlock(void) +{ + atomic_fetch_sub(&this_rcu_birdloop->ctl, RCU_NEST_CNT); +} + +void synchronize_rcu(void); + +/* Registering and unregistering a birdloop. To be called from birdloop implementation */ +void rcu_birdloop_start(struct rcu_birdloop *); +void rcu_birdloop_stop(struct rcu_birdloop *); + +/* Run this from resource init */ +void rcu_init(void); + +#endif diff --git a/lib/resource.c b/lib/resource.c index 5d4c7780..898fb533 100644 --- a/lib/resource.c +++ b/lib/resource.c @@ -14,6 +14,7 @@ #include "nest/bird.h" #include "lib/resource.h" #include "lib/string.h" +#include "lib/rcu.h" /** * DOC: Resource pools @@ -29,12 +30,6 @@ * is freed upon shutdown of the module. */ -struct pool { - resource r; - list inside; - const char *name; -}; - static void pool_dump(resource *); static void pool_free(resource *); static resource *pool_lookup(resource *, unsigned long); @@ -70,6 +65,20 @@ rp_new(pool *p, const char *name) return z; } +pool * +rp_newf(pool *p, const char *fmt, ...) +{ + pool *z = rp_new(p, NULL); + + va_list args; + va_start(args, fmt); + z->name = mb_vsprintf(p, fmt, args); + va_end(args); + + return z; +} + + static void pool_free(resource *P) { @@ -270,11 +279,34 @@ rlookup(unsigned long a) void resource_init(void) { + resource_sys_init(); + rcu_init(); + root_pool.r.class = &pool_class; root_pool.name = "Root"; init_list(&root_pool.inside); + tmp_init(&root_pool); } +_Thread_local struct tmp_resources tmp_res; + +void +tmp_init(pool *p) +{ + tmp_res.lp = lp_new_default(p); + tmp_res.parent = p; + tmp_res.pool = rp_new(p, "TMP"); +} + +void +tmp_flush(void) +{ + lp_flush(tmp_linpool); + rfree(tmp_res.pool); + tmp_res.pool = rp_new(tmp_res.parent, "TMP"); +} + + /** * DOC: Memory blocks * @@ -407,21 +439,6 @@ mb_realloc(void *m, unsigned size) return b->data; } -/** - * mb_move - move a memory block - * @m: memory block - * @p: target pool - * - * mb_move() moves the given memory block to another pool in the same way - * as rmove() moves a plain resource. - */ -void -mb_move(void *m, pool *p) -{ - struct mblock *b = SKIP_BACK(struct mblock, data, m); - rmove(b, p); -} - /** * mb_free - free a memory block diff --git a/lib/resource.h b/lib/resource.h index 9ec41ed8..5ad011ec 100644 --- a/lib/resource.h +++ b/lib/resource.h @@ -40,10 +40,16 @@ struct resclass { /* Generic resource manipulation */ -typedef struct pool pool; +typedef struct pool { + resource r; + list inside; + const char *name; +} pool; + void resource_init(void); pool *rp_new(pool *, const char *); /* Create new pool */ +pool *rp_newf(pool *, const char *, ...); /* Create a new pool with a formatted string as its name */ void rfree(void *); /* Free single resource */ void rdump(void *); /* Dump to debug output */ struct resmem rmemsize(void *res); /* Return size of memory used by the resource */ @@ -59,7 +65,6 @@ extern pool root_pool; void *mb_alloc(pool *, unsigned size); void *mb_allocz(pool *, unsigned size); void *mb_realloc(void *m, unsigned size); -void mb_move(void *, pool *); void mb_free(void *); /* Memory pools with linear allocation */ @@ -69,9 +74,10 @@ typedef struct linpool linpool; typedef struct lp_state { void *current, *large; byte *ptr; + uint total_large; } lp_state; -linpool *lp_new(pool *, unsigned blk); +linpool *lp_new(pool *); void *lp_alloc(linpool *, unsigned size); /* Aligned */ void *lp_allocu(linpool *, unsigned size); /* Unaligned */ void *lp_allocz(linpool *, unsigned size); /* With clear */ @@ -79,10 +85,23 @@ void lp_flush(linpool *); /* Free everything, but leave linpool */ void lp_save(linpool *m, lp_state *p); /* Save state */ void lp_restore(linpool *m, lp_state *p); /* Restore state */ -extern const int lp_chunk_size; -#define LP_GAS 1024 -#define LP_GOOD_SIZE(x) (((x + LP_GAS - 1) & (~(LP_GAS - 1))) - lp_chunk_size) -#define lp_new_default(p) lp_new(p, LP_GOOD_SIZE(LP_GAS*4)) +struct tmp_resources { + pool *pool, *parent; + linpool *lp; +}; + +extern _Thread_local struct tmp_resources tmp_res; + +#define tmp_linpool tmp_res.lp +#define tmp_alloc(sz) lp_alloc(tmp_linpool, sz) +#define tmp_allocu(sz) lp_allocu(tmp_linpool, sz) +#define tmp_allocz(sz) lp_allocz(tmp_linpool, sz) + +void tmp_init(pool *p); +void tmp_flush(void); + + +#define lp_new_default lp_new /* Slabs */ @@ -91,7 +110,7 @@ typedef struct slab slab; slab *sl_new(pool *, unsigned size); void *sl_alloc(slab *); void *sl_allocz(slab *); -void sl_free(slab *, void *); +void sl_free(void *); /* * Low-level memory allocation functions, please don't use @@ -101,10 +120,12 @@ void sl_free(slab *, void *); void buffer_realloc(void **buf, unsigned *size, unsigned need, unsigned item_size); /* Allocator of whole pages; for use in slabs and other high-level allocators. */ -u64 get_page_size(void); +#define PAGE_HEAD(x) ((void *) (((uintptr_t) (x)) & ~(page_size-1))) +extern long page_size; void *alloc_page(void); void free_page(void *); -extern uint pages_kept; + +void resource_sys_init(void); #ifdef HAVE_LIBDMALLOC /* diff --git a/lib/route.h b/lib/route.h new file mode 100644 index 00000000..f5456396 --- /dev/null +++ b/lib/route.h @@ -0,0 +1,549 @@ +/* + * BIRD Internet Routing Daemon -- Routing data structures + * + * (c) 1998--2000 Martin Mares <mj@ucw.cz> + * (c) 2022 Maria Matejka <mq@jmq.cz> + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#ifndef _BIRD_LIB_ROUTE_H_ +#define _BIRD_LIB_ROUTE_H_ + +#undef RT_SOURCE_DEBUG + +#include "lib/type.h" +#include "lib/rcu.h" +#include "lib/hash.h" +#include "lib/event.h" + +struct network; +struct proto; +struct cli; +struct rtable; + +typedef struct rte { + struct ea_list *attrs; /* Attributes of this route */ + const net_addr *net; /* Network this RTE belongs to */ + struct rte_src *src; /* Route source that created the route */ + struct rt_import_hook *sender; /* Import hook used to send the route to the routing table */ + btime lastmod; /* Last modified (set by table) */ + u32 id; /* Table specific route id */ + byte flags; /* Table-specific flags */ + byte pflags; /* Protocol-specific flags */ + u8 generation; /* If this route import is based on other previously exported route, + this value should be 1 + MAX(generation of the parent routes). + Otherwise the route is independent and this value is zero. */ + u8 stale_cycle; /* Auxiliary value for route refresh */ +} rte; + +#define REF_FILTERED 2 /* Route is rejected by import filter */ +#define REF_PENDING 32 /* Route has not propagated completely yet */ + +/* Route is valid for propagation (may depend on other flags in the future), accepts NULL */ +static inline int rte_is_valid(rte *r) { return r && !(r->flags & REF_FILTERED); } + +/* Route just has REF_FILTERED flag */ +static inline int rte_is_filtered(rte *r) { return !!(r->flags & REF_FILTERED); } + +struct rte_src { + struct rte_src *next; /* Hash chain */ + struct rte_owner *owner; /* Route source owner */ + u32 private_id; /* Private ID, assigned by the protocol */ + u32 global_id; /* Globally unique ID of the source */ + _Atomic u64 uc; /* Use count */ +}; + +struct rte_owner_class { + void (*get_route_info)(struct rte *, byte *buf); /* Get route information (for `show route' command) */ + int (*rte_better)(struct rte *, struct rte *); + int (*rte_mergable)(struct rte *, struct rte *); + u32 (*rte_igp_metric)(const rte *); +}; + +struct rte_owner { + struct rte_owner_class *class; + int (*rte_recalculate)(struct rtable *, struct network *, struct rte *, struct rte *, struct rte *); + HASH(struct rte_src) hash; + const char *name; + u32 hash_key; + u32 uc; + event_list *list; + event *prune; + event *stop; +}; + +DEFINE_DOMAIN(attrs); +extern DOMAIN(attrs) attrs_domain; + +#define RTA_LOCK LOCK_DOMAIN(attrs, attrs_domain) +#define RTA_UNLOCK UNLOCK_DOMAIN(attrs, attrs_domain) + +#define RTE_SRC_PU_SHIFT 44 +#define RTE_SRC_IN_PROGRESS (1ULL << RTE_SRC_PU_SHIFT) + +/* Get a route source. This also locks the source, therefore the caller has to + * unlock the source after the route has been propagated. */ +struct rte_src *rt_get_source_o(struct rte_owner *o, u32 id); +#define rt_get_source(p, id) rt_get_source_o(&(p)->sources, (id)) + +struct rte_src *rt_find_source_global(u32 id); + +#ifdef RT_SOURCE_DEBUG +#define rt_lock_source _rt_lock_source_internal +#define rt_unlock_source _rt_unlock_source_internal +#endif + +static inline void rt_lock_source(struct rte_src *src) +{ + /* Locking a source is trivial; somebody already holds it so we just increase + * the use count. Nothing can be freed underneath our hands. */ + u64 uc = atomic_fetch_add_explicit(&src->uc, 1, memory_order_acq_rel); + ASSERT_DIE(uc > 0); +} + +static inline void rt_unlock_source(struct rte_src *src) +{ + /* Unlocking is tricky. We do it lockless so at the same time, the prune + * event may be running, therefore if the unlock gets us to zero, it must be + * the last thing in this routine, otherwise the prune routine may find the + * source's usecount zeroed, freeing it prematurely. + * + * The usecount is split into two parts: + * the top 20 bits are an in-progress indicator + * the bottom 44 bits keep the actual usecount. + * + * Therefore at most 1 million of writers can simultaneously unlock the same + * source, while at most ~17T different routes can reference it. Both limits + * are insanely high from the 2022 point of view. Let's suppose that when 17T + * routes or 1M writers get real, we get also 128bit atomic variables in the + * C norm. */ + + /* First, we push the in-progress indicator */ + u64 uc = atomic_fetch_add_explicit(&src->uc, RTE_SRC_IN_PROGRESS, memory_order_acq_rel); + + /* Then we split the indicator to its parts. Remember, we got the value before the operation happened. */ + u64 pending = (uc >> RTE_SRC_PU_SHIFT) + 1; + uc &= RTE_SRC_IN_PROGRESS - 1; + + /* We per-use the RCU critical section indicator to make the prune event wait + * until we finish here in the rare case we get preempted. */ + rcu_read_lock(); + + /* Obviously, there can't be more pending unlocks than the usecount itself */ + if (uc == pending) + /* If we're the last unlocker, schedule the owner's prune event */ + ev_send(src->owner->list, src->owner->prune); + else + ASSERT_DIE(uc > pending); + + /* And now, finally, simultaneously pop the in-progress indicator and the + * usecount, possibly allowing the source pruning routine to free this structure */ + atomic_fetch_sub_explicit(&src->uc, RTE_SRC_IN_PROGRESS + 1, memory_order_acq_rel); + + /* ... and to reduce the load a bit, the source pruning routine will better wait for + * RCU synchronization instead of a busy loop. */ + rcu_read_unlock(); +} + +#ifdef RT_SOURCE_DEBUG +#undef rt_lock_source +#undef rt_unlock_source + +#define rt_lock_source(x) ( log(L_INFO "Lock source %uG at %s:%d", (x)->global_id, __FILE__, __LINE__), _rt_lock_source_internal(x) ) +#define rt_unlock_source(x) ( log(L_INFO "Unlock source %uG at %s:%d", (x)->global_id, __FILE__, __LINE__), _rt_unlock_source_internal(x) ) +#endif + +void rt_init_sources(struct rte_owner *, const char *name, event_list *list); +void rt_destroy_sources(struct rte_owner *, event *); + +/* + * Route Attributes + * + * Beware: All standard BGP attributes must be represented here instead + * of making them local to the route. This is needed to ensure proper + * construction of BGP route attribute lists. + */ + +/* Nexthop structure */ +struct nexthop { + ip_addr gw; /* Next hop */ + struct iface *iface; /* Outgoing interface */ + byte flags; + byte weight; + byte labels; /* Number of all labels */ + u32 label[0]; +}; + +/* For packing one into eattrs */ +struct nexthop_adata { + struct adata ad; + /* There is either a set of nexthops or a special destination (RTD_*) */ + union { + struct nexthop nh; + uint dest; + }; +}; + +#define NEXTHOP_DEST_SIZE (OFFSETOF(struct nexthop_adata, dest) + sizeof(uint) - OFFSETOF(struct adata, data)) +#define NEXTHOP_DEST_LITERAL(x) ((struct nexthop_adata) { \ + .ad.length = NEXTHOP_DEST_SIZE, .dest = (x), }) + +#define RNF_ONLINK 0x1 /* Gateway is onlink regardless of IP ranges */ + + +#define RTS_STATIC 1 /* Normal static route */ +#define RTS_INHERIT 2 /* Route inherited from kernel */ +#define RTS_DEVICE 3 /* Device route */ +#define RTS_STATIC_DEVICE 4 /* Static device route */ +#define RTS_REDIRECT 5 /* Learned via redirect */ +#define RTS_RIP 6 /* RIP route */ +#define RTS_OSPF 7 /* OSPF route */ +#define RTS_OSPF_IA 8 /* OSPF inter-area route */ +#define RTS_OSPF_EXT1 9 /* OSPF external route type 1 */ +#define RTS_OSPF_EXT2 10 /* OSPF external route type 2 */ +#define RTS_BGP 11 /* BGP route */ +#define RTS_PIPE 12 /* Inter-table wormhole */ +#define RTS_BABEL 13 /* Babel route */ +#define RTS_RPKI 14 /* Route Origin Authorization */ +#define RTS_PERF 15 /* Perf checker */ +#define RTS_MAX 16 + +#define RTD_NONE 0 /* Undefined next hop */ +#define RTD_UNICAST 1 /* A standard next hop */ +#define RTD_BLACKHOLE 2 /* Silently drop packets */ +#define RTD_UNREACHABLE 3 /* Reject as unreachable */ +#define RTD_PROHIBIT 4 /* Administratively prohibited */ +#define RTD_MAX 5 + +extern const char * rta_dest_names[RTD_MAX]; + +static inline const char *rta_dest_name(uint n) +{ return (n < RTD_MAX) ? rta_dest_names[n] : "???"; } + + +/* + * Extended Route Attributes + */ + +typedef struct eattr { + word id; /* EA_CODE(PROTOCOL_..., protocol-dependent ID) */ + byte flags; /* Protocol-dependent flags */ + byte type; /* Attribute type */ + byte rfu:5; + byte originated:1; /* The attribute has originated locally */ + byte fresh:1; /* An uncached attribute (e.g. modified in export filter) */ + byte undef:1; /* Explicitly undefined */ + + PADDING(unused, 3, 3); + + union bval u; +} eattr; + + +#define EA_CODE_MASK 0xffff +#define EA_ALLOW_UNDEF 0x10000 /* ea_find: allow EAF_TYPE_UNDEF */ +#define EA_BIT(n) ((n) << 24) /* Used in bitfield accessors */ +#define EA_BIT_GET(ea) ((ea) >> 24) + +typedef struct ea_list { + struct ea_list *next; /* In case we have an override list */ + byte flags; /* Flags: EALF_... */ + byte rfu; + word count; /* Number of attributes */ + eattr attrs[0]; /* Attribute definitions themselves */ +} ea_list; + +struct ea_storage { + struct ea_storage *next_hash; /* Next in hash chain */ + struct ea_storage **pprev_hash; /* Previous in hash chain */ + _Atomic u32 uc; /* Use count */ + u32 hash_key; /* List hash */ + ea_list l[0]; /* The list itself */ +}; + +#define EALF_SORTED 1 /* Attributes are sorted by code */ +#define EALF_BISECT 2 /* Use interval bisection for searching */ +#define EALF_CACHED 4 /* List is cached */ + +struct ea_class { +#define EA_CLASS_INSIDE \ + const char *name; /* Name (both print and filter) */ \ + struct symbol *sym; /* Symbol to export to configs */ \ + uint id; /* Autoassigned attribute ID */ \ + uint uc; /* Reference count */ \ + btype type; /* Data type ID */ \ + uint readonly:1; /* This attribute can't be changed by filters */ \ + uint conf:1; /* Requested by config */ \ + uint hidden:1; /* Technical attribute, do not show, do not expose to filters */ \ + void (*format)(const eattr *ea, byte *buf, uint size); \ + void (*stored)(const eattr *ea); /* When stored into global hash */ \ + void (*freed)(const eattr *ea); /* When released from global hash */ \ + + EA_CLASS_INSIDE; +}; + +struct ea_class_ref { + resource r; + struct ea_class *class; +}; + +void ea_register_init(struct ea_class *); +struct ea_class_ref *ea_register_alloc(pool *, struct ea_class); + +#define EA_REGISTER_ALL_HELPER(x) ea_register_init(x); +#define EA_REGISTER_ALL(...) MACRO_FOREACH(EA_REGISTER_ALL_HELPER, __VA_ARGS__) + +struct ea_class *ea_class_find_by_id(uint id); +struct ea_class *ea_class_find_by_name(const char *name); +static inline struct ea_class *ea_class_self(struct ea_class *self) { return self; } +#define ea_class_find(_arg) _Generic((_arg), \ + uint: ea_class_find_by_id, \ + word: ea_class_find_by_id, \ + char *: ea_class_find_by_name, \ + const char *: ea_class_find_by_name, \ + struct ea_class *: ea_class_self)(_arg) + +struct ea_walk_state { + ea_list *eattrs; /* Ccurrent ea_list, initially set by caller */ + eattr *ea; /* Current eattr, initially NULL */ + u32 visited[4]; /* Bitfield, limiting max to 128 */ +}; + +#define ea_find(_l, _arg) _Generic((_arg), uint: ea_find_by_id, struct ea_class *: ea_find_by_class, char *: ea_find_by_name)(_l, _arg) +eattr *ea_find_by_id(ea_list *, unsigned ea); +static inline eattr *ea_find_by_class(ea_list *l, const struct ea_class *def) +{ return ea_find_by_id(l, def->id); } +static inline eattr *ea_find_by_name(ea_list *l, const char *name) +{ + const struct ea_class *def = ea_class_find_by_name(name); + return def ? ea_find_by_class(l, def) : NULL; +} + +#define ea_get_int(_l, _ident, _def) ({ \ + struct ea_class *cls = ea_class_find((_ident)); \ + ASSERT_DIE(cls->type & EAF_EMBEDDED); \ + const eattr *ea = ea_find((_l), cls->id); \ + (ea ? ea->u.data : (_def)); \ + }) + +#define ea_get_ip(_l, _ident, _def) ({ \ + struct ea_class *cls = ea_class_find((_ident)); \ + ASSERT_DIE(cls->type == T_IP); \ + const eattr *ea = ea_find((_l), cls->id); \ + (ea ? *((const ip_addr *) ea->u.ptr->data) : (_def)); \ + }) + +eattr *ea_walk(struct ea_walk_state *s, uint id, uint max); +void ea_dump(ea_list *); +int ea_same(ea_list *x, ea_list *y); /* Test whether two ea_lists are identical */ +uint ea_hash(ea_list *e); /* Calculate 16-bit hash value */ +ea_list *ea_append(ea_list *to, ea_list *what); +void ea_format_bitfield(const struct eattr *a, byte *buf, int bufsize, const char **names, int min, int max); + +/* Normalize ea_list; allocates the result from tmp_linpool */ +ea_list *ea_normalize(ea_list *e, int overlay); + +uint ea_list_size(ea_list *); +void ea_list_copy(ea_list *dest, ea_list *src, uint size); + +#define EA_LOCAL_LIST(N) struct { ea_list l; eattr a[N]; } + +#define EA_LITERAL_EMBEDDED(_class, _flags, _val) ({ \ + btype _type = (_class)->type; \ + ASSERT_DIE(_type & EAF_EMBEDDED); \ + EA_LITERAL_GENERIC((_class)->id, _type, _flags, .u.i = _val); \ + }) + +#define EA_LITERAL_STORE_ADATA(_class, _flags, _buf, _len) ({ \ + btype _type = (_class)->type; \ + ASSERT_DIE(!(_type & EAF_EMBEDDED)); \ + EA_LITERAL_GENERIC((_class)->id, _type, _flags, .u.ad = tmp_store_adata((_buf), (_len))); \ + }) + +#define EA_LITERAL_DIRECT_ADATA(_class, _flags, _adata) ({ \ + btype _type = (_class)->type; \ + ASSERT_DIE(!(_type & EAF_EMBEDDED)); \ + EA_LITERAL_GENERIC((_class)->id, _type, _flags, .u.ad = _adata); \ + }) + +#define EA_LITERAL_GENERIC(_id, _type, _flags, ...) \ + ((eattr) { .id = _id, .type = _type, .flags = _flags, __VA_ARGS__ }) + +static inline eattr * +ea_set_attr(ea_list **to, eattr a) +{ + EA_LOCAL_LIST(1) *ea = tmp_alloc(sizeof(*ea)); + *ea = (typeof(*ea)) { + .l.flags = EALF_SORTED, + .l.count = 1, + .l.next = *to, + .a[0] = a, + }; + + *to = &ea->l; + return &ea->a[0]; +} + +static inline void +ea_unset_attr(ea_list **to, _Bool local, const struct ea_class *def) +{ + ea_set_attr(to, EA_LITERAL_GENERIC(def->id, 0, 0, + .fresh = local, .originated = local, .undef = 1)); +} + +static inline void +ea_set_attr_u32(ea_list **to, const struct ea_class *def, uint flags, u64 data) +{ ea_set_attr(to, EA_LITERAL_EMBEDDED(def, flags, data)); } + +static inline void +ea_set_attr_data(ea_list **to, const struct ea_class *def, uint flags, const void *data, uint len) +{ ea_set_attr(to, EA_LITERAL_STORE_ADATA(def, flags, data, len)); } + +static inline void +ea_copy_attr(ea_list **to, ea_list *from, const struct ea_class *def) +{ + eattr *e = ea_find_by_class(from, def); + if (e) + if (e->type & EAF_EMBEDDED) + ea_set_attr_u32(to, def, e->flags, e->u.data); + else + ea_set_attr_data(to, def, e->flags, e->u.ptr->data, e->u.ptr->length); + else + ea_unset_attr(to, 0, def); +} + +/* + * Common route attributes + */ + +/* Preference: first-order comparison */ +extern struct ea_class ea_gen_preference; +static inline u32 rt_get_preference(rte *rt) +{ return ea_get_int(rt->attrs, &ea_gen_preference, 0); } + +/* IGP metric: second-order comparison */ +extern struct ea_class ea_gen_igp_metric; +u32 rt_get_igp_metric(const rte *rt); +#define IGP_METRIC_UNKNOWN 0x80000000 /* Default igp_metric used when no other + protocol-specific metric is availabe */ + +/* From: Advertising router */ +extern struct ea_class ea_gen_from; + +/* Source: An old method to devise the route source protocol and kind. + * To be superseded in a near future by something more informative. */ +extern struct ea_class ea_gen_source; +static inline u32 rt_get_source_attr(const rte *rt) +{ return ea_get_int(rt->attrs, &ea_gen_source, 0); } + +/* Flowspec validation result */ +enum flowspec_valid { + FLOWSPEC_UNKNOWN = 0, + FLOWSPEC_VALID = 1, + FLOWSPEC_INVALID = 2, + FLOWSPEC__MAX, +}; + +extern const char * flowspec_valid_names[FLOWSPEC__MAX]; +static inline const char *flowspec_valid_name(enum flowspec_valid v) +{ return (v < FLOWSPEC__MAX) ? flowspec_valid_names[v] : "???"; } + +extern struct ea_class ea_gen_flowspec_valid; +static inline enum flowspec_valid rt_get_flowspec_valid(rte *rt) +{ return ea_get_int(rt->attrs, &ea_gen_flowspec_valid, FLOWSPEC_UNKNOWN); } + +/* Next hop: For now, stored as adata */ +extern struct ea_class ea_gen_nexthop; + +static inline void ea_set_dest(struct ea_list **to, uint flags, uint dest) +{ + struct nexthop_adata nhad = NEXTHOP_DEST_LITERAL(dest); + ea_set_attr_data(to, &ea_gen_nexthop, flags, &nhad.ad.data, nhad.ad.length); +} + +/* Next hop structures */ + +#define NEXTHOP_ALIGNMENT (_Alignof(struct nexthop)) +#define NEXTHOP_MAX_SIZE (sizeof(struct nexthop) + sizeof(u32)*MPLS_MAX_LABEL_STACK) +#define NEXTHOP_SIZE(_nh) NEXTHOP_SIZE_CNT(((_nh)->labels)) +#define NEXTHOP_SIZE_CNT(cnt) BIRD_ALIGN((sizeof(struct nexthop) + sizeof(u32) * (cnt)), NEXTHOP_ALIGNMENT) +#define nexthop_size(nh) NEXTHOP_SIZE((nh)) + +#define NEXTHOP_NEXT(_nh) ((void *) (_nh) + NEXTHOP_SIZE(_nh)) +#define NEXTHOP_END(_nhad) ((_nhad)->ad.data + (_nhad)->ad.length) +#define NEXTHOP_VALID(_nh, _nhad) ((void *) (_nh) < (void *) NEXTHOP_END(_nhad)) +#define NEXTHOP_ONE(_nhad) (NEXTHOP_NEXT(&(_nhad)->nh) == NEXTHOP_END(_nhad)) + +#define NEXTHOP_WALK(_iter, _nhad) for ( \ + struct nexthop *_iter = &(_nhad)->nh; \ + (void *) _iter < (void *) NEXTHOP_END(_nhad); \ + _iter = NEXTHOP_NEXT(_iter)) + + +static inline int nexthop_same(struct nexthop_adata *x, struct nexthop_adata *y) +{ return adata_same(&x->ad, &y->ad); } +struct nexthop_adata *nexthop_merge(struct nexthop_adata *x, struct nexthop_adata *y, int max, linpool *lp); +struct nexthop_adata *nexthop_sort(struct nexthop_adata *x, linpool *lp); +int nexthop_is_sorted(struct nexthop_adata *x); + +#define NEXTHOP_IS_REACHABLE(nhad) ((nhad)->ad.length > NEXTHOP_DEST_SIZE) + +/* Route has regular, reachable nexthop (i.e. not RTD_UNREACHABLE and like) */ +static inline int rte_is_reachable(rte *r) +{ + eattr *nhea = ea_find(r->attrs, &ea_gen_nexthop); + if (!nhea) + return 0; + + struct nexthop_adata *nhad = (void *) nhea->u.ptr; + return NEXTHOP_IS_REACHABLE(nhad); +} + +static inline int nhea_dest(eattr *nhea) +{ + if (!nhea) + return RTD_NONE; + + struct nexthop_adata *nhad = nhea ? (struct nexthop_adata *) nhea->u.ptr : NULL; + if (NEXTHOP_IS_REACHABLE(nhad)) + return RTD_UNICAST; + else + return nhad->dest; +} + +static inline int rte_dest(const rte *r) +{ + return nhea_dest(ea_find(r->attrs, &ea_gen_nexthop)); +} + +void rta_init(void); +ea_list *ea_lookup(ea_list *, int overlay); /* Get a cached (and normalized) variant of this attribute list */ +static inline int ea_is_cached(const ea_list *r) { return r->flags & EALF_CACHED; } +static inline struct ea_storage *ea_get_storage(ea_list *r) +{ + ASSERT_DIE(ea_is_cached(r)); + return SKIP_BACK(struct ea_storage, l[0], r); +} + +static inline ea_list *ea_clone(ea_list *r) { + ASSERT_DIE(0 < atomic_fetch_add_explicit(&ea_get_storage(r)->uc, 1, memory_order_acq_rel)); + return r; +} +void ea__free(struct ea_storage *r); +static inline void ea_free(ea_list *l) { + if (!l) return; + struct ea_storage *r = ea_get_storage(l); + if (1 == atomic_fetch_sub_explicit(&r->uc, 1, memory_order_acq_rel)) ea__free(r); +} + +void ea_dump(ea_list *); +void ea_dump_all(void); +void ea_show_list(struct cli *, ea_list *); + +#define rta_lookup ea_lookup +#define rta_is_cached ea_is_cached +#define rta_clone ea_clone +#define rta_free ea_free + +#endif @@ -32,6 +32,7 @@ #include "nest/bird.h" #include "lib/resource.h" #include "lib/string.h" +#include "lib/tlists.h" #undef FAKE_SLAB /* Turn on if you want to debug memory allocations */ @@ -98,7 +99,7 @@ sl_allocz(slab *s) } void -sl_free(slab *s, void *oo) +sl_free(void *oo) { struct sl_obj *o = SKIP_BACK(struct sl_obj, data, oo); @@ -153,11 +154,38 @@ slab_memsize(resource *r) #define MAX_EMPTY_HEADS 1 +enum sl_head_state { + slh_empty = 2, + slh_partial = 0, + slh_full = 1, +} PACKED; + +struct sl_head { + struct slab *slab; + TLIST_NODE(sl_head, struct sl_head) n; + u16 num_full; + enum sl_head_state state; + u32 used_bits[0]; +}; + +struct sl_alignment { /* Magic structure for testing of alignment */ + byte data; + int x[0]; +}; + +#define TLIST_PREFIX sl_head +#define TLIST_TYPE struct sl_head +#define TLIST_ITEM n +#define TLIST_WANT_WALK +#define TLIST_WANT_ADD_HEAD + +#include "lib/tlists.h" + struct slab { resource r; uint obj_size, head_size, head_bitfield_len; uint objs_per_slab, num_empty_heads, data_size; - list empty_heads, partial_heads, full_heads; + struct sl_head_list empty_heads, partial_heads, full_heads; }; static struct resclass sl_class = { @@ -169,18 +197,15 @@ static struct resclass sl_class = { slab_memsize }; -struct sl_head { - node n; - u32 num_full; - u32 used_bits[0]; -}; +#define SL_GET_HEAD(x) PAGE_HEAD(x) -struct sl_alignment { /* Magic structure for testing of alignment */ - byte data; - int x[0]; -}; +#define SL_HEAD_CHANGE_STATE(_s, _h, _from, _to) ({ \ + ASSERT_DIE(_h->state == slh_##_from); \ + sl_head_rem_node(&_s->_from##_heads, _h); \ + sl_head_add_head(&_s->_to##_heads, _h); \ + _h->state = slh_##_to; \ + }) -#define SL_GET_HEAD(x) ((struct sl_head *) (((uintptr_t) (x)) & ~(get_page_size()-1))) /** * sl_new - create a new Slab @@ -202,7 +227,6 @@ sl_new(pool *p, uint size) s->obj_size = size; s->head_size = sizeof(struct sl_head); - u64 page_size = get_page_size(); do { s->objs_per_slab = (page_size - s->head_size) / size; @@ -218,9 +242,6 @@ sl_new(pool *p, uint size) bug("Slab: object too large"); s->num_empty_heads = 0; - init_list(&s->empty_heads); - init_list(&s->partial_heads); - init_list(&s->full_heads); return s; } @@ -237,8 +258,7 @@ sl_alloc(slab *s) struct sl_head *h; redo: - h = HEAD(s->partial_heads); - if (!h->n.next) + if (!(h = s->partial_heads.first)) goto no_partial; okay: for (uint i=0; i<s->head_bitfield_len; i++) @@ -258,23 +278,27 @@ okay: return out; } - rem_node(&h->n); - add_tail(&s->full_heads, &h->n); + SL_HEAD_CHANGE_STATE(s, h, partial, full); goto redo; no_partial: - h = HEAD(s->empty_heads); - if (h->n.next) + if (h = s->empty_heads.first) { - rem_node(&h->n); - add_head(&s->partial_heads, &h->n); + SL_HEAD_CHANGE_STATE(s, h, empty, partial); s->num_empty_heads--; goto okay; } + h = alloc_page(); ASSERT_DIE(SL_GET_HEAD(h) == h); + +#ifdef POISON + memset(h, 0xba, page_size); +#endif + memset(h, 0, s->head_size); - add_head(&s->partial_heads, &h->n); + h->slab = s; + sl_head_add_head(&s->partial_heads, h); goto okay; } @@ -303,9 +327,10 @@ sl_allocz(slab *s) * and returns it back to the Slab @s. */ void -sl_free(slab *s, void *oo) +sl_free(void *oo) { struct sl_head *h = SL_GET_HEAD(oo); + struct slab *s = h->slab; #ifdef POISON memset(oo, 0xdb, s->data_size); @@ -318,19 +343,22 @@ sl_free(slab *s, void *oo) h->used_bits[pos / 32] &= ~(1 << (pos % 32)); - if (h->num_full-- == s->objs_per_slab) - { - rem_node(&h->n); - add_head(&s->partial_heads, &h->n); - } + if ((h->num_full-- == s->objs_per_slab) && (h->state == slh_full)) + SL_HEAD_CHANGE_STATE(s, h, full, partial); else if (!h->num_full) { - rem_node(&h->n); + sl_head_rem_node(&s->partial_heads, h); if (s->num_empty_heads >= MAX_EMPTY_HEADS) + { +#ifdef POISON + memset(h, 0xde, page_size); +#endif free_page(h); + } else { - add_head(&s->empty_heads, &h->n); + sl_head_add_head(&s->empty_heads, h); + h->state = slh_empty; s->num_empty_heads++; } } @@ -340,13 +368,12 @@ static void slab_free(resource *r) { slab *s = (slab *) r; - struct sl_head *h, *g; - WALK_LIST_DELSAFE(h, g, s->empty_heads) + WALK_TLIST_DELSAFE(sl_head, h, &s->empty_heads) free_page(h); - WALK_LIST_DELSAFE(h, g, s->partial_heads) + WALK_TLIST_DELSAFE(sl_head, h, &s->partial_heads) free_page(h); - WALK_LIST_DELSAFE(h, g, s->full_heads) + WALK_TLIST_DELSAFE(sl_head, h, &s->full_heads) free_page(h); } @@ -355,13 +382,12 @@ slab_dump(resource *r) { slab *s = (slab *) r; int ec=0, pc=0, fc=0; - struct sl_head *h; - WALK_LIST(h, s->empty_heads) + WALK_TLIST(sl_head, h, &s->empty_heads) ec++; - WALK_LIST(h, s->partial_heads) + WALK_TLIST(sl_head, h, &s->partial_heads) pc++; - WALK_LIST(h, s->full_heads) + WALK_TLIST(sl_head, h, &s->full_heads) fc++; debug("(%de+%dp+%df blocks per %d objs per %d bytes)\n", ec, pc, fc, s->objs_per_slab, s->obj_size); } @@ -371,27 +397,26 @@ slab_memsize(resource *r) { slab *s = (slab *) r; size_t heads = 0; - struct sl_head *h; - WALK_LIST(h, s->full_heads) + WALK_TLIST(sl_head, h, &s->full_heads) heads++; size_t items = heads * s->objs_per_slab; - WALK_LIST(h, s->partial_heads) + WALK_TLIST(sl_head, h, &s->partial_heads) { heads++; items += h->num_full; } - WALK_LIST(h, s->empty_heads) + WALK_TLIST(sl_head, h, &s->empty_heads) heads++; - size_t eff = items * s->obj_size; + size_t eff = items * s->data_size; return (struct resmem) { .effective = eff, - .overhead = ALLOC_OVERHEAD + sizeof(struct slab) + heads * get_page_size() - eff, + .overhead = ALLOC_OVERHEAD + sizeof(struct slab) + heads * page_size - eff, }; } @@ -399,13 +424,12 @@ static resource * slab_lookup(resource *r, unsigned long a) { slab *s = (slab *) r; - struct sl_head *h; - WALK_LIST(h, s->partial_heads) - if ((unsigned long) h < a && (unsigned long) h + get_page_size() < a) + WALK_TLIST(sl_head, h, &s->partial_heads) + if ((unsigned long) h < a && (unsigned long) h + page_size < a) return r; - WALK_LIST(h, s->full_heads) - if ((unsigned long) h < a && (unsigned long) h + get_page_size() < a) + WALK_TLIST(sl_head, h, &s->full_heads) + if ((unsigned long) h < a && (unsigned long) h + page_size < a) return r; return NULL; } diff --git a/lib/slab_test.c b/lib/slab_test.c new file mode 100644 index 00000000..803d0215 --- /dev/null +++ b/lib/slab_test.c @@ -0,0 +1,171 @@ +/* + * BIRD Library -- Slab Alloc / Dealloc Tests + * + * (c) 2022 Maria Matejka <mq@jmq.cz> + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#include "test/birdtest.h" +#include "lib/resource.h" +#include "lib/bitops.h" + +static const int sizes[] = { + 8, 12, 18, 27, 41, 75, 131, 269, +}; + +#define TEST_SIZE 1024 * 128 +#define ITEMS(sz) TEST_SIZE / ( (sz) >> u32_log2((sz))/2 ) + +struct test_request { + int size; + enum strategy { + TEST_NONE, + TEST_FORWARDS, + TEST_BACKWARDS, + TEST_RANDOM, + TEST_MIXED, + TEST__MAX, + } strategy; +}; + +const char * const strategy_name[TEST__MAX] = { + [TEST_FORWARDS] = "forwards", + [TEST_BACKWARDS] = "backwards", + [TEST_RANDOM] = "random", + [TEST_MIXED] = "mixed", +}; + +static inline byte *test_alloc(slab *s, int sz, struct resmem *sliz) +{ + byte *out = sl_alloc(s); + + for (int p=0; p < sz; p++) + out[p] = p & 0xff; + + struct resmem ns = rmemsize((resource *) s); + + bt_assert(sliz->effective + sz == ns.effective); + bt_assert((sliz->overhead - sz - ns.overhead) % page_size == 0); + + *sliz = ns; + + return out; +} + +static inline void test_free(slab *s, byte *block, int sz, struct resmem *sliz) +{ + for (int p=0; p < sz; p++) + { + bt_assert(block[p] == (p & 0xff)); + block[p]++; + } + + sl_free(block); + + struct resmem ns = rmemsize((resource *) s); + + bt_assert(sliz->effective - sz == ns.effective); + bt_assert((sliz->overhead + sz - ns.overhead) % page_size == 0); + + *sliz = ns; +} + +static inline struct resmem get_memsize(slab *s) +{ + struct resmem sz = rmemsize((resource *) s); + bt_assert(sz.effective == 0); + return sz; +} + +static int +t_slab(const void *data) +{ + const struct test_request *tr = data; + int sz = tr->size; + + slab *s = sl_new(&root_pool, sz); + struct resmem sliz = get_memsize(s); + + int n = ITEMS(sz); + byte **block = mb_alloc(&root_pool, n * sizeof(*block)); + + switch (tr->strategy) { + case TEST_FORWARDS: + for (int i = 0; i < n; i++) + block[i] = test_alloc(s, sz, &sliz); + + for (int i = 0; i < n; i++) + test_free(s, block[i], sz, &sliz); + + break; + + case TEST_BACKWARDS: + for (int i = 0; i < n; i++) + block[i] = test_alloc(s, sz, &sliz); + + for (int i = n - 1; i >= 0; i--) + test_free(s, block[i], sz, &sliz); + + break; + + case TEST_RANDOM: + for (int i = 0; i < n; i++) + block[i] = test_alloc(s, sz, &sliz); + + for (int i = 0; i < n; i++) + { + int pos = bt_random() % (n - i); + test_free(s, block[pos], sz, &sliz); + if (pos != n - i - 1) + block[pos] = block[n - i - 1]; + } + + break; + + case TEST_MIXED: + { + int cur = 0; + int pending = n; + + while (cur + pending > 0) { + int action = bt_random() % (cur + pending); + + if (action < cur) { + test_free(s, block[action], sz, &sliz); + if (action != --cur) + block[action] = block[cur]; + } else { + block[cur++] = test_alloc(s, sz, &sliz); + pending--; + } + } + + break; + } + + default: bug("This shouldn't happen"); + } + + mb_free(block); + return 1; +} +int main(int argc, char *argv[]) +{ + bt_init(argc, argv); + + struct test_request tr; + + for (uint i = 0; i < sizeof(sizes) / sizeof(*sizes); i++) + for (uint strategy = TEST_FORWARDS; strategy < TEST__MAX; strategy++) + { + tr = (struct test_request) { + .size = sizes[i], + .strategy = strategy, + }; + bt_test_suite_arg(t_slab, &tr, "Slab allocator test, size=%d, strategy=%s", + tr.size, strategy_name[strategy]); + } + + return bt_exit_value(); +} diff --git a/lib/socket.h b/lib/socket.h index 0b6ac589..5c69482e 100644 --- a/lib/socket.h +++ b/lib/socket.h @@ -12,6 +12,7 @@ #include <errno.h> #include "lib/resource.h" +#include "lib/event.h" #ifdef HAVE_LIBSSH #define LIBSSH_LEGACY_0_4 #include <libssh/libssh.h> @@ -79,6 +80,7 @@ typedef struct birdsock { const char *password; /* Password for MD5 authentication */ const char *err; /* Error message */ struct ssh_sock *ssh; /* Used in SK_SSH */ + struct event reloop; /* Reloop event */ } sock; sock *sock_new(pool *); /* Allocate new socket */ @@ -129,6 +131,7 @@ extern int sk_priority_control; /* Suggested priority for control traffic, shou #define SKF_TRUNCATED 0x200 /* Received packet was truncated, set by IO layer */ #define SKF_HDRINCL 0x400 /* Used internally */ #define SKF_PKTINFO 0x800 /* Used internally */ +#define SKF_PASSIVE_THREAD 0x1000 /* Child sockets used in thread, do not add to main loop */ /* * Socket types SA SP DA DP IF TTL SendTo (?=may, -=must not, *=must) diff --git a/lib/string.h b/lib/string.h index 976b1c24..2829943d 100644 --- a/lib/string.h +++ b/lib/string.h @@ -20,6 +20,11 @@ int bvsprintf(char *str, const char *fmt, va_list args); int bsnprintf(char *str, int size, const char *fmt, ...); int bvsnprintf(char *str, int size, const char *fmt, va_list args); +char *mb_sprintf(pool *p, const char *fmt, ...); +char *mb_vsprintf(pool *p, const char *fmt, va_list args); +char *lp_sprintf(linpool *p, const char *fmt, ...); +char *lp_vsprintf(linpool *p, const char *fmt, va_list args); + int buffer_vprint(buffer *buf, const char *fmt, va_list args); int buffer_print(buffer *buf, const char *fmt, ...); void buffer_puts(buffer *buf, const char *str); diff --git a/lib/timer.c b/lib/timer.c index 381163d0..ff6975a4 100644 --- a/lib/timer.c +++ b/lib/timer.c @@ -36,57 +36,13 @@ #include "lib/resource.h" #include "lib/timer.h" - -struct timeloop main_timeloop; - - -#ifdef USE_PTHREADS - #include <pthread.h> -/* Data accessed and modified from proto/bfd/io.c */ -pthread_key_t current_time_key; - -static inline struct timeloop * -timeloop_current(void) -{ - return pthread_getspecific(current_time_key); -} - -static inline void -timeloop_init_current(void) -{ - pthread_key_create(¤t_time_key, NULL); - pthread_setspecific(current_time_key, &main_timeloop); -} +_Atomic btime last_time; +_Atomic btime real_time; void wakeup_kick_current(void); -#else - -/* Just use main timelooop */ -static inline struct timeloop * timeloop_current(void) { return &main_timeloop; } -static inline void timeloop_init_current(void) { } - -#endif - -btime -current_time(void) -{ - return timeloop_current()->last_time; -} - -btime -current_real_time(void) -{ - struct timeloop *loop = timeloop_current(); - - if (!loop->real_time) - times_update_real_time(loop); - - return loop->real_time; -} - #define TIMER_LESS(a,b) ((a)->expires < (b)->expires) #define TIMER_SWAP(heap,a,b,t) (t = heap[a], heap[a] = heap[b], heap[b] = t, \ @@ -112,7 +68,7 @@ tm_dump(resource *r) if (t->recurrent) debug("recur %d, ", t->recurrent); if (t->expires) - debug("expires in %d ms)\n", (t->expires - current_time()) TO_MS); + debug("in loop %p expires in %d ms)\n", t->loop, (t->expires - current_time()) TO_MS); else debug("inactive)\n"); } @@ -135,41 +91,40 @@ tm_new(pool *p) return t; } -void -tm_set(timer *t, btime when) +static void +tm_set_in_tl(timer *t, btime when, struct timeloop *local_timeloop) { - struct timeloop *loop = timeloop_current(); - uint tc = timers_count(loop); + uint tc = timers_count(local_timeloop); if (!t->expires) { t->index = ++tc; t->expires = when; - BUFFER_PUSH(loop->timers) = t; - HEAP_INSERT(loop->timers.data, tc, timer *, TIMER_LESS, TIMER_SWAP); + BUFFER_PUSH(local_timeloop->timers) = t; + HEAP_INSERT(local_timeloop->timers.data, tc, timer *, TIMER_LESS, TIMER_SWAP); } else if (t->expires < when) { t->expires = when; - HEAP_INCREASE(loop->timers.data, tc, timer *, TIMER_LESS, TIMER_SWAP, t->index); + HEAP_INCREASE(local_timeloop->timers.data, tc, timer *, TIMER_LESS, TIMER_SWAP, t->index); } else if (t->expires > when) { t->expires = when; - HEAP_DECREASE(loop->timers.data, tc, timer *, TIMER_LESS, TIMER_SWAP, t->index); + HEAP_DECREASE(local_timeloop->timers.data, tc, timer *, TIMER_LESS, TIMER_SWAP, t->index); } -#ifdef CONFIG_BFD - /* Hack to notify BFD loops */ - if ((loop != &main_timeloop) && (t->index == 1)) - wakeup_kick_current(); -#endif + t->loop = local_timeloop; + + if (t->index == 1) + birdloop_ping(local_timeloop->loop); } void -tm_start(timer *t, btime after) +tm_set_in(timer *t, btime when, struct birdloop *loop) { - tm_set(t, current_time() + MAX(after, 0)); + ASSERT_DIE(birdloop_inside(loop)); + tm_set_in_tl(t, when, birdloop_time_loop(loop)); } void @@ -178,20 +133,22 @@ tm_stop(timer *t) if (!t->expires) return; - struct timeloop *loop = timeloop_current(); - uint tc = timers_count(loop); + TLOCK_TIMER_ASSERT(t->loop); - HEAP_DELETE(loop->timers.data, tc, timer *, TIMER_LESS, TIMER_SWAP, t->index); - BUFFER_POP(loop->timers); + uint tc = timers_count(t->loop); + + HEAP_DELETE(t->loop->timers.data, tc, timer *, TIMER_LESS, TIMER_SWAP, t->index); + BUFFER_POP(t->loop->timers); t->index = -1; t->expires = 0; + t->loop = NULL; } void timers_init(struct timeloop *loop, pool *p) { - times_init(loop); + TLOCK_TIMER_ASSERT(loop); BUFFER_INIT(loop->timers, p, 4); BUFFER_PUSH(loop->timers) = NULL; @@ -200,13 +157,15 @@ timers_init(struct timeloop *loop, pool *p) void io_log_event(void *hook, void *data); void -timers_fire(struct timeloop *loop) +timers_fire(struct timeloop *loop, int io_log) { + TLOCK_TIMER_ASSERT(loop); + btime base_time; timer *t; - times_update(loop); - base_time = loop->last_time; + times_update(); + base_time = current_time(); while (t = timers_first(loop)) { @@ -217,32 +176,26 @@ timers_fire(struct timeloop *loop) { btime when = t->expires + t->recurrent; - if (when <= loop->last_time) - when = loop->last_time + t->recurrent; + if (when <= base_time) + when = base_time + t->recurrent; if (t->randomize) when += random() % (t->randomize + 1); - tm_set(t, when); + tm_set_in_tl(t, when, loop); } else tm_stop(t); /* This is ugly hack, we want to log just timers executed from the main I/O loop */ - if (loop == &main_timeloop) + if (io_log) io_log_event(t->hook, t->data); t->hook(t); + tmp_flush(); } } -void -timer_init(void) -{ - timers_init(&main_timeloop, &root_pool); - timeloop_init_current(); -} - /** * tm_parse_time - parse a date and time diff --git a/lib/timer.h b/lib/timer.h index c5ea430c..555fc96f 100644 --- a/lib/timer.h +++ b/lib/timer.h @@ -12,8 +12,14 @@ #include "nest/bird.h" #include "lib/buffer.h" +#include "lib/io-loop.h" +#include "lib/locking.h" #include "lib/resource.h" +#include <stdatomic.h> + +extern _Atomic btime last_time; +extern _Atomic btime real_time; typedef struct timer { @@ -25,36 +31,42 @@ typedef struct timer uint randomize; /* Amount of randomization */ uint recurrent; /* Timer recurrence */ + struct timeloop *loop; /* Loop where the timer is active */ + int index; } timer; struct timeloop { BUFFER_(timer *) timers; - btime last_time; - btime real_time; + struct domain_generic *domain; + struct birdloop *loop; }; +#define TLOCK_TIMER_ASSERT(loop) ASSERT_DIE((loop)->domain && DG_IS_LOCKED((loop)->domain)) +#define TLOCK_LOCAL_ASSERT(loop) ASSERT_DIE(!(loop)->domain || DG_IS_LOCKED((loop)->domain)) + static inline uint timers_count(struct timeloop *loop) -{ return loop->timers.used - 1; } +{ TLOCK_TIMER_ASSERT(loop); return loop->timers.used - 1; } static inline timer *timers_first(struct timeloop *loop) -{ return (loop->timers.used > 1) ? loop->timers.data[1] : NULL; } +{ TLOCK_TIMER_ASSERT(loop); return (loop->timers.used > 1) ? loop->timers.data[1] : NULL; } -extern struct timeloop main_timeloop; - -btime current_time(void); -btime current_real_time(void); +#define current_time() atomic_load_explicit(&last_time, memory_order_acquire) +#define current_real_time() atomic_load_explicit(&real_time, memory_order_acquire) //#define now (current_time() TO_S) //#define now_real (current_real_time() TO_S) extern btime boot_time; timer *tm_new(pool *p); -void tm_set(timer *t, btime when); -void tm_start(timer *t, btime after); +#define tm_set(t, when) tm_set_in((t), (when), &main_birdloop) +#define tm_start(t, after) tm_start_in((t), (after), &main_birdloop) void tm_stop(timer *t); +void tm_set_in(timer *t, btime when, struct birdloop *loop); +#define tm_start_in(t, after, loop) tm_set_in((t), (current_time() + MAX_((after), 0)), loop) + static inline int tm_active(timer *t) { @@ -94,15 +106,11 @@ tm_start_max(timer *t, btime after) } /* In sysdep code */ -void times_init(struct timeloop *loop); -void times_update(struct timeloop *loop); -void times_update_real_time(struct timeloop *loop); +void times_update(void); /* For I/O loop */ void timers_init(struct timeloop *loop, pool *p); -void timers_fire(struct timeloop *loop); - -void timer_init(void); +void timers_fire(struct timeloop *loop, int io_log); struct timeformat { diff --git a/lib/tlists.h b/lib/tlists.h new file mode 100644 index 00000000..e1ed79ea --- /dev/null +++ b/lib/tlists.h @@ -0,0 +1,172 @@ +/* + * BIRD Library -- Typed Linked Lists + * + * (c) 2022 Maria Matejka <mq@jmq.cz> + * + * Can be freely distributed and used under the terms of the GNU GPL. + * + * + * This implementation of linked lists forces its members to be + * typed. On the other hand, it needs to be implemented as ugly macros to + * keep the needed genericity. + * + * Usage: + * 1. Include this file + * 2. Define the node structure + * 3. For every list type you need to define: + * A. #define TLIST_PREFIX and other macros + * B. Include this file once again + * + * Macros to define: + * TLIST_PREFIX: prefix to prepend to everything generated + * TLIST_TYPE: the actual node type + * TLIST_ITEM: where the tlist structure is + * TLIST_WANT_WALK: if defined, generates a helper functions for list walking macros + * TLIST_WANT_ADD_HEAD: if defined, TLIST_PREFIX_add_head() is generated to + * add an item to the beginning of the list + * TLIST_WANT_ADD_TAIL: if defined, TLIST_PREFIX_add_tail() is generated to + * add an item to the end of the list + * + * TLIST_PREFIX_rem_node() is generated always. + * + * All these macros are #undef-ed by including this file. + * + * Example: + * + * #include "lib/tlists.h" + * + * struct foo { + * ... + * TLIST_NODE(bar, struct foo) baz; + * ... + * }; + * + * #define TLIST_PREFIX bar + * #define TLIST_TYPE struct foo + * #define TLIST_ITEM baz + * + * #define TLIST_WANT_WALK + * #define TLIST_WANT_ADD_HEAD + * + * #include "lib/tlists.h" + * + * ... + * (end of example) + * + */ + +#ifdef _BIRD_LIB_TLISTS_H_ +# ifdef TLIST_PREFIX + +/* Check for mandatory arguments */ +#ifndef TLIST_TYPE +#error "TLIST_TYPE must be defined" +#endif +#ifndef TLIST_ITEM +#error "TLIST_ITEM must be defined" +#endif +#ifndef TLIST_PREFIX +#error "TLIST_PREFIX must be defined" +#endif + +#define TLIST_NAME(x) MACRO_CONCAT_AFTER(TLIST_PREFIX,_##x) +#ifndef TLIST_LIST_STRUCT +#define TLIST_LIST_STRUCT TLIST_NAME(list) +#endif + +typedef struct TLIST_LIST_STRUCT { + TLIST_TYPE *first; + TLIST_TYPE *last; +} TLIST_LIST_STRUCT; + +#ifdef TLIST_WANT_WALK +static inline struct TLIST_NAME(node) * TLIST_NAME(node_get)(TLIST_TYPE *node) +{ return &(node->TLIST_ITEM); } +#endif + +#ifdef TLIST_WANT_ADD_HEAD +static inline void TLIST_NAME(add_head)(TLIST_LIST_STRUCT *list, TLIST_TYPE *node) +{ + ASSERT_DIE(!node->TLIST_ITEM.prev && !node->TLIST_ITEM.next); + if (node->TLIST_ITEM.next = list->first) + list->first->TLIST_ITEM.prev = node; + else + list->last = node; + list->first = node; +} +#endif + +#ifdef TLIST_WANT_ADD_TAIL +static inline void TLIST_NAME(add_tail)(TLIST_LIST_STRUCT *list, TLIST_TYPE *node) +{ + ASSERT_DIE(!node->TLIST_ITEM.prev && !node->TLIST_ITEM.next); + if (node->TLIST_ITEM.prev = list->last) + list->last->TLIST_ITEM.next = node; + else + list->first = node; + list->last = node; +} +#endif + +static inline void TLIST_NAME(rem_node)(TLIST_LIST_STRUCT *list, TLIST_TYPE *node) +{ + if (node->TLIST_ITEM.prev) + node->TLIST_ITEM.prev->TLIST_ITEM.next = node->TLIST_ITEM.next; + else + { + ASSERT_DIE(list->first == node); + list->first = node->TLIST_ITEM.next; + } + + if (node->TLIST_ITEM.next) + node->TLIST_ITEM.next->TLIST_ITEM.prev = node->TLIST_ITEM.prev; + else + { + ASSERT_DIE(list->last == node); + list->last = node->TLIST_ITEM.prev; + } + + node->TLIST_ITEM.next = node->TLIST_ITEM.prev = NULL; +} + +#undef TLIST_PREFIX +#undef TLIST_NAME +#undef TLIST_LIST_STRUCT +#undef TLIST_TYPE +#undef TLIST_ITEM +#undef TLIST_WANT_ADD_HEAD +#undef TLIST_WANT_ADD_TAIL + +# endif +#else +#define _BIRD_LIB_TLISTS_H_ + +#include "lib/macro.h" + +#if defined(TLIST_NAME) || defined(TLIST_PREFIX) +#error "You should first include lib/tlists.h without requesting a TLIST" +#endif + +#define TLIST_NODE(_name, _type) struct _name##_node { _type *next; _type *prev; } +#define TLIST_LIST(_name) struct _name##_list + +/* Use ->first and ->last to access HEAD and TAIL */ +#define THEAD(_name, _list) (_list)->first +#define TTAIL(_name, _list) (_list)->last + +/* Walkaround macros: simple and resilient to node removal */ +#define WALK_TLIST(_name, _node, _list) \ + for (typeof((_list)->first) _node = (_list)->first; \ + _node; _node = _name##_node_get((_node))->next) + +#define WALK_TLIST_DELSAFE(_name, _node, _list) \ + for (typeof((_list)->first) _node = (_list)->first, \ + _helper = _node ? _name##_node_get((_list)->first)->next : NULL; \ + _node; \ + (_node = _helper) ? (_helper = _name##_node_get(_helper)->next) : 0) + +/* Empty check */ +#define EMPTY_TLIST(_name, _list) (!(_list)->first) + +#endif + diff --git a/lib/type.h b/lib/type.h new file mode 100644 index 00000000..b54744c1 --- /dev/null +++ b/lib/type.h @@ -0,0 +1,112 @@ +/* + * BIRD Internet Routing Daemon -- Internal Data Types + * + * (c) 2022 Maria Matejka <mq@jmq.cz> + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#ifndef _BIRD_TYPE_H_ +#define _BIRD_TYPE_H_ + +#include "lib/birdlib.h" +#include "lib/attrs.h" + +union bval { +#define BVAL_ITEMS \ + struct { \ + u32 data; /* Integer type inherited from eattrs */ \ + PADDING(data, 0, 4); /* Must be padded on 64-bits */ \ + }; \ + struct { \ + u32 i; /* Integer type inherited from filters */ \ + PADDING(i, 0, 4); /* Must be padded on 64-bits */ \ + }; \ + const struct adata *ptr; /* Generic attribute data inherited from eattrs */ \ + const struct adata *ad; /* Generic attribute data inherited from filters */ \ + + BVAL_ITEMS; +}; + +union bval_long { + union bval bval; /* For direct assignments */ + BVAL_ITEMS; /* For item-wise access */ + + u64 ec; + lcomm lc; + ip_addr ip; + const net_addr *net; + const char *s; + const struct f_tree *t; + const struct f_trie *ti; + const struct f_path_mask *path_mask; + struct f_path_mask_item pmi; +}; + + +/* Internal types */ +enum btype { +/* Nothing. Simply nothing. */ + T_VOID = 0, + +/* Something but inaccessible. */ + T_OPAQUE = 0x02, /* Opaque byte string (not filterable) */ + T_IFACE = 0x0c, /* Pointer to an interface (inside adata) */ + T_NEXTHOP_LIST = 0x2c, /* The whole nexthop block */ + T_HOSTENTRY = 0x2e, /* Hostentry with possible MPLS labels */ + +/* Types shared with eattrs */ + T_INT = 0x01, /* 32-bit unsigned integer number */ + T_IP = 0x04, /* IP address */ + T_QUAD = 0x05, /* Router ID (IPv4 address) */ + T_PATH = 0x06, /* BGP AS path (encoding per RFC 1771:4.3) */ + T_CLIST = 0x0a, /* Set of u32's (e.g., a community list) */ + T_ECLIST = 0x0e, /* Set of pairs of u32's - ext. community list */ + T_LCLIST = 0x08, /* Set of triplets of u32's - large community list */ + + T_ENUM_BGP_ORIGIN = 0x11, /* BGP Origin enum */ + T_ENUM_RA_PREFERENCE = 0x13, /* RA Preference enum */ + T_ENUM_FLOWSPEC_VALID = 0x15, /* Flowspec validation result */ + +#define EAF_TYPE__MAX 0x1f +#define EAF_EMBEDDED 0x01 /* Data stored in eattr.u.data (part of type spec) */ + /* Otherwise, attribute data is adata */ + +/* Other user visible types which fit in int */ + T_BOOL = 0xa0, + T_PAIR = 0xa4, /* Notice that pair is stored as integer: first << 16 | second */ + +/* Put enumerational types in 0x20..0x3f range */ + T_ENUM_LO = 0x10, + T_ENUM_HI = 0x3f, + + T_ENUM_RTS = 0x31, + T_ENUM_SCOPE = 0x33, + T_ENUM_RTD = 0x37, + T_ENUM_ROA = 0x39, + T_ENUM_NETTYPE = 0x3b, + T_ENUM_AF = 0x3d, + +/* new enums go here */ + +#define T_ENUM T_ENUM_LO ... T_ENUM_HI + +/* Bigger ones */ + T_NET = 0xb0, + T_STRING = 0xb4, + T_PATH_MASK = 0xb8, /* mask for BGP path */ + T_EC = 0xbc, /* Extended community value, u64 */ + T_LC = 0xc0, /* Large community value, lcomm */ + T_RD = 0xc4, /* Route distinguisher for VPN addresses */ + T_PATH_MASK_ITEM = 0xc8, /* Path mask item for path mask constructors */ + + T_SET = 0x80, + T_PREFIX_SET = 0x84, +} PACKED; + +typedef enum btype btype; + +STATIC_ASSERT(sizeof(btype) == sizeof(byte)); + + +#endif diff --git a/lib/type_test.c b/lib/type_test.c new file mode 100644 index 00000000..b526db69 --- /dev/null +++ b/lib/type_test.c @@ -0,0 +1,79 @@ +/* + * BIRD Library -- Data Type Alignment Tests + * + * (c) 2022 Maria Matejka <mq@jmq.cz> + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#include "test/birdtest.h" +#include "lib/type.h" +#include "lib/route.h" + +#define CHECK_ONE(val) \ + for (uint i=0; i<sizeof(val); i++) \ + bt_assert(((const u8 *) &val)[i] == (u8) ~0); + +#define SET_PADDING(val, name) \ + for (uint i=0; i<sizeof(val.PADDING_NAME(name)); i++) \ + val.PADDING_NAME(name)[i] = ~0; + + +static int +t_bval(void) +{ + union bval v; + + memset(&v, 0, sizeof(v)); + v.data = ~0; + SET_PADDING(v, data); + CHECK_ONE(v); + + memset(&v, 0, sizeof(v)); + v.i = ~0; + SET_PADDING(v, i); + CHECK_ONE(v); + + memset(&v, 0, sizeof(v)); + v.ptr = (void *) ~0; + CHECK_ONE(v); + + memset(&v, 0, sizeof(v)); + v.ad = (void *) ~0; + CHECK_ONE(v); + + return 1; +} + +static int +t_eattr(void) +{ + struct eattr e; + memset(&e, 0, sizeof(e)); + + e.id = ~0; + e.flags = ~0; + e.type = ~0; + e.rfu = ~0; + e.originated = ~0; + e.fresh = ~0; + e.undef = ~0; + memset(&e.u, ~0, sizeof(e.u)); /* Assumes t_bval passed */ + + SET_PADDING(e, unused); + + CHECK_ONE(e); + + return 1; +} + + +int main(int argc, char *argv[]) +{ + bt_init(argc, argv); + + bt_test_suite(t_bval, "Structure alignment test: bval"); + bt_test_suite(t_eattr, "Structure alignment test: eattr"); + + return bt_exit_value(); +} diff --git a/nest/Makefile b/nest/Makefile index 884d3950..39617350 100644 --- a/nest/Makefile +++ b/nest/Makefile @@ -1,8 +1,17 @@ -src := a-path.c a-set.c cli.c cmds.c iface.c locks.c neighbor.c password.c proto.c rt-attr.c rt-dev.c rt-fib.c rt-show.c rt-table.c +src := cli.c cmds.c iface.c locks.c neighbor.c password.c proto.c proto-build.c rt-attr.c rt-dev.c rt-fib.c rt-show.c rt-table.c obj := $(src-o-files) $(all-daemon) $(cf-local) +$(call proto-build,dev_build) -tests_src := a-set_test.c a-path_test.c +$(proto-build-c): $(lastword $(MAKEFILE_LIST)) + $(E)echo GEN $@ + $(Q)echo "#include \"lib/birdlib.h\"" > $@ + $(Q)$(patsubst %,echo 'void %(void);' >> $@;,$(PROTO_BUILD)) + $(Q)echo "void protos_build_gen(void) {" >> $@ + $(Q)$(patsubst %,echo ' %();'>>$@;,$(PROTO_BUILD)) + $(Q)echo "}" >> $@ + +tests_src := tests_targets := $(tests_targets) $(tests-target-files) tests_objs := $(tests_objs) $(src-o-files) diff --git a/nest/bird.h b/nest/bird.h index 55712abe..931974a0 100644 --- a/nest/bird.h +++ b/nest/bird.h @@ -9,7 +9,6 @@ #ifndef _BIRD_BIRD_H_ #define _BIRD_BIRD_H_ -#include "sysdep/config.h" #include "lib/birdlib.h" #include "lib/ip.h" #include "lib/net.h" @@ -319,7 +319,6 @@ cli_new(void *priv) c->event->data = c; c->cont = cli_hello; c->parser_pool = lp_new_default(c->pool); - c->show_pool = lp_new_default(c->pool); c->rx_buf = mb_alloc(c->pool, CLI_RX_BUF_SIZE); ev_schedule(c->event); return c; @@ -409,11 +408,14 @@ void cli_free(cli *c) { cli_set_log_echo(c, 0, 0); + int defer = 0; if (c->cleanup) - c->cleanup(c); + defer = c->cleanup(c); if (c == cmd_reconfig_stored_cli) cmd_reconfig_stored_cli = NULL; - rfree(c->pool); + + if (!defer) + rfree(c->pool); } /** @@ -33,12 +33,12 @@ typedef struct cli { struct cli_out *tx_buf, *tx_pos, *tx_write; event *event; void (*cont)(struct cli *c); - void (*cleanup)(struct cli *c); + int (*cleanup)(struct cli *c); /* Return 0 if finished and cli may be freed immediately. + Otherwise return 1 and call rfree(c->pool) when appropriate. */ void *rover; /* Private to continuation routine */ int last_reply; int restricted; /* CLI is restricted to read-only commands */ struct linpool *parser_pool; /* Pool used during parsing */ - struct linpool *show_pool; /* Pool used during route show */ byte *ring_buf; /* Ring buffer for asynchronous messages */ byte *ring_end, *ring_read, *ring_write; /* Pointers to the ring buffer */ uint ring_overflow; /* Counter of ring overflows */ diff --git a/nest/cmds.c b/nest/cmds.c index 1a16f9c7..092be48a 100644 --- a/nest/cmds.c +++ b/nest/cmds.c @@ -8,7 +8,7 @@ #include "nest/bird.h" #include "nest/protocol.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/cli.h" #include "conf/conf.h" #include "nest/cmds.h" @@ -51,17 +51,18 @@ cmd_show_symbols(struct sym_show_data *sd) cli_msg(1010, "%-8s\t%s", sd->sym->name, cf_symbol_class_name(sd->sym)); else { - HASH_WALK(config->sym_hash, next, sym) - { - if (!sym->scope->active) - continue; + for (const struct sym_scope *scope = config->root_scope; scope; scope = scope->next) + HASH_WALK(scope->hash, next, sym) + { + if (!sym->scope->active) + continue; - if (sd->type && (sym->class != sd->type)) - continue; + if (sd->type && (sym->class != sd->type)) + continue; - cli_msg(-1010, "%-8s\t%s", sym->name, cf_symbol_class_name(sym)); - } - HASH_WALK_END; + cli_msg(-1010, "%-8s\t%s", sym->name, cf_symbol_class_name(sym)); + } + HASH_WALK_END; cli_msg(0, ""); } @@ -108,6 +109,7 @@ print_size(char *dsc, struct resmem vals) extern pool *rt_table_pool; extern pool *rta_pool; +extern uint *pages_kept; void cmd_show_memory(void) @@ -119,8 +121,8 @@ cmd_show_memory(void) print_size("Protocols:", rmemsize(proto_pool)); struct resmem total = rmemsize(&root_pool); #ifdef HAVE_MMAP - print_size("Standby memory:", (struct resmem) { .overhead = get_page_size() * pages_kept }); - total.overhead += get_page_size() * pages_kept; + print_size("Standby memory:", (struct resmem) { .overhead = page_size * *pages_kept }); + total.overhead += page_size * *pages_kept; #endif print_size("Total:", total); cli_msg(0, ""); @@ -132,7 +134,7 @@ cmd_eval(const struct f_line *expr) buffer buf; LOG_BUFFER_INIT(buf); - if (f_eval_buf(expr, this_cli->parser_pool, &buf) > F_RETURN) + if (f_eval_buf(expr, &buf) > F_RETURN) { cli_msg(8008, "runtime error"); return; diff --git a/nest/config.Y b/nest/config.Y index ac599c09..91147a29 100644 --- a/nest/config.Y +++ b/nest/config.Y @@ -112,28 +112,28 @@ proto_postconfig(void) CF_DECLS -CF_KEYWORDS(ROUTER, ID, HOSTNAME, PROTOCOL, TEMPLATE, PREFERENCE, DISABLED, DEBUG, ALL, OFF, DIRECT) +CF_KEYWORDS(ROUTER, ID, HOSTNAME, PROTOCOL, TEMPLATE, PREFERENCE, DISABLED, DEBUG, ALL, OFF, DIRECT, PIPE) CF_KEYWORDS(INTERFACE, IMPORT, EXPORT, FILTER, NONE, VRF, DEFAULT, TABLE, STATES, ROUTES, FILTERS) CF_KEYWORDS(IPV4, IPV6, VPN4, VPN6, ROA4, ROA6, FLOW4, FLOW6, SADR, MPLS) CF_KEYWORDS(RECEIVE, LIMIT, ACTION, WARN, BLOCK, RESTART, DISABLE, KEEP, FILTERED, RPKI) CF_KEYWORDS(PASSWORD, KEY, FROM, PASSIVE, TO, ID, EVENTS, PACKETS, PROTOCOLS, CHANNELS, INTERFACES) CF_KEYWORDS(ALGORITHM, KEYED, HMAC, MD5, SHA1, SHA256, SHA384, SHA512, BLAKE2S128, BLAKE2S256, BLAKE2B256, BLAKE2B512) -CF_KEYWORDS(PRIMARY, STATS, COUNT, BY, FOR, IN, COMMANDS, PREEXPORT, NOEXPORT, EXPORTED, GENERATE) +CF_KEYWORDS(PRIMARY, STATS, COUNT, FOR, IN, COMMANDS, PREEXPORT, NOEXPORT, EXPORTED, GENERATE) CF_KEYWORDS(BGP, PASSWORDS, DESCRIPTION) -CF_KEYWORDS(RELOAD, IN, OUT, MRTDUMP, MESSAGES, RESTRICT, MEMORY, IGP_METRIC, CLASS, DSCP) +CF_KEYWORDS(RELOAD, IN, OUT, MRTDUMP, MESSAGES, RESTRICT, MEMORY, CLASS, DSCP) CF_KEYWORDS(TIMEFORMAT, ISO, SHORT, LONG, ROUTE, PROTOCOL, BASE, LOG, S, MS, US) -CF_KEYWORDS(GRACEFUL, RESTART, WAIT, MAX, FLUSH, AS) +CF_KEYWORDS(GRACEFUL, RESTART, WAIT, MAX, AS) CF_KEYWORDS(MIN, IDLE, RX, TX, INTERVAL, MULTIPLIER, PASSIVE) CF_KEYWORDS(CHECK, LINK) -CF_KEYWORDS(SORTED, TRIE, MIN, MAX, SETTLE, TIME, GC, THRESHOLD, PERIOD) +CF_KEYWORDS(CORK, SORTED, TRIE, MIN, MAX, SETTLE, TIME, GC, THRESHOLD, PERIOD) /* For r_args_channel */ CF_KEYWORDS(IPV4, IPV4_MC, IPV4_MPLS, IPV6, IPV6_MC, IPV6_MPLS, IPV6_SADR, VPN4, VPN4_MC, VPN4_MPLS, VPN6, VPN6_MC, VPN6_MPLS, ROA4, ROA6, FLOW4, FLOW6, MPLS, PRI, SEC) -CF_ENUM(T_ENUM_RTS, RTS_, DUMMY, STATIC, INHERIT, DEVICE, STATIC_DEVICE, REDIRECT, +CF_ENUM(T_ENUM_RTS, RTS_, STATIC, INHERIT, DEVICE, STATIC_DEVICE, REDIRECT, RIP, OSPF, OSPF_IA, OSPF_EXT1, OSPF_EXT2, BGP, PIPE, BABEL) CF_ENUM(T_ENUM_SCOPE, SCOPE_, HOST, LINK, SITE, ORGANIZATION, UNIVERSE, UNDEFINED) -CF_ENUM(T_ENUM_RTD, RTD_, UNICAST, BLACKHOLE, UNREACHABLE, PROHIBIT) +CF_ENUM(T_ENUM_RTD, RTD_, BLACKHOLE, UNREACHABLE, PROHIBIT) CF_ENUM(T_ENUM_ROA, ROA_, UNKNOWN, VALID, INVALID) CF_ENUM_PX(T_ENUM_AF, AF_, AFI_, IPV4, IPV6) @@ -231,6 +231,10 @@ table_opt: | MAX SETTLE TIME expr_us { this_table->max_settle_time = $4; } | GC THRESHOLD expr { this_table->gc_threshold = $3; } | GC PERIOD expr_us { this_table->gc_period = (uint) $3; if ($3 > 3600 S_) cf_error("GC period must be at most 3600 s"); } + | CORK THRESHOLD expr expr { + if ($3 > $4) cf_error("Cork low threshold must be lower than the high threshold."); + this_table->cork_threshold.low = $3; + this_table->cork_threshold.high = $4; } ; table_opts: @@ -308,12 +312,25 @@ channel_item_: this_channel->table = $2; } | IMPORT imexport { this_channel->in_filter = $2; } + | EXPORT IN net_any imexport { + if (this_channel->net_type && ($3->type != this_channel->net_type)) + cf_error("Incompatible export prefilter type"); + this_channel->out_subprefix = $3; + this_channel->out_filter = $4; + } | EXPORT imexport { this_channel->out_filter = $2; } | RECEIVE LIMIT limit_spec { this_channel->rx_limit = $3; } | IMPORT LIMIT limit_spec { this_channel->in_limit = $3; } | EXPORT LIMIT limit_spec { this_channel->out_limit = $3; } | PREFERENCE expr { this_channel->preference = $2; check_u16($2); } - | IMPORT KEEP FILTERED bool { this_channel->in_keep_filtered = $4; } + | IMPORT KEEP FILTERED bool { + if ($4) + this_channel->in_keep |= RIK_REJECTED; + else if ((this_channel->in_keep & RIK_PREFILTER) == RIK_PREFILTER) + cf_error("Import keep filtered is implied by the import table."); + else + this_channel->in_keep &= ~RIK_REJECTED; + } | RPKI RELOAD bool { this_channel->rpki_reload = $3; } ; @@ -373,6 +390,7 @@ debug_default: DEBUG PROTOCOLS debug_mask { new_config->proto_default_debug = $3; } | DEBUG CHANNELS debug_mask { new_config->channel_default_debug = $3; } | DEBUG COMMANDS expr { new_config->cli_debug = $3; } + | DEBUG TABLES bool { new_config->table_debug = $3; } ; /* MRTDUMP PROTOCOLS is in systep/unix/config.Y */ @@ -458,6 +476,7 @@ proto: dev_proto '}' ; dev_proto_start: proto_start DIRECT { this_proto = proto_config_new(&proto_device, $1); init_list(&DIRECT_CFG->iface_list); + this_proto->late_if_feed = 1; } ; @@ -640,28 +659,29 @@ r_args: $$ = cfg_allocz(sizeof(struct rt_show_data)); init_list(&($$->tables)); $$->filter = FILTER_ACCEPT; - $$->running_on_config = new_config->fallback; + $$->running_on_config = config; + $$->cli = this_cli; } | r_args net_any { $$ = $1; if ($$->addr) cf_error("Only one prefix expected"); $$->addr = $2; - $$->addr_mode = RSD_ADDR_EQUAL; + $$->addr_mode = TE_ADDR_EQUAL; } | r_args FOR r_args_for { $$ = $1; if ($$->addr) cf_error("Only one prefix expected"); $$->addr = $3; - $$->addr_mode = RSD_ADDR_FOR; + $$->addr_mode = TE_ADDR_FOR; } | r_args IN net_any { $$ = $1; if ($$->addr) cf_error("Only one prefix expected"); if (!net_type_match($3, NB_IP)) cf_error("Only IP networks accepted for 'in' argument"); $$->addr = $3; - $$->addr_mode = RSD_ADDR_IN; + $$->addr_mode = TE_ADDR_IN; } -| r_args TABLE CF_SYM_KNOWN { +| r_args TABLE symbol_known { cf_assert_symbol($3, SYM_TABLE); $$ = $1; rt_show_add_table($$, $3->table->table); @@ -675,13 +695,13 @@ r_args: $$->tables_defined_by = RSD_TDB_ALL; } | r_args IMPORT TABLE channel_arg { - if (!$4->in_table) cf_error("No import table in channel %s.%s", $4->proto->name, $4->name); - rt_show_add_table($$, $4->in_table); + if (!($4->in_keep & RIK_PREFILTER)) cf_error("No import table in channel %s.%s", $4->proto->name, $4->name); + rt_show_add_exporter($$, &$4->table->exporter, "import")->prefilter = $4; $$->tables_defined_by = RSD_TDB_DIRECT; } | r_args EXPORT TABLE channel_arg { if (!$4->out_table) cf_error("No export table in channel %s.%s", $4->proto->name, $4->name); - rt_show_add_table($$, $4->out_table); + rt_show_add_exporter($$, $4->out_table, "export"); $$->tables_defined_by = RSD_TDB_DIRECT; } | r_args FILTER filter { @@ -706,7 +726,7 @@ r_args: $$ = $1; $$->filtered = 1; } - | r_args export_mode CF_SYM_KNOWN { + | r_args export_mode symbol_known { cf_assert_symbol($3, SYM_PROTO); struct proto_config *c = (struct proto_config *) $3->proto; $$ = $1; @@ -723,7 +743,7 @@ r_args: $$->export_channel = $3; $$->tables_defined_by = RSD_TDB_INDIRECT; } - | r_args PROTOCOL CF_SYM_KNOWN { + | r_args PROTOCOL symbol_known { cf_assert_symbol($3, SYM_PROTO); struct proto_config *c = (struct proto_config *) $3->proto; $$ = $1; @@ -851,9 +871,11 @@ CF_CLI(DUMP INTERFACES,,, [[Dump interface information]]) CF_CLI(DUMP NEIGHBORS,,, [[Dump neighbor cache]]) { neigh_dump_all(); cli_msg(0, ""); } ; CF_CLI(DUMP ATTRIBUTES,,, [[Dump attribute cache]]) -{ rta_dump_all(); cli_msg(0, ""); } ; -CF_CLI(DUMP ROUTES,,, [[Dump routing table]]) +{ ea_dump_all(); cli_msg(0, ""); } ; +CF_CLI(DUMP ROUTES,,, [[Dump routes]]) { rt_dump_all(); cli_msg(0, ""); } ; +CF_CLI(DUMP TABLES,,, [[Dump table connections]]) +{ rt_dump_hooks_all(); cli_msg(0, ""); } ; CF_CLI(DUMP PROTOCOLS,,, [[Dump protocol information]]) { protos_dump_all(); cli_msg(0, ""); } ; CF_CLI(DUMP FILTER ALL,,, [[Dump all filters in linearized form]]) @@ -923,9 +945,6 @@ proto_patt2: | TEXT { $$.ptr = $1; $$.patt = 1; } ; -dynamic_attr: IGP_METRIC { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_GEN_IGP_METRIC); } ; - - CF_CODE CF_END diff --git a/nest/limit.h b/nest/limit.h new file mode 100644 index 00000000..5838ad3b --- /dev/null +++ b/nest/limit.h @@ -0,0 +1,49 @@ +/* + * BIRD Internet Routing Daemon -- Limits + * + * (c) 1998--2000 Martin Mares <mj@ucw.cz> + * (c) 2021 Maria Matejka <mq@jmq.cz> + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#ifndef _BIRD_LIMIT_H_ +#define _BIRD_LIMIT_H_ + +struct limit { + u32 max; + u32 count; + int (*action)(struct limit *, void *data); +}; + +static inline int limit_do_action(struct limit *l, void *data) +{ + return l->action ? l->action(l, data) : 1; +} + +static inline int limit_push(struct limit *l, void *data) +{ + if ((l->count >= l->max) && limit_do_action(l, data)) + return 1; + + l->count++; + return 0; +} + +static inline void limit_pop(struct limit *l) +{ + --l->count; +} + +static inline void limit_reset(struct limit *l) +{ + l->count = 0; +} + +static inline void limit_update(struct limit *l, void *data, u32 max) +{ + if (l->count > (l->max = max)) + limit_do_action(l, data); +} + +#endif diff --git a/nest/neighbor.c b/nest/neighbor.c index 1a31fb79..7cf9c85d 100644 --- a/nest/neighbor.c +++ b/nest/neighbor.c @@ -345,7 +345,7 @@ neigh_free(neighbor *n) { rem_node(&n->n); rem_node(&n->if_n); - sl_free(neigh_slab, n); + sl_free(n); } /** diff --git a/nest/proto-hooks.c b/nest/proto-hooks.c index bc88b4b4..716ce86c 100644 --- a/nest/proto-hooks.c +++ b/nest/proto-hooks.c @@ -76,16 +76,6 @@ void dump(struct proto *p) { DUMMY; } /** - * dump_attrs - dump protocol-dependent attributes - * @e: a route entry - * - * This hook dumps all attributes in the &rte which belong to this - * protocol to the debug output. - */ -void dump_attrs(rte *e) -{ DUMMY; } - -/** * start - request instance startup * @p: protocol instance * @@ -228,36 +218,6 @@ void neigh_notify(neighbor *neigh) { DUMMY; } /** - * make_tmp_attrs - convert embedded attributes to temporary ones - * @e: route entry - * @pool: linear pool to allocate attribute memory in - * - * This hook is called by the routing table functions if they need - * to convert the protocol attributes embedded directly in the &rte - * to temporary extended attributes in order to distribute them - * to other protocols or to filters. make_tmp_attrs() creates - * an &ea_list in the linear pool @pool, fills it with values of the - * temporary attributes and returns a pointer to it. - */ -ea_list *make_tmp_attrs(rte *e, struct linpool *pool) -{ DUMMY; } - -/** - * store_tmp_attrs - convert temporary attributes to embedded ones - * @e: route entry - * @attrs: temporary attributes to be converted - * - * This hook is an exact opposite of make_tmp_attrs() -- it takes - * a list of extended attributes and converts them to attributes - * embedded in the &rte corresponding to this protocol. - * - * You must be prepared for any of the attributes being missing - * from the list and use default values instead. - */ -void store_tmp_attrs(rte *e, ea_list *attrs) -{ DUMMY; } - -/** * preexport - pre-filtering decisions before route export * @p: protocol instance the route is going to be exported to * @e: the route in question diff --git a/nest/proto.c b/nest/proto.c index 31ee1fa1..853b1cf9 100644 --- a/nest/proto.c +++ b/nest/proto.c @@ -16,17 +16,16 @@ #include "lib/timer.h" #include "lib/string.h" #include "conf/conf.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/iface.h" #include "nest/cli.h" #include "filter/filter.h" #include "filter/f-inst.h" pool *proto_pool; -list proto_list; +list STATIC_LIST_INIT(proto_list); -static list protocol_list; -struct protocol *class_to_protocol[PROTOCOL__MAX]; +static list STATIC_LIST_INIT(protocol_list); #define CD(c, msg, args...) ({ if (c->debug & D_STATES) log(L_TRACE "%s.%s: " msg, c->proto->name, c->name ?: "?", ## args); }) #define PD(p, msg, args...) ({ if (p->debug & D_STATES) log(L_TRACE "%s: " msg, p->name, ## args); }) @@ -43,8 +42,7 @@ static int graceful_restart_state; static u32 graceful_restart_locks; static char *p_states[] = { "DOWN", "START", "UP", "STOP" }; -static char *c_states[] = { "DOWN", "START", "UP", "FLUSHING" }; -static char *e_states[] = { "DOWN", "FEEDING", "READY" }; +static char *c_states[] = { "DOWN", "START", "UP", "STOP", "RESTART" }; extern struct protocol proto_unix_iface; @@ -52,15 +50,38 @@ static void channel_request_reload(struct channel *c); static void proto_shutdown_loop(timer *); static void proto_rethink_goal(struct proto *p); static char *proto_state_name(struct proto *p); -static void channel_verify_limits(struct channel *c); -static inline void channel_reset_limit(struct channel_limit *l); - +static void channel_init_limit(struct channel *c, struct limit *l, int dir, struct channel_limit *cf); +static void channel_update_limit(struct channel *c, struct limit *l, int dir, struct channel_limit *cf); +static void channel_reset_limit(struct channel *c, struct limit *l, int dir); +static void channel_feed_end(struct channel *c); +static void channel_export_stopped(struct rt_export_request *req); static inline int proto_is_done(struct proto *p) -{ return (p->proto_state == PS_DOWN) && (p->active_channels == 0); } +{ return (p->proto_state == PS_DOWN) && proto_is_inactive(p); } + +static inline event_list *proto_event_list(struct proto *p) +{ return p->loop == &main_birdloop ? &global_event_list : birdloop_event_list(p->loop); } + +static inline event_list *proto_work_list(struct proto *p) +{ return p->loop == &main_birdloop ? &global_work_list : birdloop_event_list(p->loop); } + +static inline void proto_send_event(struct proto *p) +{ ev_send(proto_event_list(p), p->event); } + +#define PROTO_ENTER_FROM_MAIN(p) ({ \ + ASSERT_DIE(birdloop_inside(&main_birdloop)); \ + struct birdloop *_loop = (p)->loop; \ + if (_loop != &main_birdloop) birdloop_enter(_loop); \ + _loop; \ + }) + +#define PROTO_LEAVE_FROM_MAIN(loop) ({ if (loop != &main_birdloop) birdloop_leave(loop); }) + +#define PROTO_LOCKED_FROM_MAIN(p) for (struct birdloop *_proto_loop = PROTO_ENTER_FROM_MAIN(p); _proto_loop; PROTO_LEAVE_FROM_MAIN(_proto_loop), (_proto_loop = NULL)) + static inline int channel_is_active(struct channel *c) -{ return (c->channel_state == CS_START) || (c->channel_state == CS_UP); } +{ return (c->channel_state != CS_DOWN); } static inline int channel_reloadable(struct channel *c) { return c->proto->reload_routes && c->reloadable; } @@ -68,10 +89,46 @@ static inline int channel_reloadable(struct channel *c) static inline void channel_log_state_change(struct channel *c) { - if (c->export_state) - CD(c, "State changed to %s/%s", c_states[c->channel_state], e_states[c->export_state]); - else - CD(c, "State changed to %s", c_states[c->channel_state]); + CD(c, "State changed to %s", c_states[c->channel_state]); +} + +void +channel_import_log_state_change(struct rt_import_request *req, u8 state) +{ + struct channel *c = SKIP_BACK(struct channel, in_req, req); + CD(c, "Channel import state changed to %s", rt_import_state_name(state)); +} + +void +channel_export_log_state_change(struct rt_export_request *req, u8 state) +{ + struct channel *c = SKIP_BACK(struct channel, out_req, req); + CD(c, "Channel export state changed to %s", rt_export_state_name(state)); + + switch (state) + { + case TES_FEEDING: + if (c->proto->feed_begin) + c->proto->feed_begin(c, !c->refeeding); + break; + case TES_READY: + channel_feed_end(c); + break; + } +} + +static void +channel_dump_import_req(struct rt_import_request *req) +{ + struct channel *c = SKIP_BACK(struct channel, in_req, req); + debug(" Channel %s.%s import request %p\n", c->proto->name, c->name, req); +} + +static void +channel_dump_export_req(struct rt_export_request *req) +{ + struct channel *c = SKIP_BACK(struct channel, out_req, req); + debug(" Channel %s.%s export request %p\n", c->proto->name, c->name, req); } static void @@ -141,6 +198,15 @@ proto_find_channel_by_name(struct proto *p, const char *n) return NULL; } +int channel_preimport(struct rt_import_request *req, rte *new, rte *old); + +void rt_notify_optimal(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe); +void rt_notify_any(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe); +void rt_feed_any(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe, rte **feed, uint count); +void rt_notify_accepted(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe, rte **feed, uint count); +void rt_notify_merged(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe, rte **feed, uint count); + + /** * proto_add_channel - connect protocol to a routing table * @p: protocol instance @@ -165,23 +231,25 @@ proto_add_channel(struct proto *p, struct channel_config *cf) c->channel = cf->channel; c->proto = p; c->table = cf->table->table; + rt_lock_table(c->table); c->in_filter = cf->in_filter; c->out_filter = cf->out_filter; - c->rx_limit = cf->rx_limit; - c->in_limit = cf->in_limit; - c->out_limit = cf->out_limit; + c->out_subprefix = cf->out_subprefix; + + channel_init_limit(c, &c->rx_limit, PLD_RX, &cf->rx_limit); + channel_init_limit(c, &c->in_limit, PLD_IN, &cf->in_limit); + channel_init_limit(c, &c->out_limit, PLD_OUT, &cf->out_limit); c->net_type = cf->net_type; c->ra_mode = cf->ra_mode; c->preference = cf->preference; c->debug = cf->debug; c->merge_limit = cf->merge_limit; - c->in_keep_filtered = cf->in_keep_filtered; + c->in_keep = cf->in_keep; c->rpki_reload = cf->rpki_reload; c->channel_state = CS_DOWN; - c->export_state = ES_DOWN; c->last_state_change = current_time(); c->reloadable = 1; @@ -203,6 +271,7 @@ proto_remove_channel(struct proto *p UNUSED, struct channel *c) CD(c, "Removed", c->name); + rt_unlock_table(c->table); rem_node(&c->n); mb_free(c); } @@ -223,7 +292,7 @@ proto_pause_channels(struct proto *p) struct channel *c; WALK_LIST(c, p->channels) if (!c->disabled && channel_is_active(c)) - channel_set_state(c, CS_START); + channel_set_state(c, CS_PAUSE); } static void @@ -232,7 +301,7 @@ proto_stop_channels(struct proto *p) struct channel *c; WALK_LIST(c, p->channels) if (!c->disabled && channel_is_active(c)) - channel_set_state(c, CS_FLUSHING); + channel_set_state(c, CS_STOP); } static void @@ -244,75 +313,10 @@ proto_remove_channels(struct proto *p) } static void -channel_schedule_feed(struct channel *c, int initial) -{ - // DBG("%s: Scheduling meal\n", p->name); - ASSERT(c->channel_state == CS_UP); - - c->export_state = ES_FEEDING; - c->refeeding = !initial; - - ev_schedule_work(c->feed_event); -} - -static void -channel_feed_loop(void *ptr) -{ - struct channel *c = ptr; - - if (c->export_state != ES_FEEDING) - return; - - /* Start feeding */ - if (!c->feed_active) - { - if (c->proto->feed_begin) - c->proto->feed_begin(c, !c->refeeding); - - c->refeed_pending = 0; - } - - // DBG("Feeding protocol %s continued\n", p->name); - if (!rt_feed_channel(c)) - { - ev_schedule_work(c->feed_event); - return; - } - - /* Reset export limit if the feed ended with acceptable number of exported routes */ - struct channel_limit *l = &c->out_limit; - if (c->refeeding && - (l->state == PLS_BLOCKED) && - (c->refeed_count <= l->limit) && - (c->stats.exp_routes <= l->limit)) - { - log(L_INFO "Protocol %s resets route export limit (%u)", c->proto->name, l->limit); - channel_reset_limit(&c->out_limit); - - /* Continue in feed - it will process routing table again from beginning */ - c->refeed_count = 0; - ev_schedule_work(c->feed_event); - return; - } - - // DBG("Feeding protocol %s finished\n", p->name); - c->export_state = ES_READY; - channel_log_state_change(c); - - if (c->proto->feed_end) - c->proto->feed_end(c); - - /* Restart feeding */ - if (c->refeed_pending) - channel_request_feeding(c); -} - - -static void -channel_roa_in_changed(struct rt_subscription *s) +channel_roa_in_changed(void *_data) { - struct channel *c = s->data; - int active = c->reload_event && ev_active(c->reload_event); + struct channel *c = _data; + int active = !!c->reload_req.hook; CD(c, "Reload triggered by RPKI change%s", active ? " - already active" : ""); @@ -323,17 +327,15 @@ channel_roa_in_changed(struct rt_subscription *s) } static void -channel_roa_out_changed(struct rt_subscription *s) +channel_roa_out_changed(void *_data) { - struct channel *c = s->data; - int active = (c->export_state == ES_FEEDING); + struct channel *c = _data; + CD(c, "Feeding triggered by RPKI change"); - CD(c, "Feeding triggered by RPKI change%s", active ? " - already active" : ""); + c->refeed_pending = 1; - if (!active) - channel_request_feeding(c); - else - c->refeed_pending = 1; + if (c->out_req.hook) + rt_stop_export(&c->out_req, channel_export_stopped); } /* Temporary code, subscriptions should be changed to resources */ @@ -345,14 +347,14 @@ struct roa_subscription { static int channel_roa_is_subscribed(struct channel *c, rtable *tab, int dir) { - void (*hook)(struct rt_subscription *) = + void (*hook)(void *) = dir ? channel_roa_in_changed : channel_roa_out_changed; struct roa_subscription *s; node *n; WALK_LIST2(s, n, c->roa_subscriptions, roa_node) - if ((s->s.tab == tab) && (s->s.hook == hook)) + if ((s->s.tab == tab) && (s->s.event->hook == hook)) return 1; return 0; @@ -366,9 +368,9 @@ channel_roa_subscribe(struct channel *c, rtable *tab, int dir) return; struct roa_subscription *s = mb_allocz(c->proto->pool, sizeof(struct roa_subscription)); + s->s.event = ev_new_init(c->proto->pool, dir ? channel_roa_in_changed : channel_roa_out_changed, c); + s->s.list = proto_work_list(c->proto); - s->s.hook = dir ? channel_roa_in_changed : channel_roa_out_changed; - s->s.data = c; rt_subscribe(tab, &s->s); add_tail(&c->roa_subscriptions, &s->roa_node); @@ -379,6 +381,7 @@ channel_roa_unsubscribe(struct roa_subscription *s) { rt_unsubscribe(&s->s); rem_node(&s->roa_node); + rfree(s->s.event); mb_free(s); } @@ -399,7 +402,7 @@ channel_roa_subscribe_filter(struct channel *c, int dir) #ifdef CONFIG_BGP /* No automatic reload for BGP channels without in_table / out_table */ if (c->channel == &channel_bgp) - valid = dir ? !!c->in_table : !!c->out_table; + valid = dir ? ((c->in_keep & RIK_PREFILTER) == RIK_PREFILTER) : !!c->out_table; #endif struct filter_iterator fit; @@ -409,14 +412,8 @@ channel_roa_subscribe_filter(struct channel *c, int dir) { switch (fi->fi_code) { - case FI_ROA_CHECK_IMPLICIT: - tab = fi->i_FI_ROA_CHECK_IMPLICIT.rtc->table; - if (valid) channel_roa_subscribe(c, tab, dir); - found = 1; - break; - - case FI_ROA_CHECK_EXPLICIT: - tab = fi->i_FI_ROA_CHECK_EXPLICIT.rtc->table; + case FI_ROA_CHECK: + tab = fi->i_FI_ROA_CHECK.rtc->table; if (valid) channel_roa_subscribe(c, tab, dir); found = 1; break; @@ -445,119 +442,239 @@ channel_roa_unsubscribe_all(struct channel *c) } static void -channel_start_export(struct channel *c) +channel_start_import(struct channel *c) { + if (c->in_req.hook) + { + log(L_WARN "%s.%s: Attempted to start channel's already started import", c->proto->name, c->name); + return; + } + + int nlen = strlen(c->name) + strlen(c->proto->name) + 2; + char *rn = mb_allocz(c->proto->pool, nlen); + bsprintf(rn, "%s.%s", c->proto->name, c->name); + + c->in_req = (struct rt_import_request) { + .name = rn, + .trace_routes = c->debug | c->proto->debug, + .dump_req = channel_dump_import_req, + .log_state_change = channel_import_log_state_change, + .preimport = channel_preimport, + }; + ASSERT(c->channel_state == CS_UP); - ASSERT(c->export_state == ES_DOWN); - channel_schedule_feed(c, 1); /* Sets ES_FEEDING */ + channel_reset_limit(c, &c->rx_limit, PLD_RX); + channel_reset_limit(c, &c->in_limit, PLD_IN); + + memset(&c->import_stats, 0, sizeof(struct channel_import_stats)); + + DBG("%s.%s: Channel start import req=%p\n", c->proto->name, c->name, &c->in_req); + rt_request_import(c->table, &c->in_req); } static void -channel_stop_export(struct channel *c) +channel_start_export(struct channel *c) { - /* Need to abort feeding */ - if (c->export_state == ES_FEEDING) - rt_feed_channel_abort(c); + if (c->out_req.hook) + { + log(L_WARN "%s.%s: Attempted to start channel's already started export", c->proto->name, c->name); + return; + } + + ASSERT(c->channel_state == CS_UP); + int nlen = strlen(c->name) + strlen(c->proto->name) + 2; + char *rn = mb_allocz(c->proto->pool, nlen); + bsprintf(rn, "%s.%s", c->proto->name, c->name); + + c->out_req = (struct rt_export_request) { + .name = rn, + .list = proto_work_list(c->proto), + .addr = c->out_subprefix, + .addr_mode = c->out_subprefix ? TE_ADDR_IN : TE_ADDR_NONE, + .trace_routes = c->debug | c->proto->debug, + .dump_req = channel_dump_export_req, + .log_state_change = channel_export_log_state_change, + }; + + bmap_init(&c->export_map, c->proto->pool, 1024); + bmap_init(&c->export_reject_map, c->proto->pool, 1024); + + channel_reset_limit(c, &c->out_limit, PLD_OUT); + + memset(&c->export_stats, 0, sizeof(struct channel_export_stats)); + + switch (c->ra_mode) { + case RA_OPTIMAL: + c->out_req.export_one = rt_notify_optimal; + break; + case RA_ANY: + c->out_req.export_one = rt_notify_any; + c->out_req.export_bulk = rt_feed_any; + break; + case RA_ACCEPTED: + c->out_req.export_bulk = rt_notify_accepted; + break; + case RA_MERGED: + c->out_req.export_bulk = rt_notify_merged; + break; + default: + bug("Unknown route announcement mode"); + } - c->export_state = ES_DOWN; - c->stats.exp_routes = 0; - bmap_reset(&c->export_map, 1024); + DBG("%s.%s: Channel start export req=%p\n", c->proto->name, c->name, &c->out_req); + rt_request_export(&c->table->exporter, &c->out_req); } +static void +channel_check_stopped(struct channel *c) +{ + switch (c->channel_state) + { + case CS_STOP: + if (c->out_req.hook || c->in_req.hook) + return; + + channel_set_state(c, CS_DOWN); + proto_send_event(c->proto); + + break; + case CS_PAUSE: + if (c->out_req.hook) + return; + + channel_set_state(c, CS_START); + break; + default: + bug("Stopped channel in a bad state: %d", c->channel_state); + } + + DBG("%s.%s: Channel requests/hooks stopped (in state %s)\n", c->proto->name, c->name, c_states[c->channel_state]); +} -/* Called by protocol for reload from in_table */ void -channel_schedule_reload(struct channel *c) +channel_import_stopped(struct rt_import_request *req) { - ASSERT(c->channel_state == CS_UP); + struct channel *c = SKIP_BACK(struct channel, in_req, req); + + req->hook = NULL; - rt_reload_channel_abort(c); - ev_schedule_work(c->reload_event); + mb_free(c->in_req.name); + c->in_req.name = NULL; + + channel_check_stopped(c); } static void -channel_reload_loop(void *ptr) +channel_export_stopped(struct rt_export_request *req) { - struct channel *c = ptr; + struct channel *c = SKIP_BACK(struct channel, out_req, req); - /* Start reload */ - if (!c->reload_active) - c->reload_pending = 0; + /* The hook has already stopped */ + req->hook = NULL; - if (!rt_reload_channel(c)) + if (c->refeed_pending) { - ev_schedule_work(c->reload_event); + c->refeeding = 1; + c->refeed_pending = 0; + rt_request_export(&c->table->exporter, req); return; } - /* Restart reload */ - if (c->reload_pending) - channel_request_reload(c); + mb_free(c->out_req.name); + c->out_req.name = NULL; + + channel_check_stopped(c); } static void -channel_reset_import(struct channel *c) +channel_feed_end(struct channel *c) { - /* Need to abort feeding */ - ev_postpone(c->reload_event); - rt_reload_channel_abort(c); + struct rt_export_request *req = &c->out_req; + + /* Reset export limit if the feed ended with acceptable number of exported routes */ + struct limit *l = &c->out_limit; + if (c->refeeding && + (c->limit_active & (1 << PLD_OUT)) && + (c->refeed_count <= l->max) && + (l->count <= l->max)) + { + log(L_INFO "Protocol %s resets route export limit (%u)", c->proto->name, l->max); + + c->refeed_pending = 1; + rt_stop_export(req, channel_export_stopped); + return; + } - rt_prune_sync(c->in_table, 1); + if (c->proto->feed_end) + c->proto->feed_end(c); + + if (c->refeed_pending) + rt_stop_export(req, channel_export_stopped); + else + c->refeeding = 0; } -static void -channel_reset_export(struct channel *c) +/* Called by protocol for reload from in_table */ +void +channel_schedule_reload(struct channel *c) { - /* Just free the routes */ - rt_prune_sync(c->out_table, 1); + ASSERT(c->in_req.hook); + + rt_request_export(&c->table->exporter, &c->reload_req); } -/* Called by protocol to activate in_table */ -void -channel_setup_in_table(struct channel *c) +static void +channel_reload_stopped(struct rt_export_request *req) { - struct rtable_config *cf = mb_allocz(c->proto->pool, sizeof(struct rtable_config)); + struct channel *c = SKIP_BACK(struct channel, reload_req, req); - cf->name = "import"; - cf->addr_type = c->net_type; - cf->internal = 1; + /* Restart reload */ + if (c->reload_pending) + channel_request_reload(c); +} - c->in_table = rt_setup(c->proto->pool, cf); +static void +channel_reload_log_state_change(struct rt_export_request *req, u8 state) +{ + if (state == TES_READY) + rt_stop_export(req, channel_reload_stopped); +} - c->reload_event = ev_new_init(c->proto->pool, channel_reload_loop, c); +static void +channel_reload_dump_req(struct rt_export_request *req) +{ + struct channel *c = SKIP_BACK(struct channel, reload_req, req); + debug(" Channel %s.%s import reload request %p\n", c->proto->name, c->name, req); } -/* Called by protocol to activate out_table */ +void channel_reload_export_bulk(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe, rte **feed, uint count); + +/* Called by protocol to activate in_table */ void -channel_setup_out_table(struct channel *c) +channel_setup_in_table(struct channel *c) { - struct rtable_config *cf = mb_allocz(c->proto->pool, sizeof(struct rtable_config)); - cf->name = "export"; - cf->addr_type = c->net_type; - cf->internal = 1; + c->reload_req = (struct rt_export_request) { + .name = mb_sprintf(c->proto->pool, "%s.%s.import", c->proto->name, c->name), + .list = proto_work_list(c->proto), + .trace_routes = c->debug | c->proto->debug, + .export_bulk = channel_reload_export_bulk, + .dump_req = channel_reload_dump_req, + .log_state_change = channel_reload_log_state_change, + }; - c->out_table = rt_setup(c->proto->pool, cf); + c->in_keep |= RIK_PREFILTER; } static void channel_do_start(struct channel *c) { - rt_lock_table(c->table); - add_tail(&c->table->channels, &c->table_node); c->proto->active_channels++; - c->feed_event = ev_new_init(c->proto->pool, channel_feed_loop, c); - - bmap_init(&c->export_map, c->proto->pool, 1024); - memset(&c->stats, 0, sizeof(struct proto_stats)); - - channel_reset_limit(&c->rx_limit); - channel_reset_limit(&c->in_limit); - channel_reset_limit(&c->out_limit); - CALL(c->channel->start, c); + + channel_start_import(c); } static void @@ -572,9 +689,31 @@ channel_do_up(struct channel *c) } static void -channel_do_flush(struct channel *c) +channel_do_pause(struct channel *c) +{ + /* Need to abort feeding */ + if (c->reload_req.hook) + { + c->reload_pending = 0; + rt_stop_export(&c->reload_req, channel_reload_stopped); + } + + /* Stop export */ + if (c->out_req.hook) + rt_stop_export(&c->out_req, channel_export_stopped); + + channel_roa_unsubscribe_all(c); + + bmap_free(&c->export_map); + bmap_free(&c->export_reject_map); +} + +static void +channel_do_stop(struct channel *c) { - rt_schedule_prune(c->table); + /* Stop import */ + if (c->in_req.hook) + rt_stop_import(&c->in_req, channel_import_stopped); c->gr_wait = 0; if (c->gr_lock) @@ -582,32 +721,19 @@ channel_do_flush(struct channel *c) CALL(c->channel->shutdown, c); - /* This have to be done in here, as channel pool is freed before channel_do_down() */ - bmap_free(&c->export_map); - c->in_table = NULL; - c->reload_event = NULL; - c->out_table = NULL; - - channel_roa_unsubscribe_all(c); } static void channel_do_down(struct channel *c) { - ASSERT(!c->feed_active && !c->reload_active); + ASSERT(!c->reload_req.hook); - rem_node(&c->table_node); - rt_unlock_table(c->table); c->proto->active_channels--; - if ((c->stats.imp_routes + c->stats.filt_routes) != 0) - log(L_ERR "%s: Channel %s is down but still has some routes", c->proto->name, c->name); - // bmap_free(&c->export_map); - memset(&c->stats, 0, sizeof(struct proto_stats)); + memset(&c->import_stats, 0, sizeof(struct channel_import_stats)); + memset(&c->export_stats, 0, sizeof(struct channel_export_stats)); - c->in_table = NULL; - c->reload_event = NULL; c->out_table = NULL; /* The in_table and out_table are going to be freed by freeing their resource pools. */ @@ -616,14 +742,13 @@ channel_do_down(struct channel *c) /* Schedule protocol shutddown */ if (proto_is_done(c->proto)) - ev_schedule(c->proto->event); + proto_send_event(c->proto); } void channel_set_state(struct channel *c, uint state) { uint cs = c->channel_state; - uint es = c->export_state; DBG("%s reporting channel %s state transition %s -> %s\n", c->proto->name, c->name, c_states[cs], c_states[state]); if (state == cs) @@ -635,20 +760,11 @@ channel_set_state(struct channel *c, uint state) switch (state) { case CS_START: - ASSERT(cs == CS_DOWN || cs == CS_UP); + ASSERT(cs == CS_DOWN || cs == CS_PAUSE); if (cs == CS_DOWN) channel_do_start(c); - if (es != ES_DOWN) - channel_stop_export(c); - - if (c->in_table && (cs == CS_UP)) - channel_reset_import(c); - - if (c->out_table && (cs == CS_UP)) - channel_reset_export(c); - break; case CS_UP: @@ -663,23 +779,24 @@ channel_set_state(struct channel *c, uint state) channel_do_up(c); break; - case CS_FLUSHING: - ASSERT(cs == CS_START || cs == CS_UP); + case CS_PAUSE: + ASSERT(cs == CS_UP); - if (es != ES_DOWN) - channel_stop_export(c); + if (cs == CS_UP) + channel_do_pause(c); + break; - if (c->in_table && (cs == CS_UP)) - channel_reset_import(c); + case CS_STOP: + ASSERT(cs == CS_UP || cs == CS_START || cs == CS_PAUSE); - if (c->out_table && (cs == CS_UP)) - channel_reset_export(c); + if (cs == CS_UP) + channel_do_pause(c); - channel_do_flush(c); + channel_do_stop(c); break; case CS_DOWN: - ASSERT(cs == CS_FLUSHING); + ASSERT(cs == CS_STOP); channel_do_down(c); break; @@ -704,35 +821,16 @@ channel_set_state(struct channel *c, uint state) void channel_request_feeding(struct channel *c) { - ASSERT(c->channel_state == CS_UP); - - CD(c, "Feeding requested"); - - /* Do nothing if we are still waiting for feeding */ - if (c->export_state == ES_DOWN) - return; - - /* If we are already feeding, we want to restart it */ - if (c->export_state == ES_FEEDING) - { - /* Unless feeding is in initial state */ - if (!c->feed_active) - return; - - rt_feed_channel_abort(c); - } + ASSERT(c->out_req.hook); - /* Track number of exported routes during refeed */ - c->refeed_count = 0; - - channel_schedule_feed(c, 0); /* Sets ES_FEEDING */ - channel_log_state_change(c); + c->refeed_pending = 1; + rt_stop_export(&c->out_req, channel_export_stopped); } static void channel_request_reload(struct channel *c) { - ASSERT(c->channel_state == CS_UP); + ASSERT(c->in_req.hook); ASSERT(channel_reloadable(c)); CD(c, "Reload requested"); @@ -743,8 +841,8 @@ channel_request_reload(struct channel *c) * Should this be done before reload_routes() hook? * Perhaps, but routes are updated asynchronously. */ - channel_reset_limit(&c->rx_limit); - channel_reset_limit(&c->in_limit); + channel_reset_limit(c, &c->rx_limit, PLD_RX); + channel_reset_limit(c, &c->in_limit, PLD_IN); } const struct channel_class channel_basic = { @@ -830,7 +928,12 @@ int channel_reconfigure(struct channel *c, struct channel_config *cf) { /* FIXME: better handle these changes, also handle in_keep_filtered */ - if ((c->table != cf->table->table) || (cf->ra_mode && (c->ra_mode != cf->ra_mode))) + if ((c->table != cf->table->table) || + (cf->ra_mode && (c->ra_mode != cf->ra_mode)) || + (cf->in_keep != c->in_keep) || + cf->out_subprefix && c->out_subprefix && + !net_equal(cf->out_subprefix, c->out_subprefix) || + (!cf->out_subprefix != !c->out_subprefix)) return 0; /* Note that filter_same() requires arguments in (new, old) order */ @@ -847,19 +950,19 @@ channel_reconfigure(struct channel *c, struct channel_config *cf) /* Reconfigure channel fields */ c->in_filter = cf->in_filter; c->out_filter = cf->out_filter; - c->rx_limit = cf->rx_limit; - c->in_limit = cf->in_limit; - c->out_limit = cf->out_limit; + + channel_update_limit(c, &c->rx_limit, PLD_RX, &cf->rx_limit); + channel_update_limit(c, &c->in_limit, PLD_IN, &cf->in_limit); + channel_update_limit(c, &c->out_limit, PLD_OUT, &cf->out_limit); // c->ra_mode = cf->ra_mode; c->merge_limit = cf->merge_limit; c->preference = cf->preference; + c->out_req.addr = c->out_subprefix = cf->out_subprefix; c->debug = cf->debug; - c->in_keep_filtered = cf->in_keep_filtered; + c->in_req.trace_routes = c->out_req.trace_routes = c->debug | c->proto->debug; c->rpki_reload = cf->rpki_reload; - channel_verify_limits(c); - /* Execute channel-specific reconfigure hook */ if (c->channel->reconfigure && !c->channel->reconfigure(c, cf, &import_changed, &export_changed)) return 0; @@ -950,17 +1053,34 @@ proto_configure_channel(struct proto *p, struct channel **pc, struct channel_con return 1; } +static void +proto_cleanup(struct proto *p) +{ + rfree(p->pool); + p->pool = NULL; + + p->active = 0; + proto_log_state_change(p); + proto_rethink_goal(p); +} static void -proto_event(void *ptr) +proto_loop_stopped(void *ptr) { struct proto *p = ptr; - if (p->do_start) - { - if_feed_baby(p); - p->do_start = 0; - } + birdloop_enter(&main_birdloop); + + p->loop = &main_birdloop; + proto_cleanup(p); + + birdloop_leave(&main_birdloop); +} + +static void +proto_event(void *ptr) +{ + struct proto *p = ptr; if (p->do_stop) { @@ -970,14 +1090,10 @@ proto_event(void *ptr) } if (proto_is_done(p)) - { - if (p->proto->cleanup) - p->proto->cleanup(p); - - p->active = 0; - proto_log_state_change(p); - proto_rethink_goal(p); - } + if (p->loop != &main_birdloop) + birdloop_stop_self(p->loop, proto_loop_stopped, p); + else + proto_cleanup(p); } @@ -1018,6 +1134,7 @@ proto_init(struct proto_config *c, node *n) struct protocol *pr = c->protocol; struct proto *p = pr->init(c); + p->loop = &main_birdloop; p->proto_state = PS_DOWN; p->last_state_change = current_time(); p->vrf = c->vrf; @@ -1034,11 +1151,19 @@ proto_init(struct proto_config *c, node *n) static void proto_start(struct proto *p) { - /* Here we cannot use p->cf->name since it won't survive reconfiguration */ - p->pool = rp_new(proto_pool, p->proto->name); + DBG("Kicking %s up\n", p->name); + PD(p, "Starting"); + + p->pool = rp_newf(proto_pool, "Protocol %s", p->cf->name); if (graceful_restart_state == GRS_INIT) p->gr_recovery = 1; + + if (p->cf->loop_order != DOMAIN_ORDER(the_bird)) + p->loop = birdloop_new(p->pool, p->cf->loop_order, p->pool->name); + + PROTO_LOCKED_FROM_MAIN(p) + proto_notify_state(p, (p->proto->start ? p->proto->start(p) : PS_UP)); } @@ -1074,6 +1199,7 @@ proto_config_new(struct protocol *pr, int class) cf->class = class; cf->debug = new_config->proto_default_debug; cf->mrtdump = new_config->proto_default_mrtdump; + cf->loop_order = DOMAIN_ORDER(the_bird); init_list(&cf->channels); @@ -1363,11 +1489,20 @@ protos_commit(struct config *new, struct config *old, int force_reconfig, int ty } static void -proto_rethink_goal(struct proto *p) +proto_shutdown(struct proto *p) { - struct protocol *q; - byte goal; + if (p->proto_state == PS_START || p->proto_state == PS_UP) + { + /* Going down */ + DBG("Kicking %s down\n", p->name); + PD(p, "Shutting down"); + proto_notify_state(p, (p->proto->shutdown ? p->proto->shutdown(p) : PS_DOWN)); + } +} +static void +proto_rethink_goal(struct proto *p) +{ if (p->reconfiguring && !p->active) { struct proto_config *nc = p->cf_new; @@ -1387,32 +1522,12 @@ proto_rethink_goal(struct proto *p) /* Determine what state we want to reach */ if (p->disabled || p->reconfiguring) - goal = PS_DOWN; - else - goal = PS_UP; - - q = p->proto; - if (goal == PS_UP) - { - if (!p->active) - { - /* Going up */ - DBG("Kicking %s up\n", p->name); - PD(p, "Starting"); - proto_start(p); - proto_notify_state(p, (q->start ? q->start(p) : PS_UP)); - } - } - else { - if (p->proto_state == PS_START || p->proto_state == PS_UP) - { - /* Going down */ - DBG("Kicking %s down\n", p->name); - PD(p, "Shutting down"); - proto_notify_state(p, (q->shutdown ? q->shutdown(p) : PS_DOWN)); - } + PROTO_LOCKED_FROM_MAIN(p) + proto_shutdown(p); } + else if (!p->active) + proto_start(p); } struct proto * @@ -1524,7 +1639,7 @@ graceful_restart_done(timer *t UNUSED) WALK_LIST(c, p->channels) { /* Resume postponed export of routes */ - if ((c->channel_state == CS_UP) && c->gr_wait && c->proto->rt_notify) + if ((c->channel_state == CS_UP) && c->gr_wait && p->rt_notify) channel_start_export(c); /* Cleanup */ @@ -1614,7 +1729,11 @@ protos_dump_all(void) struct proto *p; WALK_LIST(p, proto_list) { - debug(" protocol %s state %s\n", p->name, p_states[p->proto_state]); +#define DPF(x) (p->x ? " " #x : "") + debug(" protocol %s (%p) state %s with %d active channels flags: %s%s%s%s%s\n", + p->name, p, p_states[p->proto_state], p->active_channels, + DPF(disabled), DPF(active), DPF(do_stop), DPF(reconfiguring)); +#undef DPF struct channel *c; WALK_LIST(c, p->channels) @@ -1624,6 +1743,9 @@ protos_dump_all(void) debug("\tInput filter: %s\n", filter_name(c->in_filter)); if (c->out_filter) debug("\tOutput filter: %s\n", filter_name(c->out_filter)); + debug("\tChannel state: %s/%s/%s\n", c_states[c->channel_state], + c->in_req.hook ? rt_import_state_name(rt_import_get_state(c->in_req.hook)) : "-", + c->out_req.hook ? rt_export_state_name(rt_export_get_state(c->out_req.hook)) : "-"); } if (p->proto->dump && (p->proto_state != PS_DOWN)) @@ -1643,14 +1765,13 @@ void proto_build(struct protocol *p) { add_tail(&protocol_list, &p->n); - ASSERT(p->class); - ASSERT(!class_to_protocol[p->class]); - class_to_protocol[p->class] = p; } /* FIXME: convert this call to some protocol hook */ extern void bfd_init_all(void); +void protos_build_gen(void); + /** * protos_build - build a protocol list * @@ -1663,44 +1784,7 @@ extern void bfd_init_all(void); void protos_build(void) { - init_list(&proto_list); - init_list(&protocol_list); - - proto_build(&proto_device); -#ifdef CONFIG_RADV - proto_build(&proto_radv); -#endif -#ifdef CONFIG_RIP - proto_build(&proto_rip); -#endif -#ifdef CONFIG_STATIC - proto_build(&proto_static); -#endif -#ifdef CONFIG_MRT - proto_build(&proto_mrt); -#endif -#ifdef CONFIG_OSPF - proto_build(&proto_ospf); -#endif -#ifdef CONFIG_PIPE - proto_build(&proto_pipe); -#endif -#ifdef CONFIG_BGP - proto_build(&proto_bgp); -#endif -#ifdef CONFIG_BFD - proto_build(&proto_bfd); - bfd_init_all(); -#endif -#ifdef CONFIG_BABEL - proto_build(&proto_babel); -#endif -#ifdef CONFIG_RPKI - proto_build(&proto_rpki); -#endif -#ifdef CONFIG_PERF - proto_build(&proto_perf); -#endif + protos_build_gen(); proto_pool = rp_new(&root_pool, "Protocols"); proto_shutdown_timer = tm_new(proto_pool); @@ -1780,108 +1864,132 @@ proto_set_message(struct proto *p, char *msg, int len) } -static const char * -channel_limit_name(struct channel_limit *l) -{ - const char *actions[] = { - [PLA_WARN] = "warn", - [PLA_BLOCK] = "block", - [PLA_RESTART] = "restart", - [PLA_DISABLE] = "disable", - }; +static const char * channel_limit_name[] = { + [PLA_WARN] = "warn", + [PLA_BLOCK] = "block", + [PLA_RESTART] = "restart", + [PLA_DISABLE] = "disable", +}; - return actions[l->action]; -} -/** - * channel_notify_limit: notify about limit hit and take appropriate action - * @c: channel - * @l: limit being hit - * @dir: limit direction (PLD_*) - * @rt_count: the number of routes - * - * The function is called by the route processing core when limit @l - * is breached. It activates the limit and tooks appropriate action - * according to @l->action. - */ -void -channel_notify_limit(struct channel *c, struct channel_limit *l, int dir, u32 rt_count) +static void +channel_log_limit(struct channel *c, struct limit *l, int dir) { const char *dir_name[PLD_MAX] = { "receive", "import" , "export" }; - const byte dir_down[PLD_MAX] = { PDC_RX_LIMIT_HIT, PDC_IN_LIMIT_HIT, PDC_OUT_LIMIT_HIT }; - struct proto *p = c->proto; + log(L_WARN "Channel %s.%s hits route %s limit (%d), action: %s", + c->proto->name, c->name, dir_name[dir], l->max, channel_limit_name[c->limit_actions[dir]]); +} - if (l->state == PLS_BLOCKED) +static void +channel_activate_limit(struct channel *c, struct limit *l, int dir) +{ + if (c->limit_active & (1 << dir)) return; - /* For warning action, we want the log message every time we hit the limit */ - if (!l->state || ((l->action == PLA_WARN) && (rt_count == l->limit))) - log(L_WARN "Protocol %s hits route %s limit (%d), action: %s", - p->name, dir_name[dir], l->limit, channel_limit_name(l)); + c->limit_active |= (1 << dir); + channel_log_limit(c, l, dir); +} - switch (l->action) - { - case PLA_WARN: - l->state = PLS_ACTIVE; - break; +static int +channel_limit_warn(struct limit *l, void *data) +{ + struct channel_limit_data *cld = data; + struct channel *c = cld->c; + int dir = cld->dir; - case PLA_BLOCK: - l->state = PLS_BLOCKED; - break; + channel_log_limit(c, l, dir); - case PLA_RESTART: - case PLA_DISABLE: - l->state = PLS_BLOCKED; - if (p->proto_state == PS_UP) - proto_schedule_down(p, l->action == PLA_RESTART, dir_down[dir]); - break; - } + return 0; } -static void -channel_verify_limits(struct channel *c) +static int +channel_limit_block(struct limit *l, void *data) { - struct channel_limit *l; - u32 all_routes = c->stats.imp_routes + c->stats.filt_routes; + struct channel_limit_data *cld = data; + struct channel *c = cld->c; + int dir = cld->dir; - l = &c->rx_limit; - if (l->action && (all_routes > l->limit)) - channel_notify_limit(c, l, PLD_RX, all_routes); + channel_activate_limit(c, l, dir); - l = &c->in_limit; - if (l->action && (c->stats.imp_routes > l->limit)) - channel_notify_limit(c, l, PLD_IN, c->stats.imp_routes); + return 1; +} - l = &c->out_limit; - if (l->action && (c->stats.exp_routes > l->limit)) - channel_notify_limit(c, l, PLD_OUT, c->stats.exp_routes); +static const byte chl_dir_down[PLD_MAX] = { PDC_RX_LIMIT_HIT, PDC_IN_LIMIT_HIT, PDC_OUT_LIMIT_HIT }; + +static int +channel_limit_down(struct limit *l, void *data) +{ + struct channel_limit_data *cld = data; + struct channel *c = cld->c; + struct proto *p = c->proto; + int dir = cld->dir; + + channel_activate_limit(c, l, dir); + + if (p->proto_state == PS_UP) + proto_schedule_down(p, c->limit_actions[dir] == PLA_RESTART, chl_dir_down[dir]); + + return 1; } -static inline void -channel_reset_limit(struct channel_limit *l) +static int (*channel_limit_action[])(struct limit *, void *) = { + [PLA_NONE] = NULL, + [PLA_WARN] = channel_limit_warn, + [PLA_BLOCK] = channel_limit_block, + [PLA_RESTART] = channel_limit_down, + [PLA_DISABLE] = channel_limit_down, +}; + +static void +channel_update_limit(struct channel *c, struct limit *l, int dir, struct channel_limit *cf) +{ + l->action = channel_limit_action[cf->action]; + c->limit_actions[dir] = cf->action; + + struct channel_limit_data cld = { .c = c, .dir = dir }; + limit_update(l, &cld, cf->action ? cf->limit : ~((u32) 0)); +} + +static void +channel_init_limit(struct channel *c, struct limit *l, int dir, struct channel_limit *cf) { - if (l->action) - l->state = PLS_INITIAL; + channel_reset_limit(c, l, dir); + channel_update_limit(c, l, dir, cf); } +static void +channel_reset_limit(struct channel *c, struct limit *l, int dir) +{ + limit_reset(l); + c->limit_active &= ~(1 << dir); +} + +static struct rte_owner_class default_rte_owner_class; + static inline void proto_do_start(struct proto *p) { p->active = 1; - p->do_start = 1; - ev_schedule(p->event); + + rt_init_sources(&p->sources, p->name, proto_event_list(p)); + if (!p->sources.class) + p->sources.class = &default_rte_owner_class; + + if (!p->cf->late_if_feed) + if_feed_baby(p); } static void proto_do_up(struct proto *p) { if (!p->main_source) - { p->main_source = rt_get_source(p, 0); - rt_lock_source(p->main_source); - } + // Locked automaticaly proto_start_channels(p); + + if (p->cf->late_if_feed) + if_feed_baby(p); } static inline void @@ -1896,9 +2004,6 @@ proto_do_stop(struct proto *p) p->down_sched = 0; p->gr_recovery = 0; - p->do_stop = 1; - ev_schedule(p->event); - if (p->main_source) { rt_unlock_source(p->main_source); @@ -1906,6 +2011,10 @@ proto_do_stop(struct proto *p) } proto_stop_channels(p); + rt_destroy_sources(&p->sources, p->event); + + p->do_stop = 1; + proto_send_event(p); } static void @@ -1913,12 +2022,10 @@ proto_do_down(struct proto *p) { p->down_code = 0; neigh_prune(); - rfree(p->pool); - p->pool = NULL; /* Shutdown is finished in the protocol event */ if (proto_is_done(p)) - ev_schedule(p->event); + proto_send_event(p); } @@ -2009,38 +2116,58 @@ proto_state_name(struct proto *p) static void channel_show_stats(struct channel *c) { - struct proto_stats *s = &c->stats; + struct channel_import_stats *ch_is = &c->import_stats; + struct channel_export_stats *ch_es = &c->export_stats; + struct rt_import_stats *rt_is = c->in_req.hook ? &c->in_req.hook->stats : NULL; + struct rt_export_stats *rt_es = c->out_req.hook ? &c->out_req.hook->stats : NULL; - if (c->in_keep_filtered) +#define SON(ie, item) ((ie) ? (ie)->item : 0) +#define SCI(item) SON(ch_is, item) +#define SCE(item) SON(ch_es, item) +#define SRI(item) SON(rt_is, item) +#define SRE(item) SON(rt_es, item) + + u32 rx_routes = c->rx_limit.count; + u32 in_routes = c->in_limit.count; + u32 out_routes = c->out_limit.count; + + if (c->in_keep) cli_msg(-1006, " Routes: %u imported, %u filtered, %u exported, %u preferred", - s->imp_routes, s->filt_routes, s->exp_routes, s->pref_routes); + in_routes, (rx_routes - in_routes), out_routes, SRI(pref)); else cli_msg(-1006, " Routes: %u imported, %u exported, %u preferred", - s->imp_routes, s->exp_routes, s->pref_routes); - - cli_msg(-1006, " Route change stats: received rejected filtered ignored accepted"); - cli_msg(-1006, " Import updates: %10u %10u %10u %10u %10u", - s->imp_updates_received, s->imp_updates_invalid, - s->imp_updates_filtered, s->imp_updates_ignored, - s->imp_updates_accepted); - cli_msg(-1006, " Import withdraws: %10u %10u --- %10u %10u", - s->imp_withdraws_received, s->imp_withdraws_invalid, - s->imp_withdraws_ignored, s->imp_withdraws_accepted); - cli_msg(-1006, " Export updates: %10u %10u %10u --- %10u", - s->exp_updates_received, s->exp_updates_rejected, - s->exp_updates_filtered, s->exp_updates_accepted); - cli_msg(-1006, " Export withdraws: %10u --- --- --- %10u", - s->exp_withdraws_received, s->exp_withdraws_accepted); + in_routes, out_routes, SRI(pref)); + + cli_msg(-1006, " Route change stats: received rejected filtered ignored RX limit IN limit accepted"); + cli_msg(-1006, " Import updates: %10u %10u %10u %10u %10u %10u %10u", + SCI(updates_received), SCI(updates_invalid), + SCI(updates_filtered), SRI(updates_ignored), + SCI(updates_limited_rx), SCI(updates_limited_in), + SRI(updates_accepted)); + cli_msg(-1006, " Import withdraws: %10u %10u --- %10u --- %10u", + SCI(withdraws_received), SCI(withdraws_invalid), + SRI(withdraws_ignored), SRI(withdraws_accepted)); + cli_msg(-1006, " Export updates: %10u %10u %10u --- %10u %10u", + SRE(updates_received), SCE(updates_rejected), + SCE(updates_filtered), SCE(updates_limited), SCE(updates_accepted)); + cli_msg(-1006, " Export withdraws: %10u --- --- --- ---%10u", + SRE(withdraws_received), SCE(withdraws_accepted)); + +#undef SRI +#undef SRE +#undef SCI +#undef SCE +#undef SON } void -channel_show_limit(struct channel_limit *l, const char *dsc) +channel_show_limit(struct limit *l, const char *dsc, int active, int action) { if (!l->action) return; - cli_msg(-1006, " %-16s%d%s", dsc, l->limit, l->state ? " [HIT]" : ""); - cli_msg(-1006, " Action: %s", channel_limit_name(l)); + cli_msg(-1006, " %-16s%d%s", dsc, l->max, active ? " [HIT]" : ""); + cli_msg(-1006, " Action: %s", channel_limit_name[action]); } void @@ -2048,6 +2175,8 @@ channel_show_info(struct channel *c) { cli_msg(-1006, " Channel %s", c->name); cli_msg(-1006, " State: %s", c_states[c->channel_state]); + cli_msg(-1006, " Import state: %s", rt_import_state_name(rt_import_get_state(c->in_req.hook))); + cli_msg(-1006, " Export state: %s", rt_export_state_name(rt_export_get_state(c->out_req.hook))); cli_msg(-1006, " Table: %s", c->table->name); cli_msg(-1006, " Preference: %d", c->preference); cli_msg(-1006, " Input filter: %s", filter_name(c->in_filter)); @@ -2058,9 +2187,9 @@ channel_show_info(struct channel *c) c->gr_lock ? " pending" : "", c->gr_wait ? " waiting" : ""); - channel_show_limit(&c->rx_limit, "Receive limit:"); - channel_show_limit(&c->in_limit, "Import limit:"); - channel_show_limit(&c->out_limit, "Export limit:"); + channel_show_limit(&c->rx_limit, "Receive limit:", c->limit_active & (1 << PLD_RX), c->limit_actions[PLD_RX]); + channel_show_limit(&c->in_limit, "Import limit:", c->limit_active & (1 << PLD_IN), c->limit_actions[PLD_IN]); + channel_show_limit(&c->out_limit, "Export limit:", c->limit_active & (1 << PLD_OUT), c->limit_actions[PLD_OUT]); if (c->channel_state != CS_DOWN) channel_show_stats(c); @@ -2135,7 +2264,7 @@ proto_cmd_disable(struct proto *p, uintptr_t arg, int cnt UNUSED) p->disabled = 1; p->down_code = PDC_CMD_DISABLE; proto_set_message(p, (char *) arg, -1); - proto_rethink_goal(p); + proto_shutdown(p); cli_msg(-9, "%s: disabled", p->name); } @@ -2168,9 +2297,9 @@ proto_cmd_restart(struct proto *p, uintptr_t arg, int cnt UNUSED) p->disabled = 1; p->down_code = PDC_CMD_RESTART; proto_set_message(p, (char *) arg, -1); - proto_rethink_goal(p); + proto_shutdown(p); p->disabled = 0; - proto_rethink_goal(p); + /* After the protocol shuts down, proto_rethink_goal() is run from proto_event. */ cli_msg(-12, "%s: restarted", p->name); } @@ -2243,8 +2372,15 @@ proto_apply_cmd_symbol(const struct symbol *s, void (* cmd)(struct proto *, uint return; } - cmd(s->proto->proto, arg, 0); - cli_msg(0, ""); + if (s->proto->proto) + { + struct proto *p = s->proto->proto; + PROTO_LOCKED_FROM_MAIN(p) + cmd(p, arg, 0); + cli_msg(0, ""); + } + else + cli_msg(9002, "%s does not exist", s->name); } static void @@ -2255,7 +2391,8 @@ proto_apply_cmd_patt(const char *patt, void (* cmd)(struct proto *, uintptr_t, i WALK_LIST(p, proto_list) if (!patt || patmatch(patt, p->name)) - cmd(p, arg, cnt++); + PROTO_LOCKED_FROM_MAIN(p) + cmd(p, arg, cnt++); if (!cnt) cli_msg(8003, "No protocols match"); diff --git a/nest/protocol.h b/nest/protocol.h index 3b3812a5..709d6415 100644 --- a/nest/protocol.h +++ b/nest/protocol.h @@ -12,7 +12,8 @@ #include "lib/lists.h" #include "lib/resource.h" #include "lib/event.h" -#include "nest/route.h" +#include "nest/rt.h" +#include "nest/limit.h" #include "conf/conf.h" struct iface; @@ -37,56 +38,35 @@ struct symbol; * Routing Protocol */ -enum protocol_class { - PROTOCOL_NONE, - PROTOCOL_BABEL, - PROTOCOL_BFD, - PROTOCOL_BGP, - PROTOCOL_DEVICE, - PROTOCOL_DIRECT, - PROTOCOL_KERNEL, - PROTOCOL_OSPF, - PROTOCOL_MRT, - PROTOCOL_PERF, - PROTOCOL_PIPE, - PROTOCOL_RADV, - PROTOCOL_RIP, - PROTOCOL_RPKI, - PROTOCOL_STATIC, - PROTOCOL__MAX -}; - -extern struct protocol *class_to_protocol[PROTOCOL__MAX]; struct protocol { node n; char *name; char *template; /* Template for automatic generation of names */ int name_counter; /* Counter for automatic name generation */ - enum protocol_class class; /* Machine readable protocol class */ uint preference; /* Default protocol preference */ uint channel_mask; /* Mask of accepted channel types (NB_*) */ uint proto_size; /* Size of protocol data structure */ uint config_size; /* Size of protocol config data structure */ + uint eattr_begin; /* First ID of registered eattrs */ + uint eattr_end; /* End of eattr id zone */ + void (*preconfig)(struct protocol *, struct config *); /* Just before configuring */ void (*postconfig)(struct proto_config *); /* After configuring each instance */ struct proto * (*init)(struct proto_config *); /* Create new instance */ int (*reconfigure)(struct proto *, struct proto_config *); /* Try to reconfigure instance, returns success */ void (*dump)(struct proto *); /* Debugging dump */ - void (*dump_attrs)(struct rte *); /* Dump protocol-dependent attributes */ int (*start)(struct proto *); /* Start the instance */ int (*shutdown)(struct proto *); /* Stop the instance */ - void (*cleanup)(struct proto *); /* Called after shutdown when protocol became hungry/down */ void (*get_status)(struct proto *, byte *buf); /* Get instance status (for `show protocols' command) */ - void (*get_route_info)(struct rte *, byte *buf); /* Get route information (for `show route' command) */ - int (*get_attr)(const struct eattr *, byte *buf, int buflen); /* ASCIIfy dynamic attribute (returns GA_*) */ +// int (*get_attr)(const struct eattr *, byte *buf, int buflen); /* ASCIIfy dynamic attribute (returns GA_*) */ void (*show_proto_info)(struct proto *); /* Show protocol info (for `show protocols all' command) */ void (*copy_config)(struct proto_config *, struct proto_config *); /* Copy config from given protocol instance */ }; -void protos_build(void); -void proto_build(struct protocol *); +void protos_build(void); /* Called from sysdep to initialize protocols */ +void proto_build(struct protocol *); /* Called from protocol to register itself */ void protos_preconfig(struct config *); void protos_commit(struct config *new, struct config *old, int force_restart, int type); struct proto * proto_spawn(struct proto_config *cf, uint disabled); @@ -121,8 +101,10 @@ struct proto_config { u8 net_type; /* Protocol network type (NET_*), 0 for undefined */ u8 disabled; /* Protocol enabled/disabled by default */ u8 vrf_set; /* Related VRF instance (below) is defined */ + u8 late_if_feed; /* Delay interface feed after channels are up */ u32 debug, mrtdump; /* Debugging bitfields, both use D_* constants */ u32 router_id; /* Protocol specific router ID */ + uint loop_order; /* Launch a birdloop on this locking level; use DOMAIN_ORDER(the_bird) for mainloop */ list channels; /* List of channel configs (struct channel_config) */ struct iface *vrf; /* Related VRF instance, NULL if global */ @@ -133,31 +115,6 @@ struct proto_config { }; /* Protocol statistics */ -struct proto_stats { - /* Import - from protocol to core */ - u32 imp_routes; /* Number of routes successfully imported to the (adjacent) routing table */ - u32 filt_routes; /* Number of routes rejected in import filter but kept in the routing table */ - u32 pref_routes; /* Number of routes selected as best in the (adjacent) routing table */ - u32 imp_updates_received; /* Number of route updates received */ - u32 imp_updates_invalid; /* Number of route updates rejected as invalid */ - u32 imp_updates_filtered; /* Number of route updates rejected by filters */ - u32 imp_updates_ignored; /* Number of route updates rejected as already in route table */ - u32 imp_updates_accepted; /* Number of route updates accepted and imported */ - u32 imp_withdraws_received; /* Number of route withdraws received */ - u32 imp_withdraws_invalid; /* Number of route withdraws rejected as invalid */ - u32 imp_withdraws_ignored; /* Number of route withdraws rejected as already not in route table */ - u32 imp_withdraws_accepted; /* Number of route withdraws accepted and processed */ - - /* Export - from core to protocol */ - u32 exp_routes; /* Number of routes successfully exported to the protocol */ - u32 exp_updates_received; /* Number of route updates received */ - u32 exp_updates_rejected; /* Number of route updates rejected by protocol */ - u32 exp_updates_filtered; /* Number of route updates rejected by filters */ - u32 exp_updates_accepted; /* Number of route updates accepted and exported */ - u32 exp_withdraws_received; /* Number of route withdraws received */ - u32 exp_withdraws_accepted; /* Number of route withdraws accepted and processed */ -}; - struct proto { node n; /* Node in global proto_list */ struct protocol *proto; /* Protocol */ @@ -165,22 +122,24 @@ struct proto { struct proto_config *cf_new; /* Configuration we want to switch to after shutdown (NULL=delete) */ pool *pool; /* Pool containing local objects */ event *event; /* Protocol event */ + struct birdloop *loop; /* BIRDloop running this protocol */ list channels; /* List of channels to rtables (struct channel) */ struct channel *main_channel; /* Primary channel */ struct rte_src *main_source; /* Primary route source */ + struct rte_owner sources; /* Route source owner structure */ struct iface *vrf; /* Related VRF instance, NULL if global */ const char *name; /* Name of this instance (== cf->name) */ u32 debug; /* Debugging flags */ u32 mrtdump; /* MRTDump flags */ uint active_channels; /* Number of active channels */ + uint active_loops; /* Number of active IO loops */ byte net_type; /* Protocol network type (NET_*), 0 for undefined */ byte disabled; /* Manually disabled */ byte vrf_set; /* Related VRF instance (above) is defined */ byte proto_state; /* Protocol state machine (PS_*, see below) */ byte active; /* From PS_START to cleanup after PS_STOP */ - byte do_start; /* Start actions are scheduled */ byte do_stop; /* Stop actions are scheduled */ byte reconfiguring; /* We're shutting down due to reconfiguration */ byte gr_recovery; /* Protocol should participate in graceful restart recovery */ @@ -198,12 +157,11 @@ struct proto { * ifa_notify Notify protocol about interface address changes. * rt_notify Notify protocol about routing table updates. * neigh_notify Notify protocol about neighbor cache events. - * make_tmp_attrs Add attributes to rta from from private attrs stored in rte. The route and rta MUST NOT be cached. - * store_tmp_attrs Store private attrs back to rte and undef added attributes. The route and rta MUST NOT be cached. - * preexport Called as the first step of the route exporting process. - * It can construct a new rte, add private attributes and - * decide whether the route shall be exported: 1=yes, -1=no, - * 0=process it through the export filter set by the user. + * preexport Called as the first step of the route exporting process. + * It can decide whether the route shall be exported: + * -1 = reject, + * 0 = continue to export filter + * 1 = accept immediately * reload_routes Request channel to reload all its routes to the core * (using rte_update()). Returns: 0=reload cannot be done, * 1= reload is scheduled and will happen (asynchronously). @@ -213,11 +171,9 @@ struct proto { void (*if_notify)(struct proto *, unsigned flags, struct iface *i); void (*ifa_notify)(struct proto *, unsigned flags, struct ifa *a); - void (*rt_notify)(struct proto *, struct channel *, struct network *net, struct rte *new, struct rte *old); + void (*rt_notify)(struct proto *, struct channel *, const net_addr *net, struct rte *new, const struct rte *old); void (*neigh_notify)(struct neighbor *neigh); - void (*make_tmp_attrs)(struct rte *rt, struct linpool *pool); - void (*store_tmp_attrs)(struct rte *rt, struct linpool *pool); - int (*preexport)(struct channel *, struct rte **rt, struct linpool *pool); + int (*preexport)(struct channel *, struct rte *rt); void (*reload_routes)(struct channel *); void (*feed_begin)(struct channel *, int initial); void (*feed_end)(struct channel *); @@ -235,11 +191,10 @@ struct proto { int (*rte_recalculate)(struct rtable *, struct network *, struct rte *, struct rte *, struct rte *); int (*rte_better)(struct rte *, struct rte *); - int (*rte_same)(struct rte *, struct rte *); int (*rte_mergable)(struct rte *, struct rte *); - struct rte * (*rte_modify)(struct rte *, struct linpool *); void (*rte_insert)(struct network *, struct rte *); void (*rte_remove)(struct network *, struct rte *); + u32 (*rte_igp_metric)(const struct rte *); /* Hic sunt protocol-specific data */ }; @@ -279,7 +234,7 @@ void channel_graceful_restart_unlock(struct channel *c); #define DEFAULT_GR_WAIT 240 -void channel_show_limit(struct channel_limit *l, const char *dsc); +void channel_show_limit(struct limit *l, const char *dsc, int active, int action); void channel_show_info(struct channel *c); void channel_cmd_debug(struct channel *c, uint mask); @@ -386,6 +341,8 @@ void proto_notify_state(struct proto *p, unsigned state); * as a result of received ROUTE-REFRESH request). */ +static inline int proto_is_inactive(struct proto *p) +{ return (p->active_channels == 0) && (p->active_loops == 0) && (p->sources.uc == 0); } /* @@ -434,18 +391,29 @@ extern struct proto_config *cf_dev_proto; #define PLA_RESTART 4 /* Force protocol restart */ #define PLA_DISABLE 5 /* Shutdown and disable protocol */ -#define PLS_INITIAL 0 /* Initial limit state after protocol start */ -#define PLS_ACTIVE 1 /* Limit was hit */ -#define PLS_BLOCKED 2 /* Limit is active and blocking new routes */ - struct channel_limit { u32 limit; /* Maximum number of prefixes */ u8 action; /* Action to take (PLA_*) */ - u8 state; /* State of limit (PLS_*) */ }; -void channel_notify_limit(struct channel *c, struct channel_limit *l, int dir, u32 rt_count); +struct channel_limit_data { + struct channel *c; + int dir; +}; + +#define CLP__RX(_c) (&(_c)->rx_limit) +#define CLP__IN(_c) (&(_c)->in_limit) +#define CLP__OUT(_c) (&(_c)->out_limit) + + +#if 0 +#define CHANNEL_LIMIT_LOG(_c, _dir, _op) log(L_TRACE "%s.%s: %s limit %s %u", (_c)->proto->name, (_c)->name, #_dir, _op, (CLP__##_dir(_c))->count) +#else +#define CHANNEL_LIMIT_LOG(_c, _dir, _op) +#endif +#define CHANNEL_LIMIT_PUSH(_c, _dir) ({ CHANNEL_LIMIT_LOG(_c, _dir, "push from"); struct channel_limit_data cld = { .c = (_c), .dir = PLD_##_dir }; limit_push(CLP__##_dir(_c), &cld); }) +#define CHANNEL_LIMIT_POP(_c, _dir) ({ limit_pop(CLP__##_dir(_c)); CHANNEL_LIMIT_LOG(_c, _dir, "pop to"); }) /* * Channels @@ -469,7 +437,6 @@ struct channel_class { void (*dump)(struct proto *); /* Debugging dump */ - void (*dump_attrs)(struct rte *); /* Dump protocol-dependent attributes */ void (*get_status)(struct proto *, byte *buf); /* Get instance status (for `show protocols' command) */ void (*get_route_info)(struct rte *, byte *buf); /* Get route information (for `show route' command) */ @@ -489,8 +456,10 @@ struct channel_config { struct proto_config *parent; /* Where channel is defined (proto or template) */ struct rtable_config *table; /* Table we're attached to */ const struct filter *in_filter, *out_filter; /* Attached filters */ + const net_addr *out_subprefix; /* Export only subprefixes of this net */ + struct channel_limit rx_limit; /* Limit for receiving routes from protocol - (relevant when in_keep_filtered is active) */ + (relevant when in_keep & RIK_REJECTED) */ struct channel_limit in_limit; /* Limit for importing routes from protocol */ struct channel_limit out_limit; /* Limit for exporting routes to protocol */ @@ -499,13 +468,12 @@ struct channel_config { u16 preference; /* Default route preference */ u32 debug; /* Debugging flags (D_*) */ u8 merge_limit; /* Maximal number of nexthops for RA_MERGED */ - u8 in_keep_filtered; /* Routes rejected in import filter are kept */ + u8 in_keep; /* Which states of routes to keep (RIK_*) */ u8 rpki_reload; /* RPKI changes trigger channel reload */ }; struct channel { node n; /* Node in proto->channels */ - node table_node; /* Node in table->channels */ const char *name; /* Channel name (may be NULL) */ const struct channel_class *channel; @@ -514,14 +482,40 @@ struct channel { struct rtable *table; const struct filter *in_filter; /* Input filter */ const struct filter *out_filter; /* Output filter */ - struct bmap export_map; /* Keeps track which routes passed export filter */ - struct channel_limit rx_limit; /* Receive limit (for in_keep_filtered) */ - struct channel_limit in_limit; /* Input limit */ - struct channel_limit out_limit; /* Output limit */ - - struct event *feed_event; /* Event responsible for feeding */ - struct fib_iterator feed_fit; /* Routing table iterator used during feeding */ - struct proto_stats stats; /* Per-channel protocol statistics */ + const net_addr *out_subprefix; /* Export only subprefixes of this net */ + struct bmap export_map; /* Keeps track which routes were really exported */ + struct bmap export_reject_map; /* Keeps track which routes were rejected by export filter */ + + struct limit rx_limit; /* Receive limit (for in_keep & RIK_REJECTED) */ + struct limit in_limit; /* Input limit */ + struct limit out_limit; /* Output limit */ + + u8 limit_actions[PLD_MAX]; /* Limit actions enum */ + u8 limit_active; /* Flags for active limits */ + + struct channel_import_stats { + /* Import - from protocol to core */ + u32 updates_received; /* Number of route updates received */ + u32 updates_invalid; /* Number of route updates rejected as invalid */ + u32 updates_filtered; /* Number of route updates rejected by filters */ + u32 updates_limited_rx; /* Number of route updates exceeding the rx_limit */ + u32 updates_limited_in; /* Number of route updates exceeding the in_limit */ + u32 withdraws_received; /* Number of route withdraws received */ + u32 withdraws_invalid; /* Number of route withdraws rejected as invalid */ + } import_stats; + + struct channel_export_stats { + /* Export - from core to protocol */ + u32 updates_rejected; /* Number of route updates rejected by protocol */ + u32 updates_filtered; /* Number of route updates rejected by filters */ + u32 updates_accepted; /* Number of route updates accepted and exported */ + u32 updates_limited; /* Number of route updates exceeding the out_limit */ + u32 withdraws_accepted; /* Number of route withdraws accepted and processed */ + } export_stats; + + struct rt_import_request in_req; /* Table import connection */ + struct rt_export_request out_req; /* Table export connection */ + u32 refeed_count; /* Number of routes exported during refeed regardless of out_limit */ u8 net_type; /* Routing table network type (NET_*), 0 for undefined */ @@ -529,36 +523,31 @@ struct channel { u16 preference; /* Default route preference */ u32 debug; /* Debugging flags (D_*) */ u8 merge_limit; /* Maximal number of nexthops for RA_MERGED */ - u8 in_keep_filtered; /* Routes rejected in import filter are kept */ + u8 in_keep; /* Which states of routes to keep (RIK_*) */ u8 disabled; u8 stale; /* Used in reconfiguration */ u8 channel_state; - u8 export_state; /* Route export state (ES_*, see below) */ - u8 feed_active; - u8 flush_active; - u8 refeeding; /* We are refeeding (valid only if export_state == ES_FEEDING) */ + u8 refeeding; /* Refeeding the channel. */ u8 reloadable; /* Hook reload_routes() is allowed on the channel */ u8 gr_lock; /* Graceful restart mechanism should wait for this channel */ u8 gr_wait; /* Route export to channel is postponed until graceful restart */ btime last_state_change; /* Time of last state transition */ - struct rtable *in_table; /* Internal table for received routes */ - struct event *reload_event; /* Event responsible for reloading from in_table */ - struct fib_iterator reload_fit; /* FIB iterator in in_table used during reloading */ - struct rte *reload_next_rte; /* Route iterator in in_table used during reloading */ - u8 reload_active; /* Iterator reload_fit is linked */ + struct rt_export_request reload_req; /* Feeder for import reload */ u8 reload_pending; /* Reloading and another reload is scheduled */ u8 refeed_pending; /* Refeeding and another refeed is scheduled */ u8 rpki_reload; /* RPKI changes trigger channel reload */ - struct rtable *out_table; /* Internal table for exported routes */ + struct rt_exporter *out_table; /* Internal table for exported routes */ list roa_subscriptions; /* List of active ROA table subscriptions based on filters roa_check() */ }; +#define RIK_REJECTED 1 /* Routes rejected in import filter are kept */ +#define RIK_PREFILTER (2 | RIK_REJECTED) /* All routes' attribute state before import filter is kept */ /* * Channel states @@ -585,34 +574,34 @@ struct channel { * restricted by that and is on volition of the protocol. Generally, channels * are opened in protocols' start() hooks when going to PS_UP. * - * CS_FLUSHING - The transitional state between initialized channel and closed + * CS_STOP - The transitional state between initialized channel and closed * channel. The channel is still initialized, but no route exchange is allowed. * Instead, the associated table is running flush loop to remove routes imported * through the channel. After that, the channel changes state to CS_DOWN and * is detached from the table (the table is unlocked and the channel is unlinked - * from it). Unlike other states, the CS_FLUSHING state is not explicitly + * from it). Unlike other states, the CS_STOP state is not explicitly * entered or left by the protocol. A protocol may request to close a channel * (by calling channel_close()), which causes the channel to change state to - * CS_FLUSHING and later to CS_DOWN. Also note that channels are closed + * CS_STOP and later to CS_DOWN. Also note that channels are closed * automatically by the core when the protocol is going down. * + * CS_PAUSE - Almost the same as CS_STOP, just the table import is kept and + * the table export is stopped before transitioning to CS_START. + * * Allowed transitions: * * CS_DOWN -> CS_START / CS_UP - * CS_START -> CS_UP / CS_FLUSHING - * CS_UP -> CS_START / CS_FLUSHING - * CS_FLUSHING -> CS_DOWN (automatic) + * CS_START -> CS_UP / CS_STOP + * CS_UP -> CS_PAUSE / CS_STOP + * CS_PAUSE -> CS_START (automatic) + * CS_STOP -> CS_DOWN (automatic) */ #define CS_DOWN 0 #define CS_START 1 #define CS_UP 2 -#define CS_FLUSHING 3 - -#define ES_DOWN 0 -#define ES_FEEDING 1 -#define ES_READY 2 - +#define CS_STOP 3 +#define CS_PAUSE 4 struct channel_config *proto_cf_find_channel(struct proto_config *p, uint net_type); static inline struct channel_config *proto_cf_main_channel(struct proto_config *pc) @@ -625,30 +614,15 @@ int proto_configure_channel(struct proto *p, struct channel **c, struct channel_ void channel_set_state(struct channel *c, uint state); void channel_setup_in_table(struct channel *c); -void channel_setup_out_table(struct channel *c); void channel_schedule_reload(struct channel *c); static inline void channel_init(struct channel *c) { channel_set_state(c, CS_START); } static inline void channel_open(struct channel *c) { channel_set_state(c, CS_UP); } -static inline void channel_close(struct channel *c) { channel_set_state(c, CS_FLUSHING); } +static inline void channel_close(struct channel *c) { channel_set_state(c, CS_STOP); } void channel_request_feeding(struct channel *c); void *channel_config_new(const struct channel_class *cc, const char *name, uint net_type, struct proto_config *proto); void *channel_config_get(const struct channel_class *cc, const char *name, uint net_type, struct proto_config *proto); int channel_reconfigure(struct channel *c, struct channel_config *cf); - -/* Moved from route.h to avoid dependency conflicts */ -static inline void rte_update(struct proto *p, const net_addr *n, rte *new) { rte_update2(p->main_channel, n, new, p->main_source); } - -static inline void -rte_update3(struct channel *c, const net_addr *n, rte *new, struct rte_src *src) -{ - if (c->in_table && !rte_update_in(c, n, new, src)) - return; - - rte_update2(c, n, new, src); -} - - #endif diff --git a/nest/route.h b/nest/route.h deleted file mode 100644 index 1dacd92f..00000000 --- a/nest/route.h +++ /dev/null @@ -1,792 +0,0 @@ -/* - * BIRD Internet Routing Daemon -- Routing Table - * - * (c) 1998--2000 Martin Mares <mj@ucw.cz> - * - * Can be freely distributed and used under the terms of the GNU GPL. - */ - -#ifndef _BIRD_ROUTE_H_ -#define _BIRD_ROUTE_H_ - -#include "lib/lists.h" -#include "lib/bitmap.h" -#include "lib/resource.h" -#include "lib/net.h" - -struct ea_list; -struct protocol; -struct proto; -struct rte_src; -struct symbol; -struct timer; -struct fib; -struct filter; -struct f_trie; -struct f_trie_walk_state; -struct cli; - -/* - * Generic data structure for storing network prefixes. Also used - * for the master routing table. Currently implemented as a hash - * table. - * - * Available operations: - * - insertion of new entry - * - deletion of entry - * - searching for entry by network prefix - * - asynchronous retrieval of fib contents - */ - -struct fib_node { - struct fib_node *next; /* Next in hash chain */ - struct fib_iterator *readers; /* List of readers of this node */ - net_addr addr[0]; -}; - -struct fib_iterator { /* See lib/slists.h for an explanation */ - struct fib_iterator *prev, *next; /* Must be synced with struct fib_node! */ - byte efef; /* 0xff to distinguish between iterator and node */ - byte pad[3]; - struct fib_node *node; /* Or NULL if freshly merged */ - uint hash; -}; - -typedef void (*fib_init_fn)(struct fib *, void *); - -struct fib { - pool *fib_pool; /* Pool holding all our data */ - slab *fib_slab; /* Slab holding all fib nodes */ - struct fib_node **hash_table; /* Node hash table */ - uint hash_size; /* Number of hash table entries (a power of two) */ - uint hash_order; /* Binary logarithm of hash_size */ - uint hash_shift; /* 32 - hash_order */ - uint addr_type; /* Type of address data stored in fib (NET_*) */ - uint node_size; /* FIB node size, 0 for nonuniform */ - uint node_offset; /* Offset of fib_node struct inside of user data */ - uint entries; /* Number of entries */ - uint entries_min, entries_max; /* Entry count limits (else start rehashing) */ - fib_init_fn init; /* Constructor */ -}; - -static inline void * fib_node_to_user(struct fib *f, struct fib_node *e) -{ return e ? (void *) ((char *) e - f->node_offset) : NULL; } - -static inline struct fib_node * fib_user_to_node(struct fib *f, void *e) -{ return e ? (void *) ((char *) e + f->node_offset) : NULL; } - -void fib_init(struct fib *f, pool *p, uint addr_type, uint node_size, uint node_offset, uint hash_order, fib_init_fn init); -void *fib_find(struct fib *, const net_addr *); /* Find or return NULL if doesn't exist */ -void *fib_get_chain(struct fib *f, const net_addr *a); /* Find first node in linked list from hash table */ -void *fib_get(struct fib *, const net_addr *); /* Find or create new if nonexistent */ -void *fib_route(struct fib *, const net_addr *); /* Longest-match routing lookup */ -void fib_delete(struct fib *, void *); /* Remove fib entry */ -void fib_free(struct fib *); /* Destroy the fib */ -void fib_check(struct fib *); /* Consistency check for debugging */ - -void fit_init(struct fib_iterator *, struct fib *); /* Internal functions, don't call */ -struct fib_node *fit_get(struct fib *, struct fib_iterator *); -void fit_put(struct fib_iterator *, struct fib_node *); -void fit_put_next(struct fib *f, struct fib_iterator *i, struct fib_node *n, uint hpos); -void fit_put_end(struct fib_iterator *i); -void fit_copy(struct fib *f, struct fib_iterator *dst, struct fib_iterator *src); - - -#define FIB_WALK(fib, type, z) do { \ - struct fib_node *fn_, **ff_ = (fib)->hash_table; \ - uint count_ = (fib)->hash_size; \ - type *z; \ - while (count_--) \ - for (fn_ = *ff_++; z = fib_node_to_user(fib, fn_); fn_=fn_->next) - -#define FIB_WALK_END } while (0) - -#define FIB_ITERATE_INIT(it, fib) fit_init(it, fib) - -#define FIB_ITERATE_START(fib, it, type, z) do { \ - struct fib_node *fn_ = fit_get(fib, it); \ - uint count_ = (fib)->hash_size; \ - uint hpos_ = (it)->hash; \ - type *z; \ - for(;;) { \ - if (!fn_) \ - { \ - if (++hpos_ >= count_) \ - break; \ - fn_ = (fib)->hash_table[hpos_]; \ - continue; \ - } \ - z = fib_node_to_user(fib, fn_); - -#define FIB_ITERATE_END fn_ = fn_->next; } } while(0) - -#define FIB_ITERATE_PUT(it) fit_put(it, fn_) - -#define FIB_ITERATE_PUT_NEXT(it, fib) fit_put_next(fib, it, fn_, hpos_) - -#define FIB_ITERATE_PUT_END(it) fit_put_end(it) - -#define FIB_ITERATE_UNLINK(it, fib) fit_get(fib, it) - -#define FIB_ITERATE_COPY(dst, src, fib) fit_copy(fib, dst, src) - - -/* - * Master Routing Tables. Generally speaking, each of them contains a FIB - * with each entry pointing to a list of route entries representing routes - * to given network (with the selected one at the head). - * - * Each of the RTE's contains variable data (the preference and protocol-dependent - * metrics) and a pointer to a route attribute block common for many routes). - * - * It's guaranteed that there is at most one RTE for every (prefix,proto) pair. - */ - -struct rtable_config { - node n; - char *name; - struct rtable *table; - struct proto_config *krt_attached; /* Kernel syncer attached to this table */ - uint addr_type; /* Type of address data stored in table (NET_*) */ - uint gc_threshold; /* Maximum number of operations before GC is run */ - uint gc_period; /* Approximate time between two consecutive GC runs */ - byte sorted; /* Routes of network are sorted according to rte_better() */ - byte internal; /* Internal table of a protocol */ - byte trie_used; /* Rtable has attached trie */ - btime min_settle_time; /* Minimum settle time for notifications */ - btime max_settle_time; /* Maximum settle time for notifications */ -}; - -typedef struct rtable { - resource r; - node n; /* Node in list of all tables */ - pool *rp; /* Resource pool to allocate everything from, including itself */ - struct fib fib; - struct f_trie *trie; /* Trie of prefixes defined in fib */ - char *name; /* Name of this table */ - list channels; /* List of attached channels (struct channel) */ - uint addr_type; /* Type of address data stored in table (NET_*) */ - int pipe_busy; /* Pipe loop detection */ - int use_count; /* Number of protocols using this table */ - u32 rt_count; /* Number of routes in the table */ - - byte internal; /* Internal table of a protocol */ - - struct hmap id_map; - struct hostcache *hostcache; - struct rtable_config *config; /* Configuration of this table */ - struct config *deleted; /* Table doesn't exist in current configuration, - * delete as soon as use_count becomes 0 and remove - * obstacle from this routing table. - */ - struct event *rt_event; /* Routing table event */ - struct timer *prune_timer; /* Timer for periodic pruning / GC */ - btime last_rt_change; /* Last time when route changed */ - btime base_settle_time; /* Start time of rtable settling interval */ - btime gc_time; /* Time of last GC */ - uint gc_counter; /* Number of operations since last GC */ - byte prune_state; /* Table prune state, 1 -> scheduled, 2-> running */ - byte prune_trie; /* Prune prefix trie during next table prune */ - byte hcu_scheduled; /* Hostcache update is scheduled */ - byte nhu_state; /* Next Hop Update state */ - struct fib_iterator prune_fit; /* Rtable prune FIB iterator */ - struct fib_iterator nhu_fit; /* Next Hop Update FIB iterator */ - struct f_trie *trie_new; /* New prefix trie defined during pruning */ - struct f_trie *trie_old; /* Old prefix trie waiting to be freed */ - u32 trie_lock_count; /* Prefix trie locked by walks */ - u32 trie_old_lock_count; /* Old prefix trie locked by walks */ - - list subscribers; /* Subscribers for notifications */ - struct timer *settle_timer; /* Settle time for notifications */ - list flowspec_links; /* List of flowspec links, src for NET_IPx and dst for NET_FLOWx */ - struct f_trie *flowspec_trie; /* Trie for evaluation of flowspec notifications */ -} rtable; - -struct rt_subscription { - node n; - rtable *tab; - void (*hook)(struct rt_subscription *b); - void *data; -}; - -struct rt_flowspec_link { - node n; - rtable *src; - rtable *dst; - u32 uc; -}; - -#define NHU_CLEAN 0 -#define NHU_SCHEDULED 1 -#define NHU_RUNNING 2 -#define NHU_DIRTY 3 - -typedef struct network { - struct rte *routes; /* Available routes for this network */ - struct fib_node n; /* FIB flags reserved for kernel syncer */ -} net; - -struct hostcache { - slab *slab; /* Slab holding all hostentries */ - struct hostentry **hash_table; /* Hash table for hostentries */ - unsigned hash_order, hash_shift; - unsigned hash_max, hash_min; - unsigned hash_items; - linpool *lp; /* Linpool for trie */ - struct f_trie *trie; /* Trie of prefixes that might affect hostentries */ - list hostentries; /* List of all hostentries */ - byte update_hostcache; -}; - -struct hostentry { - node ln; - ip_addr addr; /* IP address of host, part of key */ - ip_addr link; /* (link-local) IP address of host, used as gw - if host is directly attached */ - struct rtable *tab; /* Dependent table, part of key */ - struct hostentry *next; /* Next in hash chain */ - unsigned hash_key; /* Hash key */ - unsigned uc; /* Use count */ - struct rta *src; /* Source rta entry */ - byte dest; /* Chosen route destination type (RTD_...) */ - byte nexthop_linkable; /* Nexthop list is completely non-device */ - u32 igp_metric; /* Chosen route IGP metric */ -}; - -typedef struct rte { - struct rte *next; - net *net; /* Network this RTE belongs to */ - struct channel *sender; /* Channel used to send the route to the routing table */ - struct rta *attrs; /* Attributes of this route */ - u32 id; /* Table specific route id */ - byte flags; /* Flags (REF_...) */ - byte pflags; /* Protocol-specific flags */ - word pref; /* Route preference */ - btime lastmod; /* Last modified */ - union { /* Protocol-dependent data (metrics etc.) */ -#ifdef CONFIG_RIP - struct { - struct iface *from; /* Incoming iface */ - u8 metric; /* RIP metric */ - u16 tag; /* External route tag */ - } rip; -#endif -#ifdef CONFIG_OSPF - struct { - u32 metric1, metric2; /* OSPF Type 1 and Type 2 metrics */ - u32 tag; /* External route tag */ - u32 router_id; /* Router that originated this route */ - } ospf; -#endif -#ifdef CONFIG_BGP - struct { - u8 suppressed; /* Used for deterministic MED comparison */ - s8 stale; /* Route is LLGR_STALE, -1 if unknown */ - struct rtable *base_table; /* Base table for Flowspec validation */ - } bgp; -#endif -#ifdef CONFIG_BABEL - struct { - u16 seqno; /* Babel seqno */ - u16 metric; /* Babel metric */ - u64 router_id; /* Babel router id */ - } babel; -#endif - struct { /* Routes generated by krt sync (both temporary and inherited ones) */ - s8 src; /* Alleged route source (see krt.h) */ - u8 proto; /* Kernel source protocol ID */ - u8 seen; /* Seen during last scan */ - u8 best; /* Best route in network, propagated to core */ - u32 metric; /* Kernel metric */ - } krt; - } u; -} rte; - -#define REF_COW 1 /* Copy this rte on write */ -#define REF_FILTERED 2 /* Route is rejected by import filter */ -#define REF_STALE 4 /* Route is stale in a refresh cycle */ -#define REF_DISCARD 8 /* Route is scheduled for discard */ -#define REF_MODIFY 16 /* Route is scheduled for modify */ - -/* Route is valid for propagation (may depend on other flags in the future), accepts NULL */ -static inline int rte_is_valid(rte *r) { return r && !(r->flags & REF_FILTERED); } - -/* Route just has REF_FILTERED flag */ -static inline int rte_is_filtered(rte *r) { return !!(r->flags & REF_FILTERED); } - - -/* Types of route announcement, also used as flags */ -#define RA_UNDEF 0 /* Undefined RA type */ -#define RA_OPTIMAL 1 /* Announcement of optimal route change */ -#define RA_ACCEPTED 2 /* Announcement of first accepted route */ -#define RA_ANY 3 /* Announcement of any route change */ -#define RA_MERGED 4 /* Announcement of optimal route merged with next ones */ - -/* Return value of preexport() callback */ -#define RIC_ACCEPT 1 /* Accepted by protocol */ -#define RIC_PROCESS 0 /* Process it through import filter */ -#define RIC_REJECT -1 /* Rejected by protocol */ -#define RIC_DROP -2 /* Silently dropped by protocol */ - -extern list routing_tables; -struct config; - -void rt_init(void); -void rt_preconfig(struct config *); -void rt_postconfig(struct config *); -void rt_commit(struct config *new, struct config *old); -void rt_lock_table(rtable *); -void rt_unlock_table(rtable *); -struct f_trie * rt_lock_trie(rtable *tab); -void rt_unlock_trie(rtable *tab, struct f_trie *trie); -void rt_subscribe(rtable *tab, struct rt_subscription *s); -void rt_unsubscribe(struct rt_subscription *s); -void rt_flowspec_link(rtable *src, rtable *dst); -void rt_flowspec_unlink(rtable *src, rtable *dst); -rtable *rt_setup(pool *, struct rtable_config *); -static inline void rt_shutdown(rtable *r) { rfree(r->rp); } - -static inline net *net_find(rtable *tab, const net_addr *addr) { return (net *) fib_find(&tab->fib, addr); } -static inline net *net_find_valid(rtable *tab, const net_addr *addr) -{ net *n = net_find(tab, addr); return (n && rte_is_valid(n->routes)) ? n : NULL; } -static inline net *net_get(rtable *tab, const net_addr *addr) { return (net *) fib_get(&tab->fib, addr); } -net *net_get(rtable *tab, const net_addr *addr); -net *net_route(rtable *tab, const net_addr *n); -int net_roa_check(rtable *tab, const net_addr *n, u32 asn); -rte *rte_find(net *net, struct rte_src *src); -rte *rte_get_temp(struct rta *); -void rte_update2(struct channel *c, const net_addr *n, rte *new, struct rte_src *src); -/* rte_update() moved to protocol.h to avoid dependency conflicts */ -int rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filter); -rte *rt_export_merged(struct channel *c, net *net, rte **rt_free, linpool *pool, int silent); -void rt_refresh_begin(rtable *t, struct channel *c); -void rt_refresh_end(rtable *t, struct channel *c); -void rt_modify_stale(rtable *t, struct channel *c); -void rt_schedule_prune(rtable *t); -void rte_dump(rte *); -void rte_free(rte *); -rte *rte_do_cow(rte *); -static inline rte * rte_cow(rte *r) { return (r->flags & REF_COW) ? rte_do_cow(r) : r; } -rte *rte_cow_rta(rte *r, linpool *lp); -void rte_init_tmp_attrs(struct rte *r, linpool *lp, uint max); -void rte_make_tmp_attr(struct rte *r, uint id, uint type, uintptr_t val); -void rte_make_tmp_attrs(struct rte **r, struct linpool *pool, struct rta **old_attrs); -uintptr_t rte_store_tmp_attr(struct rte *r, uint id); -void rt_dump(rtable *); -void rt_dump_all(void); -int rt_feed_channel(struct channel *c); -void rt_feed_channel_abort(struct channel *c); -int rte_update_in(struct channel *c, const net_addr *n, rte *new, struct rte_src *src); -int rt_reload_channel(struct channel *c); -void rt_reload_channel_abort(struct channel *c); -void rt_prune_sync(rtable *t, int all); -int rte_update_out(struct channel *c, const net_addr *n, rte *new, rte *old0, int refeed); -struct rtable_config *rt_new_table(struct symbol *s, uint addr_type); - -static inline int rt_is_ip(rtable *tab) -{ return (tab->addr_type == NET_IP4) || (tab->addr_type == NET_IP6); } - -static inline int rt_is_vpn(rtable *tab) -{ return (tab->addr_type == NET_VPN4) || (tab->addr_type == NET_VPN6); } - -static inline int rt_is_roa(rtable *tab) -{ return (tab->addr_type == NET_ROA4) || (tab->addr_type == NET_ROA6); } - -static inline int rt_is_flow(rtable *tab) -{ return (tab->addr_type == NET_FLOW4) || (tab->addr_type == NET_FLOW6); } - - -/* Default limit for ECMP next hops, defined in sysdep code */ -extern const int rt_default_ecmp; - -struct rt_show_data_rtable { - node n; - rtable *table; - struct channel *export_channel; -}; - -struct rt_show_data { - net_addr *addr; - list tables; - struct rt_show_data_rtable *tab; /* Iterator over table list */ - struct rt_show_data_rtable *last_table; /* Last table in output */ - struct fib_iterator fit; /* Iterator over networks in table */ - struct f_trie_walk_state *walk_state; /* Iterator over networks in trie */ - struct f_trie *walk_lock; /* Locked trie for walking */ - int verbose, tables_defined_by; - const struct filter *filter; - struct proto *show_protocol; - struct proto *export_protocol; - struct channel *export_channel; - struct config *running_on_config; - struct krt_proto *kernel; - int export_mode, addr_mode, primary_only, filtered, stats; - - int table_open; /* Iteration (fit) is open */ - int trie_walk; /* Current table is iterated using trie */ - int net_counter, rt_counter, show_counter, table_counter; - int net_counter_last, rt_counter_last, show_counter_last; -}; - -void rt_show(struct rt_show_data *); -struct rt_show_data_rtable * rt_show_add_table(struct rt_show_data *d, rtable *t); - -/* Value of table definition mode in struct rt_show_data */ -#define RSD_TDB_DEFAULT 0 /* no table specified */ -#define RSD_TDB_INDIRECT 0 /* show route ... protocol P ... */ -#define RSD_TDB_ALL RSD_TDB_SET /* show route ... table all ... */ -#define RSD_TDB_DIRECT RSD_TDB_SET | RSD_TDB_NMN /* show route ... table X table Y ... */ - -#define RSD_TDB_SET 0x1 /* internal: show empty tables */ -#define RSD_TDB_NMN 0x2 /* internal: need matching net */ - -/* Value of addr_mode */ -#define RSD_ADDR_EQUAL 1 /* Exact query - show route <addr> */ -#define RSD_ADDR_FOR 2 /* Longest prefix match - show route for <addr> */ -#define RSD_ADDR_IN 3 /* Interval query - show route in <addr> */ - -/* Value of export_mode in struct rt_show_data */ -#define RSEM_NONE 0 /* Export mode not used */ -#define RSEM_PREEXPORT 1 /* Routes ready for export, before filtering */ -#define RSEM_EXPORT 2 /* Routes accepted by export filter */ -#define RSEM_NOEXPORT 3 /* Routes rejected by export filter */ -#define RSEM_EXPORTED 4 /* Routes marked in export map */ - -/* - * Route Attributes - * - * Beware: All standard BGP attributes must be represented here instead - * of making them local to the route. This is needed to ensure proper - * construction of BGP route attribute lists. - */ - -/* Nexthop structure */ -struct nexthop { - ip_addr gw; /* Next hop */ - struct iface *iface; /* Outgoing interface */ - struct nexthop *next; - byte flags; - byte weight; - byte labels_orig; /* Number of labels before hostentry was applied */ - byte labels; /* Number of all labels */ - u32 label[0]; -}; - -#define RNF_ONLINK 0x1 /* Gateway is onlink regardless of IP ranges */ - - -struct rte_src { - struct rte_src *next; /* Hash chain */ - struct proto *proto; /* Protocol the source is based on */ - u32 private_id; /* Private ID, assigned by the protocol */ - u32 global_id; /* Globally unique ID of the source */ - unsigned uc; /* Use count */ -}; - - -typedef struct rta { - struct rta *next, **pprev; /* Hash chain */ - u32 uc; /* Use count */ - u32 hash_key; /* Hash over important fields */ - struct ea_list *eattrs; /* Extended Attribute chain */ - struct rte_src *src; /* Route source that created the route */ - struct hostentry *hostentry; /* Hostentry for recursive next-hops */ - ip_addr from; /* Advertising router */ - u32 igp_metric; /* IGP metric to next hop (for iBGP routes) */ - u8 source; /* Route source (RTS_...) */ - u8 scope; /* Route scope (SCOPE_... -- see ip.h) */ - u8 dest; /* Route destination type (RTD_...) */ - u8 aflags; - struct nexthop nh; /* Next hop */ -} rta; - -#define RTS_DUMMY 0 /* Dummy route to be removed soon */ -#define RTS_STATIC 1 /* Normal static route */ -#define RTS_INHERIT 2 /* Route inherited from kernel */ -#define RTS_DEVICE 3 /* Device route */ -#define RTS_STATIC_DEVICE 4 /* Static device route */ -#define RTS_REDIRECT 5 /* Learned via redirect */ -#define RTS_RIP 6 /* RIP route */ -#define RTS_OSPF 7 /* OSPF route */ -#define RTS_OSPF_IA 8 /* OSPF inter-area route */ -#define RTS_OSPF_EXT1 9 /* OSPF external route type 1 */ -#define RTS_OSPF_EXT2 10 /* OSPF external route type 2 */ -#define RTS_BGP 11 /* BGP route */ -#define RTS_PIPE 12 /* Inter-table wormhole */ -#define RTS_BABEL 13 /* Babel route */ -#define RTS_RPKI 14 /* Route Origin Authorization */ -#define RTS_PERF 15 /* Perf checker */ -#define RTS_MAX 16 - -#define RTC_UNICAST 0 -#define RTC_BROADCAST 1 -#define RTC_MULTICAST 2 -#define RTC_ANYCAST 3 /* IPv6 Anycast */ - -#define RTD_NONE 0 /* Undefined next hop */ -#define RTD_UNICAST 1 /* Next hop is neighbor router */ -#define RTD_BLACKHOLE 2 /* Silently drop packets */ -#define RTD_UNREACHABLE 3 /* Reject as unreachable */ -#define RTD_PROHIBIT 4 /* Administratively prohibited */ -#define RTD_MAX 5 - -#define RTAF_CACHED 1 /* This is a cached rta */ - -#define IGP_METRIC_UNKNOWN 0x80000000 /* Default igp_metric used when no other - protocol-specific metric is availabe */ - - -extern const char * rta_dest_names[RTD_MAX]; - -static inline const char *rta_dest_name(uint n) -{ return (n < RTD_MAX) ? rta_dest_names[n] : "???"; } - -/* Route has regular, reachable nexthop (i.e. not RTD_UNREACHABLE and like) */ -static inline int rte_is_reachable(rte *r) -{ return r->attrs->dest == RTD_UNICAST; } - - -/* - * Extended Route Attributes - */ - -typedef struct eattr { - word id; /* EA_CODE(PROTOCOL_..., protocol-dependent ID) */ - byte flags; /* Protocol-dependent flags */ - byte type; /* Attribute type and several flags (EAF_...) */ - union { - u32 data; - const struct adata *ptr; /* Attribute data elsewhere */ - } u; -} eattr; - - -#define EA_CODE(proto,id) (((proto) << 8) | (id)) -#define EA_ID(ea) ((ea) & 0xff) -#define EA_PROTO(ea) ((ea) >> 8) -#define EA_ID_FLAG(ea) (1 << EA_ID(ea)) -#define EA_CUSTOM(id) ((id) | EA_CUSTOM_BIT) -#define EA_IS_CUSTOM(ea) ((ea) & EA_CUSTOM_BIT) -#define EA_CUSTOM_ID(ea) ((ea) & ~EA_CUSTOM_BIT) - -const char *ea_custom_name(uint ea); - -#define EA_GEN_IGP_METRIC EA_CODE(PROTOCOL_NONE, 0) - -#define EA_CODE_MASK 0xffff -#define EA_CUSTOM_BIT 0x8000 -#define EA_ALLOW_UNDEF 0x10000 /* ea_find: allow EAF_TYPE_UNDEF */ -#define EA_BIT(n) ((n) << 24) /* Used in bitfield accessors */ -#define EA_BIT_GET(ea) ((ea) >> 24) - -#define EAF_TYPE_MASK 0x1f /* Mask with this to get type */ -#define EAF_TYPE_INT 0x01 /* 32-bit unsigned integer number */ -#define EAF_TYPE_OPAQUE 0x02 /* Opaque byte string (not filterable) */ -#define EAF_TYPE_IP_ADDRESS 0x04 /* IP address */ -#define EAF_TYPE_ROUTER_ID 0x05 /* Router ID (IPv4 address) */ -#define EAF_TYPE_AS_PATH 0x06 /* BGP AS path (encoding per RFC 1771:4.3) */ -#define EAF_TYPE_BITFIELD 0x09 /* 32-bit embedded bitfield */ -#define EAF_TYPE_INT_SET 0x0a /* Set of u32's (e.g., a community list) */ -#define EAF_TYPE_EC_SET 0x0e /* Set of pairs of u32's - ext. community list */ -#define EAF_TYPE_LC_SET 0x12 /* Set of triplets of u32's - large community list */ -#define EAF_TYPE_UNDEF 0x1f /* `force undefined' entry */ -#define EAF_EMBEDDED 0x01 /* Data stored in eattr.u.data (part of type spec) */ -#define EAF_VAR_LENGTH 0x02 /* Attribute length is variable (part of type spec) */ -#define EAF_ORIGINATED 0x20 /* The attribute has originated locally */ -#define EAF_FRESH 0x40 /* An uncached attribute (e.g. modified in export filter) */ - -typedef struct adata { - uint length; /* Length of data */ - byte data[0]; -} adata; - -extern const adata null_adata; /* adata of length 0 */ - -static inline struct adata * -lp_alloc_adata(struct linpool *pool, uint len) -{ - struct adata *ad = lp_alloc(pool, sizeof(struct adata) + len); - ad->length = len; - return ad; -} - -static inline int adata_same(const struct adata *a, const struct adata *b) -{ return (a->length == b->length && !memcmp(a->data, b->data, a->length)); } - - -typedef struct ea_list { - struct ea_list *next; /* In case we have an override list */ - byte flags; /* Flags: EALF_... */ - byte rfu; - word count; /* Number of attributes */ - eattr attrs[0]; /* Attribute definitions themselves */ -} ea_list; - -#define EALF_SORTED 1 /* Attributes are sorted by code */ -#define EALF_BISECT 2 /* Use interval bisection for searching */ -#define EALF_CACHED 4 /* Attributes belonging to cached rta */ -#define EALF_TEMP 8 /* Temporary ea_list added by make_tmp_attrs hooks */ - -struct rte_src *rt_find_source(struct proto *p, u32 id); -struct rte_src *rt_get_source(struct proto *p, u32 id); -static inline void rt_lock_source(struct rte_src *src) { src->uc++; } -static inline void rt_unlock_source(struct rte_src *src) { src->uc--; } -void rt_prune_sources(void); - -struct ea_walk_state { - ea_list *eattrs; /* Ccurrent ea_list, initially set by caller */ - eattr *ea; /* Current eattr, initially NULL */ - u32 visited[4]; /* Bitfield, limiting max to 128 */ -}; - -eattr *ea_find(ea_list *, unsigned ea); -eattr *ea_walk(struct ea_walk_state *s, uint id, uint max); -int ea_get_int(ea_list *, unsigned ea, int def); -void ea_dump(ea_list *); -void ea_sort(ea_list *); /* Sort entries in all sub-lists */ -unsigned ea_scan(ea_list *); /* How many bytes do we need for merged ea_list */ -void ea_merge(ea_list *from, ea_list *to); /* Merge sub-lists to allocated buffer */ -int ea_same(ea_list *x, ea_list *y); /* Test whether two ea_lists are identical */ -uint ea_hash(ea_list *e); /* Calculate 16-bit hash value */ -ea_list *ea_append(ea_list *to, ea_list *what); -void ea_format_bitfield(const struct eattr *a, byte *buf, int bufsize, const char **names, int min, int max); - -#define ea_normalize(ea) do { \ - if (ea->next) { \ - ea_list *t = alloca(ea_scan(ea)); \ - ea_merge(ea, t); \ - ea = t; \ - } \ - ea_sort(ea); \ - if (ea->count == 0) \ - ea = NULL; \ -} while(0) \ - -static inline eattr * -ea_set_attr(ea_list **to, struct linpool *pool, uint id, uint flags, uint type, uintptr_t val) -{ - ea_list *a = lp_alloc(pool, sizeof(ea_list) + sizeof(eattr)); - eattr *e = &a->attrs[0]; - - a->flags = EALF_SORTED; - a->count = 1; - a->next = *to; - *to = a; - - e->id = id; - e->type = type; - e->flags = flags; - - if (type & EAF_EMBEDDED) - e->u.data = (u32) val; - else - e->u.ptr = (struct adata *) val; - - return e; -} - -static inline void -ea_set_attr_u32(ea_list **to, struct linpool *pool, uint id, uint flags, uint type, u32 val) -{ ea_set_attr(to, pool, id, flags, type, (uintptr_t) val); } - -static inline void -ea_set_attr_ptr(ea_list **to, struct linpool *pool, uint id, uint flags, uint type, struct adata *val) -{ ea_set_attr(to, pool, id, flags, type, (uintptr_t) val); } - -static inline void -ea_set_attr_data(ea_list **to, struct linpool *pool, uint id, uint flags, uint type, void *data, uint len) -{ - struct adata *a = lp_alloc_adata(pool, len); - memcpy(a->data, data, len); - ea_set_attr(to, pool, id, flags, type, (uintptr_t) a); -} - - -#define NEXTHOP_MAX_SIZE (sizeof(struct nexthop) + sizeof(u32)*MPLS_MAX_LABEL_STACK) - -static inline size_t nexthop_size(const struct nexthop *nh) -{ return sizeof(struct nexthop) + sizeof(u32)*nh->labels; } -int nexthop__same(struct nexthop *x, struct nexthop *y); /* Compare multipath nexthops */ -static inline int nexthop_same(struct nexthop *x, struct nexthop *y) -{ return (x == y) || nexthop__same(x, y); } -struct nexthop *nexthop_merge(struct nexthop *x, struct nexthop *y, int rx, int ry, int max, linpool *lp); -struct nexthop *nexthop_sort(struct nexthop *x); -static inline void nexthop_link(struct rta *a, struct nexthop *from) -{ memcpy(&a->nh, from, nexthop_size(from)); } -void nexthop_insert(struct nexthop **n, struct nexthop *y); -int nexthop_is_sorted(struct nexthop *x); - -void rta_init(void); -static inline size_t rta_size(const rta *a) { return sizeof(rta) + sizeof(u32)*a->nh.labels; } -#define RTA_MAX_SIZE (sizeof(rta) + sizeof(u32)*MPLS_MAX_LABEL_STACK) -rta *rta_lookup(rta *); /* Get rta equivalent to this one, uc++ */ -static inline int rta_is_cached(rta *r) { return r->aflags & RTAF_CACHED; } -static inline rta *rta_clone(rta *r) { r->uc++; return r; } -void rta__free(rta *r); -static inline void rta_free(rta *r) { if (r && !--r->uc) rta__free(r); } -rta *rta_do_cow(rta *o, linpool *lp); -static inline rta * rta_cow(rta *r, linpool *lp) { return rta_is_cached(r) ? rta_do_cow(r, lp) : r; } -void rta_dump(rta *); -void rta_dump_all(void); -void rta_show(struct cli *, rta *); - -u32 rt_get_igp_metric(rte *rt); -struct hostentry * rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep); -void rta_apply_hostentry(rta *a, struct hostentry *he, mpls_label_stack *mls); - -static inline void -rta_set_recursive_next_hop(rtable *dep, rta *a, rtable *tab, ip_addr gw, ip_addr ll, mpls_label_stack *mls) -{ - rta_apply_hostentry(a, rt_get_hostentry(tab, gw, ll, dep), mls); -} - -/* - * rta_set_recursive_next_hop() acquires hostentry from hostcache and fills - * rta->hostentry field. New hostentry has zero use count. Cached rta locks its - * hostentry (increases its use count), uncached rta does not lock it. Hostentry - * with zero use count is removed asynchronously during host cache update, - * therefore it is safe to hold such hostentry temorarily. Hostentry holds a - * lock for a 'source' rta, mainly to share multipath nexthops. - * - * There is no need to hold a lock for hostentry->dep table, because that table - * contains routes responsible for that hostentry, and therefore is non-empty if - * given hostentry has non-zero use count. If the hostentry has zero use count, - * the entry is removed before dep is referenced. - * - * The protocol responsible for routes with recursive next hops should hold a - * lock for a 'source' table governing that routes (argument tab to - * rta_set_recursive_next_hop()), because its routes reference hostentries - * (through rta) related to the governing table. When all such routes are - * removed, rtas are immediately removed achieving zero uc. Then the 'source' - * table lock could be immediately released, although hostentries may still - * exist - they will be freed together with the 'source' table. - */ - -static inline void rt_lock_hostentry(struct hostentry *he) { if (he) he->uc++; } -static inline void rt_unlock_hostentry(struct hostentry *he) { if (he) he->uc--; } - -int rt_flowspec_check(rtable *tab_ip, rtable *tab_flow, const net_addr *n, rta *a, int interior); - - -/* - * Default protocol preferences - */ - -#define DEF_PREF_DIRECT 240 /* Directly connected */ -#define DEF_PREF_STATIC 200 /* Static route */ -#define DEF_PREF_OSPF 150 /* OSPF intra-area, inter-area and type 1 external routes */ -#define DEF_PREF_BABEL 130 /* Babel */ -#define DEF_PREF_RIP 120 /* RIP */ -#define DEF_PREF_BGP 100 /* BGP */ -#define DEF_PREF_RPKI 100 /* RPKI */ -#define DEF_PREF_INHERITED 10 /* Routes inherited from other routing daemons */ - -/* - * Route Origin Authorization - */ - -#define ROA_UNKNOWN 0 -#define ROA_VALID 1 -#define ROA_INVALID 2 - -#endif diff --git a/nest/rt-attr.c b/nest/rt-attr.c index c630aa95..471209ee 100644 --- a/nest/rt-attr.c +++ b/nest/rt-attr.c @@ -45,11 +45,11 @@ */ #include "nest/bird.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/protocol.h" #include "nest/iface.h" #include "nest/cli.h" -#include "nest/attrs.h" +#include "lib/attrs.h" #include "lib/alloca.h" #include "lib/hash.h" #include "lib/idm.h" @@ -57,11 +57,26 @@ #include "lib/string.h" #include <stddef.h> +#include <stdlib.h> const adata null_adata; /* adata of length 0 */ +struct ea_class ea_gen_igp_metric = { + .name = "igp_metric", + .type = T_INT, +}; + +struct ea_class ea_gen_preference = { + .name = "preference", + .type = T_INT, +}; + +struct ea_class ea_gen_from = { + .name = "from", + .type = T_IP, +}; + const char * const rta_src_names[RTS_MAX] = { - [RTS_DUMMY] = "", [RTS_STATIC] = "static", [RTS_INHERIT] = "inherit", [RTS_DEVICE] = "device", @@ -78,6 +93,71 @@ const char * const rta_src_names[RTS_MAX] = { [RTS_RPKI] = "RPKI", }; +static void +ea_gen_source_format(const eattr *a, byte *buf, uint size) +{ + if ((a->u.data >= RTS_MAX) || !rta_src_names[a->u.data]) + bsnprintf(buf, size, "unknown"); + else + bsnprintf(buf, size, "%s", rta_src_names[a->u.data]); +} + +struct ea_class ea_gen_source = { + .name = "source", + .type = T_ENUM_RTS, + .readonly = 1, + .format = ea_gen_source_format, +}; + +struct ea_class ea_gen_nexthop = { + .name = "nexthop", + .type = T_NEXTHOP_LIST, +}; + +/* + * ea_set_hostentry() acquires hostentry from hostcache. + * New hostentry has zero use count. Cached rta locks its + * hostentry (increases its use count), uncached rta does not lock it. + * Hostentry with zero use count is removed asynchronously + * during host cache update, therefore it is safe to hold + * such hostentry temporarily as long as you hold the table lock. + * + * There is no need to hold a lock for hostentry->dep table, because that table + * contains routes responsible for that hostentry, and therefore is non-empty if + * given hostentry has non-zero use count. If the hostentry has zero use count, + * the entry is removed before dep is referenced. + * + * The protocol responsible for routes with recursive next hops should hold a + * lock for a 'source' table governing that routes (argument tab), + * because its routes reference hostentries related to the governing table. + * When all such routes are + * removed, rtas are immediately removed achieving zero uc. Then the 'source' + * table lock could be immediately released, although hostentries may still + * exist - they will be freed together with the 'source' table. + */ + + static void +ea_gen_hostentry_stored(const eattr *ea) +{ + struct hostentry_adata *had = (struct hostentry_adata *) ea->u.ptr; + had->he->uc++; +} + +static void +ea_gen_hostentry_freed(const eattr *ea) +{ + struct hostentry_adata *had = (struct hostentry_adata *) ea->u.ptr; + had->he->uc--; +} + +struct ea_class ea_gen_hostentry = { + .name = "hostentry", + .type = T_HOSTENTRY, + .readonly = 1, + .stored = ea_gen_hostentry_stored, + .freed = ea_gen_hostentry_freed, +}; + const char * rta_dest_names[RTD_MAX] = { [RTD_NONE] = "", [RTD_UNICAST] = "unicast", @@ -86,10 +166,22 @@ const char * rta_dest_names[RTD_MAX] = { [RTD_PROHIBIT] = "prohibited", }; +struct ea_class ea_gen_flowspec_valid = { + .name = "flowspec_valid", + .type = T_ENUM_FLOWSPEC_VALID, + .readonly = 1, +}; + +const char * flowspec_valid_names[FLOWSPEC__MAX] = { + [FLOWSPEC_UNKNOWN] = "unknown", + [FLOWSPEC_VALID] = "", + [FLOWSPEC_INVALID] = "invalid", +}; + +DOMAIN(attrs) attrs_domain; + pool *rta_pool; -static slab *rta_slab_[4]; -static slab *nexthop_slab_[4]; static slab *rte_src_slab; static struct idm src_ids; @@ -97,121 +189,176 @@ static struct idm src_ids; /* rte source hash */ -#define RSH_KEY(n) n->proto, n->private_id +#define RSH_KEY(n) n->private_id #define RSH_NEXT(n) n->next -#define RSH_EQ(p1,n1,p2,n2) p1 == p2 && n1 == n2 -#define RSH_FN(p,n) p->hash_key ^ u32_hash(n) +#define RSH_EQ(n1,n2) n1 == n2 +#define RSH_FN(n) u32_hash(n) #define RSH_REHASH rte_src_rehash #define RSH_PARAMS /2, *2, 1, 1, 8, 20 -#define RSH_INIT_ORDER 6 - -static HASH(struct rte_src) src_hash; +#define RSH_INIT_ORDER 2 +static struct rte_src **rte_src_global; +static uint rte_src_global_max = SRC_ID_INIT_SIZE; static void rte_src_init(void) { rte_src_slab = sl_new(rta_pool, sizeof(struct rte_src)); + rte_src_global = mb_allocz(rta_pool, sizeof(struct rte_src *) * rte_src_global_max); idm_init(&src_ids, rta_pool, SRC_ID_INIT_SIZE); - - HASH_INIT(src_hash, rta_pool, RSH_INIT_ORDER); } - HASH_DEFINE_REHASH_FN(RSH, struct rte_src) -struct rte_src * -rt_find_source(struct proto *p, u32 id) +static struct rte_src * +rt_find_source(struct rte_owner *p, u32 id) { - return HASH_FIND(src_hash, RSH, p, id); + return HASH_FIND(p->hash, RSH, id); } struct rte_src * -rt_get_source(struct proto *p, u32 id) +rt_get_source_o(struct rte_owner *p, u32 id) { + if (p->stop) + bug("Stopping route owner asked for another source."); + struct rte_src *src = rt_find_source(p, id); if (src) + { + UNUSED u64 uc = atomic_fetch_add_explicit(&src->uc, 1, memory_order_acq_rel); return src; + } + RTA_LOCK; src = sl_allocz(rte_src_slab); - src->proto = p; + src->owner = p; src->private_id = id; src->global_id = idm_alloc(&src_ids); - src->uc = 0; - HASH_INSERT2(src_hash, RSH, rta_pool, src); + atomic_store_explicit(&src->uc, 1, memory_order_release); + p->uc++; + + HASH_INSERT2(p->hash, RSH, rta_pool, src); + if (config->table_debug) + log(L_TRACE "Allocated new rte_src for %s, ID %uL %uG, have %u sources now", + p->name, src->private_id, src->global_id, p->uc); + + if (src->global_id >= rte_src_global_max) + { + rte_src_global = mb_realloc(rte_src_global, sizeof(struct rte_src *) * (rte_src_global_max *= 2)); + memset(&rte_src_global[rte_src_global_max / 2], 0, + sizeof(struct rte_src *) * (rte_src_global_max / 2)); + } + + rte_src_global[src->global_id] = src; + RTA_UNLOCK; return src; } +struct rte_src * +rt_find_source_global(u32 id) +{ + if (id >= rte_src_global_max) + return NULL; + else + return rte_src_global[id]; +} + +static inline void +rt_done_sources(struct rte_owner *o) +{ + ev_send(o->list, o->stop); +} + void -rt_prune_sources(void) +rt_prune_sources(void *data) { - HASH_WALK_FILTER(src_hash, next, src, sp) + struct rte_owner *o = data; + + HASH_WALK_FILTER(o->hash, next, src, sp) { - if (src->uc == 0) + u64 uc; + while ((uc = atomic_load_explicit(&src->uc, memory_order_acquire)) >> RTE_SRC_PU_SHIFT) + synchronize_rcu(); + + if (uc == 0) { - HASH_DO_REMOVE(src_hash, RSH, sp); + o->uc--; + + HASH_DO_REMOVE(o->hash, RSH, sp); + + RTA_LOCK; + rte_src_global[src->global_id] = NULL; idm_free(&src_ids, src->global_id); - sl_free(rte_src_slab, src); + sl_free(src); + RTA_UNLOCK; } } HASH_WALK_FILTER_END; - HASH_MAY_RESIZE_DOWN(src_hash, RSH, rta_pool); -} - - -/* - * Multipath Next Hop - */ + RTA_LOCK; + HASH_MAY_RESIZE_DOWN(o->hash, RSH, rta_pool); -static inline u32 -nexthop_hash(struct nexthop *x) -{ - u32 h = 0; - for (; x; x = x->next) + if (o->stop && !o->uc) { - h ^= ipa_hash(x->gw) ^ (h << 5) ^ (h >> 9); + rfree(o->prune); + RTA_UNLOCK; + + if (config->table_debug) + log(L_TRACE "All rte_src's for %s pruned, scheduling stop event", o->name); - for (int i = 0; i < x->labels; i++) - h ^= x->label[i] ^ (h << 6) ^ (h >> 7); + rt_done_sources(o); } + else + RTA_UNLOCK; +} - return h; +void +rt_init_sources(struct rte_owner *o, const char *name, event_list *list) +{ + RTA_LOCK; + HASH_INIT(o->hash, rta_pool, RSH_INIT_ORDER); + o->hash_key = random_u32(); + o->uc = 0; + o->name = name; + o->prune = ev_new_init(rta_pool, rt_prune_sources, o); + o->stop = NULL; + o->list = list; + RTA_UNLOCK; } -int -nexthop__same(struct nexthop *x, struct nexthop *y) +void +rt_destroy_sources(struct rte_owner *o, event *done) { - for (; x && y; x = x->next, y = y->next) + o->stop = done; + + if (!o->uc) { - if (!ipa_equal(x->gw, y->gw) || (x->iface != y->iface) || - (x->flags != y->flags) || (x->weight != y->weight) || - (x->labels_orig != y->labels_orig) || (x->labels != y->labels)) - return 0; + if (config->table_debug) + log(L_TRACE "Source owner %s destroy requested. All rte_src's already pruned, scheduling stop event", o->name); - for (int i = 0; i < x->labels; i++) - if (x->label[i] != y->label[i]) - return 0; - } + RTA_LOCK; + rfree(o->prune); + RTA_UNLOCK; - return x == y; + rt_done_sources(o); + } + else + if (config->table_debug) + log(L_TRACE "Source owner %s destroy requested. Remaining %u rte_src's to prune.", o->name, o->uc); } +/* + * Multipath Next Hop + */ + static int nexthop_compare_node(const struct nexthop *x, const struct nexthop *y) { int r; - - if (!x) - return 1; - - if (!y) - return -1; - /* Should we also compare flags ? */ r = ((int) y->weight) - ((int) x->weight); @@ -236,23 +383,16 @@ nexthop_compare_node(const struct nexthop *x, const struct nexthop *y) return ((int) x->iface->index) - ((int) y->iface->index); } -static inline struct nexthop * -nexthop_copy_node(const struct nexthop *src, linpool *lp) +static int +nexthop_compare_qsort(const void *x, const void *y) { - struct nexthop *n = lp_alloc(lp, nexthop_size(src)); - - memcpy(n, src, nexthop_size(src)); - n->next = NULL; - - return n; + return nexthop_compare_node( *(const struct nexthop **) x, *(const struct nexthop **) y ); } /** * nexthop_merge - merge nexthop lists * @x: list 1 * @y: list 2 - * @rx: reusability of list @x - * @ry: reusability of list @y * @max: max number of nexthops * @lp: linpool for allocating nexthops * @@ -269,138 +409,227 @@ nexthop_copy_node(const struct nexthop *src, linpool *lp) * resulting list is no longer needed. When reusability is not set, the * corresponding lists are not modified nor linked from the resulting list. */ -struct nexthop * -nexthop_merge(struct nexthop *x, struct nexthop *y, int rx, int ry, int max, linpool *lp) +struct nexthop_adata * +nexthop_merge(struct nexthop_adata *xin, struct nexthop_adata *yin, int max, linpool *lp) { - struct nexthop *root = NULL; - struct nexthop **n = &root; + uint outlen = ADATA_SIZE(xin->ad.length) + ADATA_SIZE(yin->ad.length); + struct nexthop_adata *out = lp_alloc(lp, outlen); + out->ad.length = outlen - sizeof (struct adata); - while ((x || y) && max--) + struct nexthop *x = &xin->nh, *y = &yin->nh, *cur = &out->nh; + int xvalid, yvalid; + + while (max--) { - int cmp = nexthop_compare_node(x, y); + xvalid = NEXTHOP_VALID(x, xin); + yvalid = NEXTHOP_VALID(y, yin); + + if (!xvalid && !yvalid) + break; + + ASSUME(NEXTHOP_VALID(cur, out)); + + int cmp = !xvalid ? 1 : !yvalid ? -1 : nexthop_compare_node(x, y); if (cmp < 0) { - ASSUME(x); - *n = rx ? x : nexthop_copy_node(x, lp); - x = x->next; + ASSUME(NEXTHOP_VALID(x, xin)); + memcpy(cur, x, nexthop_size(x)); + x = NEXTHOP_NEXT(x); } else if (cmp > 0) { - ASSUME(y); - *n = ry ? y : nexthop_copy_node(y, lp); - y = y->next; + ASSUME(NEXTHOP_VALID(y, yin)); + memcpy(cur, y, nexthop_size(y)); + y = NEXTHOP_NEXT(y); } else { - ASSUME(x && y); - *n = rx ? x : (ry ? y : nexthop_copy_node(x, lp)); - x = x->next; - y = y->next; + ASSUME(NEXTHOP_VALID(x, xin)); + memcpy(cur, x, nexthop_size(x)); + x = NEXTHOP_NEXT(x); + + ASSUME(NEXTHOP_VALID(y, yin)); + y = NEXTHOP_NEXT(y); } - n = &((*n)->next); + cur = NEXTHOP_NEXT(cur); } - *n = NULL; - return root; + out->ad.length = (void *) cur - (void *) out->ad.data; + + return out; } -void -nexthop_insert(struct nexthop **n, struct nexthop *x) +struct nexthop_adata * +nexthop_sort(struct nexthop_adata *nhad, linpool *lp) { - for (; *n; n = &((*n)->next)) - { - int cmp = nexthop_compare_node(*n, x); + /* Count the nexthops */ + uint cnt = 0; + NEXTHOP_WALK(nh, nhad) + cnt++; - if (cmp < 0) - continue; - else if (cmp > 0) - break; - else - return; - } + if (cnt <= 1) + return nhad; - x->next = *n; - *n = x; -} + /* Get pointers to them */ + struct nexthop **sptr = tmp_alloc(cnt * sizeof(struct nexthop *)); -struct nexthop * -nexthop_sort(struct nexthop *x) -{ - struct nexthop *s = NULL; + uint i = 0; + NEXTHOP_WALK(nh, nhad) + sptr[i++] = nh; + + /* Sort the pointers */ + qsort(sptr, cnt, sizeof(struct nexthop *), nexthop_compare_qsort); - /* Simple insert-sort */ - while (x) + /* Allocate the output */ + struct nexthop_adata *out = (struct nexthop_adata *) lp_alloc_adata(lp, nhad->ad.length); + struct nexthop *dest = &out->nh; + + /* Deduplicate nexthops while storing them */ + for (uint i = 0; i < cnt; i++) { - struct nexthop *n = x; - x = n->next; - n->next = NULL; + if (i && !nexthop_compare_node(sptr[i], sptr[i-1])) + continue; - nexthop_insert(&s, n); + memcpy(dest, sptr[i], NEXTHOP_SIZE(sptr[i])); + dest = NEXTHOP_NEXT(dest); } - return s; + out->ad.length = (void *) dest - (void *) out->ad.data; + return out; } int -nexthop_is_sorted(struct nexthop *x) +nexthop_is_sorted(struct nexthop_adata *nhad) { - for (; x && x->next; x = x->next) - if (nexthop_compare_node(x, x->next) >= 0) + struct nexthop *prev = NULL; + NEXTHOP_WALK(nh, nhad) + { + if (prev && (nexthop_compare_node(prev, nh) >= 0)) return 0; + prev = nh; + } + return 1; } -static inline slab * -nexthop_slab(struct nexthop *nh) +/* + * Extended Attributes + */ + +#define EA_CLASS_INITIAL_MAX 128 +static struct ea_class **ea_class_global = NULL; +static uint ea_class_max; +static struct idm ea_class_idm; + +/* Config parser lex register function */ +void ea_lex_register(struct ea_class *def); +void ea_lex_unregister(struct ea_class *def); + +static void +ea_class_free(struct ea_class *cl) { - return nexthop_slab_[MIN(nh->labels, 3)]; + /* No more ea class references. Unregister the attribute. */ + idm_free(&ea_class_idm, cl->id); + ea_class_global[cl->id] = NULL; + if (!cl->hidden) + ea_lex_unregister(cl); } -static struct nexthop * -nexthop_copy(struct nexthop *o) +static void +ea_class_ref_free(resource *r) { - struct nexthop *first = NULL; - struct nexthop **last = &first; - - for (; o; o = o->next) - { - struct nexthop *n = sl_allocz(nexthop_slab(o)); - n->gw = o->gw; - n->iface = o->iface; - n->next = NULL; - n->flags = o->flags; - n->weight = o->weight; - n->labels_orig = o->labels_orig; - n->labels = o->labels; - for (int i=0; i<o->labels; i++) - n->label[i] = o->label[i]; - - *last = n; - last = &(n->next); - } + struct ea_class_ref *ref = SKIP_BACK(struct ea_class_ref, r, r); + if (!--ref->class->uc) + ea_class_free(ref->class); +} - return first; +static void +ea_class_ref_dump(resource *r) +{ + struct ea_class_ref *ref = SKIP_BACK(struct ea_class_ref, r, r); + debug("name \"%s\", type=%d\n", ref->class->name, ref->class->type); } +static struct resclass ea_class_ref_class = { + .name = "Attribute class reference", + .size = sizeof(struct ea_class_ref), + .free = ea_class_ref_free, + .dump = ea_class_ref_dump, + .lookup = NULL, + .memsize = NULL, +}; + static void -nexthop_free(struct nexthop *o) +ea_class_init(void) { - struct nexthop *n; + idm_init(&ea_class_idm, rta_pool, EA_CLASS_INITIAL_MAX); + ea_class_global = mb_allocz(rta_pool, + sizeof(*ea_class_global) * (ea_class_max = EA_CLASS_INITIAL_MAX)); +} - while (o) - { - n = o->next; - sl_free(nexthop_slab(o), o); - o = n; - } +static struct ea_class_ref * +ea_ref_class(pool *p, struct ea_class *def) +{ + def->uc++; + struct ea_class_ref *ref = ralloc(p, &ea_class_ref_class); + ref->class = def; + return ref; } +static struct ea_class_ref * +ea_register(pool *p, struct ea_class *def) +{ + def->id = idm_alloc(&ea_class_idm); -/* - * Extended Attributes - */ + ASSERT_DIE(ea_class_global); + while (def->id >= ea_class_max) + ea_class_global = mb_realloc(ea_class_global, sizeof(*ea_class_global) * (ea_class_max *= 2)); + + ASSERT_DIE(def->id < ea_class_max); + ea_class_global[def->id] = def; + + if (!def->hidden) + ea_lex_register(def); + + return ea_ref_class(p, def); +} + +struct ea_class_ref * +ea_register_alloc(pool *p, struct ea_class cl) +{ + struct ea_class *clp = ea_class_find_by_name(cl.name); + if (clp && clp->type == cl.type) + return ea_ref_class(p, clp); + + uint namelen = strlen(cl.name) + 1; + + struct { + struct ea_class cl; + char name[0]; + } *cla = mb_alloc(rta_pool, sizeof(struct ea_class) + namelen); + cla->cl = cl; + memcpy(cla->name, cl.name, namelen); + cla->cl.name = cla->name; + + return ea_register(p, &cla->cl); +} + +void +ea_register_init(struct ea_class *clp) +{ + ASSERT_DIE(!ea_class_find_by_name(clp->name)); + ea_register(&root_pool, clp); +} + +struct ea_class * +ea_class_find_by_id(uint id) +{ + ASSERT_DIE(id < ea_class_max); + ASSERT_DIE(ea_class_global[id]); + return ea_class_global[id]; +} static inline eattr * ea__find(ea_list *e, unsigned id) @@ -445,12 +674,11 @@ ea__find(ea_list *e, unsigned id) * to its &eattr structure or %NULL if no such attribute exists. */ eattr * -ea_find(ea_list *e, unsigned id) +ea_find_by_id(ea_list *e, unsigned id) { eattr *a = ea__find(e, id & EA_CODE_MASK); - if (a && (a->type & EAF_TYPE_MASK) == EAF_TYPE_UNDEF && - !(id & EA_ALLOW_UNDEF)) + if (a && a->undef && !(id & EA_ALLOW_UNDEF)) return NULL; return a; } @@ -517,7 +745,7 @@ ea_walk(struct ea_walk_state *s, uint id, uint max) BIT32_SET(s->visited, n); - if ((a->type & EAF_TYPE_MASK) == EAF_TYPE_UNDEF) + if (a->undef) continue; s->eattrs = e; @@ -531,25 +759,6 @@ ea_walk(struct ea_walk_state *s, uint id, uint max) return NULL; } -/** - * ea_get_int - fetch an integer attribute - * @e: attribute list - * @id: attribute ID - * @def: default value - * - * This function is a shortcut for retrieving a value of an integer attribute - * by calling ea_find() to find the attribute, extracting its value or returning - * a provided default if no such attribute is present. - */ -int -ea_get_int(ea_list *e, unsigned id, int def) -{ - eattr *a = ea_find(e, id); - if (!a) - return def; - return a->u.data; -} - static inline void ea_do_sort(ea_list *e) { @@ -616,15 +825,18 @@ ea_do_prune(ea_list *e) s++; /* Now s0 is the most recent version, s[-1] the oldest one */ - /* Drop undefs */ - if ((s0->type & EAF_TYPE_MASK) == EAF_TYPE_UNDEF) + /* Drop undefs unless this is a true overlay */ + if (s0->undef && (s[-1].undef || !e->next)) continue; /* Copy the newest version to destination */ *d = *s0; /* Preserve info whether it originated locally */ - d->type = (d->type & ~(EAF_ORIGINATED|EAF_FRESH)) | (s[-1].type & EAF_ORIGINATED); + d->originated = s[-1].originated; + + /* Not fresh any more, we prefer surstroemming */ + d->fresh = 0; /* Next destination */ d++; @@ -644,21 +856,18 @@ ea_do_prune(ea_list *e) * If an attribute occurs multiple times in a single &ea_list, * ea_sort() leaves only the first (the only significant) occurrence. */ -void +static void ea_sort(ea_list *e) { - while (e) - { - if (!(e->flags & EALF_SORTED)) - { - ea_do_sort(e); - ea_do_prune(e); - e->flags |= EALF_SORTED; - } - if (e->count > 5) - e->flags |= EALF_BISECT; - e = e->next; - } + if (!(e->flags & EALF_SORTED)) + { + ea_do_sort(e); + ea_do_prune(e); + e->flags |= EALF_SORTED; + } + + if (e->count > 5) + e->flags |= EALF_BISECT; } /** @@ -668,8 +877,8 @@ ea_sort(ea_list *e) * This function calculates an upper bound of the size of * a given &ea_list after merging with ea_merge(). */ -unsigned -ea_scan(ea_list *e) +static unsigned +ea_scan(const ea_list *e, int overlay) { unsigned cnt = 0; @@ -677,6 +886,8 @@ ea_scan(ea_list *e) { cnt += e->count; e = e->next; + if (e && overlay && ea_is_cached(e)) + break; } return sizeof(ea_list) + sizeof(eattr)*cnt; } @@ -695,21 +906,36 @@ ea_scan(ea_list *e) * segments with ea_merge() and finally sort and prune the result * by calling ea_sort(). */ -void -ea_merge(ea_list *e, ea_list *t) +static void +ea_merge(ea_list *e, ea_list *t, int overlay) { eattr *d = t->attrs; t->flags = 0; t->count = 0; - t->next = NULL; + while (e) { memcpy(d, e->attrs, sizeof(eattr)*e->count); t->count += e->count; d += e->count; e = e->next; + + if (e && overlay && ea_is_cached(e)) + break; } + + t->next = e; +} + +ea_list * +ea_normalize(ea_list *e, int overlay) +{ + ea_list *t = tmp_alloc(ea_scan(e, overlay)); + ea_merge(e, t, overlay); + ea_sort(t); + + return t->count ? t : t->next; } /** @@ -727,7 +953,8 @@ ea_same(ea_list *x, ea_list *y) if (!x || !y) return x == y; - ASSERT(!x->next && !y->next); + if (x->next != y->next) + return 0; if (x->count != y->count) return 0; for(c=0; c<x->count; c++) @@ -738,39 +965,46 @@ ea_same(ea_list *x, ea_list *y) if (a->id != b->id || a->flags != b->flags || a->type != b->type || + a->originated != b->originated || + a->fresh != b->fresh || + a->undef != b->undef || ((a->type & EAF_EMBEDDED) ? a->u.data != b->u.data : !adata_same(a->u.ptr, b->u.ptr))) return 0; } return 1; } -static inline ea_list * -ea_list_copy(ea_list *o) +uint +ea_list_size(ea_list *o) { - ea_list *n; - unsigned i, adpos, elen; + unsigned i, elen; - if (!o) - return NULL; - ASSERT(!o->next); - elen = adpos = sizeof(ea_list) + sizeof(eattr) * o->count; + ASSERT_DIE(o); + elen = BIRD_CPU_ALIGN(sizeof(ea_list) + sizeof(eattr) * o->count); for(i=0; i<o->count; i++) { eattr *a = &o->attrs[i]; - if (!(a->type & EAF_EMBEDDED)) - elen += sizeof(struct adata) + a->u.ptr->length; + if (!a->undef && !(a->type & EAF_EMBEDDED)) + elen += ADATA_SIZE(a->u.ptr->length); } - n = mb_alloc(rta_pool, elen); + return elen; +} + +void +ea_list_copy(ea_list *n, ea_list *o, uint elen) +{ + uint adpos = sizeof(ea_list) + sizeof(eattr) * o->count; memcpy(n, o, adpos); - n->flags |= EALF_CACHED; - for(i=0; i<o->count; i++) + adpos = BIRD_CPU_ALIGN(adpos); + + for(uint i=0; i<o->count; i++) { eattr *a = &n->attrs[i]; - if (!(a->type & EAF_EMBEDDED)) + if (!a->undef && !(a->type & EAF_EMBEDDED)) { - unsigned size = sizeof(struct adata) + a->u.ptr->length; + unsigned size = ADATA_SIZE(a->u.ptr->length); ASSERT_DIE(adpos + size <= elen); struct adata *d = ((void *) n) + adpos; @@ -780,30 +1014,58 @@ ea_list_copy(ea_list *o) adpos += size; } } + ASSERT_DIE(adpos == elen); - return n; } -static inline void -ea_free(ea_list *o) +static void +ea_list_ref(ea_list *l) { - if (o) + for(uint i=0; i<l->count; i++) { - ASSERT(!o->next); - mb_free(o); + eattr *a = &l->attrs[i]; + ASSERT_DIE(a->id < ea_class_max); + + if (a->undef) + continue; + + struct ea_class *cl = ea_class_global[a->id]; + ASSERT_DIE(cl && cl->uc); + + CALL(cl->stored, a); + cl->uc++; } + + if (l->next) + { + ASSERT_DIE(ea_is_cached(l->next)); + ea_clone(l->next); + } } -static int -get_generic_attr(const eattr *a, byte **buf, int buflen UNUSED) +static void ea_free_nested(ea_list *l); + +static void +ea_list_unref(ea_list *l) { - if (a->id == EA_GEN_IGP_METRIC) + for(uint i=0; i<l->count; i++) { - *buf += bsprintf(*buf, "igp_metric"); - return GA_NAME; + eattr *a = &l->attrs[i]; + ASSERT_DIE(a->id < ea_class_max); + + if (a->undef) + continue; + + struct ea_class *cl = ea_class_global[a->id]; + ASSERT_DIE(cl && cl->uc); + + CALL(cl->freed, a); + if (!--cl->uc) + ea_class_free(cl); } - return GA_UNKNOWN; + if (l->next) + ea_free_nested(l->next); } void @@ -856,41 +1118,90 @@ opaque_format(const struct adata *ad, byte *buf, uint size) } static inline void -ea_show_int_set(struct cli *c, const struct adata *ad, int way, byte *pos, byte *buf, byte *end) +ea_show_int_set(struct cli *c, const char *name, const struct adata *ad, int way, byte *buf) { - int i = int_set_format(ad, way, 0, pos, end - pos); - cli_printf(c, -1012, "\t%s", buf); + int nlen = strlen(name); + int i = int_set_format(ad, way, 0, buf, CLI_MSG_SIZE - nlen - 3); + cli_printf(c, -1012, "\t%s: %s", name, buf); while (i) { - i = int_set_format(ad, way, i, buf, end - buf - 1); + i = int_set_format(ad, way, i, buf, CLI_MSG_SIZE - 1); cli_printf(c, -1012, "\t\t%s", buf); } } static inline void -ea_show_ec_set(struct cli *c, const struct adata *ad, byte *pos, byte *buf, byte *end) +ea_show_ec_set(struct cli *c, const char *name, const struct adata *ad, byte *buf) { - int i = ec_set_format(ad, 0, pos, end - pos); - cli_printf(c, -1012, "\t%s", buf); + int nlen = strlen(name); + int i = ec_set_format(ad, 0, buf, CLI_MSG_SIZE - nlen - 3); + cli_printf(c, -1012, "\t%s: %s", name, buf); while (i) { - i = ec_set_format(ad, i, buf, end - buf - 1); + i = ec_set_format(ad, i, buf, CLI_MSG_SIZE - 1); cli_printf(c, -1012, "\t\t%s", buf); } } static inline void -ea_show_lc_set(struct cli *c, const struct adata *ad, byte *pos, byte *buf, byte *end) +ea_show_lc_set(struct cli *c, const char *name, const struct adata *ad, byte *buf) { - int i = lc_set_format(ad, 0, pos, end - pos); - cli_printf(c, -1012, "\t%s", buf); + int nlen = strlen(name); + int i = lc_set_format(ad, 0, buf, CLI_MSG_SIZE - nlen - 3); + cli_printf(c, -1012, "\t%s: %s", name, buf); while (i) { - i = lc_set_format(ad, i, buf, end - buf - 1); + i = lc_set_format(ad, i, buf, CLI_MSG_SIZE - 1); cli_printf(c, -1012, "\t\t%s", buf); } } +void +ea_show_nexthop_list(struct cli *c, struct nexthop_adata *nhad) +{ + if (!NEXTHOP_IS_REACHABLE(nhad)) + return; + + NEXTHOP_WALK(nh, nhad) + { + char mpls[MPLS_MAX_LABEL_STACK*12 + 5], *lsp = mpls; + char *onlink = (nh->flags & RNF_ONLINK) ? " onlink" : ""; + char weight[16] = ""; + + if (nh->labels) + { + lsp += bsprintf(lsp, " mpls %d", nh->label[0]); + for (int i=1;i<nh->labels; i++) + lsp += bsprintf(lsp, "/%d", nh->label[i]); + } + *lsp = '\0'; + + if (!NEXTHOP_ONE(nhad)) + bsprintf(weight, " weight %d", nh->weight + 1); + + if (ipa_nonzero(nh->gw)) + if (nh->iface) + cli_printf(c, -1007, "\tvia %I on %s%s%s%s", + nh->gw, nh->iface->name, mpls, onlink, weight); + else + cli_printf(c, -1007, "\tvia %I", nh->gw); + else + cli_printf(c, -1007, "\tdev %s%s%s", + nh->iface->name, mpls, onlink, weight); + } +} + +void +ea_show_hostentry(const struct adata *ad, byte *buf, uint size) +{ + const struct hostentry_adata *had = (const struct hostentry_adata *) ad; + + if (ipa_nonzero(had->he->link) && !ipa_equal(had->he->link, had->he->addr)) + bsnprintf(buf, size, "via %I %I table %s", had->he->addr, had->he->link, had->he->tab->name); + else + bsnprintf(buf, size, "via %I table %s", had->he->addr, had->he->tab->name); +} + /** * ea_show - print an &eattr to CLI * @c: destination CLI @@ -902,79 +1213,80 @@ ea_show_lc_set(struct cli *c, const struct adata *ad, byte *pos, byte *buf, byte * If the protocol defining the attribute provides its own * get_attr() hook, it's consulted first. */ -void +static void ea_show(struct cli *c, const eattr *e) { - struct protocol *p; - int status = GA_UNKNOWN; const struct adata *ad = (e->type & EAF_EMBEDDED) ? NULL : e->u.ptr; byte buf[CLI_MSG_SIZE]; byte *pos = buf, *end = buf + sizeof(buf); - if (EA_IS_CUSTOM(e->id)) - { - const char *name = ea_custom_name(e->id); - if (name) - { - pos += bsprintf(pos, "%s", name); - status = GA_NAME; - } - else - pos += bsprintf(pos, "%02x.", EA_PROTO(e->id)); - } - else if (p = class_to_protocol[EA_PROTO(e->id)]) - { - pos += bsprintf(pos, "%s.", p->name); - if (p->get_attr) - status = p->get_attr(e, pos, end - pos); - pos += strlen(pos); - } - else if (EA_PROTO(e->id)) - pos += bsprintf(pos, "%02x.", EA_PROTO(e->id)); + ASSERT_DIE(e->id < ea_class_max); + + struct ea_class *cls = ea_class_global[e->id]; + ASSERT_DIE(cls); + + if (e->undef || cls->hidden) + return; + else if (cls->format) + cls->format(e, buf, end - buf); else - status = get_generic_attr(e, &pos, end - pos); + switch (e->type) + { + case T_INT: + if ((cls == &ea_gen_igp_metric) && e->u.data >= IGP_METRIC_UNKNOWN) + return; - if (status < GA_NAME) - pos += bsprintf(pos, "%02x", EA_ID(e->id)); - if (status < GA_FULL) - { - *pos++ = ':'; - *pos++ = ' '; - switch (e->type & EAF_TYPE_MASK) - { - case EAF_TYPE_INT: bsprintf(pos, "%u", e->u.data); break; - case EAF_TYPE_OPAQUE: + case T_OPAQUE: opaque_format(ad, pos, end - pos); break; - case EAF_TYPE_IP_ADDRESS: + case T_IP: bsprintf(pos, "%I", *(ip_addr *) ad->data); break; - case EAF_TYPE_ROUTER_ID: + case T_QUAD: bsprintf(pos, "%R", e->u.data); break; - case EAF_TYPE_AS_PATH: + case T_PATH: as_path_format(ad, pos, end - pos); break; - case EAF_TYPE_BITFIELD: - bsprintf(pos, "%08x", e->u.data); - break; - case EAF_TYPE_INT_SET: - ea_show_int_set(c, ad, 1, pos, buf, end); + case T_CLIST: + ea_show_int_set(c, cls->name, ad, 1, buf); + return; + case T_ECLIST: + ea_show_ec_set(c, cls->name, ad, buf); return; - case EAF_TYPE_EC_SET: - ea_show_ec_set(c, ad, pos, buf, end); + case T_LCLIST: + ea_show_lc_set(c, cls->name, ad, buf); return; - case EAF_TYPE_LC_SET: - ea_show_lc_set(c, ad, pos, buf, end); + case T_NEXTHOP_LIST: + ea_show_nexthop_list(c, (struct nexthop_adata *) e->u.ptr); return; - case EAF_TYPE_UNDEF: + case T_HOSTENTRY: + ea_show_hostentry(ad, pos, end - pos); + break; default: bsprintf(pos, "<type %02x>", e->type); - } + } + + cli_printf(c, -1012, "\t%s: %s", cls->name, buf); +} + +static void +nexthop_dump(const struct adata *ad) +{ + struct nexthop_adata *nhad = (struct nexthop_adata *) ad; + + debug(":"); + + NEXTHOP_WALK(nh, nhad) + { + if (ipa_nonzero(nh->gw)) debug(" ->%I", nh->gw); + if (nh->labels) debug(" L %d", nh->label[0]); + for (int i=1; i<nh->labels; i++) + debug("/%d", nh->label[i]); + debug(" [%s]", nh->iface ? nh->iface->name : "???"); } - cli_printf(c, -1012, "\t%s", buf); } /** @@ -996,19 +1308,26 @@ ea_dump(ea_list *e) } while (e) { - debug("[%c%c%c]", + struct ea_storage *s = ea_is_cached(e) ? ea_get_storage(e) : NULL; + debug("[%c%c%c] uc=%d h=%08x", (e->flags & EALF_SORTED) ? 'S' : 's', (e->flags & EALF_BISECT) ? 'B' : 'b', - (e->flags & EALF_CACHED) ? 'C' : 'c'); + (e->flags & EALF_CACHED) ? 'C' : 'c', + s ? s->uc : 0, s ? s->hash_key : 0); for(i=0; i<e->count; i++) { eattr *a = &e->attrs[i]; - debug(" %02x:%02x.%02x", EA_PROTO(a->id), EA_ID(a->id), a->flags); - debug("=%c", "?iO?I?P???S?????" [a->type & EAF_TYPE_MASK]); - if (a->type & EAF_ORIGINATED) + debug(" %04x.%02x", a->id, a->flags); + debug("=%c", + "?iO?IRP???S??pE?" + "??L???N?????????" + "?o???r??????????" [a->type]); + if (a->originated) debug("o"); if (a->type & EAF_EMBEDDED) debug(":%08x", a->u.data); + else if (a->id == ea_gen_nexthop.id) + nexthop_dump(a->u.ptr); else { int j, len = a->u.ptr->length; @@ -1038,10 +1357,13 @@ ea_hash(ea_list *e) if (e) /* Assuming chain of length 1 */ { + h ^= mem_hash(&e->next, sizeof(e->next)); for(i=0; i<e->count; i++) { struct eattr *a = &e->attrs[i]; h ^= a->id; h *= mul; + if (a->undef) + continue; if (a->type & EAF_EMBEDDED) h ^= a->u.data; else @@ -1085,12 +1407,12 @@ static uint rta_cache_count; static uint rta_cache_size = 32; static uint rta_cache_limit; static uint rta_cache_mask; -static rta **rta_hash_table; +static struct ea_storage **rta_hash_table; static void rta_alloc_hash(void) { - rta_hash_table = mb_allocz(rta_pool, sizeof(rta *) * rta_cache_size); + rta_hash_table = mb_allocz(rta_pool, sizeof(struct ea_storage *) * rta_cache_size); if (rta_cache_size < 32768) rta_cache_limit = rta_cache_size * 2; else @@ -1098,64 +1420,14 @@ rta_alloc_hash(void) rta_cache_mask = rta_cache_size - 1; } -static inline uint -rta_hash(rta *a) -{ - u64 h; - mem_hash_init(&h); -#define MIX(f) mem_hash_mix(&h, &(a->f), sizeof(a->f)); - MIX(src); - MIX(hostentry); - MIX(from); - MIX(igp_metric); - MIX(source); - MIX(scope); - MIX(dest); -#undef MIX - - return mem_hash_value(&h) ^ nexthop_hash(&(a->nh)) ^ ea_hash(a->eattrs); -} - -static inline int -rta_same(rta *x, rta *y) -{ - return (x->src == y->src && - x->source == y->source && - x->scope == y->scope && - x->dest == y->dest && - x->igp_metric == y->igp_metric && - ipa_equal(x->from, y->from) && - x->hostentry == y->hostentry && - nexthop_same(&(x->nh), &(y->nh)) && - ea_same(x->eattrs, y->eattrs)); -} - -static inline slab * -rta_slab(rta *a) -{ - return rta_slab_[a->nh.labels > 2 ? 3 : a->nh.labels]; -} - -static rta * -rta_copy(rta *o) -{ - rta *r = sl_alloc(rta_slab(o)); - - memcpy(r, o, rta_size(o)); - r->uc = 1; - r->nh.next = nexthop_copy(o->nh.next); - r->eattrs = ea_list_copy(o->eattrs); - return r; -} - static inline void -rta_insert(rta *r) +rta_insert(struct ea_storage *r) { uint h = r->hash_key & rta_cache_mask; - r->next = rta_hash_table[h]; - if (r->next) - r->next->pprev = &r->next; - r->pprev = &rta_hash_table[h]; + r->next_hash = rta_hash_table[h]; + if (r->next_hash) + r->next_hash->pprev_hash = &r->next_hash; + r->pprev_hash = &rta_hash_table[h]; rta_hash_table[h] = r; } @@ -1164,8 +1436,8 @@ rta_rehash(void) { uint ohs = rta_cache_size; uint h; - rta *r, *n; - rta **oht = rta_hash_table; + struct ea_storage *r, *n; + struct ea_storage **oht = rta_hash_table; rta_cache_size = 2*rta_cache_size; DBG("Rehashing rta cache from %d to %d entries.\n", ohs, rta_cache_size); @@ -1173,7 +1445,7 @@ rta_rehash(void) for(h=0; h<ohs; h++) for(r=oht[h]; r; r=n) { - n = r->next; + n = r->next_hash; rta_insert(r); } mb_free(oht); @@ -1192,102 +1464,75 @@ rta_rehash(void) * The extended attribute lists attached to the &rta are automatically * converted to the normalized form. */ -rta * -rta_lookup(rta *o) +ea_list * +ea_lookup(ea_list *o, int overlay) { - rta *r; + struct ea_storage *r; uint h; - ASSERT(!(o->aflags & RTAF_CACHED)); - if (o->eattrs) - ea_normalize(o->eattrs); + ASSERT(!ea_is_cached(o)); + o = ea_normalize(o, overlay); + h = ea_hash(o); + + RTA_LOCK; + + for(r=rta_hash_table[h & rta_cache_mask]; r; r=r->next_hash) + if (r->hash_key == h && ea_same(r->l, o)) + { + atomic_fetch_add_explicit(&r->uc, 1, memory_order_acq_rel); + RTA_UNLOCK; + return r->l; + } - h = rta_hash(o); - for(r=rta_hash_table[h & rta_cache_mask]; r; r=r->next) - if (r->hash_key == h && rta_same(r, o)) - return rta_clone(r); + uint elen = ea_list_size(o); + r = mb_alloc(rta_pool, elen + sizeof(struct ea_storage)); + ea_list_copy(r->l, o, elen); + ea_list_ref(r->l); - r = rta_copy(o); + r->l->flags |= EALF_CACHED; r->hash_key = h; - r->aflags = RTAF_CACHED; - rt_lock_source(r->src); - rt_lock_hostentry(r->hostentry); + r->uc = 1; + rta_insert(r); if (++rta_cache_count > rta_cache_limit) rta_rehash(); - return r; + RTA_UNLOCK; + return r->l; } -void -rta__free(rta *a) +static void +ea_free_locked(struct ea_storage *a) { - ASSERT(rta_cache_count && (a->aflags & RTAF_CACHED)); + /* Somebody has cloned this rta inbetween. This sometimes happens. */ + if (atomic_load_explicit(&a->uc, memory_order_acquire)) + return; + + ASSERT(rta_cache_count); rta_cache_count--; - *a->pprev = a->next; - if (a->next) - a->next->pprev = a->pprev; - rt_unlock_hostentry(a->hostentry); - rt_unlock_source(a->src); - if (a->nh.next) - nexthop_free(a->nh.next); - ea_free(a->eattrs); - a->aflags = 0; /* Poison the entry */ - sl_free(rta_slab(a), a); + *a->pprev_hash = a->next_hash; + if (a->next_hash) + a->next_hash->pprev_hash = a->pprev_hash; + + ea_list_unref(a->l); + mb_free(a); } -rta * -rta_do_cow(rta *o, linpool *lp) +static void +ea_free_nested(struct ea_list *l) { - rta *r = lp_alloc(lp, rta_size(o)); - memcpy(r, o, rta_size(o)); - for (struct nexthop **nhn = &(r->nh.next), *nho = o->nh.next; nho; nho = nho->next) - { - *nhn = lp_alloc(lp, nexthop_size(nho)); - memcpy(*nhn, nho, nexthop_size(nho)); - nhn = &((*nhn)->next); - } - r->aflags = 0; - r->uc = 0; - return r; + struct ea_storage *r = ea_get_storage(l); + if (1 == atomic_fetch_sub_explicit(&r->uc, 1, memory_order_acq_rel)) + ea_free_locked(r); } -/** - * rta_dump - dump route attributes - * @a: attribute structure to dump - * - * This function takes a &rta and dumps its contents to the debug output. - */ void -rta_dump(rta *a) +ea__free(struct ea_storage *a) { - static char *rts[] = { "RTS_DUMMY", "RTS_STATIC", "RTS_INHERIT", "RTS_DEVICE", - "RTS_STAT_DEV", "RTS_REDIR", "RTS_RIP", - "RTS_OSPF", "RTS_OSPF_IA", "RTS_OSPF_EXT1", - "RTS_OSPF_EXT2", "RTS_BGP", "RTS_PIPE", "RTS_BABEL" }; - static char *rtd[] = { "", " DEV", " HOLE", " UNREACH", " PROHIBIT" }; - - debug("p=%s uc=%d %s %s%s h=%04x", - a->src->proto->name, a->uc, rts[a->source], ip_scope_text(a->scope), - rtd[a->dest], a->hash_key); - if (!(a->aflags & RTAF_CACHED)) - debug(" !CACHED"); - debug(" <-%I", a->from); - if (a->dest == RTD_UNICAST) - for (struct nexthop *nh = &(a->nh); nh; nh = nh->next) - { - if (ipa_nonzero(nh->gw)) debug(" ->%I", nh->gw); - if (nh->labels) debug(" L %d", nh->label[0]); - for (int i=1; i<nh->labels; i++) - debug("/%d", nh->label[i]); - debug(" [%s]", nh->iface ? nh->iface->name : "???"); - } - if (a->eattrs) - { - debug(" EA: "); - ea_dump(a->eattrs); - } + RTA_LOCK; + ea_free_locked(a); + RTA_UNLOCK; } /** @@ -1297,30 +1542,29 @@ rta_dump(rta *a) * to the debug output. */ void -rta_dump_all(void) +ea_dump_all(void) { - rta *a; - uint h; + RTA_LOCK; debug("Route attribute cache (%d entries, rehash at %d):\n", rta_cache_count, rta_cache_limit); - for(h=0; h<rta_cache_size; h++) - for(a=rta_hash_table[h]; a; a=a->next) + for (uint h=0; h < rta_cache_size; h++) + for (struct ea_storage *a = rta_hash_table[h]; a; a = a->next_hash) { debug("%p ", a); - rta_dump(a); + ea_dump(a->l); debug("\n"); } debug("\n"); + + RTA_UNLOCK; } void -rta_show(struct cli *c, rta *a) +ea_show_list(struct cli *c, ea_list *eal) { - cli_printf(c, -1008, "\tType: %s %s", rta_src_names[a->source], ip_scope_text(a->scope)); - - for(ea_list *eal = a->eattrs; eal; eal=eal->next) - for(int i=0; i<eal->count; i++) - ea_show(c, &eal->attrs[i]); + ea_list *n = ea_normalize(eal, 0); + for (int i =0; i < n->count; i++) + ea_show(c, &n->attrs[i]); } /** @@ -1332,20 +1576,24 @@ rta_show(struct cli *c, rta *a) void rta_init(void) { - rta_pool = rp_new(&root_pool, "Attributes"); + attrs_domain = DOMAIN_NEW(attrs, "Attributes"); - rta_slab_[0] = sl_new(rta_pool, sizeof(rta)); - rta_slab_[1] = sl_new(rta_pool, sizeof(rta) + sizeof(u32)); - rta_slab_[2] = sl_new(rta_pool, sizeof(rta) + sizeof(u32)*2); - rta_slab_[3] = sl_new(rta_pool, sizeof(rta) + sizeof(u32)*MPLS_MAX_LABEL_STACK); - - nexthop_slab_[0] = sl_new(rta_pool, sizeof(struct nexthop)); - nexthop_slab_[1] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32)); - nexthop_slab_[2] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32)*2); - nexthop_slab_[3] = sl_new(rta_pool, sizeof(struct nexthop) + sizeof(u32)*MPLS_MAX_LABEL_STACK); + rta_pool = rp_new(&root_pool, "Attributes"); rta_alloc_hash(); rte_src_init(); + ea_class_init(); + + /* These attributes are required to be first for nice "show route" output */ + ea_register_init(&ea_gen_nexthop); + ea_register_init(&ea_gen_hostentry); + + /* Other generic route attributes */ + ea_register_init(&ea_gen_preference); + ea_register_init(&ea_gen_igp_metric); + ea_register_init(&ea_gen_from); + ea_register_init(&ea_gen_source); + ea_register_init(&ea_gen_flowspec_valid); } /* diff --git a/nest/rt-dev.c b/nest/rt-dev.c index 61f025ce..4199e17c 100644 --- a/nest/rt-dev.c +++ b/nest/rt-dev.c @@ -18,7 +18,7 @@ #include "nest/bird.h" #include "nest/iface.h" #include "nest/protocol.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/rt-dev.h" #include "conf/conf.h" #include "lib/resource.h" @@ -67,13 +67,11 @@ dev_ifa_notify(struct proto *P, uint flags, struct ifa *ad) /* Use iface ID as local source ID */ struct rte_src *src = rt_get_source(P, ad->iface->index); - rte_update2(c, net, NULL, src); + rte_update(c, net, NULL, src); + rt_unlock_source(src); } else if (flags & IF_CHANGE_UP) { - rta *a; - rte *e; - DBG("dev_if_notify: %s:%I going up\n", ad->iface->name, ad->ip); if (cf->check_link && !(ad->iface->flags & IF_LINK_UP)) @@ -82,18 +80,23 @@ dev_ifa_notify(struct proto *P, uint flags, struct ifa *ad) /* Use iface ID as local source ID */ struct rte_src *src = rt_get_source(P, ad->iface->index); - rta a0 = { + ea_list *ea = NULL; + struct nexthop_adata nhad = { + .nh = { .iface = ad->iface, }, + .ad = { .length = (void *) NEXTHOP_NEXT(&nhad.nh) - (void *) nhad.ad.data, }, + }; + + ea_set_attr_u32(&ea, &ea_gen_preference, 0, c->preference); + ea_set_attr_u32(&ea, &ea_gen_source, 0, RTS_DEVICE); + ea_set_attr_data(&ea, &ea_gen_nexthop, 0, nhad.ad.data, nhad.ad.length); + + rte e0 = { + .attrs = ea, .src = src, - .source = RTS_DEVICE, - .scope = SCOPE_UNIVERSE, - .dest = RTD_UNICAST, - .nh.iface = ad->iface, }; - a = rta_lookup(&a0); - e = rte_get_temp(a); - e->pflags = 0; - rte_update2(c, net, e, src); + rte_update(c, net, &e0, src); + rt_unlock_source(src); } } @@ -185,7 +188,6 @@ dev_copy_config(struct proto_config *dest, struct proto_config *src) struct protocol proto_device = { .name = "Direct", .template = "direct%d", - .class = PROTOCOL_DIRECT, .preference = DEF_PREF_DIRECT, .channel_mask = NB_IP | NB_IP6_SADR, .proto_size = sizeof(struct rt_dev_proto), @@ -195,3 +197,9 @@ struct protocol proto_device = { .reconfigure = dev_reconfigure, .copy_config = dev_copy_config }; + +void +dev_build(void) +{ + proto_build(&proto_device); +} diff --git a/nest/rt-fib.c b/nest/rt-fib.c index 1690a8f6..801561da 100644 --- a/nest/rt-fib.c +++ b/nest/rt-fib.c @@ -55,7 +55,7 @@ #undef LOCAL_DEBUG #include "nest/bird.h" -#include "nest/route.h" +#include "nest/rt.h" #include "lib/string.h" /* @@ -475,7 +475,7 @@ fib_delete(struct fib *f, void *E) } if (f->fib_slab) - sl_free(f->fib_slab, E); + sl_free(E); else mb_free(E); diff --git a/nest/rt-show.c b/nest/rt-show.c index 7639e5f7..17400029 100644 --- a/nest/rt-show.c +++ b/nest/rt-show.c @@ -10,7 +10,7 @@ #undef LOCAL_DEBUG #include "nest/bird.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/protocol.h" #include "nest/cli.h" #include "nest/iface.h" @@ -19,90 +19,83 @@ #include "sysdep/unix/krt.h" static void -rt_show_table(struct cli *c, struct rt_show_data *d) +rt_show_table(struct rt_show_data *d) { + struct cli *c = d->cli; + /* No table blocks in 'show route count' */ if (d->stats == 2) return; if (d->last_table) cli_printf(c, -1007, ""); - cli_printf(c, -1007, "Table %s:", d->tab->table->name); + cli_printf(c, -1007, "Table %s:", + d->tab->name); d->last_table = d->tab; } -static inline struct krt_proto * -rt_show_get_kernel(struct rt_show_data *d) -{ - struct proto_config *krt = d->tab->table->config->krt_attached; - return krt ? (struct krt_proto *) krt->proto : NULL; -} - static void rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, int primary) { byte from[IPA_MAX_TEXT_LENGTH+8]; byte tm[TM_DATETIME_BUFFER_SIZE], info[256]; - rta *a = e->attrs; - int sync_error = d->kernel ? krt_get_sync_error(d->kernel, e) : 0; + ea_list *a = e->attrs; + int sync_error = d->tab->kernel ? krt_get_sync_error(d->tab->kernel, e) : 0; void (*get_route_info)(struct rte *, byte *buf); - struct nexthop *nh; + eattr *nhea = net_type_match(e->net, NB_DEST) ? + ea_find(a, &ea_gen_nexthop) : NULL; + struct nexthop_adata *nhad = nhea ? (struct nexthop_adata *) nhea->u.ptr : NULL; + int dest = nhad ? (NEXTHOP_IS_REACHABLE(nhad) ? RTD_UNICAST : nhad->dest) : RTD_NONE; + int flowspec_valid = net_is_flow(e->net) ? rt_get_flowspec_valid(e) : FLOWSPEC_UNKNOWN; tm_format_time(tm, &config->tf_route, e->lastmod); - if (ipa_nonzero(a->from) && !ipa_equal(a->from, a->nh.gw)) - bsprintf(from, " from %I", a->from); + ip_addr a_from = ea_get_ip(a, &ea_gen_from, IPA_NONE); + if (ipa_nonzero(a_from) && (!nhad || !ipa_equal(a_from, nhad->nh.gw))) + bsprintf(from, " from %I", a_from); else from[0] = 0; /* Need to normalize the extended attributes */ - if (d->verbose && !rta_is_cached(a) && a->eattrs) - ea_normalize(a->eattrs); + if (d->verbose && !rta_is_cached(a) && a) + a = ea_normalize(a, 0); - get_route_info = a->src->proto->proto->get_route_info; + get_route_info = e->src->owner->class ? e->src->owner->class->get_route_info : NULL; if (get_route_info) get_route_info(e, info); else - bsprintf(info, " (%d)", e->pref); + bsprintf(info, " (%d)", rt_get_preference(e)); if (d->last_table != d->tab) - rt_show_table(c, d); - - cli_printf(c, -1007, "%-20s %s [%s %s%s]%s%s", ia, rta_dest_name(a->dest), - a->src->proto->name, tm, from, primary ? (sync_error ? " !" : " *") : "", info); - - if (a->dest == RTD_UNICAST) - for (nh = &(a->nh); nh; nh = nh->next) - { - char mpls[MPLS_MAX_LABEL_STACK*12 + 5], *lsp = mpls; - char *onlink = (nh->flags & RNF_ONLINK) ? " onlink" : ""; - char weight[16] = ""; - - if (nh->labels) - { - lsp += bsprintf(lsp, " mpls %d", nh->label[0]); - for (int i=1;i<nh->labels; i++) - lsp += bsprintf(lsp, "/%d", nh->label[i]); - } - *lsp = '\0'; + rt_show_table(d); - if (a->nh.next) - bsprintf(weight, " weight %d", nh->weight + 1); + eattr *heea; + struct hostentry_adata *had = NULL; + if (!net_is_flow(e->net) && (dest == RTD_NONE) && (heea = ea_find(a, &ea_gen_hostentry))) + had = (struct hostentry_adata *) heea->u.ptr; - if (ipa_nonzero(nh->gw)) - cli_printf(c, -1007, "\tvia %I on %s%s%s%s", - nh->gw, nh->iface->name, mpls, onlink, weight); - else - cli_printf(c, -1007, "\tdev %s%s%s", - nh->iface->name, mpls, onlink, weight); - } + cli_printf(c, -1007, "%-20s %s [%s %s%s]%s%s", ia, + net_is_flow(e->net) ? flowspec_valid_name(flowspec_valid) : had ? "recursive" : rta_dest_name(dest), + e->src->owner->name, tm, from, primary ? (sync_error ? " !" : " *") : "", info); if (d->verbose) - rta_show(c, a); + { + ea_show_list(c, a); + cli_printf(c, -1008, "\tInternal route handling values: %uL %uG %uS id %u", + e->src->private_id, e->src->global_id, e->stale_cycle, e->id); + } + else if (dest == RTD_UNICAST) + ea_show_nexthop_list(c, nhad); + else if (had) + { + char hetext[256]; + ea_show_hostentry(&had->ad, hetext, sizeof hetext); + cli_printf(c, -1007, "\t%s", hetext); + } } static void -rt_show_net(struct cli *c, net *n, struct rt_show_data *d) +rt_show_net(struct rt_show_data *d, const net_addr *n, rte **feed, uint count) { - rte *e, *ee; + struct cli *c = d->cli; byte ia[NET_MAX_TEXT_LENGTH+1]; struct channel *ec = d->tab->export_channel; @@ -114,9 +107,9 @@ rt_show_net(struct cli *c, net *n, struct rt_show_data *d) int first_show = 1; int pass = 0; - for (e = n->routes; e; e = e->next) + for (uint i = 0; i < count; i++) { - if (rte_is_filtered(e) != d->filtered) + if (!d->tab->prefilter && (rte_is_filtered(feed[i]) != d->filtered)) continue; d->rt_counter++; @@ -126,16 +119,20 @@ rt_show_net(struct cli *c, net *n, struct rt_show_data *d) if (pass) continue; - ee = e; - rte_make_tmp_attrs(&e, c->show_pool, NULL); + struct rte e = *feed[i]; + if (d->tab->prefilter) + if (e.sender != d->tab->prefilter->in_req.hook) + continue; + else while (e.attrs->next) + e.attrs = e.attrs->next; /* Export channel is down, do not try to export routes to it */ - if (ec && (ec->export_state == ES_DOWN)) + if (ec && !ec->out_req.hook) goto skip; if (d->export_mode == RSEM_EXPORTED) { - if (!bmap_test(&ec->export_map, ee->id)) + if (!bmap_test(&ec->export_map, e.id)) goto skip; // if (ec->ra_mode != RA_ANY) @@ -144,17 +141,18 @@ rt_show_net(struct cli *c, net *n, struct rt_show_data *d) else if ((d->export_mode == RSEM_EXPORT) && (ec->ra_mode == RA_MERGED)) { /* Special case for merged export */ - rte *rt_free; - e = rt_export_merged(ec, n, &rt_free, c->show_pool, 1); pass = 1; + rte *em = rt_export_merged(ec, feed, count, tmp_linpool, 1); - if (!e) - { e = ee; goto skip; } + if (em) + e = *em; + else + goto skip; } else if (d->export_mode) { struct proto *ep = ec->proto; - int ic = ep->preexport ? ep->preexport(ec, &e, c->show_pool) : 0; + int ic = ep->preexport ? ep->preexport(ec, &e) : 0; if (ec->ra_mode == RA_OPTIMAL || ec->ra_mode == RA_MERGED) pass = 1; @@ -170,7 +168,7 @@ rt_show_net(struct cli *c, net *n, struct rt_show_data *d) * command may change the export filter and do not update routes. */ int do_export = (ic > 0) || - (f_run(ec->out_filter, &e, c->show_pool, FF_SILENT) <= F_ACCEPT); + (f_run(ec->out_filter, &e, FF_SILENT) <= F_ACCEPT); if (do_export != (d->export_mode == RSEM_EXPORT)) goto skip; @@ -180,184 +178,193 @@ rt_show_net(struct cli *c, net *n, struct rt_show_data *d) } } - if (d->show_protocol && (d->show_protocol != e->attrs->src->proto)) + if (d->show_protocol && (&d->show_protocol->sources != e.src->owner)) goto skip; - if (f_run(d->filter, &e, c->show_pool, 0) > F_ACCEPT) + if (f_run(d->filter, &e, 0) > F_ACCEPT) goto skip; if (d->stats < 2) { if (first_show) - net_format(n->n.addr, ia, sizeof(ia)); + net_format(n, ia, sizeof(ia)); else ia[0] = 0; - rt_show_rte(c, ia, e, d, (e->net->routes == ee)); + rt_show_rte(c, ia, &e, d, !d->tab->prefilter && !i); first_show = 0; } d->show_counter++; skip: - if (e != ee) - { - rte_free(e); - e = ee; - } - lp_flush(c->show_pool); - if (d->primary_only) break; } + + if ((d->show_counter - d->show_counter_last_flush) > 64) + { + d->show_counter_last_flush = d->show_counter; + cli_write_trigger(d->cli); + } } static void -rt_show_cleanup(struct cli *c) +rt_show_net_export_bulk(struct rt_export_request *req, const net_addr *n, + struct rt_pending_export *rpe UNUSED, rte **feed, uint count) { - struct rt_show_data *d = c->rover; - struct rt_show_data_rtable *tab; + struct rt_show_data *d = SKIP_BACK(struct rt_show_data, req, req); + return rt_show_net(d, n, feed, count); +} - /* Unlink the iterator */ - if (d->table_open && !d->trie_walk) - fit_get(&d->tab->table->fib, &d->fit); +static void +rt_show_export_stopped_cleanup(struct rt_export_request *req) +{ + struct rt_show_data *d = SKIP_BACK(struct rt_show_data, req, req); - if (d->walk_lock) - rt_unlock_trie(d->tab->table, d->walk_lock); + /* The hook is now invalid */ + req->hook = NULL; - /* Unlock referenced tables */ - WALK_LIST(tab, d->tables) - rt_unlock_table(tab->table); + /* And free the CLI (deferred) */ + rfree(d->cli->pool); } -static void -rt_show_cont(struct cli *c) +static int +rt_show_cleanup(struct cli *c) { struct rt_show_data *d = c->rover; - struct rtable *tab = d->tab->table; -#ifdef DEBUGGING - unsigned max = 4; -#else - unsigned max = 64; -#endif - struct fib *fib = &tab->fib; - struct fib_iterator *it = &d->fit; - if (d->running_on_config && (d->running_on_config != config)) + /* Cancel the feed */ + if (d->req.hook) { - cli_printf(c, 8004, "Stopped due to reconfiguration"); - goto done; + rt_stop_export(&d->req, rt_show_export_stopped_cleanup); + return 1; } + else + return 0; +} - if (!d->table_open) - { - /* We use either trie-based walk or fib-based walk */ - d->trie_walk = tab->trie && - (d->addr_mode == RSD_ADDR_IN) && - net_val_match(tab->addr_type, NB_IP); +static void rt_show_export_stopped(struct rt_export_request *req); - if (d->trie_walk && !d->walk_state) - d->walk_state = lp_allocz(c->parser_pool, sizeof (struct f_trie_walk_state)); +static void +rt_show_log_state_change(struct rt_export_request *req, u8 state) +{ + if (state == TES_READY) + rt_stop_export(req, rt_show_export_stopped); +} - if (d->trie_walk) - { - d->walk_lock = rt_lock_trie(tab); - trie_walk_init(d->walk_state, tab->trie, d->addr); - } - else - FIB_ITERATE_INIT(&d->fit, &tab->fib); +static void +rt_show_dump_req(struct rt_export_request *req) +{ + debug(" CLI Show Route Feed %p\n", req); +} - d->table_open = 1; - d->table_counter++; - d->kernel = rt_show_get_kernel(d); +static void +rt_show_done(struct rt_show_data *d) +{ + /* No more action */ + d->cli->cleanup = NULL; + d->cli->cont = NULL; + d->cli->rover = NULL; - d->show_counter_last = d->show_counter; - d->rt_counter_last = d->rt_counter; - d->net_counter_last = d->net_counter; + /* Write pending messages */ + cli_write_trigger(d->cli); +} - if (d->tables_defined_by & RSD_TDB_SET) - rt_show_table(c, d); - } +static void +rt_show_cont(struct rt_show_data *d) +{ + struct cli *c = d->cli; - if (d->trie_walk) + if (d->running_on_config && (d->running_on_config != config)) { - /* Trie-based walk */ - net_addr addr; - while (trie_walk_next(d->walk_state, &addr)) - { - net *n = net_find(tab, &addr); - if (!n) - continue; + cli_printf(c, 8004, "Stopped due to reconfiguration"); + return rt_show_done(d); + } - rt_show_net(c, n, d); + d->req = (struct rt_export_request) { + .addr = d->addr, + .name = "CLI Show Route", + .list = &global_work_list, + .export_bulk = rt_show_net_export_bulk, + .dump_req = rt_show_dump_req, + .log_state_change = rt_show_log_state_change, + .addr_mode = d->addr_mode, + }; - if (!--max) - return; - } + d->table_counter++; - rt_unlock_trie(tab, d->walk_lock); - d->walk_lock = NULL; - } - else - { - /* fib-based walk */ - FIB_ITERATE_START(fib, it, net, n) - { - if ((d->addr_mode == RSD_ADDR_IN) && (!net_in_netX(n->n.addr, d->addr))) - goto next; + d->show_counter_last = d->show_counter; + d->rt_counter_last = d->rt_counter; + d->net_counter_last = d->net_counter; - if (!max--) - { - FIB_ITERATE_PUT(it); - return; - } - rt_show_net(c, n, d); + if (d->tables_defined_by & RSD_TDB_SET) + rt_show_table(d); - next:; - } - FIB_ITERATE_END; - } + rt_request_export(d->tab->table, &d->req); +} + +static void +rt_show_export_stopped(struct rt_export_request *req) +{ + struct rt_show_data *d = SKIP_BACK(struct rt_show_data, req, req); + + /* The hook is now invalid */ + req->hook = NULL; if (d->stats) { if (d->last_table != d->tab) - rt_show_table(c, d); + rt_show_table(d); - cli_printf(c, -1007, "%d of %d routes for %d networks in table %s", + cli_printf(d->cli, -1007, "%d of %d routes for %d networks in table %s", d->show_counter - d->show_counter_last, d->rt_counter - d->rt_counter_last, - d->net_counter - d->net_counter_last, tab->name); + d->net_counter - d->net_counter_last, d->tab->name); } - d->kernel = NULL; - d->table_open = 0; d->tab = NODE_NEXT(d->tab); if (NODE_VALID(d->tab)) - return; + return rt_show_cont(d); + /* Printout total stats */ if (d->stats && (d->table_counter > 1)) { - if (d->last_table) cli_printf(c, -1007, ""); - cli_printf(c, 14, "Total: %d of %d routes for %d networks in %d tables", + if (d->last_table) cli_printf(d->cli, -1007, ""); + cli_printf(d->cli, 14, "Total: %d of %d routes for %d networks in %d tables", d->show_counter, d->rt_counter, d->net_counter, d->table_counter); } + else if (!d->rt_counter && ((d->addr_mode == TE_ADDR_EQUAL) || (d->addr_mode == TE_ADDR_FOR))) + cli_printf(d->cli, 8001, "Network not found"); else - cli_printf(c, 0, ""); + cli_printf(d->cli, 0, ""); -done: - rt_show_cleanup(c); - c->cont = c->cleanup = NULL; + /* No more route showing */ + rt_show_done(d); } struct rt_show_data_rtable * -rt_show_add_table(struct rt_show_data *d, rtable *t) +rt_show_add_exporter(struct rt_show_data *d, struct rt_exporter *t, const char *name) { struct rt_show_data_rtable *tab = cfg_allocz(sizeof(struct rt_show_data_rtable)); tab->table = t; + tab->name = name; add_tail(&(d->tables), &(tab->n)); return tab; } +struct rt_show_data_rtable * +rt_show_add_table(struct rt_show_data *d, struct rtable *t) +{ + struct rt_show_data_rtable *rsdr = rt_show_add_exporter(d, &t->exporter, t->name); + + struct proto_config *krt = t->config->krt_attached; + if (krt) + rsdr->kernel = (struct krt_proto *) krt->proto; + + return rsdr; +} + static inline void rt_show_get_default_tables(struct rt_show_data *d) { @@ -376,7 +383,7 @@ rt_show_get_default_tables(struct rt_show_data *d) { WALK_LIST(c, d->export_protocol->channels) { - if (c->export_state == ES_DOWN) + if (!c->out_req.hook) continue; tab = rt_show_add_table(d, c->table); @@ -393,7 +400,7 @@ rt_show_get_default_tables(struct rt_show_data *d) } for (int i=1; i<NET_MAX; i++) - if (config->def_tables[i]) + if (config->def_tables[i] && config->def_tables[i]->table) rt_show_add_table(d, config->def_tables[i]->table); } @@ -412,16 +419,16 @@ rt_show_prepare_tables(struct rt_show_data *d) if (d->export_mode) { if (!tab->export_channel && d->export_channel && - (tab->table == d->export_channel->table)) + (tab->table == &d->export_channel->table->exporter)) tab->export_channel = d->export_channel; if (!tab->export_channel && d->export_protocol) - tab->export_channel = proto_find_channel_by_table(d->export_protocol, tab->table); + tab->export_channel = proto_find_channel_by_table(d->export_protocol, SKIP_BACK(rtable, exporter, tab->table)); if (!tab->export_channel) { if (d->tables_defined_by & RSD_TDB_NMN) - cf_error("No export channel for table %s", tab->table->name); + cf_error("No export channel for table %s", tab->name); rem_node(&(tab->n)); continue; @@ -432,7 +439,7 @@ rt_show_prepare_tables(struct rt_show_data *d) if (d->addr && (tab->table->addr_type != d->addr->type)) { if (d->tables_defined_by & RSD_TDB_NMN) - cf_error("Incompatible type of prefix/ip for table %s", tab->table->name); + cf_error("Incompatible type of prefix/ip for table %s", tab->name); rem_node(&(tab->n)); continue; @@ -444,48 +451,29 @@ rt_show_prepare_tables(struct rt_show_data *d) cf_error("No valid tables"); } +static void +rt_show_dummy_cont(struct cli *c UNUSED) +{ + /* Explicitly do nothing to prevent CLI from trying to parse another command. */ +} + void rt_show(struct rt_show_data *d) { - struct rt_show_data_rtable *tab; - net *n; - /* Filtered routes are neither exported nor have sensible ordering */ if (d->filtered && (d->export_mode || d->primary_only)) cf_error("Incompatible show route options"); rt_show_prepare_tables(d); - if (!d->addr || (d->addr_mode == RSD_ADDR_IN)) - { - WALK_LIST(tab, d->tables) - rt_lock_table(tab->table); - - /* There is at least one table */ - d->tab = HEAD(d->tables); - this_cli->cont = rt_show_cont; - this_cli->cleanup = rt_show_cleanup; - this_cli->rover = d; - } - else - { - WALK_LIST(tab, d->tables) - { - d->tab = tab; - d->kernel = rt_show_get_kernel(d); + if (EMPTY_LIST(d->tables)) + cf_error("No suitable tables found"); - if (d->addr_mode == RSD_ADDR_FOR) - n = net_route(tab->table, d->addr); - else - n = net_find(tab->table, d->addr); + d->tab = HEAD(d->tables); - if (n) - rt_show_net(this_cli, n, d); - } + this_cli->cleanup = rt_show_cleanup; + this_cli->rover = d; + this_cli->cont = rt_show_dummy_cont; - if (d->rt_counter) - cli_msg(0, ""); - else - cli_msg(8001, "Network not found"); - } + rt_show_cont(d); } diff --git a/nest/rt-table.c b/nest/rt-table.c index a2fa5e77..3ade4237 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -91,7 +91,7 @@ #undef LOCAL_DEBUG #include "nest/bird.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/protocol.h" #include "nest/iface.h" #include "lib/resource.h" @@ -110,22 +110,83 @@ #include "proto/bgp/bgp.h" #endif -pool *rt_table_pool; +#include <stdatomic.h> -static slab *rte_slab; -static linpool *rte_update_pool; +pool *rt_table_pool; list routing_tables; +list deleted_routing_tables; + +struct rt_cork rt_cork; + +/* Data structures for export journal */ +#define RT_PENDING_EXPORT_ITEMS (page_size - sizeof(struct rt_export_block)) / sizeof(struct rt_pending_export) + +struct rt_export_block { + node n; + _Atomic u32 end; + _Atomic _Bool not_last; + struct rt_pending_export export[]; +}; static void rt_free_hostcache(rtable *tab); static void rt_notify_hostcache(rtable *tab, net *net); static void rt_update_hostcache(rtable *tab); static void rt_next_hop_update(rtable *tab); +static inline void rt_next_hop_resolve_rte(rte *r); +static inline void rt_flowspec_resolve_rte(rte *r, struct channel *c); static inline void rt_prune_table(rtable *tab); static inline void rt_schedule_notify(rtable *tab); static void rt_flowspec_notify(rtable *tab, net *net); static void rt_kick_prune_timer(rtable *tab); +static void rt_feed_by_fib(void *); +static void rt_feed_by_trie(void *); +static void rt_feed_equal(void *); +static void rt_feed_for(void *); +static uint rt_feed_net(struct rt_export_hook *c, net *n); +static void rt_check_cork_low(rtable *tab); +static void rt_check_cork_high(rtable *tab); +static void rt_cork_release_hook(void *); + +static inline void rt_export_used(struct rt_exporter *); +static void rt_export_cleanup(rtable *tab); + +static int rte_same(rte *x, rte *y); + +const char *rt_import_state_name_array[TIS_MAX] = { + [TIS_DOWN] = "DOWN", + [TIS_UP] = "UP", + [TIS_STOP] = "STOP", + [TIS_FLUSHING] = "FLUSHING", + [TIS_WAITING] = "WAITING", + [TIS_CLEARED] = "CLEARED", +}; + +const char *rt_export_state_name_array[TES_MAX] = { + [TES_DOWN] = "DOWN", + [TES_FEEDING] = "FEEDING", + [TES_READY] = "READY", + [TES_STOP] = "STOP" +}; + +const char *rt_import_state_name(u8 state) +{ + if (state >= TIS_MAX) + return "!! INVALID !!"; + else + return rt_import_state_name_array[state]; +} + +const char *rt_export_state_name(u8 state) +{ + if (state >= TES_MAX) + return "!! INVALID !!"; + else + return rt_export_state_name_array[state]; +} +static inline struct rte_storage *rt_next_hop_update_rte(rtable *tab, net *n, rte *old); +static struct hostentry *rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep); static void net_init_with_trie(struct fib *f, void *N) @@ -395,7 +456,7 @@ net_roa_check_ip4_trie(rtable *tab, const net_addr_ip4 *px, u32 asn) net_addr_roa4 *roa = (void *) fn->addr; net *r = fib_node_to_user(&tab->fib, fn); - if (net_equal_prefix_roa4(roa, &roa0) && rte_is_valid(r->routes)) + if (net_equal_prefix_roa4(roa, &roa0) && r->routes && rte_is_valid(&r->routes->rte)) { anything = 1; if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen)) @@ -422,7 +483,7 @@ net_roa_check_ip4_fib(rtable *tab, const net_addr_ip4 *px, u32 asn) net_addr_roa4 *roa = (void *) fn->addr; net *r = fib_node_to_user(&tab->fib, fn); - if (net_equal_prefix_roa4(roa, &n) && rte_is_valid(r->routes)) + if (net_equal_prefix_roa4(roa, &n) && r->routes && rte_is_valid(&r->routes->rte)) { anything = 1; if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen)) @@ -455,7 +516,7 @@ net_roa_check_ip6_trie(rtable *tab, const net_addr_ip6 *px, u32 asn) net_addr_roa6 *roa = (void *) fn->addr; net *r = fib_node_to_user(&tab->fib, fn); - if (net_equal_prefix_roa6(roa, &roa0) && rte_is_valid(r->routes)) + if (net_equal_prefix_roa6(roa, &roa0) && r->routes && rte_is_valid(&r->routes->rte)) { anything = 1; if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen)) @@ -482,7 +543,7 @@ net_roa_check_ip6_fib(rtable *tab, const net_addr_ip6 *px, u32 asn) net_addr_roa6 *roa = (void *) fn->addr; net *r = fib_node_to_user(&tab->fib, fn); - if (net_equal_prefix_roa6(roa, &n) && rte_is_valid(r->routes)) + if (net_equal_prefix_roa6(roa, &n) && r->routes && rte_is_valid(&r->routes->rte)) { anything = 1; if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen)) @@ -541,253 +602,55 @@ net_roa_check(rtable *tab, const net_addr *n, u32 asn) * @net: network node * @src: route source * - * The rte_find() function returns a route for destination @net - * which is from route source @src. + * The rte_find() function returns a pointer to a route for destination @net + * which is from route source @src. List end pointer is returned if no route is found. */ -rte * +static struct rte_storage ** rte_find(net *net, struct rte_src *src) { - rte *e = net->routes; + struct rte_storage **e = &net->routes; - while (e && e->attrs->src != src) - e = e->next; - return e; -} - -/** - * rte_get_temp - get a temporary &rte - * @a: attributes to assign to the new route (a &rta; in case it's - * un-cached, rte_update() will create a cached copy automatically) - * - * Create a temporary &rte and bind it with the attributes @a. - * Also set route preference to the default preference set for - * the protocol. - */ -rte * -rte_get_temp(rta *a) -{ - rte *e = sl_alloc(rte_slab); + while ((*e) && (*e)->rte.src != src) + e = &(*e)->next; - e->attrs = a; - e->id = 0; - e->flags = 0; - e->pref = 0; return e; } -rte * -rte_do_cow(rte *r) -{ - rte *e = sl_alloc(rte_slab); - - memcpy(e, r, sizeof(rte)); - e->attrs = rta_clone(r->attrs); - e->flags = 0; - return e; -} - -/** - * rte_cow_rta - get a private writable copy of &rte with writable &rta - * @r: a route entry to be copied - * @lp: a linpool from which to allocate &rta - * - * rte_cow_rta() takes a &rte and prepares it and associated &rta for - * modification. There are three possibilities: First, both &rte and &rta are - * private copies, in that case they are returned unchanged. Second, &rte is - * private copy, but &rta is cached, in that case &rta is duplicated using - * rta_do_cow(). Third, both &rte is shared and &rta is cached, in that case - * both structures are duplicated by rte_do_cow() and rta_do_cow(). - * - * Note that in the second case, cached &rta loses one reference, while private - * copy created by rta_do_cow() is a shallow copy sharing indirect data (eattrs, - * nexthops, ...) with it. To work properly, original shared &rta should have - * another reference during the life of created private copy. - * - * Result: a pointer to the new writable &rte with writable &rta. - */ -rte * -rte_cow_rta(rte *r, linpool *lp) -{ - if (!rta_is_cached(r->attrs)) - return r; - - r = rte_cow(r); - rta *a = rta_do_cow(r->attrs, lp); - rta_free(r->attrs); - r->attrs = a; - return r; -} - -/** - * rte_init_tmp_attrs - initialize temporary ea_list for route - * @r: route entry to be modified - * @lp: linpool from which to allocate attributes - * @max: maximum number of added temporary attribus - * - * This function is supposed to be called from make_tmp_attrs() and - * store_tmp_attrs() hooks before rte_make_tmp_attr() / rte_store_tmp_attr() - * functions. It allocates &ea_list with length for @max items for temporary - * attributes and puts it on top of eattrs stack. - */ -void -rte_init_tmp_attrs(rte *r, linpool *lp, uint max) +struct rte_storage * +rte_store(const rte *r, net *net, rtable *tab) { - struct ea_list *e = lp_alloc(lp, sizeof(struct ea_list) + max * sizeof(eattr)); + struct rte_storage *e = sl_alloc(tab->rte_slab); - e->next = r->attrs->eattrs; - e->flags = EALF_SORTED | EALF_TEMP; - e->count = 0; + e->rte = *r; + e->rte.net = net->n.addr; - r->attrs->eattrs = e; -} + rt_lock_source(e->rte.src); -/** - * rte_make_tmp_attr - make temporary eattr from private route fields - * @r: route entry to be modified - * @id: attribute ID - * @type: attribute type - * @val: attribute value (u32 or adata ptr) - * - * This function is supposed to be called from make_tmp_attrs() hook for - * each temporary attribute, after temporary &ea_list was initialized by - * rte_init_tmp_attrs(). It checks whether temporary attribute is supposed to - * be defined (based on route pflags) and if so then it fills &eattr field in - * preallocated temporary &ea_list on top of route @r eattrs stack. - * - * Note that it may require free &eattr in temporary &ea_list, so it must not be - * called more times than @max argument of rte_init_tmp_attrs(). - */ -void -rte_make_tmp_attr(rte *r, uint id, uint type, uintptr_t val) -{ - if (r->pflags & EA_ID_FLAG(id)) - { - ea_list *e = r->attrs->eattrs; - eattr *a = &e->attrs[e->count++]; - a->id = id; - a->type = type; - a->flags = 0; + if (ea_is_cached(e->rte.attrs)) + e->rte.attrs = rta_clone(e->rte.attrs); + else + e->rte.attrs = rta_lookup(e->rte.attrs, 1); - if (type & EAF_EMBEDDED) - a->u.data = (u32) val; - else - a->u.ptr = (struct adata *) val; - } + return e; } /** - * rte_store_tmp_attr - store temporary eattr to private route fields - * @r: route entry to be modified - * @id: attribute ID - * - * This function is supposed to be called from store_tmp_attrs() hook for - * each temporary attribute, after temporary &ea_list was initialized by - * rte_init_tmp_attrs(). It checks whether temporary attribute is defined in - * route @r eattrs stack, updates route pflags accordingly, undefines it by - * filling &eattr field in preallocated temporary &ea_list on top of the eattrs - * stack, and returns the value. Caller is supposed to store it in the - * appropriate private field. + * rte_free - delete a &rte + * @e: &struct rte_storage to be deleted + * @tab: the table which the rte belongs to * - * Note that it may require free &eattr in temporary &ea_list, so it must not be - * called more times than @max argument of rte_init_tmp_attrs() + * rte_free() deletes the given &rte from the routing table it's linked to. */ -uintptr_t -rte_store_tmp_attr(rte *r, uint id) -{ - ea_list *e = r->attrs->eattrs; - eattr *a = ea_find(e->next, id); - - if (a) - { - e->attrs[e->count++] = (struct eattr) { .id = id, .type = EAF_TYPE_UNDEF }; - r->pflags |= EA_ID_FLAG(id); - return (a->type & EAF_EMBEDDED) ? a->u.data : (uintptr_t) a->u.ptr; - } - else - { - r->pflags &= ~EA_ID_FLAG(id); - return 0; - } -} -/** - * rte_make_tmp_attrs - prepare route by adding all relevant temporary route attributes - * @r: route entry to be modified (may be replaced if COW) - * @lp: linpool from which to allocate attributes - * @old_attrs: temporary ref to old &rta (may be NULL) - * - * This function expands privately stored protocol-dependent route attributes - * to a uniform &eattr / &ea_list representation. It is essentially a wrapper - * around protocol make_tmp_attrs() hook, which does some additional work like - * ensuring that route @r is writable. - * - * The route @r may be read-only (with %REF_COW flag), in that case rw copy is - * obtained by rte_cow() and @r is replaced. If @rte is originally rw, it may be - * directly modified (and it is never copied). - * - * If the @old_attrs ptr is supplied, the function obtains another reference of - * old cached &rta, that is necessary in some cases (see rte_cow_rta() for - * details). It is freed by rte_store_tmp_attrs(), or manually by rta_free(). - * - * Generally, if caller ensures that @r is read-only (e.g. in route export) then - * it may ignore @old_attrs (and set it to NULL), but must handle replacement of - * @r. If caller ensures that @r is writable (e.g. in route import) then it may - * ignore replacement of @r, but it must handle @old_attrs. - */ void -rte_make_tmp_attrs(rte **r, linpool *lp, rta **old_attrs) -{ - void (*make_tmp_attrs)(rte *r, linpool *lp); - make_tmp_attrs = (*r)->attrs->src->proto->make_tmp_attrs; - - if (!make_tmp_attrs) - return; - - /* We may need to keep ref to old attributes, will be freed in rte_store_tmp_attrs() */ - if (old_attrs) - *old_attrs = rta_is_cached((*r)->attrs) ? rta_clone((*r)->attrs) : NULL; - - *r = rte_cow_rta(*r, lp); - make_tmp_attrs(*r, lp); -} - -/** - * rte_store_tmp_attrs - store temporary route attributes back to private route fields - * @r: route entry to be modified - * @lp: linpool from which to allocate attributes - * @old_attrs: temporary ref to old &rta - * - * This function stores temporary route attributes that were expanded by - * rte_make_tmp_attrs() back to private route fields and also undefines them. - * It is essentially a wrapper around protocol store_tmp_attrs() hook, which - * does some additional work like shortcut if there is no change and cleanup - * of @old_attrs reference obtained by rte_make_tmp_attrs(). - */ -static void -rte_store_tmp_attrs(rte *r, linpool *lp, rta *old_attrs) +rte_free(struct rte_storage *e) { - void (*store_tmp_attrs)(rte *rt, linpool *lp); - store_tmp_attrs = r->attrs->src->proto->store_tmp_attrs; - - if (!store_tmp_attrs) - return; - - ASSERT(!rta_is_cached(r->attrs)); - - /* If there is no new ea_list, we just skip the temporary ea_list */ - ea_list *ea = r->attrs->eattrs; - if (ea && (ea->flags & EALF_TEMP)) - r->attrs->eattrs = ea->next; - else - store_tmp_attrs(r, lp); - - /* Free ref we got in rte_make_tmp_attrs(), have to do rta_lookup() first */ - r->attrs = rta_lookup(r->attrs); - rta_free(old_attrs); + rt_unlock_source(e->rte.src); + rta_free(e->rte.attrs); + sl_free(e); } - static int /* Actually better or at least as good as */ rte_better(rte *new, rte *old) { @@ -798,20 +661,23 @@ rte_better(rte *new, rte *old) if (!rte_is_valid(new)) return 0; - if (new->pref > old->pref) + u32 np = rt_get_preference(new); + u32 op = rt_get_preference(old); + + if (np > op) return 1; - if (new->pref < old->pref) + if (np < op) return 0; - if (new->attrs->src->proto->proto != old->attrs->src->proto->proto) + if (new->src->owner->class != old->src->owner->class) { /* * If the user has configured protocol preferences, so that two different protocols * have the same preference, try to break the tie by comparing addresses. Not too * useful, but keeps the ordering of routes unambiguous. */ - return new->attrs->src->proto->proto > old->attrs->src->proto->proto; + return new->src->owner->class > old->src->owner->class; } - if (better = new->attrs->src->proto->rte_better) + if (better = new->src->owner->class->rte_better) return better(new, old); return 0; } @@ -824,180 +690,197 @@ rte_mergable(rte *pri, rte *sec) if (!rte_is_valid(pri) || !rte_is_valid(sec)) return 0; - if (pri->pref != sec->pref) + if (rt_get_preference(pri) != rt_get_preference(sec)) return 0; - if (pri->attrs->src->proto->proto != sec->attrs->src->proto->proto) + if (pri->src->owner->class != sec->src->owner->class) return 0; - if (mergable = pri->attrs->src->proto->rte_mergable) + if (mergable = pri->src->owner->class->rte_mergable) return mergable(pri, sec); return 0; } static void -rte_trace(struct channel *c, rte *e, int dir, char *msg) +rte_trace(const char *name, const rte *e, int dir, const char *msg) { - log(L_TRACE "%s.%s %c %s %N %s", - c->proto->name, c->name ?: "?", dir, msg, e->net->n.addr, - rta_dest_name(e->attrs->dest)); + log(L_TRACE "%s %c %s %N src %uL %uG %uS id %u %s", + name, dir, msg, e->net, + e->src->private_id, e->src->global_id, e->stale_cycle, e->id, + rta_dest_name(rte_dest(e))); } static inline void -rte_trace_in(uint flag, struct channel *c, rte *e, char *msg) +channel_rte_trace_in(uint flag, struct channel *c, const rte *e, const char *msg) { if ((c->debug & flag) || (c->proto->debug & flag)) - rte_trace(c, e, '>', msg); + rte_trace(c->in_req.name, e, '>', msg); } static inline void -rte_trace_out(uint flag, struct channel *c, rte *e, char *msg) +channel_rte_trace_out(uint flag, struct channel *c, const rte *e, const char *msg) { if ((c->debug & flag) || (c->proto->debug & flag)) - rte_trace(c, e, '<', msg); + rte_trace(c->out_req.name, e, '<', msg); +} + +static inline void +rt_rte_trace_in(uint flag, struct rt_import_request *req, const rte *e, const char *msg) +{ + if (req->trace_routes & flag) + rte_trace(req->name, e, '>', msg); +} + +#if 0 +// seems to be unused at all +static inline void +rt_rte_trace_out(uint flag, struct rt_export_request *req, const rte *e, const char *msg) +{ + if (req->trace_routes & flag) + rte_trace(req->name, e, '<', msg); +} +#endif + +static uint +rte_feed_count(net *n) +{ + uint count = 0; + for (struct rte_storage *e = n->routes; e; e = e->next) + count++; + + return count; +} + +static void +rte_feed_obtain(net *n, struct rte **feed, uint count) +{ + uint i = 0; + for (struct rte_storage *e = n->routes; e; e = e->next) + { + ASSERT_DIE(i < count); + feed[i++] = &e->rte; + } + + ASSERT_DIE(i == count); } static rte * -export_filter_(struct channel *c, rte *rt0, rte **rt_free, linpool *pool, int silent) +export_filter(struct channel *c, rte *rt, int silent) { struct proto *p = c->proto; const struct filter *filter = c->out_filter; - struct proto_stats *stats = &c->stats; - rte *rt; - int v; + struct channel_export_stats *stats = &c->export_stats; - rt = rt0; - *rt_free = NULL; + /* Do nothing if we have already rejected the route */ + if (silent && bmap_test(&c->export_reject_map, rt->id)) + goto reject_noset; - v = p->preexport ? p->preexport(c, &rt, pool) : 0; + int v = p->preexport ? p->preexport(c, rt) : 0; if (v < 0) { if (silent) - goto reject; + goto reject_noset; - stats->exp_updates_rejected++; + stats->updates_rejected++; if (v == RIC_REJECT) - rte_trace_out(D_FILTERS, c, rt, "rejected by protocol"); - goto reject; + channel_rte_trace_out(D_FILTERS, c, rt, "rejected by protocol"); + goto reject_noset; + } if (v > 0) { if (!silent) - rte_trace_out(D_FILTERS, c, rt, "forced accept by protocol"); + channel_rte_trace_out(D_FILTERS, c, rt, "forced accept by protocol"); goto accept; } - rte_make_tmp_attrs(&rt, pool, NULL); - v = filter && ((filter == FILTER_REJECT) || - (f_run(filter, &rt, pool, + (f_run(filter, rt, (silent ? FF_SILENT : 0)) > F_ACCEPT)); if (v) { if (silent) goto reject; - stats->exp_updates_filtered++; - rte_trace_out(D_FILTERS, c, rt, "filtered out"); + stats->updates_filtered++; + channel_rte_trace_out(D_FILTERS, c, rt, "filtered out"); goto reject; } -#ifdef CONFIG_PIPE - /* Pipes need rte with stored tmpattrs, remaining protocols need expanded tmpattrs */ - if (p->proto == &proto_pipe) - rte_store_tmp_attrs(rt, pool, NULL); -#endif - accept: - if (rt != rt0) - *rt_free = rt; + /* We have accepted the route */ + bmap_clear(&c->export_reject_map, rt->id); return rt; reject: + /* We have rejected the route by filter */ + bmap_set(&c->export_reject_map, rt->id); + +reject_noset: /* Discard temporary rte */ - if (rt != rt0) - rte_free(rt); return NULL; } -static inline rte * -export_filter(struct channel *c, rte *rt0, rte **rt_free, int silent) -{ - return export_filter_(c, rt0, rt_free, rte_update_pool, silent); -} - static void -do_rt_notify(struct channel *c, net *net, rte *new, rte *old, int refeed) +do_rt_notify(struct channel *c, const net_addr *net, rte *new, const rte *old) { struct proto *p = c->proto; - struct proto_stats *stats = &c->stats; + struct channel_export_stats *stats = &c->export_stats; - if (refeed && new) + if (c->refeeding && new) c->refeed_count++; - /* Apply export limit */ - struct channel_limit *l = &c->out_limit; - if (l->action && !old && new) - { - if (stats->exp_routes >= l->limit) - channel_notify_limit(c, l, PLD_OUT, stats->exp_routes); - - if (l->state == PLS_BLOCKED) + if (!old && new) + if (CHANNEL_LIMIT_PUSH(c, OUT)) { - stats->exp_updates_rejected++; - rte_trace_out(D_FILTERS, c, new, "rejected [limit]"); + stats->updates_rejected++; + channel_rte_trace_out(D_FILTERS, c, new, "rejected [limit]"); return; } - } - /* Apply export table */ - if (c->out_table && !rte_update_out(c, net->n.addr, new, old, refeed)) - return; + if (!new && old) + CHANNEL_LIMIT_POP(c, OUT); if (new) - stats->exp_updates_accepted++; + stats->updates_accepted++; else - stats->exp_withdraws_accepted++; + stats->withdraws_accepted++; if (old) - { bmap_clear(&c->export_map, old->id); - stats->exp_routes--; - } if (new) - { bmap_set(&c->export_map, new->id); - stats->exp_routes++; - } if (p->debug & D_ROUTES) { if (new && old) - rte_trace_out(D_ROUTES, c, new, "replaced"); + channel_rte_trace_out(D_ROUTES, c, new, "replaced"); else if (new) - rte_trace_out(D_ROUTES, c, new, "added"); + channel_rte_trace_out(D_ROUTES, c, new, "added"); else if (old) - rte_trace_out(D_ROUTES, c, old, "removed"); + channel_rte_trace_out(D_ROUTES, c, old, "removed"); } p->rt_notify(p, c, net, new, old); } static void -rt_notify_basic(struct channel *c, net *net, rte *new, rte *old, int refeed) +rt_notify_basic(struct channel *c, const net_addr *net, rte *new, rte *old) { - // struct proto *p = c->proto; - rte *new_free = NULL; - - if (new) - c->stats.exp_updates_received++; - else - c->stats.exp_withdraws_received++; + if (new && old && rte_same(new, old)) + { + if ((new->id != old->id) && bmap_test(&c->export_map, old->id)) + { + bmap_set(&c->export_map, new->id); + bmap_clear(&c->export_map, old->id); + } + return; + } if (new) - new = export_filter(c, new, &new_free, 0); + new = export_filter(c, new, 0); if (old && !bmap_test(&c->export_map, old->id)) old = NULL; @@ -1005,197 +888,351 @@ rt_notify_basic(struct channel *c, net *net, rte *new, rte *old, int refeed) if (!new && !old) return; - do_rt_notify(c, net, new, old, refeed); - - /* Discard temporary rte */ - if (new_free) - rte_free(new_free); + do_rt_notify(c, net, new, old); } static void -rt_notify_accepted(struct channel *c, net *net, rte *new_changed, rte *old_changed, int refeed) +channel_rpe_mark_seen(struct rt_export_request *req, struct rt_pending_export *rpe) { - // struct proto *p = c->proto; - rte *new_best = NULL; - rte *old_best = NULL; - rte *new_free = NULL; - int new_first = 0; - - /* - * We assume that there are no changes in net route order except (added) - * new_changed and (removed) old_changed. Therefore, the function is not - * compatible with deterministic_med (where nontrivial reordering can happen - * as a result of a route change) and with recomputation of recursive routes - * due to next hop update (where many routes can be changed in one step). - * - * Note that we need this assumption just for optimizations, we could just - * run full new_best recomputation otherwise. - * - * There are three cases: - * feed or old_best is old_changed -> we need to recompute new_best - * old_best is before new_changed -> new_best is old_best, ignore - * old_best is after new_changed -> try new_changed, otherwise old_best - */ + struct channel *c = SKIP_BACK(struct channel, out_req, req); - if (net->routes) - c->stats.exp_updates_received++; - else - c->stats.exp_withdraws_received++; + rpe_mark_seen(req->hook, rpe); + if (rpe->old) + bmap_clear(&c->export_reject_map, rpe->old->rte.id); +} - /* Find old_best - either old_changed, or route for net->routes */ - if (old_changed && bmap_test(&c->export_map, old_changed->id)) - old_best = old_changed; - else +void +rt_notify_accepted(struct rt_export_request *req, const net_addr *n, struct rt_pending_export *rpe, + struct rte **feed, uint count) +{ + struct channel *c = SKIP_BACK(struct channel, out_req, req); + + rte nb0, *new_best = NULL; + const rte *old_best = NULL; + + for (uint i = 0; i < count; i++) { - for (rte *r = net->routes; rte_is_valid(r); r = r->next) + if (!rte_is_valid(feed[i])) + continue; + + /* Has been already rejected, won't bother with it */ + if (!c->refeeding && bmap_test(&c->export_reject_map, feed[i]->id)) + continue; + + /* Previously exported */ + if (!old_best && bmap_test(&c->export_map, feed[i]->id)) { - if (bmap_test(&c->export_map, r->id)) + /* is still best */ + if (!new_best) { - old_best = r; - break; + DBG("rt_notify_accepted: idempotent\n"); + goto done; } - /* Note if new_changed found before old_best */ - if (r == new_changed) - new_first = 1; + /* is superseded */ + old_best = feed[i]; + break; } - } - /* Find new_best */ - if ((new_changed == old_changed) || (old_best == old_changed)) - { - /* Feed or old_best changed -> find first accepted by filters */ - for (rte *r = net->routes; rte_is_valid(r); r = r->next) - if (new_best = export_filter(c, r, &new_free, 0)) - break; + /* Have no new best route yet */ + if (!new_best) + { + /* Try this route not seen before */ + nb0 = *feed[i]; + new_best = export_filter(c, &nb0, 0); + DBG("rt_notify_accepted: checking route id %u: %s\n", feed[i]->id, new_best ? "ok" : "no"); + } } - else + +done: + /* Check obsolete routes for previously exported */ + while (rpe) { - /* Other cases -> either new_changed, or old_best (and nothing changed) */ - if (new_first && (new_changed = export_filter(c, new_changed, &new_free, 0))) - new_best = new_changed; - else - return; + channel_rpe_mark_seen(req, rpe); + if (rpe->old) + { + if (bmap_test(&c->export_map, rpe->old->rte.id)) + { + ASSERT_DIE(old_best == NULL); + old_best = &rpe->old->rte; + } + } + rpe = rpe_next(rpe, NULL); } - if (!new_best && !old_best) - return; - - do_rt_notify(c, net, new_best, old_best, refeed); - - /* Discard temporary rte */ - if (new_free) - rte_free(new_free); -} - - -static struct nexthop * -nexthop_merge_rta(struct nexthop *nhs, rta *a, linpool *pool, int max) -{ - return nexthop_merge(nhs, &(a->nh), 1, 0, max, pool); + /* Nothing to export */ + if (new_best || old_best) + do_rt_notify(c, n, new_best, old_best); + else + DBG("rt_notify_accepted: nothing to export\n"); } rte * -rt_export_merged(struct channel *c, net *net, rte **rt_free, linpool *pool, int silent) +rt_export_merged(struct channel *c, struct rte **feed, uint count, linpool *pool, int silent) { - // struct proto *p = c->proto; - struct nexthop *nhs = NULL; - rte *best0, *best, *rt0, *rt, *tmp; + _Thread_local static rte rloc; - best0 = net->routes; - *rt_free = NULL; + // struct proto *p = c->proto; + struct nexthop_adata *nhs = NULL; + rte *best0 = feed[0]; + rte *best = NULL; if (!rte_is_valid(best0)) return NULL; - best = export_filter_(c, best0, rt_free, pool, silent); + /* Already rejected, no need to re-run the filter */ + if (!c->refeeding && bmap_test(&c->export_reject_map, best0->id)) + return NULL; - if (!best || !rte_is_reachable(best)) + rloc = *best0; + best = export_filter(c, &rloc, silent); + + if (!best) + /* Best route doesn't pass the filter */ + return NULL; + + if (!rte_is_reachable(best)) + /* Unreachable routes can't be merged */ return best; - for (rt0 = best0->next; rt0; rt0 = rt0->next) + for (uint i = 1; i < count; i++) { - if (!rte_mergable(best0, rt0)) + if (!rte_mergable(best0, feed[i])) continue; - rt = export_filter_(c, rt0, &tmp, pool, 1); + rte tmp0 = *feed[i]; + rte *tmp = export_filter(c, &tmp0, 1); - if (!rt) + if (!tmp || !rte_is_reachable(tmp)) continue; - if (rte_is_reachable(rt)) - nhs = nexthop_merge_rta(nhs, rt->attrs, pool, c->merge_limit); + eattr *nhea = ea_find(tmp->attrs, &ea_gen_nexthop); + ASSERT_DIE(nhea); - if (tmp) - rte_free(tmp); + if (nhs) + nhs = nexthop_merge(nhs, (struct nexthop_adata *) nhea->u.ptr, c->merge_limit, pool); + else + nhs = (struct nexthop_adata *) nhea->u.ptr; } if (nhs) { - nhs = nexthop_merge_rta(nhs, best->attrs, pool, c->merge_limit); + eattr *nhea = ea_find(best->attrs, &ea_gen_nexthop); + ASSERT_DIE(nhea); - if (nhs->next) - { - best = rte_cow_rta(best, pool); - nexthop_link(best->attrs, nhs); - } - } + nhs = nexthop_merge(nhs, (struct nexthop_adata *) nhea->u.ptr, c->merge_limit, pool); - if (best != best0) - *rt_free = best; + ea_set_attr(&best->attrs, + EA_LITERAL_DIRECT_ADATA(&ea_gen_nexthop, 0, &nhs->ad)); + } return best; } - -static void -rt_notify_merged(struct channel *c, net *net, rte *new_changed, rte *old_changed, - rte *new_best, rte *old_best, int refeed) +void +rt_notify_merged(struct rt_export_request *req, const net_addr *n, struct rt_pending_export *rpe, + struct rte **feed, uint count) { - // struct proto *p = c->proto; - rte *new_free = NULL; + struct channel *c = SKIP_BACK(struct channel, out_req, req); - /* We assume that all rte arguments are either NULL or rte_is_valid() */ - - /* This check should be done by the caller */ - if (!new_best && !old_best) - return; + // struct proto *p = c->proto; +#if 0 /* TODO: Find whether this check is possible when processing multiple changes at once. */ /* Check whether the change is relevant to the merged route */ if ((new_best == old_best) && (new_changed != old_changed) && !rte_mergable(new_best, new_changed) && !rte_mergable(old_best, old_changed)) return; +#endif - if (new_best) - c->stats.exp_updates_received++; - else - c->stats.exp_withdraws_received++; + rte *old_best = NULL; + /* Find old best route */ + for (uint i = 0; i < count; i++) + if (bmap_test(&c->export_map, feed[i]->id)) + { + old_best = feed[i]; + break; + } + + /* Check obsolete routes for previously exported */ + while (rpe) + { + channel_rpe_mark_seen(req, rpe); + if (rpe->old) + { + if (bmap_test(&c->export_map, rpe->old->rte.id)) + { + ASSERT_DIE(old_best == NULL); + old_best = &rpe->old->rte; + } + } + rpe = rpe_next(rpe, NULL); + } /* Prepare new merged route */ - if (new_best) - new_best = rt_export_merged(c, net, &new_free, rte_update_pool, 0); + rte *new_merged = count ? rt_export_merged(c, feed, count, tmp_linpool, 0) : NULL; + + if (new_merged || old_best) + do_rt_notify(c, n, new_merged, old_best); +} + +void +rt_notify_optimal(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe) +{ + struct channel *c = SKIP_BACK(struct channel, out_req, req); + rte *o = RTE_VALID_OR_NULL(rpe->old_best); + struct rte_storage *new_best = rpe->new_best; + + while (rpe) + { + channel_rpe_mark_seen(req, rpe); + new_best = rpe->new_best; + rpe = rpe_next(rpe, NULL); + } - /* Check old merged route */ - if (old_best && !bmap_test(&c->export_map, old_best->id)) - old_best = NULL; + rte n0 = RTE_COPY_VALID(new_best); + if (n0.src || o) + rt_notify_basic(c, net, n0.src ? &n0 : NULL, o); +} + +void +rt_notify_any(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe) +{ + struct channel *c = SKIP_BACK(struct channel, out_req, req); - if (!new_best && !old_best) + rte *n = RTE_VALID_OR_NULL(rpe->new); + rte *o = RTE_VALID_OR_NULL(rpe->old); + + if (!n && !o) + { + channel_rpe_mark_seen(req, rpe); return; + } - do_rt_notify(c, net, new_best, old_best, refeed); + struct rte_src *src = n ? n->src : o->src; + struct rte_storage *new_latest = rpe->new; - /* Discard temporary rte */ - if (new_free) - rte_free(new_free); + while (rpe) + { + channel_rpe_mark_seen(req, rpe); + new_latest = rpe->new; + rpe = rpe_next(rpe, src); + } + + rte n0 = RTE_COPY_VALID(new_latest); + if (n0.src || o) + rt_notify_basic(c, net, n0.src ? &n0 : NULL, o); +} + +void +rt_feed_any(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe UNUSED, rte **feed, uint count) +{ + struct channel *c = SKIP_BACK(struct channel, out_req, req); + + for (uint i=0; i<count; i++) + if (rte_is_valid(feed[i])) + { + rte n0 = *feed[i]; + rt_notify_basic(c, net, &n0, NULL); + } +} + +void +rpe_mark_seen(struct rt_export_hook *hook, struct rt_pending_export *rpe) +{ + bmap_set(&hook->seq_map, rpe->seq); +} + +struct rt_pending_export * +rpe_next(struct rt_pending_export *rpe, struct rte_src *src) +{ + struct rt_pending_export *next = atomic_load_explicit(&rpe->next, memory_order_acquire); + + if (!next) + return NULL; + + if (!src) + return next; + + while (rpe = next) + if (src == (rpe->new ? rpe->new->rte.src : rpe->old->rte.src)) + return rpe; + else + next = atomic_load_explicit(&rpe->next, memory_order_acquire); + + return NULL; } +static struct rt_pending_export * rt_next_export_fast(struct rt_pending_export *last); +static void +rte_export(struct rt_export_hook *hook, struct rt_pending_export *rpe) +{ + if (bmap_test(&hook->seq_map, rpe->seq)) + goto ignore; /* Seen already */ + + const net_addr *n = rpe->new_best ? rpe->new_best->rte.net : rpe->old_best->rte.net; + + switch (hook->req->addr_mode) + { + case TE_ADDR_NONE: + break; + + case TE_ADDR_IN: + if (!net_in_netX(n, hook->req->addr)) + goto ignore; + break; + + case TE_ADDR_EQUAL: + if (!net_equal(n, hook->req->addr)) + goto ignore; + break; + + case TE_ADDR_FOR: + bug("Continuos export of best prefix match not implemented yet."); + + default: + bug("Strange table export address mode: %d", hook->req->addr_mode); + } + + if (rpe->new) + hook->stats.updates_received++; + else + hook->stats.withdraws_received++; + + if (hook->req->export_one) + hook->req->export_one(hook->req, n, rpe); + else if (hook->req->export_bulk) + { + net *net = SKIP_BACK(struct network, n.addr, (net_addr (*)[0]) n); + uint count = rte_feed_count(net); + rte **feed = NULL; + if (count) + { + feed = alloca(count * sizeof(rte *)); + rte_feed_obtain(net, feed, count); + } + hook->req->export_bulk(hook->req, n, rpe, feed, count); + } + else + bug("Export request must always provide an export method"); + +ignore: + /* Get the next export if exists */ + hook->rpe_next = rt_next_export_fast(rpe); + + /* The last block may be available to free */ + if (PAGE_HEAD(hook->rpe_next) != PAGE_HEAD(rpe)) + CALL(hook->table->used, hook->table); + + /* Releasing this export for cleanup routine */ + DBG("store hook=%p last_export=%p seq=%lu\n", hook, rpe, rpe->seq); + atomic_store_explicit(&hook->last_export, rpe, memory_order_release); +} /** * rte_announce - announce a routing table change * @tab: table the route has been added to - * @type: type of route announcement (RA_UNDEF or RA_ANY) * @net: network in question * @new: the new or changed route * @old: the previous route replaced by the new one @@ -1211,13 +1248,6 @@ rt_notify_merged(struct channel *c, net *net, rte *new_changed, rte *old_changed * and @new_best and @old_best describes best routes. Other routes are not * affected, but in sorted table the order of other routes might change. * - * Second, There is a bulk change of multiple routes in @net, with shared best - * route selection. In such case separate route changes are described using - * @type of %RA_ANY, with @new and @old specifying the changed route, while - * @new_best and @old_best are NULL. After that, another notification is done - * where @new_best and @old_best are filled (may be the same), but @new and @old - * are NULL. - * * The function announces the change to all associated channels. For each * channel, an appropriate preprocessing is done according to channel &ra_mode. * For example, %RA_OPTIMAL channels receive just changes of best routes. @@ -1232,30 +1262,21 @@ rt_notify_merged(struct channel *c, net *net, rte *new_changed, rte *old_changed * done outside of scope of rte_announce(). */ static void -rte_announce(rtable *tab, uint type, net *net, rte *new, rte *old, - rte *new_best, rte *old_best) +rte_announce(rtable *tab, net *net, struct rte_storage *new, struct rte_storage *old, + struct rte_storage *new_best, struct rte_storage *old_best) { - if (!rte_is_valid(new)) - new = NULL; - - if (!rte_is_valid(old)) - old = NULL; + int new_best_valid = rte_is_valid(RTE_OR_NULL(new_best)); + int old_best_valid = rte_is_valid(RTE_OR_NULL(old_best)); - if (!rte_is_valid(new_best)) - new_best = NULL; - - if (!rte_is_valid(old_best)) - old_best = NULL; - - if (!new && !old && !new_best && !old_best) + if ((new == old) && (new_best == old_best)) return; - if (new_best != old_best) + if (new_best_valid || old_best_valid) { - if (new_best) - new_best->sender->stats.pref_routes++; - if (old_best) - old_best->sender->stats.pref_routes--; + if (new_best_valid) + new_best->rte.sender->stats.pref++; + if (old_best_valid) + old_best->rte.sender->stats.pref--; if (tab->hostcache) rt_notify_hostcache(tab, net); @@ -1266,103 +1287,255 @@ rte_announce(rtable *tab, uint type, net *net, rte *new, rte *old, rt_schedule_notify(tab); - struct channel *c; node *n; - WALK_LIST2(c, n, tab->channels, table_node) + if (EMPTY_LIST(tab->exporter.hooks) && EMPTY_LIST(tab->exporter.pending)) { - if (c->export_state == ES_DOWN) - continue; + /* No export hook and no pending exports to cleanup. We may free the route immediately. */ + if (!old) + return; - if (type && (type != c->ra_mode)) - continue; + hmap_clear(&tab->id_map, old->rte.id); + rte_free(old); + return; + } + + /* Get the pending export structure */ + struct rt_export_block *rpeb = NULL, *rpebsnl = NULL; + u32 end = 0; - switch (c->ra_mode) + if (!EMPTY_LIST(tab->exporter.pending)) + { + rpeb = TAIL(tab->exporter.pending); + end = atomic_load_explicit(&rpeb->end, memory_order_relaxed); + if (end >= RT_PENDING_EXPORT_ITEMS) { - case RA_OPTIMAL: - if (new_best != old_best) - rt_notify_basic(c, net, new_best, old_best, 0); - break; + ASSERT_DIE(end == RT_PENDING_EXPORT_ITEMS); + rpebsnl = rpeb; - case RA_ANY: - if (new != old) - rt_notify_basic(c, net, new, old, 0); - break; + rpeb = NULL; + end = 0; + } + } - case RA_ACCEPTED: - rt_notify_accepted(c, net, new, old, 0); - break; + if (!rpeb) + { + rpeb = alloc_page(); + *rpeb = (struct rt_export_block) {}; + add_tail(&tab->exporter.pending, &rpeb->n); + } - case RA_MERGED: - rt_notify_merged(c, net, new, old, new_best, old_best, 0); - break; + /* Fill the pending export */ + struct rt_pending_export *rpe = &rpeb->export[rpeb->end]; + *rpe = (struct rt_pending_export) { + .new = new, + .new_best = new_best, + .old = old, + .old_best = old_best, + .seq = tab->exporter.next_seq++, + }; + + DBGL("rte_announce: table=%s net=%N new=%p id %u from %s old=%p id %u from %s new_best=%p id %u old_best=%p id %u seq=%lu", + tab->name, net->n.addr, + new, new ? new->rte.id : 0, new ? new->rte.sender->req->name : NULL, + old, old ? old->rte.id : 0, old ? old->rte.sender->req->name : NULL, + new_best, old_best, rpe->seq); + + ASSERT_DIE(atomic_fetch_add_explicit(&rpeb->end, 1, memory_order_release) == end); + + if (rpebsnl) + { + _Bool f = 0; + ASSERT_DIE(atomic_compare_exchange_strong_explicit(&rpebsnl->not_last, &f, 1, + memory_order_release, memory_order_relaxed)); + } + + /* Append to the same-network squasher list */ + if (net->last) + { + struct rt_pending_export *rpenull = NULL; + ASSERT_DIE(atomic_compare_exchange_strong_explicit( + &net->last->next, &rpenull, rpe, + memory_order_relaxed, + memory_order_relaxed)); + + } + + net->last = rpe; + + if (!net->first) + net->first = rpe; + + if (tab->exporter.first == NULL) + tab->exporter.first = rpe; + + rt_check_cork_high(tab); + + if (!tm_active(tab->exporter.export_timer)) + tm_start(tab->exporter.export_timer, tab->config->export_settle_time); +} + +static struct rt_pending_export * +rt_next_export_fast(struct rt_pending_export *last) +{ + /* Get the whole export block and find our position in there. */ + struct rt_export_block *rpeb = PAGE_HEAD(last); + u32 pos = (last - &rpeb->export[0]); + u32 end = atomic_load_explicit(&rpeb->end, memory_order_acquire); + ASSERT_DIE(pos < end); + + /* Next is in the same block. */ + if (++pos < end) + return &rpeb->export[pos]; + + /* There is another block. */ + if (atomic_load_explicit(&rpeb->not_last, memory_order_acquire)) + { + /* This is OK to do non-atomically because of the not_last flag. */ + rpeb = NODE_NEXT(rpeb); + return &rpeb->export[0]; + } + + /* There is nothing more. */ + return NULL; +} + +static struct rt_pending_export * +rt_next_export(struct rt_export_hook *hook, struct rt_exporter *tab) +{ + /* As the table is locked, it is safe to reload the last export pointer */ + struct rt_pending_export *last = atomic_load_explicit(&hook->last_export, memory_order_acquire); + + /* It is still valid, let's reuse it */ + if (last) + return rt_next_export_fast(last); + + /* No, therefore we must process the table's first pending export */ + else + return tab->first; +} + +static inline void +rt_send_export_event(struct rt_export_hook *hook) +{ + ev_send(hook->req->list, hook->event); +} + +static void +rt_announce_exports(timer *tm) +{ + rtable *tab = tm->data; + + struct rt_export_hook *c; node *n; + WALK_LIST2(c, n, tab->exporter.hooks, n) + { + if (atomic_load_explicit(&c->export_state, memory_order_acquire) != TES_READY) + continue; + + rt_send_export_event(c); + } +} + +static struct rt_pending_export * +rt_last_export(struct rt_exporter *tab) +{ + struct rt_pending_export *rpe = NULL; + + if (!EMPTY_LIST(tab->pending)) + { + /* We'll continue processing exports from this export on */ + struct rt_export_block *reb = TAIL(tab->pending); + ASSERT_DIE(reb->end); + rpe = &reb->export[reb->end - 1]; + } + + return rpe; +} + +#define RT_EXPORT_BULK 1024 + +static void +rt_export_hook(void *_data) +{ + struct rt_export_hook *c = _data; + + ASSERT_DIE(atomic_load_explicit(&c->export_state, memory_order_relaxed) == TES_READY); + + if (!c->rpe_next) + { + c->rpe_next = rt_next_export(c, c->table); + + if (!c->rpe_next) + { + CALL(c->table->used, c->table); + return; } } + + /* Process the export */ + for (uint i=0; i<RT_EXPORT_BULK; i++) + { + rte_export(c, c->rpe_next); + + if (!c->rpe_next) + break; + } + + rt_send_export_event(c); } + static inline int -rte_validate(rte *e) +rte_validate(struct channel *ch, rte *e) { int c; - net *n = e->net; + const net_addr *n = e->net; - if (!net_validate(n->n.addr)) + if (!net_validate(n)) { log(L_WARN "Ignoring bogus prefix %N received via %s", - n->n.addr, e->sender->proto->name); + n, ch->proto->name); return 0; } /* FIXME: better handling different nettypes */ - c = !net_is_flow(n->n.addr) ? - net_classify(n->n.addr): (IADDR_HOST | SCOPE_UNIVERSE); + c = !net_is_flow(n) ? + net_classify(n): (IADDR_HOST | SCOPE_UNIVERSE); if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK)) { log(L_WARN "Ignoring bogus route %N received via %s", - n->n.addr, e->sender->proto->name); + n, ch->proto->name); return 0; } - if (net_type_match(n->n.addr, NB_DEST) == !e->attrs->dest) + if (net_type_match(n, NB_DEST)) { - /* Exception for flowspec that failed validation */ - if (net_is_flow(n->n.addr) && (e->attrs->dest == RTD_UNREACHABLE)) - return 1; + eattr *nhea = ea_find(e->attrs, &ea_gen_nexthop); + int dest = nhea_dest(nhea); - log(L_WARN "Ignoring route %N with invalid dest %d received via %s", - n->n.addr, e->attrs->dest, e->sender->proto->name); - return 0; - } + if (dest == RTD_NONE) + { + log(L_WARN "Ignoring route %N with no destination received via %s", + n, ch->proto->name); + return 0; + } - if ((e->attrs->dest == RTD_UNICAST) && !nexthop_is_sorted(&(e->attrs->nh))) + if ((dest == RTD_UNICAST) && + !nexthop_is_sorted((struct nexthop_adata *) nhea->u.ptr)) + { + log(L_WARN "Ignoring unsorted multipath route %N received via %s", + n, ch->proto->name); + return 0; + } + } + else if (ea_find(e->attrs, &ea_gen_nexthop)) { - log(L_WARN "Ignoring unsorted multipath route %N received via %s", - n->n.addr, e->sender->proto->name); + log(L_WARN "Ignoring route %N having a nexthop attribute received via %s", + n, ch->proto->name); return 0; } return 1; } -/** - * rte_free - delete a &rte - * @e: &rte to be deleted - * - * rte_free() deletes the given &rte from the routing table it's linked to. - */ -void -rte_free(rte *e) -{ - if (rta_is_cached(e->attrs)) - rta_free(e->attrs); - sl_free(rte_slab, e); -} - -static inline void -rte_free_quick(rte *e) -{ - rta_free(e->attrs); - sl_free(rte_slab, e); -} - static int rte_same(rte *x, rte *y) { @@ -1370,175 +1543,116 @@ rte_same(rte *x, rte *y) return x->attrs == y->attrs && x->pflags == y->pflags && - x->pref == y->pref && - (!x->attrs->src->proto->rte_same || x->attrs->src->proto->rte_same(x, y)) && + x->src == y->src && rte_is_filtered(x) == rte_is_filtered(y); } static inline int rte_is_ok(rte *e) { return e && !rte_is_filtered(e); } static void -rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src) +rte_recalculate(struct rt_import_hook *c, net *net, rte *new, struct rte_src *src) { - struct proto *p = c->proto; + struct rt_import_request *req = c->req; struct rtable *table = c->table; - struct proto_stats *stats = &c->stats; - static struct tbf rl_pipe = TBF_DEFAULT_LOG_LIMITS; - rte *before_old = NULL; - rte *old_best = net->routes; + struct rt_import_stats *stats = &c->stats; + struct rte_storage *old_best_stored = net->routes, *old_stored = NULL; + rte *old_best = old_best_stored ? &old_best_stored->rte : NULL; rte *old = NULL; - rte **k; - k = &net->routes; /* Find and remove original route from the same protocol */ - while (old = *k) + /* If the new route is identical to the old one, we find the attributes in + * cache and clone these with no performance drop. OTOH, if we were to lookup + * the attributes, such a route definitely hasn't been anywhere yet, + * therefore it's definitely worth the time. */ + struct rte_storage *new_stored = NULL; + if (new) + new = &(new_stored = rte_store(new, net, table))->rte; + + /* Find and remove original route from the same protocol */ + struct rte_storage **before_old = rte_find(net, src); + + if (*before_old) { - if (old->attrs->src == src) + old = &(old_stored = (*before_old))->rte; + + /* If there is the same route in the routing table but from + * a different sender, then there are two paths from the + * source protocol to this routing table through transparent + * pipes, which is not allowed. + * We log that and ignore the route. */ + if (old->sender != c) { - /* If there is the same route in the routing table but from - * a different sender, then there are two paths from the - * source protocol to this routing table through transparent - * pipes, which is not allowed. - * - * We log that and ignore the route. If it is withdraw, we - * ignore it completely (there might be 'spurious withdraws', - * see FIXME in do_rte_announce()) - */ - if (old->sender->proto != p) - { - if (new) - { - log_rl(&rl_pipe, L_ERR "Pipe collision detected when sending %N to table %s", - net->n.addr, table->name); - rte_free_quick(new); - } - return; - } + if (!old->generation && !new->generation) + bug("Two protocols claim to author a route with the same rte_src in table %s: %N %s/%u:%u", + c->table->name, net->n.addr, old->src->owner->name, old->src->private_id, old->src->global_id); - if (new && rte_same(old, new)) + log_rl(&table->rl_pipe, L_ERR "Route source collision in table %s: %N %s/%u:%u", + c->table->name, net->n.addr, old->src->owner->name, old->src->private_id, old->src->global_id); + } + + if (new && rte_same(old, &new_stored->rte)) { /* No changes, ignore the new route and refresh the old one */ - - old->flags &= ~(REF_STALE | REF_DISCARD | REF_MODIFY); + old->stale_cycle = new->stale_cycle; if (!rte_is_filtered(new)) { - stats->imp_updates_ignored++; - rte_trace_in(D_ROUTES, c, new, "ignored"); + stats->updates_ignored++; + rt_rte_trace_in(D_ROUTES, req, new, "ignored"); } - rte_free_quick(new); + /* We need to free the already stored route here before returning */ + rte_free(new_stored); return; - } - *k = old->next; - table->rt_count--; - break; - } - k = &old->next; - before_old = old; - } - - /* Save the last accessed position */ - rte **pos = k; + } - if (!old) - before_old = NULL; + *before_old = (*before_old)->next; + table->rt_count--; + } if (!old && !new) { - stats->imp_withdraws_ignored++; + stats->withdraws_ignored++; return; } + /* If rejected by import limit, we need to pretend there is no route */ + if (req->preimport && (req->preimport(req, new, old) == 0)) + { + rte_free(new_stored); + new_stored = NULL; + new = NULL; + } + int new_ok = rte_is_ok(new); int old_ok = rte_is_ok(old); - struct channel_limit *l = &c->rx_limit; - if (l->action && !old && new && !c->in_table) - { - u32 all_routes = stats->imp_routes + stats->filt_routes; - - if (all_routes >= l->limit) - channel_notify_limit(c, l, PLD_RX, all_routes); - - if (l->state == PLS_BLOCKED) - { - /* In receive limit the situation is simple, old is NULL so - we just free new and exit like nothing happened */ - - stats->imp_updates_ignored++; - rte_trace_in(D_FILTERS, c, new, "ignored [limit]"); - rte_free_quick(new); - return; - } - } - - l = &c->in_limit; - if (l->action && !old_ok && new_ok) - { - if (stats->imp_routes >= l->limit) - channel_notify_limit(c, l, PLD_IN, stats->imp_routes); - - if (l->state == PLS_BLOCKED) - { - /* In import limit the situation is more complicated. We - shouldn't just drop the route, we should handle it like - it was filtered. We also have to continue the route - processing if old or new is non-NULL, but we should exit - if both are NULL as this case is probably assumed to be - already handled. */ - - stats->imp_updates_ignored++; - rte_trace_in(D_FILTERS, c, new, "ignored [limit]"); - - if (c->in_keep_filtered) - new->flags |= REF_FILTERED; - else - { rte_free_quick(new); new = NULL; } - - /* Note that old && !new could be possible when - c->in_keep_filtered changed in the recent past. */ - - if (!old && !new) - return; - - new_ok = 0; - goto skip_stats1; - } - } - if (new_ok) - stats->imp_updates_accepted++; + stats->updates_accepted++; else if (old_ok) - stats->imp_withdraws_accepted++; + stats->withdraws_accepted++; else - stats->imp_withdraws_ignored++; + stats->withdraws_ignored++; if (old_ok || new_ok) table->last_rt_change = current_time(); - skip_stats1: - - if (new) - rte_is_filtered(new) ? stats->filt_routes++ : stats->imp_routes++; - if (old) - rte_is_filtered(old) ? stats->filt_routes-- : stats->imp_routes--; - if (table->config->sorted) { /* If routes are sorted, just insert new route to appropriate position */ - if (new) + if (new_stored) { - if (before_old && !rte_better(new, before_old)) - k = &before_old->next; + struct rte_storage **k; + if ((before_old != &net->routes) && !rte_better(new, &SKIP_BACK(struct rte_storage, next, before_old)->rte)) + k = before_old; else k = &net->routes; for (; *k; k=&(*k)->next) - if (rte_better(new, *k)) + if (rte_better(new, &(*k)->rte)) break; - new->next = *k; - *k = new; + new_stored->next = *k; + *k = new_stored; table->rt_count++; } @@ -1548,16 +1662,17 @@ rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src) /* If routes are not sorted, find the best route and move it on the first position. There are several optimized cases. */ - if (src->proto->rte_recalculate && src->proto->rte_recalculate(table, net, new, old, old_best)) + if (src->owner->rte_recalculate && + src->owner->rte_recalculate(table, net, new_stored ? &new_stored->rte : NULL, old, old_best)) goto do_recalculate; - if (new && rte_better(new, old_best)) + if (new_stored && rte_better(&new_stored->rte, old_best)) { /* The first case - the new route is cleary optimal, we link it at the first position */ - new->next = net->routes; - net->routes = new; + new_stored->next = net->routes; + net->routes = new_stored; table->rt_count++; } @@ -1571,10 +1686,10 @@ rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src) do_recalculate: /* Add the new route to the list */ - if (new) + if (new_stored) { - new->next = *pos; - *pos = new; + new_stored->next = *before_old; + *before_old = new_stored; table->rt_count++; } @@ -1582,335 +1697,417 @@ rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src) /* Find a new optimal route (if there is any) */ if (net->routes) { - rte **bp = &net->routes; - for (k=&(*bp)->next; *k; k=&(*k)->next) - if (rte_better(*k, *bp)) + struct rte_storage **bp = &net->routes; + for (struct rte_storage **k=&(*bp)->next; *k; k=&(*k)->next) + if (rte_better(&(*k)->rte, &(*bp)->rte)) bp = k; /* And relink it */ - rte *best = *bp; + struct rte_storage *best = *bp; *bp = best->next; best->next = net->routes; net->routes = best; } } - else if (new) + else if (new_stored) { /* The third case - the new route is not better than the old best route (therefore old_best != NULL) and the old best route was not removed (therefore old_best == net->routes). We just link the new route to the old/last position. */ - new->next = *pos; - *pos = new; + new_stored->next = *before_old; + *before_old = new_stored; table->rt_count++; } /* The fourth (empty) case - suboptimal route was removed, nothing to do */ } - if (new) + if (new_stored) { - new->lastmod = current_time(); - - if (!old) - { - new->id = hmap_first_zero(&table->id_map); - hmap_set(&table->id_map, new->id); - } - else - new->id = old->id; + new_stored->rte.lastmod = current_time(); + new_stored->rte.id = hmap_first_zero(&table->id_map); + hmap_set(&table->id_map, new_stored->rte.id); } /* Log the route change */ - if ((c->debug & D_ROUTES) || (p->debug & D_ROUTES)) + if (new_ok) + rt_rte_trace_in(D_ROUTES, req, &new_stored->rte, new_stored == net->routes ? "added [best]" : "added"); + else if (old_ok) { - if (new_ok) - rte_trace(c, new, '>', new == net->routes ? "added [best]" : "added"); - else if (old_ok) - { - if (old != old_best) - rte_trace(c, old, '>', "removed"); - else if (rte_is_ok(net->routes)) - rte_trace(c, old, '>', "removed [replaced]"); - else - rte_trace(c, old, '>', "removed [sole]"); - } + if (old != old_best) + rt_rte_trace_in(D_ROUTES, req, old, "removed"); + else if (net->routes && rte_is_ok(&net->routes->rte)) + rt_rte_trace_in(D_ROUTES, req, old, "removed [replaced]"); + else + rt_rte_trace_in(D_ROUTES, req, old, "removed [sole]"); } /* Propagate the route change */ - rte_announce(table, RA_UNDEF, net, new, old, net->routes, old_best); + rte_announce(table, net, new_stored, old_stored, + net->routes, old_best_stored); if (!net->routes && (table->gc_counter++ >= table->config->gc_threshold)) rt_kick_prune_timer(table); +#if 0 + /* Enable and reimplement these callbacks if anybody wants to use them */ if (old_ok && p->rte_remove) p->rte_remove(net, old); if (new_ok && p->rte_insert) - p->rte_insert(net, new); - - if (old) - { - if (!new) - hmap_clear(&table->id_map, old->id); + p->rte_insert(net, &new_stored->rte); +#endif - rte_free_quick(old); - } } -static int rte_update_nest_cnt; /* Nesting counter to allow recursive updates */ - -static inline void -rte_update_lock(void) +int +channel_preimport(struct rt_import_request *req, rte *new, rte *old) { - rte_update_nest_cnt++; -} + struct channel *c = SKIP_BACK(struct channel, in_req, req); -static inline void -rte_update_unlock(void) -{ - if (!--rte_update_nest_cnt) - lp_flush(rte_update_pool); -} + if (new && !old) + if (CHANNEL_LIMIT_PUSH(c, RX)) + return 0; -static inline void -rte_hide_dummy_routes(net *net, rte **dummy) -{ - if (net->routes && net->routes->attrs->source == RTS_DUMMY) - { - *dummy = net->routes; - net->routes = (*dummy)->next; - } -} + if (!new && old) + CHANNEL_LIMIT_POP(c, RX); -static inline void -rte_unhide_dummy_routes(net *net, rte **dummy) -{ - if (*dummy) - { - (*dummy)->next = net->routes; - net->routes = *dummy; - } -} + int new_in = new && !rte_is_filtered(new); + int old_in = old && !rte_is_filtered(old); -/** - * rte_update - enter a new update to a routing table - * @table: table to be updated - * @c: channel doing the update - * @net: network node - * @p: protocol submitting the update - * @src: protocol originating the update - * @new: a &rte representing the new route or %NULL for route removal. - * - * This function is called by the routing protocols whenever they discover - * a new route or wish to update/remove an existing route. The right announcement - * sequence is to build route attributes first (either un-cached with @aflags set - * to zero or a cached one using rta_lookup(); in this case please note that - * you need to increase the use count of the attributes yourself by calling - * rta_clone()), call rte_get_temp() to obtain a temporary &rte, fill in all - * the appropriate data and finally submit the new &rte by calling rte_update(). - * - * @src specifies the protocol that originally created the route and the meaning - * of protocol-dependent data of @new. If @new is not %NULL, @src have to be the - * same value as @new->attrs->proto. @p specifies the protocol that called - * rte_update(). In most cases it is the same protocol as @src. rte_update() - * stores @p in @new->sender; - * - * When rte_update() gets any route, it automatically validates it (checks, - * whether the network and next hop address are valid IP addresses and also - * whether a normal routing protocol doesn't try to smuggle a host or link - * scope route to the table), converts all protocol dependent attributes stored - * in the &rte to temporary extended attributes, consults import filters of the - * protocol to see if the route should be accepted and/or its attributes modified, - * stores the temporary attributes back to the &rte. - * - * Now, having a "public" version of the route, we - * automatically find any old route defined by the protocol @src - * for network @n, replace it by the new one (or removing it if @new is %NULL), - * recalculate the optimal route for this destination and finally broadcast - * the change (if any) to all routing protocols by calling rte_announce(). - * - * All memory used for attribute lists and other temporary allocations is taken - * from a special linear pool @rte_update_pool and freed when rte_update() - * finishes. - */ + if (new_in && !old_in) + if (CHANNEL_LIMIT_PUSH(c, IN)) + if (c->in_keep & RIK_REJECTED) + { + new->flags |= REF_FILTERED; + return 1; + } + else + return 0; + + if (!new_in && old_in) + CHANNEL_LIMIT_POP(c, IN); + + return 1; +} void -rte_update2(struct channel *c, const net_addr *n, rte *new, struct rte_src *src) +rte_update(struct channel *c, const net_addr *n, rte *new, struct rte_src *src) { - // struct proto *p = c->proto; - struct proto_stats *stats = &c->stats; - const struct filter *filter = c->in_filter; - rte *dummy = NULL; - net *nn; + if (!c->in_req.hook) + return; ASSERT(c->channel_state == CS_UP); - rte_update_lock(); + /* The import reloader requires prefilter routes to be the first layer */ + if (new && (c->in_keep & RIK_PREFILTER)) + if (ea_is_cached(new->attrs) && !new->attrs->next) + new->attrs = ea_clone(new->attrs); + else + new->attrs = ea_lookup(new->attrs, 0); + + const struct filter *filter = c->in_filter; + struct channel_import_stats *stats = &c->import_stats; + if (new) { - /* Create a temporary table node */ - nn = alloca(sizeof(net) + n->length); - memset(nn, 0, sizeof(net) + n->length); - net_copy(nn->n.addr, n); + new->net = n; - new->net = nn; - new->sender = c; + int fr; - if (!new->pref) - new->pref = c->preference; - - stats->imp_updates_received++; - if (!rte_validate(new)) + stats->updates_received++; + if ((filter == FILTER_REJECT) || + ((fr = f_run(filter, new, 0)) > F_ACCEPT)) { - rte_trace_in(D_FILTERS, c, new, "invalid"); - stats->imp_updates_invalid++; - goto drop; + stats->updates_filtered++; + channel_rte_trace_in(D_FILTERS, c, new, "filtered out"); + + if (c->in_keep & RIK_REJECTED) + new->flags |= REF_FILTERED; + else + new = NULL; } - if (filter == FILTER_REJECT) + if (new) + if (net_is_flow(n)) + rt_flowspec_resolve_rte(new, c); + else + rt_next_hop_resolve_rte(new); + + if (new && !rte_validate(c, new)) { - stats->imp_updates_filtered++; - rte_trace_in(D_FILTERS, c, new, "filtered out"); + channel_rte_trace_in(D_FILTERS, c, new, "invalid"); + stats->updates_invalid++; + new = NULL; + } - if (! c->in_keep_filtered) - goto drop; + } + else + stats->withdraws_received++; - /* new is a private copy, i could modify it */ - new->flags |= REF_FILTERED; - } - else if (filter) - { - rta *old_attrs = NULL; - rte_make_tmp_attrs(&new, rte_update_pool, &old_attrs); + rte_import(&c->in_req, n, new, src); - int fr = f_run(filter, &new, rte_update_pool, 0); - if (fr > F_ACCEPT) - { - stats->imp_updates_filtered++; - rte_trace_in(D_FILTERS, c, new, "filtered out"); + /* Now the route attributes are kept by the in-table cached version + * and we may drop the local handle */ + if (new && (c->in_keep & RIK_PREFILTER)) + { + /* There may be some updates on top of the original attribute block */ + ea_list *a = new->attrs; + while (a->next) + a = a->next; - if (! c->in_keep_filtered) - { - rta_free(old_attrs); - goto drop; - } + ea_free(a); + } - new->flags |= REF_FILTERED; - } +} - rte_store_tmp_attrs(new, rte_update_pool, old_attrs); - } - if (!rta_is_cached(new->attrs)) /* Need to copy attributes */ - new->attrs = rta_lookup(new->attrs); - new->flags |= REF_COW; +void +rte_import(struct rt_import_request *req, const net_addr *n, rte *new, struct rte_src *src) +{ + struct rt_import_hook *hook = req->hook; + if (!hook) + return; + net *nn; + if (new) + { /* Use the actual struct network, not the dummy one */ - nn = net_get(c->table, n); - new->net = nn; + nn = net_get(hook->table, n); + new->net = nn->n.addr; + new->sender = hook; + + /* Set the stale cycle */ + new->stale_cycle = hook->stale_set; } - else + else if (!(nn = net_find(hook->table, n))) { - stats->imp_withdraws_received++; - - if (!(nn = net_find(c->table, n)) || !src) - { - stats->imp_withdraws_ignored++; - rte_update_unlock(); - return; - } + req->hook->stats.withdraws_ignored++; + return; } - recalc: /* And recalculate the best route */ - rte_hide_dummy_routes(nn, &dummy); - rte_recalculate(c, nn, new, src); - rte_unhide_dummy_routes(nn, &dummy); + rte_recalculate(hook, nn, new, src); +} + +/* Check rtable for best route to given net whether it would be exported do p */ +int +rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filter) +{ + net *n = net_find(t, a); + + if (!n || !rte_is_valid(RTE_OR_NULL(n->routes))) + return 0; - rte_update_unlock(); - return; + rte rt = n->routes->rte; - drop: - rte_free(new); - new = NULL; - if (nn = net_find(c->table, n)) - goto recalc; + /* Rest is stripped down export_filter() */ + int v = c->proto->preexport ? c->proto->preexport(c, &rt) : 0; + if (v == RIC_PROCESS) + v = (f_run(filter, &rt, FF_SILENT) <= F_ACCEPT); - rte_update_unlock(); + return v > 0; } -/* Independent call to rte_announce(), used from next hop - recalculation, outside of rte_update(). new must be non-NULL */ -static inline void -rte_announce_i(rtable *tab, uint type, net *net, rte *new, rte *old, - rte *new_best, rte *old_best) +static void +rt_table_export_done(struct rt_export_hook *hook) { - rte_update_lock(); - rte_announce(tab, type, net, new, old, new_best, old_best); - rte_update_unlock(); + struct rt_exporter *re = hook->table; + struct rtable *tab = SKIP_BACK(struct rtable, exporter, re); + + rt_unlock_table(tab); + DBG("Export hook %p in table %s finished uc=%u\n", hook, tab->name, tab->use_count); } -static inline void -rte_discard(rte *old) /* Non-filtered route deletion, used during garbage collection */ +static void +rt_export_stopped(void *data) { - rte_update_lock(); - rte_recalculate(old->sender, old->net, NULL, old->attrs->src); - rte_update_unlock(); + struct rt_export_hook *hook = data; + struct rt_exporter *tab = hook->table; + + /* Drop pending exports */ + CALL(tab->used, tab); + + /* Unlist */ + rem_node(&hook->n); + + /* Report the channel as stopped. */ + hook->stopped(hook->req); + + /* Reporting the hook as finished. */ + CALL(tab->done, hook); + + /* Free the hook. */ + rfree(hook->pool); } -/* Modify existing route by protocol hook, used for long-lived graceful restart */ static inline void -rte_modify(rte *old) +rt_set_import_state(struct rt_import_hook *hook, u8 state) +{ + hook->last_state_change = current_time(); + hook->import_state = state; + + if (hook->req->log_state_change) + hook->req->log_state_change(hook->req, state); +} + +void +rt_set_export_state(struct rt_export_hook *hook, u8 state) +{ + hook->last_state_change = current_time(); + atomic_store_explicit(&hook->export_state, state, memory_order_release); + + if (hook->req->log_state_change) + hook->req->log_state_change(hook->req, state); +} + +void +rt_request_import(rtable *tab, struct rt_import_request *req) +{ + rt_lock_table(tab); + + struct rt_import_hook *hook = req->hook = mb_allocz(tab->rp, sizeof(struct rt_import_hook)); + + DBG("Lock table %s for import %p req=%p uc=%u\n", tab->name, hook, req, tab->use_count); + + hook->req = req; + hook->table = tab; + + rt_set_import_state(hook, TIS_UP); + + hook->n = (node) {}; + add_tail(&tab->imports, &hook->n); +} + +void +rt_stop_import(struct rt_import_request *req, void (*stopped)(struct rt_import_request *)) { - rte_update_lock(); + ASSERT_DIE(req->hook); + struct rt_import_hook *hook = req->hook; - rte *new = old->sender->proto->rte_modify(old, rte_update_pool); - if (new != old) + rt_schedule_prune(hook->table); + + rt_set_import_state(hook, TIS_STOP); + + hook->stopped = stopped; +} + +static struct rt_export_hook * +rt_table_export_start(struct rt_exporter *re, struct rt_export_request *req) +{ + rtable *tab = SKIP_BACK(rtable, exporter, re); + rt_lock_table(tab); + + pool *p = rp_new(tab->rp, "Export hook"); + struct rt_export_hook *hook = mb_allocz(p, sizeof(struct rt_export_hook)); + hook->pool = p; + + /* stats zeroed by mb_allocz */ + switch (req->addr_mode) { - if (new) - { - if (!rta_is_cached(new->attrs)) - new->attrs = rta_lookup(new->attrs); - new->flags = (old->flags & ~REF_MODIFY) | REF_COW; - } + case TE_ADDR_IN: + if (tab->trie && net_val_match(tab->addr_type, NB_IP)) + { + hook->walk_state = mb_allocz(p, sizeof (struct f_trie_walk_state)); + hook->walk_lock = rt_lock_trie(tab); + trie_walk_init(hook->walk_state, tab->trie, req->addr); + hook->event = ev_new_init(p, rt_feed_by_trie, hook); + break; + } + /* fall through */ + case TE_ADDR_NONE: + FIB_ITERATE_INIT(&hook->feed_fit, &tab->fib); + hook->event = ev_new_init(p, rt_feed_by_fib, hook); + break; + + case TE_ADDR_EQUAL: + hook->event = ev_new_init(p, rt_feed_equal, hook); + break; + + case TE_ADDR_FOR: + hook->event = ev_new_init(p, rt_feed_for, hook); + break; - rte_recalculate(old->sender, old->net, new, old->attrs->src); + default: + bug("Requested an unknown export address mode"); } - rte_update_unlock(); + DBG("New export hook %p req %p in table %s uc=%u\n", hook, req, tab->name, tab->use_count); + + return hook; } -/* Check rtable for best route to given net whether it would be exported do p */ -int -rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filter) +void +rt_request_export(struct rt_exporter *re, struct rt_export_request *req) { - struct proto *p = c->proto; - net *n = net_find(t, a); - rte *rt = n ? n->routes : NULL; + struct rt_export_hook *hook = req->hook = re->start(re, req); - if (!rte_is_valid(rt)) - return 0; + hook->req = req; + hook->table = re; - rte_update_lock(); + bmap_init(&hook->seq_map, hook->pool, 1024); - /* Rest is stripped down export_filter() */ - int v = p->preexport ? p->preexport(c, &rt, rte_update_pool) : 0; - if (v == RIC_PROCESS) + struct rt_pending_export *rpe = rt_last_export(hook->table); + DBG("store hook=%p last_export=%p seq=%lu\n", hook, rpe, rpe ? rpe->seq : 0); + atomic_store_explicit(&hook->last_export, rpe, memory_order_relaxed); + + hook->n = (node) {}; + add_tail(&re->hooks, &hook->n); + + /* Regular export */ + rt_set_export_state(hook, TES_FEEDING); + rt_send_export_event(hook); +} + +static void +rt_table_export_stop(struct rt_export_hook *hook) +{ + rtable *tab = SKIP_BACK(rtable, exporter, hook->table); + + if (atomic_load_explicit(&hook->export_state, memory_order_relaxed) != TES_FEEDING) + return; + + switch (hook->req->addr_mode) { - rte_make_tmp_attrs(&rt, rte_update_pool, NULL); - v = (f_run(filter, &rt, rte_update_pool, FF_SILENT) <= F_ACCEPT); + case TE_ADDR_IN: + if (hook->walk_lock) + { + rt_unlock_trie(tab, hook->walk_lock); + hook->walk_lock = NULL; + mb_free(hook->walk_state); + hook->walk_state = NULL; + break; + } + /* fall through */ + case TE_ADDR_NONE: + fit_get(&tab->fib, &hook->feed_fit); + break; } +} - /* Discard temporary rte */ - if (rt != n->routes) - rte_free(rt); +void +rt_stop_export(struct rt_export_request *req, void (*stopped)(struct rt_export_request *)) +{ + ASSERT_DIE(req->hook); + struct rt_export_hook *hook = req->hook; - rte_update_unlock(); + /* Cancel the feeder event */ + ev_postpone(hook->event); - return v > 0; -} + /* Stop feeding from the exporter */ + CALL(hook->table->stop, hook); + /* Reset the event as the stopped event */ + hook->event->hook = rt_export_stopped; + hook->stopped = stopped; + + /* Update export state */ + rt_set_export_state(hook, TES_STOP); + + /* Run the stopped event */ + rt_send_export_event(hook); +} /** * rt_refresh_begin - start a refresh cycle @@ -1927,16 +2124,38 @@ rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filte * flag in rt_refresh_end() and then removing such routes in the prune loop. */ void -rt_refresh_begin(rtable *t, struct channel *c) +rt_refresh_begin(struct rt_import_request *req) { - FIB_WALK(&t->fib, net, n) - { - rte *e; - for (e = n->routes; e; e = e->next) - if (e->sender == c) - e->flags |= REF_STALE; - } - FIB_WALK_END; + struct rt_import_hook *hook = req->hook; + ASSERT_DIE(hook); + ASSERT_DIE(hook->stale_set == hook->stale_valid); + + /* If the pruning routine is too slow */ + if ((hook->stale_pruned < hook->stale_valid) && (hook->stale_pruned + 128 < hook->stale_valid) + || (hook->stale_pruned > hook->stale_valid) && (hook->stale_pruned > hook->stale_valid + 128)) + { + log(L_WARN "Route refresh flood in table %s", hook->table->name); + FIB_WALK(&hook->table->fib, net, n) + { + for (struct rte_storage *e = n->routes; e; e = e->next) + if (e->rte.sender == req->hook) + e->rte.stale_cycle = 0; + } + FIB_WALK_END; + hook->stale_set = 1; + hook->stale_valid = 0; + hook->stale_pruned = 0; + } + /* Setting a new value of the stale modifier */ + else if (!++hook->stale_set) + { + /* Let's reserve the stale_cycle zero value for always-invalid routes */ + hook->stale_set = 1; + hook->stale_valid = 0; + } + + if (req->trace_routes & D_STATES) + log(L_TRACE "%s: route refresh begin [%u]", req->name, hook->stale_set); } /** @@ -1948,45 +2167,18 @@ rt_refresh_begin(rtable *t, struct channel *c) * hook. See rt_refresh_begin() for description of refresh cycles. */ void -rt_refresh_end(rtable *t, struct channel *c) +rt_refresh_end(struct rt_import_request *req) { - int prune = 0; - - FIB_WALK(&t->fib, net, n) - { - rte *e; - for (e = n->routes; e; e = e->next) - if ((e->sender == c) && (e->flags & REF_STALE)) - { - e->flags |= REF_DISCARD; - prune = 1; - } - } - FIB_WALK_END; + struct rt_import_hook *hook = req->hook; + ASSERT_DIE(hook); - if (prune) - rt_schedule_prune(t); -} + hook->stale_valid++; + ASSERT_DIE(hook->stale_set == hook->stale_valid); -void -rt_modify_stale(rtable *t, struct channel *c) -{ - int prune = 0; + rt_schedule_prune(hook->table); - FIB_WALK(&t->fib, net, n) - { - rte *e; - for (e = n->routes; e; e = e->next) - if ((e->sender == c) && (e->flags & REF_STALE) && !(e->flags & REF_FILTERED)) - { - e->flags |= REF_MODIFY; - prune = 1; - } - } - FIB_WALK_END; - - if (prune) - rt_schedule_prune(t); + if (req->trace_routes & D_STATES) + log(L_TRACE "%s: route refresh end [%u]", req->name, hook->stale_valid); } /** @@ -1996,14 +2188,11 @@ rt_modify_stale(rtable *t, struct channel *c) * This functions dumps contents of a &rte to debug output. */ void -rte_dump(rte *e) -{ - net *n = e->net; - debug("%-1N ", n->n.addr); - debug("PF=%02x pref=%d ", e->pflags, e->pref); - rta_dump(e->attrs); - if (e->attrs->src->proto->proto->dump_attrs) - e->attrs->src->proto->proto->dump_attrs(e); +rte_dump(struct rte_storage *e) +{ + debug("%-1N ", e->rte.net); + debug("PF=%02x ", e->rte.pflags); + ea_dump(e->rte.attrs); debug("\n"); } @@ -2016,14 +2205,13 @@ rte_dump(rte *e) void rt_dump(rtable *t) { - debug("Dump of routing table <%s>\n", t->name); + debug("Dump of routing table <%s>%s\n", t->name, t->deleted ? " (deleted)" : ""); #ifdef DEBUGGING fib_check(&t->fib); #endif FIB_WALK(&t->fib, net, n) { - rte *e; - for(e=n->routes; e; e=e->next) + for(struct rte_storage *e=n->routes; e; e=e->next) rte_dump(e); } FIB_WALK_END; @@ -2043,6 +2231,54 @@ rt_dump_all(void) WALK_LIST2(t, n, routing_tables, n) rt_dump(t); + + WALK_LIST2(t, n, deleted_routing_tables, n) + rt_dump(t); +} + +void +rt_dump_hooks(rtable *tab) +{ + debug("Dump of hooks in routing table <%s>%s\n", tab->name, tab->deleted ? " (deleted)" : ""); + debug(" nhu_state=%u hcu_scheduled=%u use_count=%d rt_count=%u\n", + tab->nhu_state, tab->hcu_scheduled, tab->use_count, tab->rt_count); + debug(" last_rt_change=%t gc_time=%t gc_counter=%d prune_state=%u\n", + tab->last_rt_change, tab->gc_time, tab->gc_counter, tab->prune_state); + + struct rt_import_hook *ih; + WALK_LIST(ih, tab->imports) + { + ih->req->dump_req(ih->req); + debug(" Import hook %p requested by %p: pref=%u" + " last_state_change=%t import_state=%u stopped=%p\n", + ih, ih->req, ih->stats.pref, + ih->last_state_change, ih->import_state, ih->stopped); + } + + struct rt_export_hook *eh; + WALK_LIST(eh, tab->exporter.hooks) + { + eh->req->dump_req(eh->req); + debug(" Export hook %p requested by %p:" + " refeed_pending=%u last_state_change=%t export_state=%u\n", + eh, eh->req, eh->refeed_pending, eh->last_state_change, atomic_load_explicit(&eh->export_state, memory_order_relaxed)); + } + debug("\n"); +} + +void +rt_dump_hooks_all(void) +{ + rtable *t; + node *n; + + debug("Dump of all table hooks\n"); + + WALK_LIST2(t, n, routing_tables, n) + rt_dump_hooks(t); + + WALK_LIST2(t, n, deleted_routing_tables, n) + rt_dump_hooks(t); } static inline void @@ -2078,6 +2314,20 @@ rt_schedule_prune(rtable *tab) tab->prune_state |= 1; } +static void +rt_export_used(struct rt_exporter *e) +{ + rtable *tab = SKIP_BACK(rtable, exporter, e); + + if (config->table_debug) + log(L_TRACE "%s: Export cleanup requested", tab->name); + + if (tab->export_used) + return; + + tab->export_used = 1; + ev_schedule(tab->rt_event); +} static void rt_event(void *ptr) @@ -2086,6 +2336,25 @@ rt_event(void *ptr) rt_lock_table(tab); + if (tab->export_used) + rt_export_cleanup(tab); + + if ( + tab->hcu_corked || + tab->nhu_corked || + (tab->hcu_scheduled || tab->nhu_state) && rt_cork_check(tab->uncork_event) + ) + { + if (!tab->hcu_corked && !tab->nhu_corked && config->table_debug) + log(L_TRACE "%s: Auxiliary routines corked", tab->name); + + tab->hcu_corked |= tab->hcu_scheduled; + tab->hcu_scheduled = 0; + + tab->nhu_corked |= tab->nhu_state; + tab->nhu_state = 0; + } + if (tab->hcu_scheduled) rt_update_hostcache(tab); @@ -2098,6 +2367,22 @@ rt_event(void *ptr) rt_unlock_table(tab); } +static void +rt_uncork_event(void *ptr) +{ + rtable *tab = ptr; + + tab->hcu_scheduled |= tab->hcu_corked; + tab->hcu_corked = 0; + + tab->nhu_state |= tab->nhu_corked; + tab->nhu_corked = 0; + + if (config->table_debug) + log(L_TRACE "%s: Auxiliary routines uncorked", tab->name); + + ev_schedule(tab->rt_event); +} static void rt_prune_timer(timer *t) @@ -2151,7 +2436,7 @@ rt_settle_timer(timer *t) struct rt_subscription *s; WALK_LIST(s, tab->subscribers) - s->hook(s); + ev_send(s->list, s->event); } static void @@ -2183,6 +2468,7 @@ rt_subscribe(rtable *tab, struct rt_subscription *s) { s->tab = tab; rt_lock_table(tab); + DBG("rt_subscribe(%s)\n", tab->name); add_tail(&tab->subscribers, &s->n); } @@ -2285,9 +2571,6 @@ rt_free(resource *_r) DBG("Deleting routing table %s\n", r->name); ASSERT_DIE(r->use_count == 0); - if (r->internal) - return; - r->config->table = NULL; rem_node(&r->n); @@ -2323,16 +2606,13 @@ static struct resclass rt_class = { rtable * rt_setup(pool *pp, struct rtable_config *cf) { - int ns = strlen("Routing table ") + strlen(cf->name) + 1; - void *nb = mb_alloc(pp, ns); - ASSERT_DIE(ns - 1 == bsnprintf(nb, ns, "Routing table %s", cf->name)); - - pool *p = rp_new(pp, nb); - mb_move(nb, p); + pool *p = rp_newf(pp, "Routing table %s", cf->name); rtable *t = ralloc(p, &rt_class); t->rp = p; + t->rte_slab = sl_new(p, sizeof(struct rte_storage)); + t->name = cf->name; t->config = cf; t->addr_type = cf->addr_type; @@ -2347,24 +2627,41 @@ rt_setup(pool *pp, struct rtable_config *cf) t->fib.init = net_init_with_trie; } - init_list(&t->channels); init_list(&t->flowspec_links); + + t->exporter = (struct rt_exporter) { + .addr_type = t->addr_type, + .start = rt_table_export_start, + .stop = rt_table_export_stop, + .done = rt_table_export_done, + .used = rt_export_used, + }; + + init_list(&t->exporter.hooks); + init_list(&t->exporter.pending); + + init_list(&t->imports); + + hmap_init(&t->id_map, p, 1024); + hmap_set(&t->id_map, 0); + init_list(&t->subscribers); - if (!(t->internal = cf->internal)) - { - hmap_init(&t->id_map, p, 1024); - hmap_set(&t->id_map, 0); + t->rt_event = ev_new_init(p, rt_event, t); + t->uncork_event = ev_new_init(p, rt_uncork_event, t); + t->prune_timer = tm_new_init(p, rt_prune_timer, t, 0, 0); + t->exporter.export_timer = tm_new_init(p, rt_announce_exports, t, 0, 0); + t->last_rt_change = t->gc_time = current_time(); + t->exporter.next_seq = 1; - t->rt_event = ev_new_init(p, rt_event, t); - t->prune_timer = tm_new_init(p, rt_prune_timer, t, 0, 0); - t->last_rt_change = t->gc_time = current_time(); + t->cork_threshold = cf->cork_threshold; - if (rt_is_flow(t)) - { - t->flowspec_trie = f_new_trie(lp_new_default(p), 0); - t->flowspec_trie->ipv4 = (t->addr_type == NET_FLOW4); - } + t->rl_pipe = (struct tbf) TBF_DEFAULT_LOG_LIMITS; + + if (rt_is_flow(t)) + { + t->flowspec_trie = f_new_trie(lp_new_default(p), 0); + t->flowspec_trie->ipv4 = (t->addr_type == NET_FLOW4); } return t; @@ -2381,9 +2678,10 @@ rt_init(void) { rta_init(); rt_table_pool = rp_new(&root_pool, "Routing tables"); - rte_update_pool = lp_new_default(rt_table_pool); - rte_slab = sl_new(rt_table_pool, sizeof(rte)); init_list(&routing_tables); + init_list(&deleted_routing_tables); + ev_init_list(&rt_cork.queue, &main_birdloop, "Route cork release"); + rt_cork.run = (event) { .hook = rt_cork_release_hook }; } @@ -2407,7 +2705,7 @@ rt_prune_table(rtable *tab) struct fib_iterator *fit = &tab->prune_fit; int limit = 2000; - struct channel *c; + struct rt_import_hook *ih; node *n, *x; DBG("Pruning route table %s\n", tab->name); @@ -2421,9 +2719,16 @@ rt_prune_table(rtable *tab) if (tab->prune_state == 1) { /* Mark channels to flush */ - WALK_LIST2(c, n, tab->channels, table_node) - if (c->channel_state == CS_FLUSHING) - c->flush_active = 1; + WALK_LIST2(ih, n, tab->imports, n) + if (ih->import_state == TIS_STOP) + rt_set_import_state(ih, TIS_FLUSHING); + else if ((ih->stale_valid != ih->stale_pruning) && (ih->stale_pruning == ih->stale_pruned)) + { + ih->stale_pruning = ih->stale_valid; + + if (ih->req->trace_routes & D_STATES) + log(L_TRACE "%s: table prune after refresh begin [%u]", ih->req->name, ih->stale_pruning); + } FIB_ITERATE_INIT(fit, &tab->fib); tab->prune_state = 2; @@ -2442,8 +2747,6 @@ rt_prune_table(rtable *tab) again: FIB_ITERATE_START(&tab->fib, fit, net, n) { - rte *e; - rescan: if (limit <= 0) { @@ -2452,26 +2755,21 @@ again: return; } - for (e=n->routes; e; e=e->next) + for (struct rte_storage *e=n->routes; e; e=e->next) { - if (e->sender->flush_active || (e->flags & REF_DISCARD)) + struct rt_import_hook *s = e->rte.sender; + if ((s->import_state == TIS_FLUSHING) || + (e->rte.stale_cycle < s->stale_valid) || + (e->rte.stale_cycle > s->stale_set)) { - rte_discard(e); - limit--; - - goto rescan; - } - - if (e->flags & REF_MODIFY) - { - rte_modify(e); + rte_recalculate(e->rte.sender, n, NULL, e->rte.src); limit--; goto rescan; } } - if (!n->routes) /* Orphaned FIB entry */ + if (!n->routes && !n->first) /* Orphaned FIB entry */ { FIB_ITERATE_PUT(fit); fib_delete(&tab->fib, n); @@ -2524,21 +2822,203 @@ again: } } - if (tab->prune_state > 0) - ev_schedule(tab->rt_event); - - /* FIXME: This should be handled in a better way */ - rt_prune_sources(); + uint flushed_channels = 0; /* Close flushed channels */ - WALK_LIST2_DELSAFE(c, n, x, tab->channels, table_node) - if (c->flush_active) + WALK_LIST2_DELSAFE(ih, n, x, tab->imports, n) + if (ih->import_state == TIS_FLUSHING) + { + ih->flush_seq = tab->exporter.next_seq; + rt_set_import_state(ih, TIS_WAITING); + flushed_channels++; + } + else if (ih->stale_pruning != ih->stale_pruned) + { + ih->stale_pruned = ih->stale_pruning; + if (ih->req->trace_routes & D_STATES) + log(L_TRACE "%s: table prune after refresh end [%u]", ih->req->name, ih->stale_pruned); + } + + /* In some cases, we may want to directly proceed to export cleanup */ + if (EMPTY_LIST(tab->exporter.hooks) && flushed_channels) + rt_export_cleanup(tab); +} + +static void +rt_export_cleanup(rtable *tab) +{ + tab->export_used = 0; + + u64 min_seq = ~((u64) 0); + struct rt_pending_export *last_export_to_free = NULL; + struct rt_pending_export *first = tab->exporter.first; + + struct rt_export_hook *eh; + node *n; + WALK_LIST2(eh, n, tab->exporter.hooks, n) + { + switch (atomic_load_explicit(&eh->export_state, memory_order_acquire)) + { + case TES_DOWN: + continue; + + case TES_READY: + { + struct rt_pending_export *last = atomic_load_explicit(&eh->last_export, memory_order_acquire); + if (!last) + /* No last export means that the channel has exported nothing since last cleanup */ + goto done; + + else if (min_seq > last->seq) + { + min_seq = last->seq; + last_export_to_free = last; + } + continue; + } + + default: + /* It's only safe to cleanup when the export state is idle or regular. No feeding or stopping allowed. */ + goto done; + } + } + + tab->exporter.first = last_export_to_free ? rt_next_export_fast(last_export_to_free) : NULL; + + if (config->table_debug) + log(L_TRACE "%s: Export cleanup, old exporter.first seq %lu, new %lu, min_seq %ld", + tab->name, + first ? first->seq : 0, + tab->exporter.first ? tab->exporter.first->seq : 0, + min_seq); + + WALK_LIST2(eh, n, tab->exporter.hooks, n) + { + if (atomic_load_explicit(&eh->export_state, memory_order_acquire) != TES_READY) + continue; + + struct rt_pending_export *last = atomic_load_explicit(&eh->last_export, memory_order_acquire); + if (last == last_export_to_free) + { + /* This may fail when the channel managed to export more inbetween. This is OK. */ + atomic_compare_exchange_strong_explicit( + &eh->last_export, &last, NULL, + memory_order_release, + memory_order_relaxed); + + DBG("store hook=%p last_export=NULL\n", eh); + } + } + + while (first && (first->seq <= min_seq)) + { + ASSERT_DIE(first->new || first->old); + + const net_addr *n = first->new ? + first->new->rte.net : + first->old->rte.net; + net *net = SKIP_BACK(struct network, n.addr, (net_addr (*)[0]) n); + + ASSERT_DIE(net->first == first); + + if (first == net->last) + /* The only export here */ + net->last = net->first = NULL; + else + /* First is now the next one */ + net->first = atomic_load_explicit(&first->next, memory_order_relaxed); + + /* For now, the old route may be finally freed */ + if (first->old) + { + rt_rte_trace_in(D_ROUTES, first->old->rte.sender->req, &first->old->rte, "freed"); + hmap_clear(&tab->id_map, first->old->rte.id); + rte_free(first->old); + } + +#ifdef LOCAL_DEBUG + memset(first, 0xbd, sizeof(struct rt_pending_export)); +#endif + + struct rt_export_block *reb = HEAD(tab->exporter.pending); + ASSERT_DIE(reb == PAGE_HEAD(first)); + + u32 pos = (first - &reb->export[0]); + u32 end = atomic_load_explicit(&reb->end, memory_order_relaxed); + ASSERT_DIE(pos < end); + + struct rt_pending_export *next = NULL; + + if (++pos < end) + next = &reb->export[pos]; + else + { + rem_node(&reb->n); + +#ifdef LOCAL_DEBUG + memset(reb, 0xbe, page_size); +#endif + + free_page(reb); + + if (EMPTY_LIST(tab->exporter.pending)) { - c->flush_active = 0; - channel_set_state(c, CS_DOWN); + if (config->table_debug) + log(L_TRACE "%s: Resetting export seq", tab->name); + + node *n; + WALK_LIST2(eh, n, tab->exporter.hooks, n) + { + if (atomic_load_explicit(&eh->export_state, memory_order_acquire) != TES_READY) + continue; + + ASSERT_DIE(atomic_load_explicit(&eh->last_export, memory_order_acquire) == NULL); + bmap_reset(&eh->seq_map, 1024); + } + + tab->exporter.next_seq = 1; + } + else + { + reb = HEAD(tab->exporter.pending); + next = &reb->export[0]; } + } + + first = next; + } + + rt_check_cork_low(tab); + +done:; + struct rt_import_hook *ih; node *x; + WALK_LIST2_DELSAFE(ih, n, x, tab->imports, n) + if (ih->import_state == TIS_WAITING) + if (!first || (first->seq >= ih->flush_seq)) + { + ih->import_state = TIS_CLEARED; + ih->stopped(ih->req); + rem_node(&ih->n); + mb_free(ih); + rt_unlock_table(tab); + } + + if (tab->export_used) + ev_schedule(tab->rt_event); + - return; + if (EMPTY_LIST(tab->exporter.pending) && tm_active(tab->exporter.export_timer)) + tm_stop(tab->exporter.export_timer); +} + +static void +rt_cork_release_hook(void *data UNUSED) +{ + do synchronize_rcu(); + while ( + !atomic_load_explicit(&rt_cork.active, memory_order_acquire) && + ev_run_list(&rt_cork.queue) + ); } /** @@ -2637,134 +3117,168 @@ rt_postconfig(struct config *c) */ void -rta_apply_hostentry(rta *a, struct hostentry *he, mpls_label_stack *mls) +ea_set_hostentry(ea_list **to, struct rtable *dep, struct rtable *tab, ip_addr gw, ip_addr ll, u32 lnum, u32 labels[lnum]) { - a->hostentry = he; - a->dest = he->dest; - a->igp_metric = he->igp_metric; + struct { + struct adata ad; + struct hostentry *he; + u32 labels[lnum]; + } *head = (void *) tmp_alloc_adata(sizeof *head - sizeof(struct adata)); + + head->he = rt_get_hostentry(tab, gw, ll, dep); + memcpy(head->labels, labels, lnum * sizeof(u32)); + + ea_set_attr(to, EA_LITERAL_DIRECT_ADATA( + &ea_gen_hostentry, 0, &head->ad)); +} - if (a->dest != RTD_UNICAST) + +static void +rta_apply_hostentry(ea_list **to, struct hostentry_adata *head) +{ + struct hostentry *he = head->he; + u32 *labels = head->labels; + u32 lnum = (u32 *) (head->ad.data + head->ad.length) - labels; + + ea_set_attr_u32(to, &ea_gen_igp_metric, 0, he->igp_metric); + + if (!he->src) { - /* No nexthop */ -no_nexthop: - a->nh = (struct nexthop) {}; - if (mls) - { /* Store the label stack for later changes */ - a->nh.labels_orig = a->nh.labels = mls->len; - memcpy(a->nh.label, mls->stack, mls->len * sizeof(u32)); - } + ea_set_dest(to, 0, RTD_UNREACHABLE); return; } - if (((!mls) || (!mls->len)) && he->nexthop_linkable) + eattr *he_nh_ea = ea_find(he->src, &ea_gen_nexthop); + ASSERT_DIE(he_nh_ea); + + struct nexthop_adata *nhad = (struct nexthop_adata *) he_nh_ea->u.ptr; + int idest = nhea_dest(he_nh_ea); + + if ((idest != RTD_UNICAST) || + !lnum && he->nexthop_linkable) { /* Just link the nexthop chain, no label append happens. */ - memcpy(&(a->nh), &(he->src->nh), nexthop_size(&(he->src->nh))); + ea_copy_attr(to, he->src, &ea_gen_nexthop); return; } - struct nexthop *nhp = NULL, *nhr = NULL; - int skip_nexthop = 0; + uint total_size = OFFSETOF(struct nexthop_adata, nh); - for (struct nexthop *nh = &(he->src->nh); nh; nh = nh->next) + NEXTHOP_WALK(nh, nhad) { - if (skip_nexthop) - skip_nexthop--; - else + if (nh->labels + lnum > MPLS_MAX_LABEL_STACK) { - nhr = nhp; - nhp = (nhp ? (nhp->next = lp_alloc(rte_update_pool, NEXTHOP_MAX_SIZE)) : &(a->nh)); + log(L_WARN "Sum of label stack sizes %d + %d = %d exceedes allowed maximum (%d)", + nh->labels, lnum, nh->labels + lnum, MPLS_MAX_LABEL_STACK); + continue; } - memset(nhp, 0, NEXTHOP_MAX_SIZE); - nhp->iface = nh->iface; - nhp->weight = nh->weight; + total_size += NEXTHOP_SIZE_CNT(nh->labels + lnum); + } - if (mls) - { - nhp->labels = nh->labels + mls->len; - nhp->labels_orig = mls->len; - if (nhp->labels <= MPLS_MAX_LABEL_STACK) - { - memcpy(nhp->label, nh->label, nh->labels * sizeof(u32)); /* First the hostentry labels */ - memcpy(&(nhp->label[nh->labels]), mls->stack, mls->len * sizeof(u32)); /* Then the bottom labels */ - } - else - { - log(L_WARN "Sum of label stack sizes %d + %d = %d exceedes allowed maximum (%d)", - nh->labels, mls->len, nhp->labels, MPLS_MAX_LABEL_STACK); - skip_nexthop++; - continue; - } - } - else if (nh->labels) + if (total_size == OFFSETOF(struct nexthop_adata, nh)) + { + log(L_WARN "No valid nexthop remaining, setting route unreachable"); + + struct nexthop_adata nha = { + .ad.length = NEXTHOP_DEST_SIZE, + .dest = RTD_UNREACHABLE, + }; + + ea_set_attr_data(to, &ea_gen_nexthop, 0, &nha.ad.data, nha.ad.length); + return; + } + + struct nexthop_adata *new = (struct nexthop_adata *) tmp_alloc_adata(total_size); + struct nexthop *dest = &new->nh; + + NEXTHOP_WALK(nh, nhad) + { + if (nh->labels + lnum > MPLS_MAX_LABEL_STACK) + continue; + + memcpy(dest, nh, NEXTHOP_SIZE(nh)); + if (lnum) { - nhp->labels = nh->labels; - nhp->labels_orig = 0; - memcpy(nhp->label, nh->label, nh->labels * sizeof(u32)); + memcpy(&(dest->label[dest->labels]), labels, lnum * sizeof labels[0]); + dest->labels += lnum; } if (ipa_nonzero(nh->gw)) - { - nhp->gw = nh->gw; /* Router nexthop */ - nhp->flags |= (nh->flags & RNF_ONLINK); - } + /* Router nexthop */ + dest->flags = (dest->flags & RNF_ONLINK); else if (!(nh->iface->flags & IF_MULTIACCESS) || (nh->iface->flags & IF_LOOPBACK)) - nhp->gw = IPA_NONE; /* PtP link - no need for nexthop */ + dest->gw = IPA_NONE; /* PtP link - no need for nexthop */ else if (ipa_nonzero(he->link)) - nhp->gw = he->link; /* Device nexthop with link-local address known */ + dest->gw = he->link; /* Device nexthop with link-local address known */ else - nhp->gw = he->addr; /* Device nexthop with link-local address unknown */ + dest->gw = he->addr; /* Device nexthop with link-local address unknown */ + + dest = NEXTHOP_NEXT(dest); } - if (skip_nexthop) - if (nhr) - nhr->next = NULL; - else - { - a->dest = RTD_UNREACHABLE; - log(L_WARN "No valid nexthop remaining, setting route unreachable"); - goto no_nexthop; - } + /* Fix final length */ + new->ad.length = (void *) dest - (void *) new->ad.data; + ea_set_attr(to, EA_LITERAL_DIRECT_ADATA( + &ea_gen_nexthop, 0, &new->ad)); } -static inline int -rta_next_hop_outdated(rta *a) +static inline struct hostentry_adata * +rta_next_hop_outdated(ea_list *a) { - struct hostentry *he = a->hostentry; + /* First retrieve the hostentry */ + eattr *heea = ea_find(a, &ea_gen_hostentry); + if (!heea) + return NULL; - if (!he) - return 0; + struct hostentry_adata *head = (struct hostentry_adata *) heea->u.ptr; - if (!he->src) - return a->dest != RTD_UNREACHABLE; + /* If no nexthop is present, we have to create one */ + eattr *a_nh_ea = ea_find(a, &ea_gen_nexthop); + if (!a_nh_ea) + return head; + + struct nexthop_adata *nhad = (struct nexthop_adata *) a_nh_ea->u.ptr; + + /* Shortcut for unresolvable hostentry */ + if (!head->he->src) + return NEXTHOP_IS_REACHABLE(nhad) ? head : NULL; - return (a->dest != he->dest) || (a->igp_metric != he->igp_metric) || - (!he->nexthop_linkable) || !nexthop_same(&(a->nh), &(he->src->nh)); + /* Comparing our nexthop with the hostentry nexthop */ + eattr *he_nh_ea = ea_find(head->he->src, &ea_gen_nexthop); + + return ( + (ea_get_int(a, &ea_gen_igp_metric, IGP_METRIC_UNKNOWN) != head->he->igp_metric) || + (!head->he->nexthop_linkable) || + (!he_nh_ea != !a_nh_ea) || + (he_nh_ea && a_nh_ea && !adata_same(he_nh_ea->u.ptr, a_nh_ea->u.ptr))) + ? head : NULL; } -static inline rte * -rt_next_hop_update_rte(rtable *tab UNUSED, rte *old) +static inline struct rte_storage * +rt_next_hop_update_rte(rtable *tab, net *n, rte *old) { - if (!rta_next_hop_outdated(old->attrs)) + struct hostentry_adata *head = rta_next_hop_outdated(old->attrs); + if (!head) return NULL; - rta *a = alloca(RTA_MAX_SIZE); - memcpy(a, old->attrs, rta_size(old->attrs)); + rte e0 = *old; + rta_apply_hostentry(&e0.attrs, head); - mpls_label_stack mls = { .len = a->nh.labels_orig }; - memcpy(mls.stack, &a->nh.label[a->nh.labels - mls.len], mls.len * sizeof(u32)); + return rte_store(&e0, n, tab); +} - rta_apply_hostentry(a, old->attrs->hostentry, &mls); - a->aflags = 0; +static inline void +rt_next_hop_resolve_rte(rte *r) +{ + eattr *heea = ea_find(r->attrs, &ea_gen_hostentry); + if (!heea) + return; - rte *e = sl_alloc(rte_slab); - memcpy(e, old, sizeof(rte)); - e->attrs = rta_lookup(a); + struct hostentry_adata *head = (struct hostentry_adata *) heea->u.ptr; - return e; + rta_apply_hostentry(&r->attrs, head); } - #ifdef CONFIG_BGP static inline int @@ -2788,23 +3302,23 @@ net_flow_has_dst_prefix(const net_addr *n) } static inline int -rta_as_path_is_empty(rta *a) +rta_as_path_is_empty(ea_list *a) { - eattr *e = ea_find(a->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH)); + eattr *e = ea_find(a, "bgp_path"); return !e || (as_path_getlen(e->u.ptr) == 0); } static inline u32 -rta_get_first_asn(rta *a) +rta_get_first_asn(ea_list *a) { - eattr *e = ea_find(a->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH)); + eattr *e = ea_find(a, "bgp_path"); u32 asn; return (e && as_path_get_first_regular(e->u.ptr, &asn)) ? asn : 0; } -int -rt_flowspec_check(rtable *tab_ip, rtable *tab_flow, const net_addr *n, rta *a, int interior) +static inline enum flowspec_valid +rt_flowspec_check(rtable *tab_ip, rtable *tab_flow, const net_addr *n, ea_list *a, int interior) { ASSERT(rt_is_ip(tab_ip)); ASSERT(rt_is_flow(tab_flow)); @@ -2812,11 +3326,11 @@ rt_flowspec_check(rtable *tab_ip, rtable *tab_flow, const net_addr *n, rta *a, i /* RFC 8955 6. a) Flowspec has defined dst prefix */ if (!net_flow_has_dst_prefix(n)) - return 0; + return FLOWSPEC_INVALID; /* RFC 9117 4.1. Accept AS_PATH is empty (fr */ if (interior && rta_as_path_is_empty(a)) - return 1; + return FLOWSPEC_VALID; /* RFC 8955 6. b) Flowspec and its best-match route have the same originator */ @@ -2830,33 +3344,36 @@ rt_flowspec_check(rtable *tab_ip, rtable *tab_flow, const net_addr *n, rta *a, i /* Find best-match BGP unicast route for flowspec dst prefix */ net *nb = net_route(tab_ip, &dst); - rte *rb = nb ? nb->routes : NULL; + const rte *rb = nb ? &nb->routes->rte : NULL; /* Register prefix to trie for tracking further changes */ int max_pxlen = (n->type == NET_FLOW4) ? IP4_MAX_PREFIX_LENGTH : IP6_MAX_PREFIX_LENGTH; trie_add_prefix(tab_flow->flowspec_trie, &dst, (nb ? nb->n.addr->pxlen : 0), max_pxlen); /* No best-match BGP route -> no flowspec */ - if (!rb || (rb->attrs->source != RTS_BGP)) - return 0; + if (!rb || (rt_get_source_attr(rb) != RTS_BGP)) + return FLOWSPEC_INVALID; /* Find ORIGINATOR_ID values */ - u32 orig_a = ea_get_int(a->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGINATOR_ID), 0); - u32 orig_b = ea_get_int(rb->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGINATOR_ID), 0); + u32 orig_a = ea_get_int(a, "bgp_originator_id", 0); + u32 orig_b = ea_get_int(rb->attrs, "bgp_originator_id", 0); /* Originator is either ORIGINATOR_ID (if present), or BGP neighbor address (if not) */ - if ((orig_a != orig_b) || (!orig_a && !orig_b && !ipa_equal(a->from, rb->attrs->from))) - return 0; + if ((orig_a != orig_b) || (!orig_a && !orig_b && !ipa_equal( + ea_get_ip(a, &ea_gen_from, IPA_NONE), + ea_get_ip(rb->attrs, &ea_gen_from, IPA_NONE) + ))) + return FLOWSPEC_INVALID; /* Find ASN of the best-match route, for use in next checks */ u32 asn_b = rta_get_first_asn(rb->attrs); if (!asn_b) - return 0; + return FLOWSPEC_INVALID; /* RFC 9117 4.2. For EBGP, flowspec and its best-match route are from the same AS */ if (!interior && (rta_get_first_asn(a) != asn_b)) - return 0; + return FLOWSPEC_INVALID; /* RFC 8955 6. c) More-specific routes are from the same AS as the best-match route */ TRIE_WALK(tab_ip->trie, subnet, &dst) @@ -2865,99 +3382,139 @@ rt_flowspec_check(rtable *tab_ip, rtable *tab_flow, const net_addr *n, rta *a, i if (!nc) continue; - rte *rc = nc->routes; - if (rc->attrs->source != RTS_BGP) - return 0; + const rte *rc = &nc->routes->rte; + if (rt_get_source_attr(rc) != RTS_BGP) + return FLOWSPEC_INVALID; if (rta_get_first_asn(rc->attrs) != asn_b) - return 0; + return FLOWSPEC_INVALID; } TRIE_WALK_END; - return 1; + return FLOWSPEC_VALID; } #endif /* CONFIG_BGP */ -static rte * -rt_flowspec_update_rte(rtable *tab, rte *r) +static struct rte_storage * +rt_flowspec_update_rte(rtable *tab, net *n, rte *r) { #ifdef CONFIG_BGP - if ((r->attrs->source != RTS_BGP) || !r->u.bgp.base_table) + if (r->generation || (rt_get_source_attr(r) != RTS_BGP)) return NULL; - const net_addr *n = r->net->n.addr; - struct bgp_proto *p = (void *) r->attrs->src->proto; - int valid = rt_flowspec_check(r->u.bgp.base_table, tab, n, r->attrs, p->is_interior); - int dest = valid ? RTD_NONE : RTD_UNREACHABLE; - - if (dest == r->attrs->dest) + struct bgp_channel *bc = (struct bgp_channel *) SKIP_BACK(struct channel, in_req, r->sender->req); + if (!bc->base_table) return NULL; - rta *a = alloca(RTA_MAX_SIZE); - memcpy(a, r->attrs, rta_size(r->attrs)); - a->dest = dest; - a->aflags = 0; + struct bgp_proto *p = SKIP_BACK(struct bgp_proto, p, bc->c.proto); + + enum flowspec_valid old = rt_get_flowspec_valid(r), + valid = rt_flowspec_check(bc->base_table, tab, n->n.addr, r->attrs, p->is_interior); + + if (old == valid) + return NULL; - rte *new = sl_alloc(rte_slab); - memcpy(new, r, sizeof(rte)); - new->attrs = rta_lookup(a); + rte new = *r; + ea_set_attr_u32(&new.attrs, &ea_gen_flowspec_valid, 0, valid); - return new; + return rte_store(&new, n, tab); #else return NULL; #endif } +static inline void +rt_flowspec_resolve_rte(rte *r, struct channel *c) +{ +#ifdef CONFIG_BGP + enum flowspec_valid valid, old = rt_get_flowspec_valid(r); + struct bgp_channel *bc = (struct bgp_channel *) c; + + if ( (rt_get_source_attr(r) == RTS_BGP) + && (c->channel == &channel_bgp) + && (bc->base_table)) + { + struct bgp_proto *p = SKIP_BACK(struct bgp_proto, p, bc->c.proto); + valid = rt_flowspec_check( + bc->base_table, + c->in_req.hook->table, + r->net, r->attrs, p->is_interior); + } + else + valid = FLOWSPEC_UNKNOWN; + + if (valid == old) + return; + + if (valid == FLOWSPEC_UNKNOWN) + ea_unset_attr(&r->attrs, 0, &ea_gen_flowspec_valid); + else + ea_set_attr_u32(&r->attrs, &ea_gen_flowspec_valid, 0, valid); +#endif +} static inline int rt_next_hop_update_net(rtable *tab, net *n) { - rte **k, *e, *new, *old_best, **new_best; + struct rte_storage *new; int count = 0; - int free_old_best = 0; + int is_flow = net_is_flow(n->n.addr); - old_best = n->routes; + struct rte_storage *old_best = n->routes; if (!old_best) return 0; - for (k = &n->routes; e = *k; k = &e->next) - { - if (!net_is_flow(n->n.addr)) - new = rt_next_hop_update_rte(tab, e); - else - new = rt_flowspec_update_rte(tab, e); + for (struct rte_storage *e, **k = &n->routes; e = *k; k = &e->next) + if (is_flow || rta_next_hop_outdated(e->rte.attrs)) + count++; - if (new) + if (!count) + return 0; + + struct rte_multiupdate { + struct rte_storage *old, *new; + } *updates = alloca(sizeof(struct rte_multiupdate) * count); + + int pos = 0; + for (struct rte_storage *e, **k = &n->routes; e = *k; k = &e->next) + if (is_flow || rta_next_hop_outdated(e->rte.attrs)) { - *k = new; + struct rte_storage *new = is_flow + ? rt_flowspec_update_rte(tab, n, &e->rte) + : rt_next_hop_update_rte(tab, n, &e->rte); - rte_trace_in(D_ROUTES, new->sender, new, "updated"); - rte_announce_i(tab, RA_ANY, n, new, e, NULL, NULL); + if (!new) + continue; /* Call a pre-comparison hook */ /* Not really an efficient way to compute this */ - if (e->attrs->src->proto->rte_recalculate) - e->attrs->src->proto->rte_recalculate(tab, n, new, e, NULL); - - if (e != old_best) - rte_free_quick(e); - else /* Freeing of the old best rte is postponed */ - free_old_best = 1; - - e = new; - count++; + if (e->rte.src->owner->rte_recalculate) + e->rte.src->owner->rte_recalculate(tab, n, &new->rte, &e->rte, &old_best->rte); + + updates[pos++] = (struct rte_multiupdate) { + .old = e, + .new = new, + }; + + /* Replace the route in the list */ + new->next = e->next; + *k = e = new; + + /* Get a new ID for the route */ + new->rte.lastmod = current_time(); + new->rte.id = hmap_first_zero(&tab->id_map); + hmap_set(&tab->id_map, new->rte.id); } - } - if (!count) - return 0; + ASSERT_DIE(pos <= count); + count = pos; /* Find the new best route */ - new_best = NULL; - for (k = &n->routes; e = *k; k = &e->next) + struct rte_storage **new_best = NULL; + for (struct rte_storage *e, **k = &n->routes; e = *k; k = &e->next) { - if (!new_best || rte_better(e, *new_best)) + if (!new_best || rte_better(&e->rte, &(*new_best)->rte)) new_best = k; } @@ -2970,15 +3527,17 @@ rt_next_hop_update_net(rtable *tab, net *n) n->routes = new; } - /* Announce the new best route */ - if (new != old_best) - rte_trace_in(D_ROUTES, new->sender, new, "updated [best]"); - - /* Propagate changes */ - rte_announce_i(tab, RA_UNDEF, n, NULL, NULL, n->routes, old_best); - - if (free_old_best) - rte_free_quick(old_best); + /* Announce the changes */ + for (int i=0; i<count; i++) + { + _Bool nb = (new == updates[i].new), ob = (old_best == updates[i].old); + const char *best_indicator[2][2] = { + { "autoupdated", "autoupdated [-best]" }, + { "autoupdated [+best]", "autoupdated [best]" } + }; + rt_rte_trace_in(D_ROUTES, updates[i].new->rte.sender->req, &updates[i].new->rte, best_indicator[nb][ob]); + rte_announce(tab, n, updates[i].new, updates[i].old, new, old_best); + } return count; } @@ -3042,6 +3601,8 @@ rt_new_table(struct symbol *s, uint addr_type) c->gc_period = (uint) -1; /* set in rt_postconfig() */ c->min_settle_time = 1 S; c->max_settle_time = 20 S; + c->cork_threshold.low = 128; + c->cork_threshold.high = 512; add_tail(&new_config->tables, &c->n); @@ -3087,6 +3648,36 @@ rt_unlock_table(rtable *r) } } +static void +rt_check_cork_low(rtable *tab) +{ + if (!tab->cork_active) + return; + + if (!tab->exporter.first || (tab->exporter.first->seq + tab->cork_threshold.low > tab->exporter.next_seq)) + { + tab->cork_active = 0; + rt_cork_release(); + + if (config->table_debug) + log(L_TRACE "%s: Uncorked", tab->name); + } +} + +static void +rt_check_cork_high(rtable *tab) +{ + if (!tab->cork_active && tab->exporter.first && (tab->exporter.first->seq + tab->cork_threshold.high <= tab->exporter.next_seq)) + { + tab->cork_active = 1; + rt_cork_acquire(); + + if (config->table_debug) + log(L_TRACE "%s: Corked", tab->name); + } +} + + static int rt_reconfigure(rtable *tab, struct rtable_config *new, struct rtable_config *old) { @@ -3100,6 +3691,14 @@ rt_reconfigure(rtable *tab, struct rtable_config *new, struct rtable_config *old tab->name = new->name; tab->config = new; + tab->cork_threshold = new->cork_threshold; + + if (new->cork_threshold.high != old->cork_threshold.high) + rt_check_cork_high(tab); + + if (new->cork_threshold.low != old->cork_threshold.low) + rt_check_cork_low(tab); + return 1; } @@ -3158,21 +3757,18 @@ rt_commit(struct config *new, struct config *old) DBG("\tdone\n"); } -static inline void -do_feed_channel(struct channel *c, net *n, rte *e) +static void +rt_feed_done(struct rt_export_hook *c) { - rte_update_lock(); - if (c->ra_mode == RA_ACCEPTED) - rt_notify_accepted(c, n, NULL, NULL, c->refeeding); - else if (c->ra_mode == RA_MERGED) - rt_notify_merged(c, n, NULL, NULL, e, e, c->refeeding); - else /* RA_BASIC */ - rt_notify_basic(c, n, e, e, c->refeeding); - rte_update_unlock(); + c->event->hook = rt_export_hook; + + rt_set_export_state(c, TES_READY); + + rt_send_export_event(c); } /** - * rt_feed_channel - advertise all routes to a channel + * rt_feed_by_fib - advertise all routes to a channel by walking a fib * @c: channel to be fed * * This function performs one pass of advertisement of routes to a channel that @@ -3180,370 +3776,154 @@ do_feed_channel(struct channel *c, net *n, rte *e) * has something to do. (We avoid transferring all the routes in single pass in * order not to monopolize CPU time.) */ -int -rt_feed_channel(struct channel *c) +static void +rt_feed_by_fib(void *data) { + struct rt_export_hook *c = data; + struct fib_iterator *fit = &c->feed_fit; int max_feed = 256; - ASSERT(c->export_state == ES_FEEDING); + ASSERT(atomic_load_explicit(&c->export_state, memory_order_relaxed) == TES_FEEDING); - if (!c->feed_active) - { - FIB_ITERATE_INIT(fit, &c->table->fib); - c->feed_active = 1; - } + rtable *tab = SKIP_BACK(rtable, exporter, c->table); - FIB_ITERATE_START(&c->table->fib, fit, net, n) + FIB_ITERATE_START(&tab->fib, fit, net, n) { - rte *e = n->routes; if (max_feed <= 0) { FIB_ITERATE_PUT(fit); - return 0; + rt_send_export_event(c); + return; } - if ((c->ra_mode == RA_OPTIMAL) || - (c->ra_mode == RA_ACCEPTED) || - (c->ra_mode == RA_MERGED)) - if (rte_is_valid(e)) - { - /* In the meantime, the protocol may fell down */ - if (c->export_state != ES_FEEDING) - goto done; - - do_feed_channel(c, n, e); - max_feed--; - } - - if (c->ra_mode == RA_ANY) - for(e = n->routes; e; e = e->next) - { - /* In the meantime, the protocol may fell down */ - if (c->export_state != ES_FEEDING) - goto done; - - if (!rte_is_valid(e)) - continue; + if (atomic_load_explicit(&c->export_state, memory_order_acquire) != TES_FEEDING) + return; - do_feed_channel(c, n, e); - max_feed--; - } + if ((c->req->addr_mode == TE_ADDR_NONE) || net_in_netX(n->n.addr, c->req->addr)) + max_feed -= rt_feed_net(c, n); } FIB_ITERATE_END; -done: - c->feed_active = 0; - return 1; + rt_feed_done(c); } -/** - * rt_feed_baby_abort - abort protocol feeding - * @c: channel - * - * This function is called by the protocol code when the protocol stops or - * ceases to exist during the feeding. - */ -void -rt_feed_channel_abort(struct channel *c) +static void +rt_feed_by_trie(void *data) { - if (c->feed_active) - { - /* Unlink the iterator */ - fit_get(&c->table->fib, &c->feed_fit); - c->feed_active = 0; - } -} + struct rt_export_hook *c = data; + rtable *tab = SKIP_BACK(rtable, exporter, c->table); + ASSERT_DIE(c->walk_state); + struct f_trie_walk_state *ws = c->walk_state; -/* - * Import table - */ + int max_feed = 256; -int -rte_update_in(struct channel *c, const net_addr *n, rte *new, struct rte_src *src) -{ - struct rtable *tab = c->in_table; - rte *old, **pos; - net *net; + ASSERT(atomic_load_explicit(&c->export_state, memory_order_relaxed) == TES_FEEDING); - if (new) + net_addr addr; + while (trie_walk_next(ws, &addr)) { - net = net_get(tab, n); - - if (!new->pref) - new->pref = c->preference; + net *n = net_find(tab, &addr); + if (!n) + continue; - if (!rta_is_cached(new->attrs)) - new->attrs = rta_lookup(new->attrs); - } - else - { - net = net_find(tab, n); + if ((max_feed -= rt_feed_net(c, n)) <= 0) + return; - if (!net) - goto drop_withdraw; + if (atomic_load_explicit(&c->export_state, memory_order_acquire) != TES_FEEDING) + return; } - /* Find the old rte */ - for (pos = &net->routes; old = *pos; pos = &old->next) - if (old->attrs->src == src) - { - if (new && rte_same(old, new)) - { - /* Refresh the old rte, continue with update to main rtable */ - if (old->flags & (REF_STALE | REF_DISCARD | REF_MODIFY)) - { - old->flags &= ~(REF_STALE | REF_DISCARD | REF_MODIFY); - return 1; - } - - goto drop_update; - } - - /* Move iterator if needed */ - if (old == c->reload_next_rte) - c->reload_next_rte = old->next; - - /* Remove the old rte */ - *pos = old->next; - rte_free_quick(old); - tab->rt_count--; - - break; - } - - if (!new) - { - if (!old) - goto drop_withdraw; + rt_unlock_trie(tab, c->walk_lock); + c->walk_lock = NULL; - if (!net->routes) - fib_delete(&tab->fib, net); + mb_free(c->walk_state); + c->walk_state = NULL; - return 1; - } + rt_feed_done(c); +} - struct channel_limit *l = &c->rx_limit; - if (l->action && !old) - { - if (tab->rt_count >= l->limit) - channel_notify_limit(c, l, PLD_RX, tab->rt_count); +static void +rt_feed_equal(void *data) +{ + struct rt_export_hook *c = data; + rtable *tab = SKIP_BACK(rtable, exporter, c->table); - if (l->state == PLS_BLOCKED) - { - /* Required by rte_trace_in() */ - new->net = net; + ASSERT_DIE(atomic_load_explicit(&c->export_state, memory_order_relaxed) == TES_FEEDING); + ASSERT_DIE(c->req->addr_mode == TE_ADDR_EQUAL); - rte_trace_in(D_FILTERS, c, new, "ignored [limit]"); - goto drop_update; - } - } + net *n = net_find(tab, c->req->addr); + if (n) + rt_feed_net(c, n); - /* Insert the new rte */ - rte *e = rte_do_cow(new); - e->flags |= REF_COW; - e->net = net; - e->sender = c; - e->lastmod = current_time(); - e->next = *pos; - *pos = e; - tab->rt_count++; - return 1; + rt_feed_done(c); +} -drop_update: - c->stats.imp_updates_received++; - c->stats.imp_updates_ignored++; - rte_free(new); +static void +rt_feed_for(void *data) +{ + struct rt_export_hook *c = data; + rtable *tab = SKIP_BACK(rtable, exporter, c->table); - if (!net->routes) - fib_delete(&tab->fib, net); + ASSERT_DIE(atomic_load_explicit(&c->export_state, memory_order_relaxed) == TES_FEEDING); + ASSERT_DIE(c->req->addr_mode == TE_ADDR_FOR); - return 0; + net *n = net_route(tab, c->req->addr); + if (n) + rt_feed_net(c, n); -drop_withdraw: - c->stats.imp_withdraws_received++; - c->stats.imp_withdraws_ignored++; - return 0; + rt_feed_done(c); } -int -rt_reload_channel(struct channel *c) +static uint +rt_feed_net(struct rt_export_hook *c, net *n) { - struct rtable *tab = c->in_table; - struct fib_iterator *fit = &c->reload_fit; - int max_feed = 64; - - ASSERT(c->channel_state == CS_UP); + uint count = 0; - if (!c->reload_active) + if (c->req->export_bulk) { - FIB_ITERATE_INIT(fit, &tab->fib); - c->reload_active = 1; - } - - do { - for (rte *e = c->reload_next_rte; e; e = e->next) - { - if (max_feed-- <= 0) - { - c->reload_next_rte = e; - debug("%s channel reload burst split (max_feed=%d)", c->proto->name, max_feed); - return 0; - } - - rte_update2(c, e->net->n.addr, rte_do_cow(e), e->attrs->src); - } - - c->reload_next_rte = NULL; - - FIB_ITERATE_START(&tab->fib, fit, net, n) + count = rte_feed_count(n); + if (count) { - if (c->reload_next_rte = n->routes) - { - FIB_ITERATE_PUT_NEXT(fit, &tab->fib); - break; - } + rte **feed = alloca(count * sizeof(rte *)); + rte_feed_obtain(n, feed, count); + c->req->export_bulk(c->req, n->n.addr, NULL, feed, count); } - FIB_ITERATE_END; } - while (c->reload_next_rte); - c->reload_active = 0; - return 1; -} - -void -rt_reload_channel_abort(struct channel *c) -{ - if (c->reload_active) + else if (n->routes) { - /* Unlink the iterator */ - fit_get(&c->in_table->fib, &c->reload_fit); - c->reload_next_rte = NULL; - c->reload_active = 0; + struct rt_pending_export rpe = { .new = n->routes, .new_best = n->routes }; + c->req->export_one(c->req, n->n.addr, &rpe); + count = 1; } -} - -void -rt_prune_sync(rtable *t, int all) -{ - struct fib_iterator fit; - FIB_ITERATE_INIT(&fit, &t->fib); + for (struct rt_pending_export *rpe = n->first; rpe; rpe = rpe_next(rpe, NULL)) + rpe_mark_seen(c, rpe); -again: - FIB_ITERATE_START(&t->fib, &fit, net, n) - { - rte *e, **ee = &n->routes; - - while (e = *ee) - { - if (all || (e->flags & (REF_STALE | REF_DISCARD))) - { - *ee = e->next; - rte_free_quick(e); - t->rt_count--; - } - else - ee = &e->next; - } - - if (all || !n->routes) - { - FIB_ITERATE_PUT(&fit); - fib_delete(&t->fib, n); - goto again; - } - } - FIB_ITERATE_END; + return count; } - /* - * Export table + * Import table */ -int -rte_update_out(struct channel *c, const net_addr *n, rte *new, rte *old0, int refeed) +void channel_reload_export_bulk(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe UNUSED, rte **feed, uint count) { - struct rtable *tab = c->out_table; - struct rte_src *src; - rte *old, **pos; - net *net; - - if (new) - { - net = net_get(tab, n); - src = new->attrs->src; - - rte_store_tmp_attrs(new, rte_update_pool, NULL); - - if (!rta_is_cached(new->attrs)) - new->attrs = rta_lookup(new->attrs); - } - else - { - net = net_find(tab, n); - src = old0->attrs->src; + struct channel *c = SKIP_BACK(struct channel, reload_req, req); - if (!net) - goto drop_withdraw; - } - - /* Find the old rte */ - for (pos = &net->routes; old = *pos; pos = &old->next) - if ((c->ra_mode != RA_ANY) || (old->attrs->src == src)) + for (uint i=0; i<count; i++) + if (feed[i]->sender == c->in_req.hook) { - if (new && rte_same(old, new)) - { - /* REF_STALE / REF_DISCARD not used in export table */ - /* - if (old->flags & (REF_STALE | REF_DISCARD | REF_MODIFY)) - { - old->flags &= ~(REF_STALE | REF_DISCARD | REF_MODIFY); - return 1; - } - */ - - goto drop_update; - } - - /* Remove the old rte */ - *pos = old->next; - rte_free_quick(old); - tab->rt_count--; + /* Strip the later attribute layers */ + rte new = *feed[i]; + while (new.attrs->next) + new.attrs = new.attrs->next; - break; + /* And reload the route */ + rte_update(c, net, &new, new.src); } - - if (!new) - { - if (!old) - goto drop_withdraw; - - if (!net->routes) - fib_delete(&tab->fib, net); - - return 1; - } - - /* Insert the new rte */ - rte *e = rte_do_cow(new); - e->flags |= REF_COW; - e->net = net; - e->sender = c; - e->lastmod = current_time(); - e->next = *pos; - *pos = e; - tab->rt_count++; - return 1; - -drop_update: - return refeed; - -drop_withdraw: - return 0; } @@ -3642,7 +4022,7 @@ hc_delete_hostentry(struct hostcache *hc, pool *p, struct hostentry *he) rem_node(&he->ln); hc_remove(hc, he); - sl_free(hc->slab, he); + sl_free(he); hc->hash_items--; if (hc->hash_items < hc->hash_min) @@ -3659,7 +4039,7 @@ rt_init_hostcache(rtable *tab) hc_alloc_table(hc, tab->rp, HC_DEF_ORDER); hc->slab = sl_new(tab->rp, sizeof(struct hostentry)); - hc->lp = lp_new(tab->rp, LP_GOOD_SIZE(1024)); + hc->lp = lp_new(tab->rp); hc->trie = f_new_trie(hc->lp, 0); tab->hostcache = hc; @@ -3711,56 +4091,31 @@ if_local_addr(ip_addr a, struct iface *i) } u32 -rt_get_igp_metric(rte *rt) +rt_get_igp_metric(const rte *rt) { - eattr *ea = ea_find(rt->attrs->eattrs, EA_GEN_IGP_METRIC); + eattr *ea = ea_find(rt->attrs, "igp_metric"); if (ea) return ea->u.data; - rta *a = rt->attrs; - -#ifdef CONFIG_OSPF - if ((a->source == RTS_OSPF) || - (a->source == RTS_OSPF_IA) || - (a->source == RTS_OSPF_EXT1)) - return rt->u.ospf.metric1; -#endif - -#ifdef CONFIG_RIP - if (a->source == RTS_RIP) - return rt->u.rip.metric; -#endif - -#ifdef CONFIG_BGP - if (a->source == RTS_BGP) - { - u64 metric = bgp_total_aigp_metric(rt); - return (u32) MIN(metric, (u64) IGP_METRIC_UNKNOWN); - } -#endif - -#ifdef CONFIG_BABEL - if (a->source == RTS_BABEL) - return rt->u.babel.metric; -#endif - - if (a->source == RTS_DEVICE) + if (rt_get_source_attr(rt) == RTS_DEVICE) return 0; + if (rt->src->owner->class->rte_igp_metric) + return rt->src->owner->class->rte_igp_metric(rt); + return IGP_METRIC_UNKNOWN; } static int rt_update_hostentry(rtable *tab, struct hostentry *he) { - rta *old_src = he->src; + ea_list *old_src = he->src; int direct = 0; int pxlen = 0; /* Reset the hostentry */ he->src = NULL; - he->dest = RTD_UNREACHABLE; he->nexthop_linkable = 0; he->igp_metric = 0; @@ -3769,11 +4124,14 @@ rt_update_hostentry(rtable *tab, struct hostentry *he) net *n = net_route(tab, &he_addr); if (n) { - rte *e = n->routes; - rta *a = e->attrs; - pxlen = n->n.addr->pxlen; - - if (a->hostentry) + struct rte_storage *e = n->routes; + ea_list *a = e->rte.attrs; + u32 pref = rt_get_preference(&e->rte); + + for (struct rte_storage *ee = n->routes; ee; ee = ee->next) + if (rte_is_valid(&ee->rte) && + (rt_get_preference(&ee->rte) >= pref) && + ea_find(ee->rte.attrs, &ea_gen_hostentry)) { /* Recursive route should not depend on another recursive route */ log(L_WARN "Next hop address %I resolvable through recursive route for %N", @@ -3781,9 +4139,14 @@ rt_update_hostentry(rtable *tab, struct hostentry *he) goto done; } - if (a->dest == RTD_UNICAST) - { - for (struct nexthop *nh = &(a->nh); nh; nh = nh->next) + pxlen = n->n.addr->pxlen; + + eattr *nhea = ea_find(a, &ea_gen_nexthop); + ASSERT_DIE(nhea); + struct nexthop_adata *nhad = (void *) nhea->u.ptr; + + if (NEXTHOP_IS_REACHABLE(nhad)) + NEXTHOP_WALK(nh, nhad) if (ipa_zero(nh->gw)) { if (if_local_addr(he->addr, nh->iface)) @@ -3796,12 +4159,10 @@ rt_update_hostentry(rtable *tab, struct hostentry *he) direct++; } - } he->src = rta_clone(a); - he->dest = a->dest; he->nexthop_linkable = !direct; - he->igp_metric = rt_get_igp_metric(e); + he->igp_metric = rt_get_igp_metric(&e->rte); } done: @@ -3839,7 +4200,7 @@ rt_update_hostcache(rtable *tab) tab->hcu_scheduled = 0; } -struct hostentry * +static struct hostentry * rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep) { struct hostentry *he; diff --git a/nest/rt.h b/nest/rt.h new file mode 100644 index 00000000..1a6b7a93 --- /dev/null +++ b/nest/rt.h @@ -0,0 +1,589 @@ +/* + * BIRD Internet Routing Daemon -- Routing Table + * + * (c) 1998--2000 Martin Mares <mj@ucw.cz> + * (c) 2019--2021 Maria Matejka <mq@jmq.cz> + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#ifndef _BIRD_NEST_RT_H_ +#define _BIRD_NEST_RT_H_ + +#include "lib/lists.h" +#include "lib/bitmap.h" +#include "lib/resource.h" +#include "lib/net.h" +#include "lib/type.h" +#include "lib/fib.h" +#include "lib/route.h" +#include "lib/event.h" +#include "lib/rcu.h" + +#include <stdatomic.h> + +struct ea_list; +struct protocol; +struct proto; +struct channel; +struct rte_src; +struct symbol; +struct timer; +struct filter; +struct f_trie; +struct f_trie_walk_state; +struct cli; + +struct rt_cork_threshold { + u64 low, high; +}; + +/* + * Master Routing Tables. Generally speaking, each of them contains a FIB + * with each entry pointing to a list of route entries representing routes + * to given network (with the selected one at the head). + * + * Each of the RTE's contains variable data (the preference and protocol-dependent + * metrics) and a pointer to a route attribute block common for many routes). + * + * It's guaranteed that there is at most one RTE for every (prefix,proto) pair. + */ + +struct rtable_config { + node n; + char *name; + struct rtable *table; + struct proto_config *krt_attached; /* Kernel syncer attached to this table */ + uint addr_type; /* Type of address data stored in table (NET_*) */ + uint gc_threshold; /* Maximum number of operations before GC is run */ + uint gc_period; /* Approximate time between two consecutive GC runs */ + byte sorted; /* Routes of network are sorted according to rte_better() */ + byte trie_used; /* Rtable has attached trie */ + btime min_settle_time; /* Minimum settle time for notifications */ + btime max_settle_time; /* Maximum settle time for notifications */ + btime export_settle_time; /* Delay before exports are announced */ + struct rt_cork_threshold cork_threshold; /* Cork threshold values */ +}; + +struct rt_export_hook; +struct rt_export_request; + +struct rt_exporter { + list hooks; /* Registered route export hooks */ + uint addr_type; /* Type of address data exported (NET_*) */ + + struct rt_export_hook *(*start)(struct rt_exporter *, struct rt_export_request *); + void (*stop)(struct rt_export_hook *); + void (*done)(struct rt_export_hook *); + void (*used)(struct rt_exporter *); + + list pending; /* List of packed struct rt_pending_export */ + struct timer *export_timer; + + struct rt_pending_export *first; /* First export to announce */ + u64 next_seq; /* The next export will have this ID */ +}; + +typedef struct rtable { + resource r; + node n; /* Node in list of all tables */ + pool *rp; /* Resource pool to allocate everything from, including itself */ + struct slab *rte_slab; /* Slab to allocate route objects */ + struct fib fib; + struct f_trie *trie; /* Trie of prefixes defined in fib */ + char *name; /* Name of this table */ + uint addr_type; /* Type of address data stored in table (NET_*) */ + int use_count; /* Number of protocols using this table */ + u32 rt_count; /* Number of routes in the table */ + + list imports; /* Registered route importers */ + struct rt_exporter exporter; /* Exporter API structure */ + + struct hmap id_map; + struct hostcache *hostcache; + struct rtable_config *config; /* Configuration of this table */ + struct config *deleted; /* Table doesn't exist in current configuration, + * delete as soon as use_count becomes 0 and remove + * obstacle from this routing table. + */ + struct event *rt_event; /* Routing table event */ + struct event *uncork_event; /* Called when uncork happens */ + struct timer *prune_timer; /* Timer for periodic pruning / GC */ + btime last_rt_change; /* Last time when route changed */ + btime base_settle_time; /* Start time of rtable settling interval */ + btime gc_time; /* Time of last GC */ + uint gc_counter; /* Number of operations since last GC */ + byte prune_state; /* Table prune state, 1 -> scheduled, 2-> running */ + byte prune_trie; /* Prune prefix trie during next table prune */ + byte hcu_scheduled; /* Hostcache update is scheduled */ + byte hcu_corked; /* Hostcache update is corked with this state */ + byte nhu_state; /* Next Hop Update state */ + byte nhu_corked; /* Next Hop Update is corked with this state */ + byte export_used; /* Pending Export pruning is scheduled */ + byte cork_active; /* Cork has been activated */ + struct rt_cork_threshold cork_threshold; /* Threshold for table cork */ + struct fib_iterator prune_fit; /* Rtable prune FIB iterator */ + struct fib_iterator nhu_fit; /* Next Hop Update FIB iterator */ + struct f_trie *trie_new; /* New prefix trie defined during pruning */ + struct f_trie *trie_old; /* Old prefix trie waiting to be freed */ + u32 trie_lock_count; /* Prefix trie locked by walks */ + u32 trie_old_lock_count; /* Old prefix trie locked by walks */ + struct tbf rl_pipe; /* Rate limiting token buffer for pipe collisions */ + + list subscribers; /* Subscribers for notifications */ + struct timer *settle_timer; /* Settle time for notifications */ + list flowspec_links; /* List of flowspec links, src for NET_IPx and dst for NET_FLOWx */ + struct f_trie *flowspec_trie; /* Trie for evaluation of flowspec notifications */ +} rtable; + +struct rt_subscription { + node n; + rtable *tab; + event *event; + event_list *list; +}; + +struct rt_flowspec_link { + node n; + rtable *src; + rtable *dst; + u32 uc; +}; + +extern struct rt_cork { + _Atomic uint active; + event_list queue; + event run; +} rt_cork; + +static inline void rt_cork_acquire(void) +{ + atomic_fetch_add_explicit(&rt_cork.active, 1, memory_order_acq_rel); +} + +static inline void rt_cork_release(void) +{ + if (atomic_fetch_sub_explicit(&rt_cork.active, 1, memory_order_acq_rel) == 1) + { + synchronize_rcu(); + ev_schedule_work(&rt_cork.run); + } +} + +static inline int rt_cork_check(event *e) +{ + rcu_read_lock(); + + int corked = (atomic_load_explicit(&rt_cork.active, memory_order_acquire) > 0); + if (corked) + ev_send(&rt_cork.queue, e); + + rcu_read_unlock(); + + return corked; +} + + +#define NHU_CLEAN 0 +#define NHU_SCHEDULED 1 +#define NHU_RUNNING 2 +#define NHU_DIRTY 3 + +typedef struct network { + struct rte_storage *routes; /* Available routes for this network */ + struct rt_pending_export *first, *last; + struct fib_node n; /* FIB flags reserved for kernel syncer */ +} net; + +struct hostcache { + slab *slab; /* Slab holding all hostentries */ + struct hostentry **hash_table; /* Hash table for hostentries */ + unsigned hash_order, hash_shift; + unsigned hash_max, hash_min; + unsigned hash_items; + linpool *lp; /* Linpool for trie */ + struct f_trie *trie; /* Trie of prefixes that might affect hostentries */ + list hostentries; /* List of all hostentries */ + byte update_hostcache; +}; + +struct hostentry { + node ln; + ip_addr addr; /* IP address of host, part of key */ + ip_addr link; /* (link-local) IP address of host, used as gw + if host is directly attached */ + struct rtable *tab; /* Dependent table, part of key */ + struct hostentry *next; /* Next in hash chain */ + unsigned hash_key; /* Hash key */ + unsigned uc; /* Use count */ + ea_list *src; /* Source attributes */ + byte nexthop_linkable; /* Nexthop list is completely non-device */ + u32 igp_metric; /* Chosen route IGP metric */ +}; + +struct rte_storage { + struct rte_storage *next; /* Next in chain */ + struct rte rte; /* Route data */ +}; + +#define RTE_COPY(r) ((r) ? (r)->rte : (rte) {}) +#define RTE_COPY_VALID(r) (((r) && (rte_is_valid(&(r)->rte))) ? (r)->rte : (rte) {}) +#define RTE_OR_NULL(r) ((r) ? &((r)->rte) : NULL) +#define RTE_VALID_OR_NULL(r) (((r) && (rte_is_valid(&(r)->rte))) ? &((r)->rte) : NULL) + +/* Table-channel connections */ + +struct rt_import_request { + struct rt_import_hook *hook; /* The table part of importer */ + char *name; + u8 trace_routes; + + void (*dump_req)(struct rt_import_request *req); + void (*log_state_change)(struct rt_import_request *req, u8 state); + /* Preimport is called when the @new route is just-to-be inserted, replacing @old. + * Return a route (may be different or modified in-place) to continue or NULL to withdraw. */ + int (*preimport)(struct rt_import_request *req, struct rte *new, struct rte *old); +}; + +struct rt_import_hook { + node n; + rtable *table; /* The connected table */ + struct rt_import_request *req; /* The requestor */ + + struct rt_import_stats { + /* Import - from protocol to core */ + u32 pref; /* Number of routes selected as best in the (adjacent) routing table */ + u32 updates_ignored; /* Number of route updates rejected as already in route table */ + u32 updates_accepted; /* Number of route updates accepted and imported */ + u32 withdraws_ignored; /* Number of route withdraws rejected as already not in route table */ + u32 withdraws_accepted; /* Number of route withdraws accepted and processed */ + } stats; + + u64 flush_seq; /* Table export seq when the channel announced flushing */ + btime last_state_change; /* Time of last state transition */ + + u8 import_state; /* IS_* */ + u8 stale_set; /* Set this stale_cycle to imported routes */ + u8 stale_valid; /* Routes with this stale_cycle and bigger are considered valid */ + u8 stale_pruned; /* Last prune finished when this value was set at stale_valid */ + u8 stale_pruning; /* Last prune started when this value was set at stale_valid */ + + void (*stopped)(struct rt_import_request *); /* Stored callback when import is stopped */ +}; + +struct rt_pending_export { + struct rt_pending_export * _Atomic next; /* Next export for the same destination */ + struct rte_storage *new, *new_best, *old, *old_best; + u64 seq; /* Sequential ID (table-local) of the pending export */ +}; + +struct rt_export_request { + struct rt_export_hook *hook; /* Table part of the export */ + char *name; + const net_addr *addr; /* Network prefilter address */ + u8 trace_routes; + u8 addr_mode; /* Network prefilter mode (TE_ADDR_*) */ + + event_list *list; /* Where to schedule export events */ + + /* There are two methods of export. You can either request feeding every single change + * or feeding the whole route feed. In case of regular export, &export_one is preferred. + * Anyway, when feeding, &export_bulk is preferred, falling back to &export_one. + * Thus, for RA_OPTIMAL, &export_one is only set, + * for RA_MERGED and RA_ACCEPTED, &export_bulk is only set + * and for RA_ANY, both are set to accomodate for feeding all routes but receiving single changes + */ + void (*export_one)(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe); + void (*export_bulk)(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe, rte **feed, uint count); + + void (*dump_req)(struct rt_export_request *req); + void (*log_state_change)(struct rt_export_request *req, u8); +}; + +struct rt_export_hook { + node n; + struct rt_exporter *table; /* The connected table */ + + pool *pool; + + struct rt_export_request *req; /* The requestor */ + + struct rt_export_stats { + /* Export - from core to protocol */ + u32 updates_received; /* Number of route updates received */ + u32 withdraws_received; /* Number of route withdraws received */ + } stats; + + union { + struct fib_iterator feed_fit; /* Routing table iterator used during feeding */ + struct { + struct f_trie_walk_state *walk_state; /* Iterator over networks in trie */ + struct f_trie *walk_lock; /* Locked trie for walking */ + }; + u32 hash_iter; /* Iterator over hash */ + }; + + struct bmap seq_map; /* Keep track which exports were already procesed */ + + struct rt_pending_export * _Atomic last_export;/* Last export processed */ + struct rt_pending_export *rpe_next; /* Next pending export to process */ + + btime last_state_change; /* Time of last state transition */ + + u8 refeed_pending; /* Refeeding and another refeed is scheduled */ + _Atomic u8 export_state; /* Route export state (TES_*, see below) */ + u8 feed_type; /* Which feeding method is used (TFT_*, see below) */ + + struct event *event; /* Event running all the export operations */ + + void (*stopped)(struct rt_export_request *); /* Stored callback when export is stopped */ +}; + +#define TIS_DOWN 0 +#define TIS_UP 1 +#define TIS_STOP 2 +#define TIS_FLUSHING 3 +#define TIS_WAITING 4 +#define TIS_CLEARED 5 +#define TIS_MAX 6 + +#define TES_DOWN 0 +#define TES_FEEDING 2 +#define TES_READY 3 +#define TES_STOP 4 +#define TES_MAX 5 + +/* Value of addr_mode */ +#define TE_ADDR_NONE 0 /* No address matching */ +#define TE_ADDR_EQUAL 1 /* Exact query - show route <addr> */ +#define TE_ADDR_FOR 2 /* Longest prefix match - show route for <addr> */ +#define TE_ADDR_IN 3 /* Interval query - show route in <addr> */ + + +#define TFT_FIB 1 +#define TFT_TRIE 2 +#define TFT_HASH 3 + +void rt_request_import(rtable *tab, struct rt_import_request *req); +void rt_request_export(struct rt_exporter *tab, struct rt_export_request *req); + +void rt_export_once(struct rt_exporter *tab, struct rt_export_request *req); + +void rt_stop_import(struct rt_import_request *, void (*stopped)(struct rt_import_request *)); +void rt_stop_export(struct rt_export_request *, void (*stopped)(struct rt_export_request *)); + +const char *rt_import_state_name(u8 state); +const char *rt_export_state_name(u8 state); + +static inline u8 rt_import_get_state(struct rt_import_hook *ih) { return ih ? ih->import_state : TIS_DOWN; } +static inline u8 rt_export_get_state(struct rt_export_hook *eh) { return eh ? eh->export_state : TES_DOWN; } + +void rt_set_export_state(struct rt_export_hook *hook, u8 state); + +void rte_import(struct rt_import_request *req, const net_addr *net, rte *new, struct rte_src *src); + +/* Get next rpe. If src is given, it must match. */ +struct rt_pending_export *rpe_next(struct rt_pending_export *rpe, struct rte_src *src); + +/* Mark the pending export processed */ +void rpe_mark_seen(struct rt_export_hook *hook, struct rt_pending_export *rpe); + +/* Get pending export seen status */ +int rpe_get_seen(struct rt_export_hook *hook, struct rt_pending_export *rpe); + +/* Types of route announcement, also used as flags */ +#define RA_UNDEF 0 /* Undefined RA type */ +#define RA_OPTIMAL 1 /* Announcement of optimal route change */ +#define RA_ACCEPTED 2 /* Announcement of first accepted route */ +#define RA_ANY 3 /* Announcement of any route change */ +#define RA_MERGED 4 /* Announcement of optimal route merged with next ones */ + +/* Return value of preexport() callback */ +#define RIC_ACCEPT 1 /* Accepted by protocol */ +#define RIC_PROCESS 0 /* Process it through import filter */ +#define RIC_REJECT -1 /* Rejected by protocol */ +#define RIC_DROP -2 /* Silently dropped by protocol */ + +#define rte_update channel_rte_import +/** + * rte_update - enter a new update to a routing table + * @c: channel doing the update + * @net: network address + * @rte: a &rte representing the new route + * @src: old route source identifier + * + * This function imports a new route to the appropriate table (via the channel). + * Table keys are @net (obligatory) and @rte->attrs->src. + * Both the @net and @rte pointers can be local. + * + * The route attributes (@rte->attrs) are obligatory. They can be also allocated + * locally. Anyway, if you use an already-cached attribute object, you shall + * call rta_clone() on that object yourself. (This semantics may change in future.) + * + * If the route attributes are local, you may set @rte->attrs->src to NULL, then + * the protocol's default route source will be supplied. + * + * When rte_update() gets a route, it automatically validates it. This includes + * checking for validity of the given network and next hop addresses and also + * checking for host-scope or link-scope routes. Then the import filters are + * processed and if accepted, the route is passed to route table recalculation. + * + * The accepted routes are then inserted into the table, replacing the old route + * for the same @net identified by @src. Then the route is announced + * to all the channels connected to the table using the standard export mechanism. + * Setting @rte to NULL makes this a withdraw, otherwise @rte->src must be the same + * as @src. + * + * All memory used for temporary allocations is taken from a special linpool + * @rte_update_pool and freed when rte_update() finishes. + */ +void rte_update(struct channel *c, const net_addr *net, struct rte *rte, struct rte_src *src); + +extern list routing_tables; +struct config; + +void rt_init(void); +void rt_preconfig(struct config *); +void rt_postconfig(struct config *); +void rt_commit(struct config *new, struct config *old); +void rt_lock_table(rtable *); +void rt_unlock_table(rtable *); +struct f_trie * rt_lock_trie(rtable *tab); +void rt_unlock_trie(rtable *tab, struct f_trie *trie); +void rt_subscribe(rtable *tab, struct rt_subscription *s); +void rt_unsubscribe(struct rt_subscription *s); +void rt_flowspec_link(rtable *src, rtable *dst); +void rt_flowspec_unlink(rtable *src, rtable *dst); +rtable *rt_setup(pool *, struct rtable_config *); +static inline void rt_shutdown(rtable *r) { rfree(r->rp); } + +static inline net *net_find(rtable *tab, const net_addr *addr) { return (net *) fib_find(&tab->fib, addr); } +static inline net *net_find_valid(rtable *tab, const net_addr *addr) +{ net *n = net_find(tab, addr); return (n && n->routes && rte_is_valid(&n->routes->rte)) ? n : NULL; } +static inline net *net_get(rtable *tab, const net_addr *addr) { return (net *) fib_get(&tab->fib, addr); } +net *net_get(rtable *tab, const net_addr *addr); +net *net_route(rtable *tab, const net_addr *n); +int rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filter); +rte *rt_export_merged(struct channel *c, rte ** feed, uint count, linpool *pool, int silent); +void rt_refresh_begin(struct rt_import_request *); +void rt_refresh_end(struct rt_import_request *); +void rt_modify_stale(rtable *t, struct rt_import_request *); +void rt_schedule_prune(rtable *t); +void rte_dump(struct rte_storage *); +void rte_free(struct rte_storage *); +struct rte_storage *rte_store(const rte *, net *net, rtable *); +void rt_dump(rtable *); +void rt_dump_all(void); +void rt_dump_hooks(rtable *); +void rt_dump_hooks_all(void); +int rt_reload_channel(struct channel *c); +void rt_reload_channel_abort(struct channel *c); +void rt_refeed_channel(struct channel *c); +void rt_prune_sync(rtable *t, int all); +struct rtable_config *rt_new_table(struct symbol *s, uint addr_type); + +static inline int rt_is_ip(rtable *tab) +{ return (tab->addr_type == NET_IP4) || (tab->addr_type == NET_IP6); } + +static inline int rt_is_vpn(rtable *tab) +{ return (tab->addr_type == NET_VPN4) || (tab->addr_type == NET_VPN6); } + +static inline int rt_is_roa(rtable *tab) +{ return (tab->addr_type == NET_ROA4) || (tab->addr_type == NET_ROA6); } + +static inline int rt_is_flow(rtable *tab) +{ return (tab->addr_type == NET_FLOW4) || (tab->addr_type == NET_FLOW6); } + + +/* Default limit for ECMP next hops, defined in sysdep code */ +extern const int rt_default_ecmp; + +struct rt_show_data_rtable { + node n; + const char *name; + struct rt_exporter *table; + struct channel *export_channel; + struct channel *prefilter; + struct krt_proto *kernel; +}; + +struct rt_show_data { + struct cli *cli; /* Pointer back to the CLI */ + net_addr *addr; + list tables; + struct rt_show_data_rtable *tab; /* Iterator over table list */ + struct rt_show_data_rtable *last_table; /* Last table in output */ + struct rt_export_request req; /* Export request in use */ + int verbose, tables_defined_by; + const struct filter *filter; + struct proto *show_protocol; + struct proto *export_protocol; + struct channel *export_channel; + struct config *running_on_config; + struct rt_export_hook *kernel_export_hook; + int export_mode, addr_mode, primary_only, filtered, stats; + + int net_counter, rt_counter, show_counter, table_counter; + int net_counter_last, rt_counter_last, show_counter_last; + int show_counter_last_flush; +}; + +void rt_show(struct rt_show_data *); +struct rt_show_data_rtable * rt_show_add_exporter(struct rt_show_data *d, struct rt_exporter *t, const char *name); +struct rt_show_data_rtable * rt_show_add_table(struct rt_show_data *d, struct rtable *t); + +/* Value of table definition mode in struct rt_show_data */ +#define RSD_TDB_DEFAULT 0 /* no table specified */ +#define RSD_TDB_INDIRECT 0 /* show route ... protocol P ... */ +#define RSD_TDB_ALL RSD_TDB_SET /* show route ... table all ... */ +#define RSD_TDB_DIRECT RSD_TDB_SET | RSD_TDB_NMN /* show route ... table X table Y ... */ + +#define RSD_TDB_SET 0x1 /* internal: show empty tables */ +#define RSD_TDB_NMN 0x2 /* internal: need matching net */ + +/* Value of export_mode in struct rt_show_data */ +#define RSEM_NONE 0 /* Export mode not used */ +#define RSEM_PREEXPORT 1 /* Routes ready for export, before filtering */ +#define RSEM_EXPORT 2 /* Routes accepted by export filter */ +#define RSEM_NOEXPORT 3 /* Routes rejected by export filter */ +#define RSEM_EXPORTED 4 /* Routes marked in export map */ + +/* Host entry: Resolve hook for recursive nexthops */ +extern struct ea_class ea_gen_hostentry; +struct hostentry_adata { + adata ad; + struct hostentry *he; + u32 labels[0]; +}; + +void +ea_set_hostentry(ea_list **to, struct rtable *dep, struct rtable *tab, ip_addr gw, ip_addr ll, u32 lnum, u32 labels[lnum]); + +void ea_show_hostentry(const struct adata *ad, byte *buf, uint size); +void ea_show_nexthop_list(struct cli *c, struct nexthop_adata *nhad); + +/* + * Default protocol preferences + */ + +#define DEF_PREF_DIRECT 240 /* Directly connected */ +#define DEF_PREF_STATIC 200 /* Static route */ +#define DEF_PREF_OSPF 150 /* OSPF intra-area, inter-area and type 1 external routes */ +#define DEF_PREF_BABEL 130 /* Babel */ +#define DEF_PREF_RIP 120 /* RIP */ +#define DEF_PREF_BGP 100 /* BGP */ +#define DEF_PREF_RPKI 100 /* RPKI */ +#define DEF_PREF_INHERITED 10 /* Routes inherited from other routing daemons */ +#define DEF_PREF_UNKNOWN 0 /* Routes with no preference set */ + +/* + * Route Origin Authorization + */ + +#define ROA_UNKNOWN 0 +#define ROA_VALID 1 +#define ROA_INVALID 2 + +int net_roa_check(rtable *tab, const net_addr *n, u32 asn); + +#endif diff --git a/proto/babel/Makefile b/proto/babel/Makefile index 06b58e95..ae6aeaf2 100644 --- a/proto/babel/Makefile +++ b/proto/babel/Makefile @@ -2,5 +2,6 @@ src := babel.c packets.c obj := $(src-o-files) $(all-daemon) $(cf-local) +$(call proto-build,babel_build) tests_objs := $(tests_objs) $(src-o-files) diff --git a/proto/babel/babel.c b/proto/babel/babel.c index 0d0304f9..4d024e3a 100644 --- a/proto/babel/babel.c +++ b/proto/babel/babel.c @@ -37,6 +37,7 @@ #include <stdlib.h> #include "babel.h" +#include "lib/macro.h" #define LOG_PKT_AUTH(msg, args...) \ log_rl(&p->log_pkt_tbf, L_AUTH "%s: " msg, p->p.name, args) @@ -58,6 +59,8 @@ static void babel_update_cost(struct babel_neighbor *n); static inline void babel_kick_timer(struct babel_proto *p); static inline void babel_iface_kick_timer(struct babel_iface *ifa); +static struct ea_class ea_babel_metric, ea_babel_router_id, ea_babel_seqno; + /* * Functions to maintain data structures */ @@ -119,7 +122,7 @@ babel_get_source(struct babel_proto *p, struct babel_entry *e, u64 router_id) } static void -babel_expire_sources(struct babel_proto *p, struct babel_entry *e) +babel_expire_sources(struct babel_proto *p UNUSED, struct babel_entry *e) { struct babel_source *n, *nx; btime now_ = current_time(); @@ -129,7 +132,7 @@ babel_expire_sources(struct babel_proto *p, struct babel_entry *e) if (n->expires && n->expires <= now_) { rem_node(NODE n); - sl_free(p->source_slab, n); + sl_free(n); } } } @@ -174,7 +177,7 @@ babel_retract_route(struct babel_proto *p, struct babel_route *r) } static void -babel_flush_route(struct babel_proto *p, struct babel_route *r) +babel_flush_route(struct babel_proto *p UNUSED, struct babel_route *r) { DBG("Babel: Flush route %N router_id %lR neigh %I\n", r->e->n.addr, r->router_id, r->neigh->addr); @@ -185,7 +188,7 @@ babel_flush_route(struct babel_proto *p, struct babel_route *r) if (r->e->selected == r) r->e->selected = NULL; - sl_free(p->route_slab, r); + sl_free(r); } static void @@ -338,13 +341,13 @@ found: } static void -babel_remove_seqno_request(struct babel_proto *p, struct babel_seqno_request *sr) +babel_remove_seqno_request(struct babel_proto *p UNUSED, struct babel_seqno_request *sr) { if (sr->nbr) rem_node(&sr->nbr_node); rem_node(NODE sr); - sl_free(p->seqno_slab, sr); + sl_free(sr); } static int @@ -639,14 +642,14 @@ babel_announce_rte(struct babel_proto *p, struct babel_entry *e) if (r) { - rta a0 = { - .src = p->p.main_source, - .source = RTS_BABEL, - .scope = SCOPE_UNIVERSE, - .dest = RTD_UNICAST, - .from = r->neigh->addr, - .nh.gw = r->next_hop, - .nh.iface = r->neigh->ifa->iface, + struct nexthop_adata nhad = { + .nh = { + .gw = r->next_hop, + .iface = r->neigh->ifa->iface, + }, + .ad = { + .length = sizeof nhad - sizeof nhad.ad, + }, }; /* @@ -655,42 +658,54 @@ babel_announce_rte(struct babel_proto *p, struct babel_entry *e) * have routing work. */ if (!neigh_find(&p->p, r->next_hop, r->neigh->ifa->iface, 0)) - a0.nh.flags = RNF_ONLINK; + nhad.nh.flags = RNF_ONLINK; + + struct { + ea_list l; + eattr a[7]; + } eattrs = { + .l.count = ARRAY_SIZE(eattrs.a), + .a = { + EA_LITERAL_EMBEDDED(&ea_gen_preference, 0, c->preference), + EA_LITERAL_STORE_ADATA(&ea_gen_from, 0, &r->neigh->addr, sizeof(r->neigh->addr)), + EA_LITERAL_EMBEDDED(&ea_gen_source, 0, RTS_BABEL), + EA_LITERAL_STORE_ADATA(&ea_gen_nexthop, 0, nhad.ad.data, nhad.ad.length), + EA_LITERAL_EMBEDDED(&ea_babel_metric, 0, r->metric), + EA_LITERAL_STORE_ADATA(&ea_babel_router_id, 0, &r->router_id, sizeof(r->router_id)), + EA_LITERAL_EMBEDDED(&ea_babel_seqno, 0, r->seqno), + } + }; - rta *a = rta_lookup(&a0); - rte *rte = rte_get_temp(a); - rte->u.babel.seqno = r->seqno; - rte->u.babel.metric = r->metric; - rte->u.babel.router_id = r->router_id; - rte->pflags = EA_ID_FLAG(EA_BABEL_METRIC) | EA_ID_FLAG(EA_BABEL_ROUTER_ID); + rte e0 = { + .attrs = &eattrs.l, + .src = p->p.main_source, + }; e->unreachable = 0; - rte_update2(c, e->n.addr, rte, p->p.main_source); + rte_update(c, e->n.addr, &e0, p->p.main_source); } else if (e->valid && (e->router_id != p->router_id)) { /* Unreachable */ - rta a0 = { + ea_list *ea = NULL; + + ea_set_attr_u32(&ea, &ea_gen_preference, 0, 1); + ea_set_attr_u32(&ea, &ea_gen_source, 0, RTS_BABEL); + ea_set_dest(&ea, 0, RTD_UNREACHABLE); + + rte e0 = { + .attrs = ea, .src = p->p.main_source, - .source = RTS_BABEL, - .scope = SCOPE_UNIVERSE, - .dest = RTD_UNREACHABLE, }; - rta *a = rta_lookup(&a0); - rte *rte = rte_get_temp(a); - memset(&rte->u.babel, 0, sizeof(rte->u.babel)); - rte->pflags = 0; - rte->pref = 1; - e->unreachable = 1; - rte_update2(c, e->n.addr, rte, p->p.main_source); + rte_update(c, e->n.addr, &e0, p->p.main_source); } else { /* Retraction */ e->unreachable = 0; - rte_update2(c, e->n.addr, NULL, p->p.main_source); + rte_update(c, e->n.addr, NULL, p->p.main_source); } } @@ -700,7 +715,7 @@ babel_announce_retraction(struct babel_proto *p, struct babel_entry *e) { struct channel *c = (e->n.addr->type == NET_IP4) ? p->ip4_channel : p->ip6_channel; e->unreachable = 0; - rte_update2(c, e->n.addr, NULL, p->p.main_source); + rte_update(c, e->n.addr, NULL, p->p.main_source); } @@ -2009,30 +2024,42 @@ babel_dump(struct proto *P) static void babel_get_route_info(rte *rte, byte *buf) { - buf += bsprintf(buf, " (%d/%d) [%lR]", rte->pref, rte->u.babel.metric, rte->u.babel.router_id); + u64 rid = 0; + eattr *e = ea_find(rte->attrs, &ea_babel_router_id); + if (e) + memcpy(&rid, e->u.ptr->data, sizeof(u64)); + + buf += bsprintf(buf, " (%d/%d) [%lR]", + rt_get_preference(rte), + ea_get_int(rte->attrs, &ea_babel_metric, BABEL_INFINITY), rid); } -static int -babel_get_attr(const eattr *a, byte *buf, int buflen UNUSED) +static void +babel_router_id_format(const eattr *a, byte *buf, uint len) { - switch (a->id) - { - case EA_BABEL_METRIC: - bsprintf(buf, "metric: %d", a->u.data); - return GA_FULL; + u64 rid = 0; + memcpy(&rid, a->u.ptr->data, sizeof(u64)); + bsnprintf(buf, len, "%lR", rid); +} - case EA_BABEL_ROUTER_ID: - { - u64 rid = 0; - memcpy(&rid, a->u.ptr->data, sizeof(u64)); - bsprintf(buf, "router_id: %lR", rid); - return GA_FULL; - } +static struct ea_class ea_babel_metric = { + .name = "babel_metric", + .type = T_INT, +}; + +static struct ea_class ea_babel_router_id = { + .name = "babel_router_id", + .type = T_OPAQUE, + .readonly = 1, + .format = babel_router_id_format, +}; + +static struct ea_class ea_babel_seqno = { + .name = "babel_seqno", + .type = T_INT, + .readonly = 1, +}; - default: - return GA_UNKNOWN; - } -} void babel_show_interfaces(struct proto *P, const char *iff) @@ -2230,45 +2257,27 @@ babel_kick_timer(struct babel_proto *p) static int -babel_preexport(struct channel *C, struct rte **new, struct linpool *pool UNUSED) +babel_preexport(struct channel *C, struct rte *new) { - struct rta *a = (*new)->attrs; + if (new->src->owner != &C->proto->sources) + return 0; /* Reject our own unreachable routes */ - if ((a->dest == RTD_UNREACHABLE) && (a->src->proto == C->proto)) + eattr *ea = ea_find(new->attrs, &ea_gen_nexthop); + struct nexthop_adata *nhad = (void *) ea->u.ptr; + if (!NEXTHOP_IS_REACHABLE(nhad)) return -1; return 0; } -static void -babel_make_tmp_attrs(struct rte *rt, struct linpool *pool) -{ - struct adata *id = lp_alloc_adata(pool, sizeof(u64)); - memcpy(id->data, &rt->u.babel.router_id, sizeof(u64)); - - rte_init_tmp_attrs(rt, pool, 2); - rte_make_tmp_attr(rt, EA_BABEL_METRIC, EAF_TYPE_INT, rt->u.babel.metric); - rte_make_tmp_attr(rt, EA_BABEL_ROUTER_ID, EAF_TYPE_OPAQUE, (uintptr_t) id); -} - -static void -babel_store_tmp_attrs(struct rte *rt, struct linpool *pool) -{ - rte_init_tmp_attrs(rt, pool, 2); - rt->u.babel.metric = rte_store_tmp_attr(rt, EA_BABEL_METRIC); - - /* EA_BABEL_ROUTER_ID is read-only, we do not really save the value */ - rte_store_tmp_attr(rt, EA_BABEL_ROUTER_ID); -} - /* * babel_rt_notify - core tells us about new route (possibly our own), * so store it into our data structures. */ static void -babel_rt_notify(struct proto *P, struct channel *c UNUSED, struct network *net, - struct rte *new, struct rte *old UNUSED) +babel_rt_notify(struct proto *P, struct channel *c UNUSED, const net_addr *net, + struct rte *new, const struct rte *old UNUSED) { struct babel_proto *p = (void *) P; struct babel_entry *e; @@ -2276,19 +2285,31 @@ babel_rt_notify(struct proto *P, struct channel *c UNUSED, struct network *net, if (new) { /* Update */ - uint internal = (new->attrs->src->proto == P); - uint rt_seqno = internal ? new->u.babel.seqno : p->update_seqno; - uint rt_metric = ea_get_int(new->attrs->eattrs, EA_BABEL_METRIC, 0); - u64 rt_router_id = internal ? new->u.babel.router_id : p->router_id; + uint rt_seqno; + uint rt_metric = ea_get_int(new->attrs, &ea_babel_metric, 0); + u64 rt_router_id = 0; + + if (new->src->owner == &P->sources) + { + rt_seqno = ea_get_int(new->attrs, &ea_babel_seqno, 0); + eattr *e = ea_find(new->attrs, &ea_babel_router_id); + if (e) + memcpy(&rt_router_id, e->u.ptr->data, sizeof(u64)); + } + else + { + rt_seqno = p->update_seqno; + rt_router_id = p->router_id; + } if (rt_metric > BABEL_INFINITY) { log(L_WARN "%s: Invalid babel_metric value %u for route %N", - p->p.name, rt_metric, net->n.addr); + p->p.name, rt_metric, net); rt_metric = BABEL_INFINITY; } - e = babel_get_entry(p, net->n.addr); + e = babel_get_entry(p, net); /* Activate triggered updates */ if ((e->valid != BABEL_ENTRY_VALID) || @@ -2306,7 +2327,7 @@ babel_rt_notify(struct proto *P, struct channel *c UNUSED, struct network *net, else { /* Withdraw */ - e = babel_find_entry(p, net->n.addr); + e = babel_find_entry(p, net); if (!e || e->valid != BABEL_ENTRY_VALID) return; @@ -2322,15 +2343,16 @@ babel_rt_notify(struct proto *P, struct channel *c UNUSED, struct network *net, static int babel_rte_better(struct rte *new, struct rte *old) { - return new->u.babel.metric < old->u.babel.metric; + uint new_metric = ea_get_int(new->attrs, &ea_babel_metric, BABEL_INFINITY); + uint old_metric = ea_get_int(old->attrs, &ea_babel_metric, BABEL_INFINITY); + + return new_metric < old_metric; } -static int -babel_rte_same(struct rte *new, struct rte *old) +static u32 +babel_rte_igp_metric(const rte *rt) { - return ((new->u.babel.seqno == old->u.babel.seqno) && - (new->u.babel.metric == old->u.babel.metric) && - (new->u.babel.router_id == old->u.babel.router_id)); + return ea_get_int(rt->attrs, &ea_babel_metric, BABEL_INFINITY); } @@ -2351,6 +2373,12 @@ babel_postconfig(struct proto_config *CF) cf->ip6_channel = ip6 ?: ip6_sadr; } +static struct rte_owner_class babel_rte_owner_class = { + .get_route_info = babel_get_route_info, + .rte_better = babel_rte_better, + .rte_igp_metric = babel_rte_igp_metric, +}; + static struct proto * babel_init(struct proto_config *CF) { @@ -2364,10 +2392,8 @@ babel_init(struct proto_config *CF) P->if_notify = babel_if_notify; P->rt_notify = babel_rt_notify; P->preexport = babel_preexport; - P->make_tmp_attrs = babel_make_tmp_attrs; - P->store_tmp_attrs = babel_store_tmp_attrs; - P->rte_better = babel_rte_better; - P->rte_same = babel_rte_same; + + P->sources.class = &babel_rte_owner_class; return P; } @@ -2465,11 +2491,9 @@ babel_reconfigure(struct proto *P, struct proto_config *CF) return 1; } - struct protocol proto_babel = { .name = "Babel", .template = "babel%d", - .class = PROTOCOL_BABEL, .preference = DEF_PREF_BABEL, .channel_mask = NB_IP | NB_IP6_SADR, .proto_size = sizeof(struct babel_proto), @@ -2480,6 +2504,16 @@ struct protocol proto_babel = { .start = babel_start, .shutdown = babel_shutdown, .reconfigure = babel_reconfigure, - .get_route_info = babel_get_route_info, - .get_attr = babel_get_attr }; + +void +babel_build(void) +{ + proto_build(&proto_babel); + + EA_REGISTER_ALL( + &ea_babel_metric, + &ea_babel_router_id, + &ea_babel_seqno + ); +} diff --git a/proto/babel/babel.h b/proto/babel/babel.h index 84feb085..a980d1da 100644 --- a/proto/babel/babel.h +++ b/proto/babel/babel.h @@ -16,7 +16,7 @@ #include "nest/bird.h" #include "nest/cli.h" #include "nest/iface.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/protocol.h" #include "nest/locks.h" #include "nest/password.h" @@ -26,9 +26,6 @@ #include "lib/string.h" #include "lib/timer.h" -#define EA_BABEL_METRIC EA_CODE(PROTOCOL_BABEL, 0) -#define EA_BABEL_ROUTER_ID EA_CODE(PROTOCOL_BABEL, 1) - #define BABEL_MAGIC 42 #define BABEL_VERSION 2 #define BABEL_PORT 6696 diff --git a/proto/babel/config.Y b/proto/babel/config.Y index 05210fa4..a4350eed 100644 --- a/proto/babel/config.Y +++ b/proto/babel/config.Y @@ -24,7 +24,7 @@ CF_DECLS CF_KEYWORDS(BABEL, INTERFACE, METRIC, RXCOST, HELLO, UPDATE, INTERVAL, PORT, TYPE, WIRED, WIRELESS, RX, TX, BUFFER, PRIORITY, LENGTH, CHECK, LINK, - NEXT, HOP, IPV4, IPV6, BABEL_METRIC, SHOW, INTERFACES, NEIGHBORS, + NEXT, HOP, IPV4, IPV6, SHOW, INTERFACES, NEIGHBORS, ENTRIES, RANDOMIZE, ROUTER, ID, AUTHENTICATION, NONE, MAC, PERMISSIVE) CF_GRAMMAR @@ -163,8 +163,6 @@ babel_iface_opt_list: babel_iface: babel_iface_start iface_patt_list_nopx babel_iface_opt_list babel_iface_finish; -dynamic_attr: BABEL_METRIC { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_BABEL_METRIC); } ; - CF_CLI_HELP(SHOW BABEL, ..., [[Show information about Babel protocol]]); CF_CLI(SHOW BABEL INTERFACES, optproto opttext, [<name>] [\"<interface>\"], [[Show information about Babel interfaces]]) diff --git a/proto/babel/packets.c b/proto/babel/packets.c index 2647a79f..d4acc170 100644 --- a/proto/babel/packets.c +++ b/proto/babel/packets.c @@ -1318,7 +1318,6 @@ babel_send_to(struct babel_iface *ifa, ip_addr dest) static uint babel_write_queue(struct babel_iface *ifa, list *queue) { - struct babel_proto *p = ifa->proto; struct babel_write_state state = { .next_hop_ip6 = ifa->addr }; if (EMPTY_LIST(*queue)) @@ -1346,7 +1345,7 @@ babel_write_queue(struct babel_iface *ifa, list *queue) pos += len; rem_node(NODE msg); - sl_free(p->msg_slab, msg); + sl_free(msg); } pos += babel_auth_add_tlvs(ifa, (struct babel_tlv *) pos, end - pos); @@ -1507,13 +1506,13 @@ babel_process_packet(struct babel_iface *ifa, else if (res == PARSE_IGNORE) { DBG("Babel: Ignoring TLV of type %d\n", tlv->type); - sl_free(p->msg_slab, msg); + sl_free(msg); } else /* PARSE_ERROR */ { LOG_PKT("Bad TLV from %I via %s type %d pos %d - parse error", saddr, ifa->iface->name, tlv->type, (int) ((byte *)tlv - (byte *)pkt)); - sl_free(p->msg_slab, msg); + sl_free(msg); break; } } @@ -1525,7 +1524,7 @@ babel_process_packet(struct babel_iface *ifa, if (tlv_data[msg->msg.type].handle_tlv) tlv_data[msg->msg.type].handle_tlv(&msg->msg, ifa); rem_node(NODE msg); - sl_free(p->msg_slab, msg); + sl_free(msg); } } diff --git a/proto/bfd/Makefile b/proto/bfd/Makefile index 402122fc..11d639d7 100644 --- a/proto/bfd/Makefile +++ b/proto/bfd/Makefile @@ -1,6 +1,7 @@ -src := bfd.c io.c packets.c +src := bfd.c packets.c obj := $(src-o-files) $(all-daemon) $(cf-local) +$(call proto-build,bfd_build) -tests_objs := $(tests_objs) $(src-o-files)
\ No newline at end of file +tests_objs := $(tests_objs) $(src-o-files) diff --git a/proto/bfd/bfd.c b/proto/bfd/bfd.c index cc941695..e7d27f74 100644 --- a/proto/bfd/bfd.c +++ b/proto/bfd/bfd.c @@ -113,8 +113,16 @@ #define HASH_IP_EQ(a1,n1,a2,n2) ipa_equal(a1, a2) && n1 == n2 #define HASH_IP_FN(a,n) ipa_hash(a) ^ u32_hash(n) -static list bfd_proto_list; -static list bfd_wait_list; +DEFINE_DOMAIN(rtable); +#define BFD_LOCK LOCK_DOMAIN(rtable, bfd_global.lock) +#define BFD_UNLOCK UNLOCK_DOMAIN(rtable, bfd_global.lock) + +static struct { + DOMAIN(rtable) lock; + list wait_list; + list pickup_list; + list proto_list; +} bfd_global; const char *bfd_state_names[] = { "AdminDown", "Down", "Init", "Up" }; @@ -188,7 +196,7 @@ bfd_session_update_tx_interval(struct bfd_session *s) return; /* Set timer relative to last tx_timer event */ - tm_set(s->tx_timer, s->last_tx + tx_int_l); + tm_set_in(s->tx_timer, s->last_tx + tx_int_l, s->ifa->bfd->p.loop); } static void @@ -202,7 +210,7 @@ bfd_session_update_detection_time(struct bfd_session *s, int kick) if (!s->last_rx) return; - tm_set(s->hold_timer, s->last_rx + timeout); + tm_set_in(s->hold_timer, s->last_rx + timeout, s->ifa->bfd->p.loop); } static void @@ -226,7 +234,7 @@ bfd_session_control_tx_timer(struct bfd_session *s, int reset) if (reset || !tm_active(s->tx_timer)) { s->last_tx = 0; - tm_start(s->tx_timer, 0); + tm_start_in(s->tx_timer, 0, s->ifa->bfd->p.loop); } return; @@ -419,7 +427,7 @@ bfd_get_free_id(struct bfd_proto *p) static struct bfd_session * bfd_add_session(struct bfd_proto *p, ip_addr addr, ip_addr local, struct iface *iface, struct bfd_options *opts) { - birdloop_enter(p->loop); + ASSERT_DIE(birdloop_inside(p->p.loop)); struct bfd_iface *ifa = bfd_get_iface(p, local, iface); @@ -454,8 +462,6 @@ bfd_add_session(struct bfd_proto *p, ip_addr addr, ip_addr local, struct iface * TRACE(D_EVENTS, "Session to %I added", s->addr); - birdloop_leave(p->loop); - return s; } @@ -463,38 +469,34 @@ bfd_add_session(struct bfd_proto *p, ip_addr addr, ip_addr local, struct iface * static void bfd_open_session(struct bfd_proto *p, struct bfd_session *s, ip_addr local, struct iface *ifa) { - birdloop_enter(p->loop); + birdloop_enter(p->p.loop); s->opened = 1; bfd_session_control_tx_timer(s); - birdloop_leave(p->loop); + birdloop_leave(p->p.loop); } static void bfd_close_session(struct bfd_proto *p, struct bfd_session *s) { - birdloop_enter(p->loop); + birdloop_enter(p->p.loop); s->opened = 0; bfd_session_update_state(s, BFD_STATE_DOWN, BFD_DIAG_PATH_DOWN); bfd_session_control_tx_timer(s); - birdloop_leave(p->loop); + birdloop_leave(p->p.loop); } */ static void -bfd_remove_session(struct bfd_proto *p, struct bfd_session *s) +bfd_remove_session_locked(struct bfd_proto *p, struct bfd_session *s) { - ip_addr ip = s->addr; - /* Caller should ensure that request list is empty */ - birdloop_enter(p->loop); - /* Remove session from notify list if scheduled for notification */ /* No need for bfd_lock_sessions(), we are already protected by birdloop_enter() */ if (NODE_VALID(&s->n)) @@ -508,11 +510,17 @@ bfd_remove_session(struct bfd_proto *p, struct bfd_session *s) HASH_REMOVE(p->session_hash_id, HASH_ID, s); HASH_REMOVE(p->session_hash_ip, HASH_IP, s); - sl_free(p->session_slab, s); + TRACE(D_EVENTS, "Session to %I removed", s->addr); - TRACE(D_EVENTS, "Session to %I removed", ip); + sl_free(s); +} - birdloop_leave(p->loop); +static void +bfd_remove_session(struct bfd_proto *p, struct bfd_session *s) +{ + birdloop_enter(p->p.loop); + bfd_remove_session_locked(p, s); + birdloop_leave(p->p.loop); } static void @@ -521,7 +529,7 @@ bfd_reconfigure_session(struct bfd_proto *p, struct bfd_session *s) if (EMPTY_LIST(s->request_list)) return; - birdloop_enter(p->loop); + birdloop_enter(p->p.loop); struct bfd_request *req = SKIP_BACK(struct bfd_request, n, HEAD(s->request_list)); s->cf = bfd_merge_options(s->ifa->cf, &req->opts); @@ -534,7 +542,7 @@ bfd_reconfigure_session(struct bfd_proto *p, struct bfd_session *s) bfd_session_control_tx_timer(s, 0); - birdloop_leave(p->loop); + birdloop_leave(p->p.loop); TRACE(D_EVENTS, "Session to %I reconfigured", s->addr); } @@ -627,9 +635,9 @@ bfd_reconfigure_iface(struct bfd_proto *p, struct bfd_iface *ifa, struct bfd_con (new->passive != old->passive); /* This should be probably changed to not access ifa->cf from the BFD thread */ - birdloop_enter(p->loop); + birdloop_enter(p->p.loop); ifa->cf = new; - birdloop_leave(p->loop); + birdloop_leave(p->p.loop); } @@ -690,41 +698,68 @@ bfd_add_request(struct bfd_proto *p, struct bfd_request *req) } static void -bfd_submit_request(struct bfd_request *req) +bfd_pickup_requests(void *_data UNUSED) { node *n; + WALK_LIST(n, bfd_global.proto_list) + { + struct bfd_proto *p = SKIP_BACK(struct bfd_proto, bfd_node, n); + birdloop_enter(p->p.loop); + BFD_LOCK; - WALK_LIST(n, bfd_proto_list) - if (bfd_add_request(SKIP_BACK(struct bfd_proto, bfd_node, n), req)) - return; + node *rn, *rnxt; + WALK_LIST_DELSAFE(rn, rnxt, bfd_global.pickup_list) + bfd_add_request(p, SKIP_BACK(struct bfd_request, n, rn)); - rem_node(&req->n); - add_tail(&bfd_wait_list, &req->n); - req->session = NULL; - bfd_request_notify(req, BFD_STATE_ADMIN_DOWN, 0); + BFD_UNLOCK; + birdloop_leave(p->p.loop); + } + + BFD_LOCK; + node *rn, *rnxt; + WALK_LIST_DELSAFE(rn, rnxt, bfd_global.pickup_list) + { + rem_node(rn); + add_tail(&bfd_global.wait_list, rn); + bfd_request_notify(SKIP_BACK(struct bfd_request, n, rn), BFD_STATE_ADMIN_DOWN, 0); + } + BFD_UNLOCK; } +static event bfd_pickup_event = { .hook = bfd_pickup_requests }; + static void bfd_take_requests(struct bfd_proto *p) { node *n, *nn; - - WALK_LIST_DELSAFE(n, nn, bfd_wait_list) + BFD_LOCK; + WALK_LIST_DELSAFE(n, nn, bfd_global.wait_list) bfd_add_request(p, SKIP_BACK(struct bfd_request, n, n)); + BFD_UNLOCK; } static void bfd_drop_requests(struct bfd_proto *p) { node *n; - - HASH_WALK(p->session_hash_id, next_id, s) + BFD_LOCK; + HASH_WALK_DELSAFE(p->session_hash_id, next_id, s) { - /* We assume that p is not in bfd_proto_list */ WALK_LIST_FIRST(n, s->request_list) - bfd_submit_request(SKIP_BACK(struct bfd_request, n, n)); + { + struct bfd_request *req = SKIP_BACK(struct bfd_request, n, n); + rem_node(&req->n); + add_tail(&bfd_global.pickup_list, &req->n); + req->session = NULL; + bfd_request_notify(req, BFD_STATE_ADMIN_DOWN, 0); + } + + ev_send(&global_event_list, &bfd_pickup_event); + + bfd_remove_session_locked(p, s); } HASH_WALK_END; + BFD_UNLOCK; } static struct resclass bfd_request_class; @@ -737,9 +772,6 @@ bfd_request_session(pool *p, ip_addr addr, ip_addr local, { struct bfd_request *req = ralloc(p, &bfd_request_class); - /* Hack: self-link req->n, we will call rem_node() on it */ - req->n.prev = req->n.next = &req->n; - req->addr = addr; req->local = local; req->iface = iface; @@ -748,11 +780,16 @@ bfd_request_session(pool *p, ip_addr addr, ip_addr local, if (opts) req->opts = *opts; - bfd_submit_request(req); - req->hook = hook; req->data = data; + req->session = NULL; + + BFD_LOCK; + add_tail(&bfd_global.pickup_list, &req->n); + ev_send(&global_event_list, &bfd_pickup_event); + BFD_UNLOCK; + return req; } @@ -1007,13 +1044,6 @@ bfd_notify_init(struct bfd_proto *p) * BFD protocol glue */ -void -bfd_init_all(void) -{ - init_list(&bfd_proto_list); - init_list(&bfd_wait_list); -} - static struct proto * bfd_init(struct proto_config *c) { @@ -1030,10 +1060,10 @@ bfd_start(struct proto *P) struct bfd_proto *p = (struct bfd_proto *) P; struct bfd_config *cf = (struct bfd_config *) (P->cf); - p->loop = birdloop_new(); - p->tpool = rp_new(NULL, "BFD thread root"); pthread_spin_init(&p->lock, PTHREAD_PROCESS_PRIVATE); + p->tpool = rp_new(P->pool, "BFD loop pool"); + p->session_slab = sl_new(P->pool, sizeof(struct bfd_session)); HASH_INIT(p->session_hash_id, P->pool, 8); HASH_INIT(p->session_hash_ip, P->pool, 8); @@ -1043,9 +1073,7 @@ bfd_start(struct proto *P) init_list(&p->notify_list); bfd_notify_init(p); - add_tail(&bfd_proto_list, &p->bfd_node); - - birdloop_enter(p->loop); + add_tail(&bfd_global.proto_list, &p->bfd_node); if (!cf->strict_bind) { @@ -1062,42 +1090,33 @@ bfd_start(struct proto *P) p->rx6_m = bfd_open_rx_sk(p, 1, SK_IPV6); } - birdloop_leave(p->loop); - bfd_take_requests(p); struct bfd_neighbor *n; WALK_LIST(n, cf->neigh_list) bfd_start_neighbor(p, n); - birdloop_start(p->loop); - return PS_UP; } - static int bfd_shutdown(struct proto *P) { struct bfd_proto *p = (struct bfd_proto *) P; - struct bfd_config *cf = (struct bfd_config *) (P->cf); + struct bfd_config *cf = (struct bfd_config *) (p->p.cf); rem_node(&p->bfd_node); - birdloop_stop(p->loop); - - struct bfd_neighbor *n; - WALK_LIST(n, cf->neigh_list) - bfd_stop_neighbor(p, n); + struct bfd_neighbor *bn; + WALK_LIST(bn, cf->neigh_list) + bfd_stop_neighbor(p, bn); bfd_drop_requests(p); - /* FIXME: This is hack */ - birdloop_enter(p->loop); - rfree(p->tpool); - birdloop_leave(p->loop); - - birdloop_free(p->loop); + if (p->rx4_1) sk_stop(p->rx4_1); + if (p->rx4_m) sk_stop(p->rx4_m); + if (p->rx6_1) sk_stop(p->rx6_1); + if (p->rx6_m) sk_stop(p->rx6_m); return PS_DOWN; } @@ -1118,7 +1137,7 @@ bfd_reconfigure(struct proto *P, struct proto_config *c) (new->strict_bind != old->strict_bind)) return 0; - birdloop_mask_wakeups(p->loop); + birdloop_mask_wakeups(p->p.loop); WALK_LIST(ifa, p->iface_list) bfd_reconfigure_iface(p, ifa, new); @@ -1132,7 +1151,7 @@ bfd_reconfigure(struct proto *P, struct proto_config *c) bfd_reconfigure_neighbors(p, new); - birdloop_unmask_wakeups(p->loop); + birdloop_unmask_wakeups(p->p.loop); return 1; } @@ -1190,7 +1209,6 @@ bfd_show_sessions(struct proto *P) struct protocol proto_bfd = { .name = "BFD", .template = "bfd%d", - .class = PROTOCOL_BFD, .proto_size = sizeof(struct bfd_proto), .config_size = sizeof(struct bfd_config), .init = bfd_init, @@ -1199,3 +1217,14 @@ struct protocol proto_bfd = { .reconfigure = bfd_reconfigure, .copy_config = bfd_copy_config, }; + +void +bfd_build(void) +{ + proto_build(&proto_bfd); + + bfd_global.lock = DOMAIN_NEW(rtable, "BFD Global"); + init_list(&bfd_global.wait_list); + init_list(&bfd_global.pickup_list); + init_list(&bfd_global.proto_list); +} diff --git a/proto/bfd/bfd.h b/proto/bfd/bfd.h index 7caf9f66..b9afaf92 100644 --- a/proto/bfd/bfd.h +++ b/proto/bfd/bfd.h @@ -13,16 +13,16 @@ #include "nest/cli.h" #include "nest/iface.h" #include "nest/protocol.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/password.h" #include "conf/conf.h" #include "lib/hash.h" +#include "lib/io-loop.h" #include "lib/resource.h" #include "lib/socket.h" #include "lib/string.h" #include "nest/bfd.h" -#include "io.h" #define BFD_CONTROL_PORT 3784 @@ -88,9 +88,11 @@ struct bfd_neighbor struct bfd_proto { struct proto p; - struct birdloop *loop; - pool *tpool; + pthread_spinlock_t lock; + + pool *tpool; + node bfd_node; slab *session_slab; diff --git a/proto/bfd/config.Y b/proto/bfd/config.Y index 70461872..0d6e33fa 100644 --- a/proto/bfd/config.Y +++ b/proto/bfd/config.Y @@ -37,6 +37,7 @@ proto: bfd_proto ; bfd_proto_start: proto_start BFD { this_proto = proto_config_new(&proto_bfd, $1); + this_proto->loop_order = DOMAIN_ORDER(proto); init_list(&BFD_CFG->patt_list); init_list(&BFD_CFG->neigh_list); BFD_CFG->accept_ipv4 = BFD_CFG->accept_ipv6 = 1; diff --git a/proto/bfd/io.h b/proto/bfd/io.h deleted file mode 100644 index ec706e9a..00000000 --- a/proto/bfd/io.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - * BIRD -- I/O and event loop - * - * Can be freely distributed and used under the terms of the GNU GPL. - */ - -#ifndef _BIRD_BFD_IO_H_ -#define _BIRD_BFD_IO_H_ - -#include "nest/bird.h" -#include "lib/lists.h" -#include "lib/resource.h" -#include "lib/event.h" -#include "lib/timer.h" -#include "lib/socket.h" - - -void ev2_schedule(event *e); - -void sk_start(sock *s); -void sk_stop(sock *s); - -struct birdloop *birdloop_new(void); -void birdloop_start(struct birdloop *loop); -void birdloop_stop(struct birdloop *loop); -void birdloop_free(struct birdloop *loop); - -void birdloop_enter(struct birdloop *loop); -void birdloop_leave(struct birdloop *loop); -void birdloop_mask_wakeups(struct birdloop *loop); -void birdloop_unmask_wakeups(struct birdloop *loop); - - -#endif /* _BIRD_BFD_IO_H_ */ diff --git a/proto/bfd/packets.c b/proto/bfd/packets.c index 5f10734c..6f0b4eaf 100644 --- a/proto/bfd/packets.c +++ b/proto/bfd/packets.c @@ -412,7 +412,7 @@ bfd_err_hook(sock *sk, int err) sock * bfd_open_rx_sk(struct bfd_proto *p, int multihop, int af) { - sock *sk = sk_new(p->tpool); + sock *sk = sk_new(p->p.pool); sk->type = SK_UDP; sk->subtype = af; sk->sport = !multihop ? BFD_CONTROL_PORT : BFD_MULTI_CTL_PORT; @@ -475,7 +475,7 @@ bfd_open_rx_sk_bound(struct bfd_proto *p, ip_addr local, struct iface *ifa) sock * bfd_open_tx_sk(struct bfd_proto *p, ip_addr local, struct iface *ifa) { - sock *sk = sk_new(p->tpool); + sock *sk = sk_new(p->p.pool); sk->type = SK_UDP; sk->saddr = local; sk->dport = ifa ? BFD_CONTROL_PORT : BFD_MULTI_CTL_PORT; diff --git a/proto/bgp/Makefile b/proto/bgp/Makefile index 00aaef5e..2a4cc99c 100644 --- a/proto/bgp/Makefile +++ b/proto/bgp/Makefile @@ -2,5 +2,6 @@ src := attrs.c bgp.c packets.c obj := $(src-o-files) $(all-daemon) $(cf-local) +$(call proto-build,bgp_build) -tests_objs := $(tests_objs) $(src-o-files)
\ No newline at end of file +tests_objs := $(tests_objs) $(src-o-files) diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c index 0d2116b7..2543ee73 100644 --- a/proto/bgp/attrs.c +++ b/proto/bgp/attrs.c @@ -15,12 +15,13 @@ #include "nest/bird.h" #include "nest/iface.h" #include "nest/protocol.h" -#include "nest/route.h" -#include "nest/attrs.h" +#include "nest/rt.h" +#include "lib/attrs.h" #include "conf/conf.h" #include "lib/resource.h" #include "lib/string.h" #include "lib/unaligned.h" +#include "lib/macro.h" #include "bgp.h" @@ -64,37 +65,72 @@ * format - Optional hook that converts eattr to textual representation. */ - -struct bgp_attr_desc { - const char *name; - uint type; - uint flags; - void (*export)(struct bgp_export_state *s, eattr *a); - int (*encode)(struct bgp_write_state *s, eattr *a, byte *buf, uint size); - void (*decode)(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to); - void (*format)(const eattr *ea, byte *buf, uint size); +union bgp_attr_desc { + struct ea_class class; + struct { + EA_CLASS_INSIDE; + uint flags; + void (*export)(struct bgp_export_state *s, eattr *a); + int (*encode)(struct bgp_write_state *s, eattr *a, byte *buf, uint size); + void (*decode)(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to); + }; }; -static const struct bgp_attr_desc bgp_attr_table[]; +static union bgp_attr_desc bgp_attr_table[]; +static inline const union bgp_attr_desc *bgp_find_attr_desc(eattr *a) +{ + const struct ea_class *class = ea_class_find(a->id); -static inline int bgp_attr_known(uint code); + if ((class < &bgp_attr_table[0].class) || (class >= &bgp_attr_table[BGP_ATTR_MAX].class)) + return NULL; -eattr * -bgp_set_attr(ea_list **attrs, struct linpool *pool, uint code, uint flags, uintptr_t val) + return (const union bgp_attr_desc *) class; +} + +#define BGP_EA_ID(code) (bgp_attr_table[code].id) +#define EA_BGP_ID(code) (((union bgp_attr_desc *) ea_class_find(code)) - bgp_attr_table) + +void bgp_set_attr_u32(ea_list **to, uint code, uint flags, u32 val) +{ + const union bgp_attr_desc *desc = &bgp_attr_table[code]; + + ea_set_attr(to, EA_LITERAL_EMBEDDED( + &desc->class, + flags & ~BAF_EXT_LEN, + val + )); +} + +void bgp_set_attr_ptr(ea_list **to, uint code, uint flags, const struct adata *ad) { - ASSERT(bgp_attr_known(code)); + const union bgp_attr_desc *desc = &bgp_attr_table[code]; - return ea_set_attr( - attrs, - pool, - EA_CODE(PROTOCOL_BGP, code), - flags & ~BAF_EXT_LEN, - bgp_attr_table[code].type, - val - ); + ea_set_attr(to, EA_LITERAL_DIRECT_ADATA( + &desc->class, + flags & ~BAF_EXT_LEN, + ad + )); } +void +bgp_set_attr_data(ea_list **to, uint code, uint flags, void *data, uint len) +{ + const union bgp_attr_desc *desc = &bgp_attr_table[code]; + ea_set_attr(to, EA_LITERAL_STORE_ADATA( + &desc->class, + flags & ~BAF_EXT_LEN, + data, + len + )); +} + +void +bgp_unset_attr(ea_list **to, uint code) +{ + const union bgp_attr_desc *desc = &bgp_attr_table[code]; + ea_unset_attr(to, 0, &desc->class); +} #define REPORT(msg, args...) \ ({ log(L_REMOTE "%s: " msg, s->proto->p.name, ## args); }) @@ -106,7 +142,7 @@ bgp_set_attr(ea_list **attrs, struct linpool *pool, uint code, uint flags, uintp ({ REPORT(msg, ## args); s->err_withdraw = 1; return; }) #define UNSET(a) \ - ({ a->type = EAF_TYPE_UNDEF; return; }) + ({ a->undef = 1; return; }) #define REJECT(msg, args...) \ ({ log(L_ERR "%s: " msg, s->proto->p.name, ## args); s->err_reject = 1; return; }) @@ -151,7 +187,7 @@ bgp_encode_u8(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size) if (size < (3+1)) return -1; - bgp_put_attr_hdr3(buf, EA_ID(a->id), a->flags, 1); + bgp_put_attr_hdr3(buf, EA_BGP_ID(a->id), a->flags, 1); buf[3] = a->u.data; return 3+1; @@ -163,7 +199,7 @@ bgp_encode_u32(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size) if (size < (3+4)) return -1; - bgp_put_attr_hdr3(buf, EA_ID(a->id), a->flags, 4); + bgp_put_attr_hdr3(buf, EA_BGP_ID(a->id), a->flags, 4); put_u32(buf+3, a->u.data); return 3+4; @@ -177,7 +213,7 @@ bgp_encode_u32s(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size if (size < (4+len)) return -1; - uint hdr = bgp_put_attr_hdr(buf, EA_ID(a->id), a->flags, len); + uint hdr = bgp_put_attr_hdr(buf, EA_BGP_ID(a->id), a->flags, len); put_u32s(buf + hdr, (u32 *) a->u.ptr->data, len / 4); return hdr + len; @@ -198,7 +234,7 @@ bgp_put_attr(byte *buf, uint size, uint code, uint flags, const byte *data, uint static int bgp_encode_raw(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size) { - return bgp_put_attr(buf, size, EA_ID(a->id), a->flags, a->u.ptr->data, a->u.ptr->length); + return bgp_put_attr(buf, size, EA_BGP_ID(a->id), a->flags, a->u.ptr->data, a->u.ptr->length); } @@ -336,9 +372,9 @@ bgp_aigp_set_metric(struct linpool *pool, const struct adata *ad, u64 metric) } int -bgp_total_aigp_metric_(rte *e, u64 *metric, const struct adata **ad) +bgp_total_aigp_metric_(const rte *e, u64 *metric, const struct adata **ad) { - eattr *a = ea_find(e->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AIGP)); + eattr *a = ea_find(e->attrs, BGP_EA_ID(BA_AIGP)); if (!a) return 0; @@ -347,7 +383,7 @@ bgp_total_aigp_metric_(rte *e, u64 *metric, const struct adata **ad) return 0; u64 aigp = get_u64(b + 3); - u64 step = e->attrs->igp_metric; + u64 step = rt_get_igp_metric(e); if (!rte_resolvable(e) || (step >= IGP_METRIC_UNKNOWN)) step = BGP_AIGP_MAX; @@ -366,7 +402,7 @@ bgp_total_aigp_metric_(rte *e, u64 *metric, const struct adata **ad) static inline int bgp_init_aigp_metric(rte *e, u64 *metric, const struct adata **ad) { - if (e->attrs->source == RTS_BGP) + if (rt_get_source_attr(e) == RTS_BGP) return 0; *metric = rt_get_igp_metric(e); @@ -374,6 +410,13 @@ bgp_init_aigp_metric(rte *e, u64 *metric, const struct adata **ad) return *metric < IGP_METRIC_UNKNOWN; } +u32 +bgp_rte_igp_metric(const rte *rt) +{ + u64 metric = bgp_total_aigp_metric(rt); + return (u32) MIN(metric, (u64) IGP_METRIC_UNKNOWN); +} + /* * Attribute hooks @@ -395,7 +438,7 @@ bgp_decode_origin(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte if (data[0] > 2) WITHDRAW(BAD_VALUE, "ORIGIN", data[0]); - bgp_set_attr_u32(to, s->pool, BA_ORIGIN, flags, data[0]); + bgp_set_attr_u32(to, BA_ORIGIN, flags, data[0]); } static void @@ -463,7 +506,7 @@ bgp_decode_as_path(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte !bgp_as_path_first_as_equal(data, len, p->remote_as)) WITHDRAW("Malformed AS_PATH attribute - %s", "First AS differs from neigbor AS"); - bgp_set_attr_data(to, s->pool, BA_AS_PATH, flags, data, len); + bgp_set_attr_data(to, BA_AS_PATH, flags, data, len); } @@ -535,7 +578,7 @@ bgp_decode_med(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *da WITHDRAW(BAD_LENGTH, "MULTI_EXIT_DISC", len); u32 val = get_u32(data); - bgp_set_attr_u32(to, s->pool, BA_MULTI_EXIT_DISC, flags, val); + bgp_set_attr_u32(to, BA_MULTI_EXIT_DISC, flags, val); } @@ -556,7 +599,7 @@ bgp_decode_local_pref(struct bgp_parse_state *s, uint code UNUSED, uint flags, b WITHDRAW(BAD_LENGTH, "LOCAL_PREF", len); u32 val = get_u32(data); - bgp_set_attr_u32(to, s->pool, BA_LOCAL_PREF, flags, val); + bgp_set_attr_u32(to, BA_LOCAL_PREF, flags, val); } @@ -566,7 +609,7 @@ bgp_decode_atomic_aggr(struct bgp_parse_state *s, uint code UNUSED, uint flags, if (len != 0) DISCARD(BAD_LENGTH, "ATOMIC_AGGR", len); - bgp_set_attr_data(to, s->pool, BA_ATOMIC_AGGR, flags, NULL, 0); + bgp_set_attr_data(to, BA_ATOMIC_AGGR, flags, NULL, 0); } static int @@ -600,7 +643,7 @@ bgp_decode_aggregator(struct bgp_parse_state *s, uint code UNUSED, uint flags, b len = aggregator_16to32(data, src); } - bgp_set_attr_data(to, s->pool, BA_AGGREGATOR, flags, data, len); + bgp_set_attr_data(to, BA_AGGREGATOR, flags, data, len); } static void @@ -629,7 +672,7 @@ bgp_decode_community(struct bgp_parse_state *s, uint code UNUSED, uint flags, by struct adata *ad = lp_alloc_adata(s->pool, len); get_u32s(data, (u32 *) ad->data, len / 4); - bgp_set_attr_ptr(to, s->pool, BA_COMMUNITY, flags, ad); + bgp_set_attr_ptr(to, BA_COMMUNITY, flags, ad); } @@ -650,7 +693,7 @@ bgp_decode_originator_id(struct bgp_parse_state *s, uint code UNUSED, uint flags WITHDRAW(BAD_LENGTH, "ORIGINATOR_ID", len); u32 val = get_u32(data); - bgp_set_attr_u32(to, s->pool, BA_ORIGINATOR_ID, flags, val); + bgp_set_attr_u32(to, BA_ORIGINATOR_ID, flags, val); } @@ -675,7 +718,7 @@ bgp_decode_cluster_list(struct bgp_parse_state *s, uint code UNUSED, uint flags, struct adata *ad = lp_alloc_adata(s->pool, len); get_u32s(data, (u32 *) ad->data, len / 4); - bgp_set_attr_ptr(to, s->pool, BA_CLUSTER_LIST, flags, ad); + bgp_set_attr_ptr(to, BA_CLUSTER_LIST, flags, ad); } static void @@ -794,7 +837,7 @@ bgp_decode_ext_community(struct bgp_parse_state *s, uint code UNUSED, uint flags struct adata *ad = lp_alloc_adata(s->pool, len); get_u32s(data, (u32 *) ad->data, len / 4); - bgp_set_attr_ptr(to, s->pool, BA_EXT_COMMUNITY, flags, ad); + bgp_set_attr_ptr(to, BA_EXT_COMMUNITY, flags, ad); } @@ -807,7 +850,7 @@ bgp_decode_as4_aggregator(struct bgp_parse_state *s, uint code UNUSED, uint flag if (len != 8) DISCARD(BAD_LENGTH, "AS4_AGGREGATOR", len); - bgp_set_attr_data(to, s->pool, BA_AS4_AGGREGATOR, flags, data, len); + bgp_set_attr_data(to, BA_AS4_AGGREGATOR, flags, data, len); } static void @@ -837,7 +880,7 @@ bgp_decode_as4_path(struct bgp_parse_state *s, uint code UNUSED, uint flags, byt a = as_path_strip_confed(s->pool, a); } - bgp_set_attr_ptr(to, s->pool, BA_AS4_PATH, flags, a); + bgp_set_attr_ptr(to, BA_AS4_PATH, flags, a); } @@ -861,7 +904,7 @@ bgp_decode_aigp(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *d if (!bgp_aigp_valid(data, len, err, sizeof(err))) DISCARD("Malformed AIGP attribute - %s", err); - bgp_set_attr_data(to, s->pool, BA_AIGP, flags, data, len); + bgp_set_attr_data(to, BA_AIGP, flags, data, len); } static void @@ -893,7 +936,7 @@ bgp_decode_large_community(struct bgp_parse_state *s, uint code UNUSED, uint fla struct adata *ad = lp_alloc_adata(s->pool, len); get_u32s(data, (u32 *) ad->data, len / 4); - bgp_set_attr_ptr(to, s->pool, BA_LARGE_COMMUNITY, flags, ad); + bgp_set_attr_ptr(to, BA_LARGE_COMMUNITY, flags, ad); } @@ -904,14 +947,14 @@ bgp_decode_otc(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *da WITHDRAW(BAD_LENGTH, "OTC", len); u32 val = get_u32(data); - bgp_set_attr_u32(to, s->pool, BA_ONLY_TO_CUSTOMER, flags, val); + bgp_set_attr_u32(to, BA_ONLY_TO_CUSTOMER, flags, val); } static void bgp_export_mpls_label_stack(struct bgp_export_state *s, eattr *a) { - net_addr *n = s->route->net->n.addr; + const net_addr *n = s->route->net; u32 *labels = (u32 *) a->u.ptr->data; uint lnum = a->u.ptr->length / 4; @@ -978,10 +1021,29 @@ bgp_format_mpls_label_stack(const eattr *a, byte *buf, uint size) } static inline void -bgp_decode_unknown(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to) +bgp_export_unknown(struct bgp_export_state *s UNUSED, eattr *a) { + if (!(a->flags & BAF_TRANSITIVE)) + UNSET(a); + + a->flags |= BAF_PARTIAL; +} + +static inline void +bgp_decode_unknown(struct bgp_parse_state *s UNUSED, uint code, uint flags, byte *data, uint len, ea_list **to) +{ + if (!(flags & BAF_OPTIONAL)) + WITHDRAW("Unknown attribute (code %u) - conflicting flags (%02x)", code, flags); + /* Cannot use bgp_set_attr_data() as it works on known attributes only */ - ea_set_attr_data(to, s->pool, EA_CODE(PROTOCOL_BGP, code), flags, EAF_TYPE_OPAQUE, data, len); + ea_set_attr_data(to, &bgp_attr_table[code].class, flags, data, len); +} + +static inline void +bgp_format_unknown(const eattr *a, byte *buf, uint size) +{ + if (a->flags & BAF_TRANSITIVE) + bsnprintf(buf, size, "(transitive)"); } @@ -989,10 +1051,10 @@ bgp_decode_unknown(struct bgp_parse_state *s, uint code, uint flags, byte *data, * Attribute table */ -static const struct bgp_attr_desc bgp_attr_table[] = { +static union bgp_attr_desc bgp_attr_table[BGP_ATTR_MAX] = { [BA_ORIGIN] = { - .name = "origin", - .type = EAF_TYPE_INT, + .name = "bgp_origin", + .type = T_ENUM_BGP_ORIGIN, .flags = BAF_TRANSITIVE, .export = bgp_export_origin, .encode = bgp_encode_u8, @@ -1000,69 +1062,69 @@ static const struct bgp_attr_desc bgp_attr_table[] = { .format = bgp_format_origin, }, [BA_AS_PATH] = { - .name = "as_path", - .type = EAF_TYPE_AS_PATH, + .name = "bgp_path", + .type = T_PATH, .flags = BAF_TRANSITIVE, .encode = bgp_encode_as_path, .decode = bgp_decode_as_path, }, [BA_NEXT_HOP] = { - .name = "next_hop", - .type = EAF_TYPE_IP_ADDRESS, + .name = "bgp_next_hop", + .type = T_IP, .flags = BAF_TRANSITIVE, .encode = bgp_encode_next_hop, .decode = bgp_decode_next_hop, .format = bgp_format_next_hop, }, [BA_MULTI_EXIT_DISC] = { - .name = "med", - .type = EAF_TYPE_INT, + .name = "bgp_med", + .type = T_INT, .flags = BAF_OPTIONAL, .encode = bgp_encode_u32, .decode = bgp_decode_med, }, [BA_LOCAL_PREF] = { - .name = "local_pref", - .type = EAF_TYPE_INT, + .name = "bgp_local_pref", + .type = T_INT, .flags = BAF_TRANSITIVE, .export = bgp_export_local_pref, .encode = bgp_encode_u32, .decode = bgp_decode_local_pref, }, [BA_ATOMIC_AGGR] = { - .name = "atomic_aggr", - .type = EAF_TYPE_OPAQUE, + .name = "bgp_atomic_aggr", + .type = T_OPAQUE, .flags = BAF_TRANSITIVE, .encode = bgp_encode_raw, .decode = bgp_decode_atomic_aggr, }, [BA_AGGREGATOR] = { - .name = "aggregator", - .type = EAF_TYPE_OPAQUE, + .name = "bgp_aggregator", + .type = T_OPAQUE, .flags = BAF_OPTIONAL | BAF_TRANSITIVE, .encode = bgp_encode_aggregator, .decode = bgp_decode_aggregator, .format = bgp_format_aggregator, }, [BA_COMMUNITY] = { - .name = "community", - .type = EAF_TYPE_INT_SET, + .name = "bgp_community", + .type = T_CLIST, .flags = BAF_OPTIONAL | BAF_TRANSITIVE, .export = bgp_export_community, .encode = bgp_encode_u32s, .decode = bgp_decode_community, }, [BA_ORIGINATOR_ID] = { - .name = "originator_id", - .type = EAF_TYPE_ROUTER_ID, + .name = "bgp_originator_id", + .type = T_QUAD, .flags = BAF_OPTIONAL, .export = bgp_export_originator_id, .encode = bgp_encode_u32, .decode = bgp_decode_originator_id, }, [BA_CLUSTER_LIST] = { - .name = "cluster_list", - .type = EAF_TYPE_INT_SET, + .name = "bgp_cluster_list", + .type = T_CLIST, .flags = BAF_OPTIONAL, .export = bgp_export_cluster_list, .encode = bgp_encode_u32s, @@ -1070,43 +1132,47 @@ static const struct bgp_attr_desc bgp_attr_table[] = { .format = bgp_format_cluster_list, }, [BA_MP_REACH_NLRI] = { - .name = "mp_reach_nlri", - .type = EAF_TYPE_OPAQUE, + .name = "bgp_mp_reach_nlri", + .type = T_OPAQUE, + .hidden = 1, .flags = BAF_OPTIONAL, .decode = bgp_decode_mp_reach_nlri, }, [BA_MP_UNREACH_NLRI] = { - .name = "mp_unreach_nlri", - .type = EAF_TYPE_OPAQUE, + .name = "bgp_mp_unreach_nlri", + .type = T_OPAQUE, + .hidden = 1, .flags = BAF_OPTIONAL, .decode = bgp_decode_mp_unreach_nlri, }, [BA_EXT_COMMUNITY] = { - .name = "ext_community", - .type = EAF_TYPE_EC_SET, + .name = "bgp_ext_community", + .type = T_ECLIST, .flags = BAF_OPTIONAL | BAF_TRANSITIVE, .export = bgp_export_ext_community, .encode = bgp_encode_u32s, .decode = bgp_decode_ext_community, }, [BA_AS4_PATH] = { - .name = "as4_path", - .type = EAF_TYPE_AS_PATH, + .name = "bgp_as4_path", + .type = T_PATH, + .hidden = 1, .flags = BAF_OPTIONAL | BAF_TRANSITIVE, .encode = bgp_encode_raw, .decode = bgp_decode_as4_path, }, [BA_AS4_AGGREGATOR] = { - .name = "as4_aggregator", - .type = EAF_TYPE_OPAQUE, + .name = "bgp_as4_aggregator", + .type = T_OPAQUE, + .hidden = 1, .flags = BAF_OPTIONAL | BAF_TRANSITIVE, .encode = bgp_encode_raw, .decode = bgp_decode_as4_aggregator, .format = bgp_format_aggregator, }, [BA_AIGP] = { - .name = "aigp", - .type = EAF_TYPE_OPAQUE, + .name = "bgp_aigp", + .type = T_OPAQUE, .flags = BAF_OPTIONAL | BAF_DECODE_FLAGS, .export = bgp_export_aigp, .encode = bgp_encode_raw, @@ -1114,8 +1180,8 @@ static const struct bgp_attr_desc bgp_attr_table[] = { .format = bgp_format_aigp, }, [BA_LARGE_COMMUNITY] = { - .name = "large_community", - .type = EAF_TYPE_LC_SET, + .name = "bgp_large_community", + .type = T_LCLIST, .flags = BAF_OPTIONAL | BAF_TRANSITIVE, .export = bgp_export_large_community, .encode = bgp_encode_u32s, @@ -1123,14 +1189,15 @@ static const struct bgp_attr_desc bgp_attr_table[] = { }, [BA_ONLY_TO_CUSTOMER] = { .name = "otc", - .type = EAF_TYPE_INT, + .type = T_INT, .flags = BAF_OPTIONAL | BAF_TRANSITIVE, .encode = bgp_encode_u32, .decode = bgp_decode_otc, }, [BA_MPLS_LABEL_STACK] = { - .name = "mpls_label_stack", - .type = EAF_TYPE_INT_SET, + .name = "bgp_mpls_label_stack", + .type = T_CLIST, + .readonly = 1, .export = bgp_export_mpls_label_stack, .encode = bgp_encode_mpls_label_stack, .decode = bgp_decode_mpls_label_stack, @@ -1138,12 +1205,32 @@ static const struct bgp_attr_desc bgp_attr_table[] = { }, }; -static inline int -bgp_attr_known(uint code) +eattr * +bgp_find_attr(ea_list *attrs, uint code) { - return (code < ARRAY_SIZE(bgp_attr_table)) && bgp_attr_table[code].name; + return ea_find(attrs, BGP_EA_ID(code)); } +void +bgp_register_attrs(void) +{ + for (uint i=0; i<ARRAY_SIZE(bgp_attr_table); i++) + { + if (!bgp_attr_table[i].name) + bgp_attr_table[i] = (union bgp_attr_desc) { + .name = mb_sprintf(&root_pool, "bgp_unknown_0x%02x", i), + .type = T_OPAQUE, + .flags = BAF_OPTIONAL, + .readonly = 1, + .export = bgp_export_unknown, + .encode = bgp_encode_raw, + .decode = bgp_decode_unknown, + .format = bgp_format_unknown, + }; + + ea_register_init(&bgp_attr_table[i].class); + } +} /* * Attribute export @@ -1152,38 +1239,24 @@ bgp_attr_known(uint code) static inline void bgp_export_attr(struct bgp_export_state *s, eattr *a, ea_list *to) { - if (EA_PROTO(a->id) != PROTOCOL_BGP) + const union bgp_attr_desc *desc = bgp_find_attr_desc(a); + if (!desc) return; - uint code = EA_ID(a->id); + /* The flags might have been zero if the attr was added locally */ + a->flags = (a->flags & BAF_PARTIAL) | desc->flags; - if (bgp_attr_known(code)) - { - const struct bgp_attr_desc *desc = &bgp_attr_table[code]; - - /* The flags might have been zero if the attr was added by filters */ - a->flags = (a->flags & BAF_PARTIAL) | desc->flags; - - /* Set partial bit if new opt-trans attribute is attached to non-local route */ - if ((s->src != NULL) && (a->type & EAF_ORIGINATED) && - (a->flags & BAF_OPTIONAL) && (a->flags & BAF_TRANSITIVE)) - a->flags |= BAF_PARTIAL; - - /* Call specific hook */ - CALL(desc->export, s, a); + /* Set partial bit if new opt-trans attribute is attached to non-local route */ + if ((s->src != NULL) && (a->originated) && + (a->flags & BAF_OPTIONAL) && (a->flags & BAF_TRANSITIVE)) + a->flags |= BAF_PARTIAL; - /* Attribute might become undefined in hook */ - if ((a->type & EAF_TYPE_MASK) == EAF_TYPE_UNDEF) - return; - } - else - { - /* Don't re-export unknown non-transitive attributes */ - if (!(a->flags & BAF_TRANSITIVE)) - return; + /* Call specific hook */ + CALL(desc->export, s, a); - a->flags |= BAF_PARTIAL; - } + /* Attribute might become undefined in hook */ + if (a->undef) + return; /* Append updated attribute */ to->attrs[to->count++] = *a; @@ -1203,12 +1276,11 @@ bgp_export_attr(struct bgp_export_state *s, eattr *a, ea_list *to) * Result: one sorted attribute list segment, or NULL if attributes are unsuitable. */ static inline ea_list * -bgp_export_attrs(struct bgp_export_state *s, ea_list *attrs) +bgp_export_attrs(struct bgp_export_state *s, ea_list *a) { /* Merge the attribute list */ - ea_list *new = lp_alloc(s->pool, ea_scan(attrs)); - ea_merge(attrs, new); - ea_sort(new); + ea_list *new = ea_normalize(a, 0); + ASSERT_DIE(new); uint i, count; count = new->count; @@ -1232,14 +1304,9 @@ bgp_export_attrs(struct bgp_export_state *s, ea_list *attrs) static inline int bgp_encode_attr(struct bgp_write_state *s, eattr *a, byte *buf, uint size) { - ASSERT(EA_PROTO(a->id) == PROTOCOL_BGP); - - uint code = EA_ID(a->id); - - if (bgp_attr_known(code)) - return bgp_attr_table[code].encode(s, a, buf, size); - else - return bgp_encode_raw(s, a, buf, size); + const union bgp_attr_desc *desc = bgp_find_attr_desc(a); + ASSERT_DIE(desc); + return desc->encode(s, a, buf, size); } /** @@ -1304,7 +1371,7 @@ bgp_cluster_list_loopy(struct bgp_proto *p, ea_list *attrs) } static inline void -bgp_decode_attr(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to) +bgp_decode_attr(struct bgp_parse_state *s, byte code, byte flags, byte *data, uint len, ea_list **to) { /* Handle duplicate attributes; RFC 7606 3 (g) */ if (BIT32_TEST(s->attrs_seen, code)) @@ -1316,24 +1383,15 @@ bgp_decode_attr(struct bgp_parse_state *s, uint code, uint flags, byte *data, ui } BIT32_SET(s->attrs_seen, code); - if (bgp_attr_known(code)) - { - const struct bgp_attr_desc *desc = &bgp_attr_table[code]; + ASSERT_DIE(bgp_attr_table[code].id); + const union bgp_attr_desc *desc = &bgp_attr_table[code]; - /* Handle conflicting flags; RFC 7606 3 (c) */ - if (((flags ^ desc->flags) & (BAF_OPTIONAL | BAF_TRANSITIVE)) && - !(desc->flags & BAF_DECODE_FLAGS)) - WITHDRAW("Malformed %s attribute - conflicting flags (%02x)", desc->name, flags); + /* Handle conflicting flags; RFC 7606 3 (c) */ + if (((flags ^ desc->flags) & (BAF_OPTIONAL | BAF_TRANSITIVE)) && + !(desc->flags & BAF_DECODE_FLAGS)) + WITHDRAW("Malformed %s attribute - conflicting flags (%02x, expected %02x)", desc->name, flags, desc->flags); - desc->decode(s, code, flags, data, len, to); - } - else /* Unknown attribute */ - { - if (!(flags & BAF_OPTIONAL)) - WITHDRAW("Unknown attribute (code %u) - conflicting flags (%02x)", code, flags); - - bgp_decode_unknown(s, code, flags, data, len, to); - } + desc->decode(s, code, flags, data, len, to); } /** @@ -1351,7 +1409,8 @@ bgp_decode_attrs(struct bgp_parse_state *s, byte *data, uint len) { struct bgp_proto *p = s->proto; ea_list *attrs = NULL; - uint code, flags, alen; + uint alen; + byte code, flags; byte *pos = data; /* Parse the attributes */ @@ -1432,7 +1491,7 @@ bgp_decode_attrs(struct bgp_parse_state *s, byte *data, uint len) /* If there is no local preference, define one */ if (!BIT32_TEST(s->attrs_seen, BA_LOCAL_PREF)) - bgp_set_attr_u32(&attrs, s->pool, BA_LOCAL_PREF, 0, p->cf->default_local_pref); + bgp_set_attr_u32(&attrs, BA_LOCAL_PREF, 0, p->cf->default_local_pref); return attrs; @@ -1456,20 +1515,20 @@ loop: } void -bgp_finish_attrs(struct bgp_parse_state *s, rta *a) +bgp_finish_attrs(struct bgp_parse_state *s, ea_list **to) { /* AIGP test here instead of in bgp_decode_aigp() - we need to know channel */ if (BIT32_TEST(s->attrs_seen, BA_AIGP) && !s->channel->cf->aigp) { REPORT("Discarding AIGP attribute received on non-AIGP session"); - bgp_unset_attr(&a->eattrs, s->pool, BA_AIGP); + bgp_unset_attr(to, BA_AIGP); } /* Handle OTC ingress procedure, RFC 9234 */ if (bgp_channel_is_role_applicable(s->channel)) { struct bgp_proto *p = s->proto; - eattr *e = bgp_find_attr(a->eattrs, BA_ONLY_TO_CUSTOMER); + eattr *e = bgp_find_attr(*to, BA_ONLY_TO_CUSTOMER); /* Reject routes from downstream if they are leaked */ if (e && (p->cf->local_role == BGP_ROLE_PROVIDER || @@ -1485,7 +1544,7 @@ bgp_finish_attrs(struct bgp_parse_state *s, rta *a) if (!e && (p->cf->local_role == BGP_ROLE_CUSTOMER || p->cf->local_role == BGP_ROLE_PEER || p->cf->local_role == BGP_ROLE_RS_CLIENT)) - bgp_set_attr_u32(&a->eattrs, s->pool, BA_ONLY_TO_CUSTOMER, 0, p->cf->remote_as); + bgp_set_attr_u32(to, BA_ONLY_TO_CUSTOMER, 0, p->cf->remote_as); } } @@ -1505,8 +1564,8 @@ bgp_finish_attrs(struct bgp_parse_state *s, rta *a) HASH_DEFINE_REHASH_FN(RBH, struct bgp_bucket) -void -bgp_init_bucket_table(struct bgp_channel *c) +static void +bgp_init_bucket_table(struct bgp_pending_tx *c) { HASH_INIT(c->bucket_hash, c->pool, 8); @@ -1514,24 +1573,8 @@ bgp_init_bucket_table(struct bgp_channel *c) c->withdraw_bucket = NULL; } -void -bgp_free_bucket_table(struct bgp_channel *c) -{ - HASH_FREE(c->bucket_hash); - - struct bgp_bucket *b; - WALK_LIST_FIRST(b, c->bucket_queue) - { - rem_node(&b->send_node); - mb_free(b); - } - - mb_free(c->withdraw_bucket); - c->withdraw_bucket = NULL; -} - static struct bgp_bucket * -bgp_get_bucket(struct bgp_channel *c, ea_list *new) +bgp_get_bucket(struct bgp_pending_tx *c, ea_list *new) { /* Hash and lookup */ u32 hash = ea_hash(new); @@ -1540,55 +1583,27 @@ bgp_get_bucket(struct bgp_channel *c, ea_list *new) if (b) return b; - uint ea_size = sizeof(ea_list) + new->count * sizeof(eattr); - uint ea_size_aligned = BIRD_ALIGN(ea_size, CPU_STRUCT_ALIGN); - uint size = sizeof(struct bgp_bucket) + ea_size_aligned; - uint i; - byte *dest; - - /* Gather total size of non-inline attributes */ - for (i = 0; i < new->count; i++) - { - eattr *a = &new->attrs[i]; - - if (!(a->type & EAF_EMBEDDED)) - size += BIRD_ALIGN(sizeof(struct adata) + a->u.ptr->length, CPU_STRUCT_ALIGN); - } + /* Scan the list for total size */ + uint ea_size = BIRD_CPU_ALIGN(ea_list_size(new)); + uint size = sizeof(struct bgp_bucket) + ea_size; - /* Create the bucket */ + /* Allocate the bucket */ b = mb_alloc(c->pool, size); *b = (struct bgp_bucket) { }; init_list(&b->prefixes); b->hash = hash; - /* Copy list of extended attributes */ - memcpy(b->eattrs, new, ea_size); - dest = ((byte *) b->eattrs) + ea_size_aligned; + /* Copy the ea_list */ + ea_list_copy(b->eattrs, new, ea_size); - /* Copy values of non-inline attributes */ - for (i = 0; i < new->count; i++) - { - eattr *a = &b->eattrs->attrs[i]; - - if (!(a->type & EAF_EMBEDDED)) - { - const struct adata *oa = a->u.ptr; - struct adata *na = (struct adata *) dest; - memcpy(na, oa, sizeof(struct adata) + oa->length); - a->u.ptr = na; - dest += BIRD_ALIGN(sizeof(struct adata) + na->length, CPU_STRUCT_ALIGN); - } - } - - /* Insert the bucket to send queue and bucket hash */ - add_tail(&c->bucket_queue, &b->send_node); + /* Insert the bucket to bucket hash */ HASH_INSERT2(c->bucket_hash, RBH, c->pool, b); return b; } static struct bgp_bucket * -bgp_get_withdraw_bucket(struct bgp_channel *c) +bgp_get_withdraw_bucket(struct bgp_pending_tx *c) { if (!c->withdraw_bucket) { @@ -1599,25 +1614,45 @@ bgp_get_withdraw_bucket(struct bgp_channel *c) return c->withdraw_bucket; } -void -bgp_free_bucket(struct bgp_channel *c, struct bgp_bucket *b) +static void +bgp_free_bucket(struct bgp_pending_tx *c, struct bgp_bucket *b) { - rem_node(&b->send_node); HASH_REMOVE2(c->bucket_hash, RBH, c->pool, b); mb_free(b); } +int +bgp_done_bucket(struct bgp_channel *bc, struct bgp_bucket *b) +{ + struct bgp_pending_tx *c = bc->ptx; + + /* Won't free the withdraw bucket */ + if (b == c->withdraw_bucket) + return 0; + + if (EMPTY_LIST(b->prefixes)) + rem_node(&b->send_node); + + if (b->px_uc || !EMPTY_LIST(b->prefixes)) + return 0; + + bgp_free_bucket(c, b); + return 1; +} + void -bgp_defer_bucket(struct bgp_channel *c, struct bgp_bucket *b) +bgp_defer_bucket(struct bgp_channel *bc, struct bgp_bucket *b) { + struct bgp_pending_tx *c = bc->ptx; rem_node(&b->send_node); add_tail(&c->bucket_queue, &b->send_node); } void -bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b) +bgp_withdraw_bucket(struct bgp_channel *bc, struct bgp_bucket *b) { - struct bgp_proto *p = (void *) c->c.proto; + struct bgp_proto *p = (void *) bc->c.proto; + struct bgp_pending_tx *c = bc->ptx; struct bgp_bucket *wb = bgp_get_withdraw_bucket(c); log(L_ERR "%s: Attribute list too long", p->p.name); @@ -1626,8 +1661,8 @@ bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b) struct bgp_prefix *px = HEAD(b->prefixes); log(L_ERR "%s: - withdrawing %N", p->p.name, &px->net); - rem_node(&px->buck_node); - add_tail(&wb->prefixes, &px->buck_node); + rem_node(&px->buck_node_xx); + add_tail(&wb->prefixes, &px->buck_node_xx); } } @@ -1638,7 +1673,7 @@ bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b) #define PXH_KEY(px) px->net, px->path_id, px->hash #define PXH_NEXT(px) px->next -#define PXH_EQ(n1,i1,h1,n2,i2,h2) h1 == h2 && i1 == i2 && net_equal(n1, n2) +#define PXH_EQ(n1,i1,h1,n2,i2,h2) h1 == h2 && (add_path_tx ? (i1 == i2) : 1) && net_equal(n1, n2) #define PXH_FN(n,i,h) h #define PXH_REHASH bgp_pxh_rehash @@ -1647,36 +1682,27 @@ bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b) HASH_DEFINE_REHASH_FN(PXH, struct bgp_prefix) -void -bgp_init_prefix_table(struct bgp_channel *c) +static void +bgp_init_prefix_table(struct bgp_channel *bc) { + struct bgp_pending_tx *c = bc->ptx; HASH_INIT(c->prefix_hash, c->pool, 8); - uint alen = net_addr_length[c->c.net_type]; + uint alen = net_addr_length[bc->c.net_type]; c->prefix_slab = alen ? sl_new(c->pool, sizeof(struct bgp_prefix) + alen) : NULL; } -void -bgp_free_prefix_table(struct bgp_channel *c) -{ - HASH_FREE(c->prefix_hash); - - rfree(c->prefix_slab); - c->prefix_slab = NULL; -} - static struct bgp_prefix * -bgp_get_prefix(struct bgp_channel *c, net_addr *net, u32 path_id) +bgp_get_prefix(struct bgp_pending_tx *c, const net_addr *net, struct rte_src *src, int add_path_tx) { + u32 path_id = src->global_id; + u32 path_id_hash = add_path_tx ? path_id : 0; /* We must use a different hash function than the rtable */ - u32 hash = u32_hash(net_hash(net) ^ u32_hash(path_id)); - struct bgp_prefix *px = HASH_FIND(c->prefix_hash, PXH, net, path_id, hash); + u32 hash = u32_hash(net_hash(net) ^ u32_hash(path_id_hash)); + struct bgp_prefix *px = HASH_FIND(c->prefix_hash, PXH, net, path_id_hash, hash); if (px) - { - rem_node(&px->buck_node); return px; - } if (c->prefix_slab) px = sl_alloc(c->prefix_slab); @@ -1687,37 +1713,297 @@ bgp_get_prefix(struct bgp_channel *c, net_addr *net, u32 path_id) px->hash = hash; px->path_id = path_id; net_copy(px->net, net); + rt_lock_source(src); HASH_INSERT2(c->prefix_hash, PXH, c->pool, px); return px; } -void -bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *px) +static void bgp_free_prefix(struct bgp_pending_tx *c, struct bgp_prefix *px); + +static inline int +bgp_update_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bucket *b) +{ +#define IS_WITHDRAW_BUCKET(b) ((b) == c->ptx->withdraw_bucket) +#define BPX_TRACE(what) do { \ + if (c->c.debug & D_ROUTES) log(L_TRACE "%s.%s < %s %N %uG %s", \ + c->c.proto->name, c->c.name, what, \ + px->net, px->path_id, IS_WITHDRAW_BUCKET(b) ? "withdraw" : "update"); } while (0) + px->lastmod = current_time(); + + /* Already queued for the same bucket */ + if (px->cur == b) + { + BPX_TRACE("already queued"); + return 0; + } + + /* Unqueue from the old bucket */ + if (px->cur) + { + rem_node(&px->buck_node_xx); + bgp_done_bucket(c, px->cur); + } + + /* The new bucket is the same as we sent before */ + if ((px->last == b) || c->c.out_table && !px->last && IS_WITHDRAW_BUCKET(b)) + { + if (px->cur) + BPX_TRACE("reverted"); + else + BPX_TRACE("already sent"); + + /* Well, we haven't sent anything yet */ + if (!px->last) + bgp_free_prefix(c->ptx, px); + + px->cur = NULL; + return 0; + } + + /* Enqueue the bucket if it has been empty */ + if (!IS_WITHDRAW_BUCKET(b) && EMPTY_LIST(b->prefixes)) + add_tail(&c->ptx->bucket_queue, &b->send_node); + + /* Enqueue to the new bucket and indicate the change */ + add_tail(&b->prefixes, &px->buck_node_xx); + px->cur = b; + + BPX_TRACE("queued"); + return 1; + +#undef BPX_TRACE +} + +static void +bgp_free_prefix(struct bgp_pending_tx *c, struct bgp_prefix *px) { - rem_node(&px->buck_node); HASH_REMOVE2(c->prefix_hash, PXH, c->pool, px); + rt_unlock_source(rt_find_source_global(px->path_id)); + if (c->prefix_slab) - sl_free(c->prefix_slab, px); + sl_free(px); else mb_free(px); } +void +bgp_done_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bucket *buck) +{ + /* Cleanup: We're called from bucket senders. */ + ASSERT_DIE(px->cur == buck); + rem_node(&px->buck_node_xx); + + /* We may want to store the updates */ + if (c->c.out_table) + { + /* Nothing to be sent right now */ + px->cur = NULL; + + /* Unref the previous sent version */ + if (px->last) + px->last->px_uc--; + + /* Ref the current sent version */ + if (!IS_WITHDRAW_BUCKET(buck)) + { + px->last = buck; + px->last->px_uc++; + return; + } + + /* Prefixes belonging to the withdraw bucket are freed always */ + } + + bgp_free_prefix(c->ptx, px); +} + +static void +bgp_pending_tx_rfree(resource *r) +{ + struct bgp_pending_tx *ptx = SKIP_BACK(struct bgp_pending_tx, r, r); + + HASH_WALK(ptx->prefix_hash, next, n) + rt_unlock_source(rt_find_source_global(n->path_id)); + HASH_WALK_END; +} + +static void bgp_pending_tx_dump(resource *r UNUSED) { debug("\n"); } + +static struct resclass bgp_pending_tx_class = { + .name = "BGP Pending TX", + .size = sizeof(struct bgp_pending_tx), + .free = bgp_pending_tx_rfree, + .dump = bgp_pending_tx_dump, +}; + +void +bgp_init_pending_tx(struct bgp_channel *c) +{ + ASSERT_DIE(!c->ptx); + + pool *p = rp_new(c->pool, "BGP Pending TX"); + c->ptx = ralloc(p, &bgp_pending_tx_class); + c->ptx->pool = p; + + bgp_init_bucket_table(c->ptx); + bgp_init_prefix_table(c); +} + +void +bgp_free_pending_tx(struct bgp_channel *c) +{ + ASSERT_DIE(c->ptx); + ASSERT_DIE(c->ptx->pool); + + rfree(c->ptx->pool); + c->ptx = NULL; +} + + +/* + * Prefix hash table exporter + */ + +static void +bgp_out_table_feed(void *data) +{ + struct rt_export_hook *hook = data; + struct bgp_channel *bc = SKIP_BACK(struct bgp_channel, prefix_exporter, hook->table); + struct bgp_pending_tx *c = bc->ptx; + + int max = 512; + + const net_addr *neq = (hook->req->addr_mode == TE_ADDR_EQUAL) ? hook->req->addr : NULL; + const net_addr *cand = NULL; + + do { + HASH_WALK_ITER(c->prefix_hash, PXH, n, hook->hash_iter) + { + switch (hook->req->addr_mode) + { + case TE_ADDR_IN: + if (!net_in_netX(n->net, hook->req->addr)) + continue; + /* fall through */ + case TE_ADDR_NONE: + /* Splitting only for multi-net exports */ + if (--max <= 0) + HASH_WALK_ITER_PUT; + break; + + case TE_ADDR_FOR: + if (!neq) + { + if (net_in_netX(hook->req->addr, n->net) && (!cand || (n->net->length > cand->length))) + cand = n->net; + continue; + } + /* fall through */ + case TE_ADDR_EQUAL: + if (!net_equal(n->net, neq)) + continue; + break; + } + + struct bgp_bucket *buck = n->cur ?: n->last; + ea_list *ea = NULL; + if (buck == c->withdraw_bucket) + ea_set_dest(&ea, 0, RTD_UNREACHABLE); + else + { + ea = buck->eattrs; + eattr *eanh = bgp_find_attr(ea, BA_NEXT_HOP); + ASSERT_DIE(eanh); + const ip_addr *nh = (const void *) eanh->u.ptr->data; + + struct nexthop_adata nhad = { + .ad = { .length = sizeof (struct nexthop_adata) - sizeof (struct adata), }, + .nh = { .gw = nh[0], }, + }; + + ea_set_attr(&ea, EA_LITERAL_DIRECT_ADATA(&ea_gen_nexthop, 0, tmp_copy_adata(&nhad.ad))); + } + + struct rte_storage es = { + .rte = { + .attrs = ea, + .net = n->net, + .src = rt_find_source_global(n->path_id), + .sender = NULL, + .lastmod = n->lastmod, + .flags = n->cur ? REF_PENDING : 0, + }, + }; + + struct rt_pending_export rpe = { + .new = &es, .new_best = &es, + }; + + if (hook->req->export_bulk) + { + rte *feed = &es.rte; + hook->req->export_bulk(hook->req, n->net, &rpe, &feed, 1); + } + else if (hook->req->export_one) + hook->req->export_one(hook->req, n->net, &rpe); + else + bug("No export method in export request"); + } + HASH_WALK_ITER_END; + + neq = cand; + cand = NULL; + } while (neq); + + if (hook->hash_iter) + ev_schedule_work(hook->event); + else + rt_set_export_state(hook, TES_READY); +} + +static struct rt_export_hook * +bgp_out_table_export_start(struct rt_exporter *re, struct rt_export_request *req UNUSED) +{ + struct bgp_channel *bc = SKIP_BACK(struct bgp_channel, prefix_exporter, re); + pool *p = rp_new(bc->c.proto->pool, "Export hook"); + struct rt_export_hook *hook = mb_allocz(p, sizeof(struct rt_export_hook)); + hook->pool = p; + hook->event = ev_new_init(p, bgp_out_table_feed, hook); + hook->feed_type = TFT_HASH; + + return hook; +} + +void +bgp_setup_out_table(struct bgp_channel *c) +{ + ASSERT_DIE(c->c.out_table == NULL); + + c->prefix_exporter = (struct rt_exporter) { + .addr_type = c->c.table->addr_type, + .start = bgp_out_table_export_start, + }; + + init_list(&c->prefix_exporter.hooks); + init_list(&c->prefix_exporter.pending); + + c->c.out_table = &c->prefix_exporter; +} + /* * BGP protocol glue */ int -bgp_preexport(struct channel *C, rte **new, struct linpool *pool UNUSED) +bgp_preexport(struct channel *C, rte *e) { - rte *e = *new; - struct proto *SRC = e->attrs->src->proto; - struct bgp_channel *c = (struct bgp_channel *) C; struct bgp_proto *p = (struct bgp_proto *) C->proto; - struct bgp_proto *src = (SRC->proto == &proto_bgp) ? (struct bgp_proto *) SRC : NULL; + struct bgp_proto *src = bgp_rte_proto(e); + struct bgp_channel *c = (struct bgp_channel *) C; /* Reject our routes */ if (src == p) @@ -1728,8 +2014,20 @@ bgp_preexport(struct channel *C, rte **new, struct linpool *pool UNUSED) return 0; /* Reject flowspec that failed validation */ - if ((e->attrs->dest == RTD_UNREACHABLE) && net_is_flow(e->net->n.addr)) - return -1; + if (net_is_flow(e->net)) + switch (rt_get_flowspec_valid(e)) + { + case FLOWSPEC_VALID: + break; + case FLOWSPEC_INVALID: + return -1; + case FLOWSPEC_UNKNOWN: + ASSUME((rt_get_source_attr(e) != RTS_BGP) || + !((struct bgp_channel *) SKIP_BACK(struct channel, in_req, e->sender->req))->base_table); + break; + case FLOWSPEC__MAX: + bug("This never happens."); + } /* IBGP route reflection, RFC 4456 */ if (p->is_internal && src->is_internal && (p->local_as == src->local_as)) @@ -1740,14 +2038,14 @@ bgp_preexport(struct channel *C, rte **new, struct linpool *pool UNUSED) /* Generally, this should be handled when path is received, but we check it also here as rr_cluster_id may be undefined or different in src. */ - if (p->rr_cluster_id && bgp_cluster_list_loopy(p, e->attrs->eattrs)) + if (p->rr_cluster_id && bgp_cluster_list_loopy(p, e->attrs)) return -1; } /* Handle well-known communities, RFC 1997 */ struct eattr *a; if (p->cf->interpret_communities && - (a = bgp_find_attr(e->attrs->eattrs, BA_COMMUNITY))) + (a = bgp_find_attr(e->attrs, BA_COMMUNITY))) { const struct adata *d = a->u.ptr; @@ -1771,7 +2069,7 @@ bgp_preexport(struct channel *C, rte **new, struct linpool *pool UNUSED) /* Do not export routes marked with OTC to upstream, RFC 9234 */ if (bgp_channel_is_role_applicable(c)) { - a = bgp_find_attr(e->attrs->eattrs, BA_ONLY_TO_CUSTOMER); + a = bgp_find_attr(e->attrs, BA_ONLY_TO_CUSTOMER); if (a && (p->cf->local_role==BGP_ROLE_CUSTOMER || p->cf->local_role==BGP_ROLE_PEER || p->cf->local_role==BGP_ROLE_RS_CLIENT)) @@ -1784,8 +2082,7 @@ bgp_preexport(struct channel *C, rte **new, struct linpool *pool UNUSED) static ea_list * bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *attrs0, struct linpool *pool) { - struct proto *SRC = e->attrs->src->proto; - struct bgp_proto *src = (SRC->proto == &proto_bgp) ? (void *) SRC : NULL; + struct bgp_proto *src = bgp_rte_proto(e); struct bgp_export_state s = { .proto = p, .channel = c, .pool = pool, .src = src, .route = e, .mpls = c->desc->mpls }; ea_list *attrs = attrs0; eattr *a; @@ -1793,7 +2090,7 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at /* ORIGIN attribute - mandatory, attach if missing */ if (! bgp_find_attr(attrs0, BA_ORIGIN)) - bgp_set_attr_u32(&attrs, pool, BA_ORIGIN, 0, src ? ORIGIN_INCOMPLETE : ORIGIN_IGP); + bgp_set_attr_u32(&attrs, BA_ORIGIN, 0, src ? ORIGIN_INCOMPLETE : ORIGIN_IGP); /* AS_PATH attribute - mandatory */ a = bgp_find_attr(attrs0, BA_AS_PATH); @@ -1808,24 +2105,24 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at { /* IBGP or route server -> just ensure there is one */ if (!a) - bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, &null_adata); + bgp_set_attr_ptr(&attrs, BA_AS_PATH, 0, &null_adata); } else if (p->is_interior) { /* Confederation -> prepend ASN as AS_CONFED_SEQUENCE */ ad = as_path_prepend2(pool, ad, AS_PATH_CONFED_SEQUENCE, p->public_as); - bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, ad); + bgp_set_attr_ptr(&attrs, BA_AS_PATH, 0, ad); } else /* Regular EBGP (no RS, no confederation) */ { /* Regular EBGP -> prepend ASN as regular sequence */ ad = as_path_prepend2(pool, ad, AS_PATH_SEQUENCE, p->public_as); - bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, ad); + bgp_set_attr_ptr(&attrs, BA_AS_PATH, 0, ad); /* MULTI_EXIT_DESC attribute - accept only if set in export filter */ a = bgp_find_attr(attrs0, BA_MULTI_EXIT_DISC); - if (a && !(a->type & EAF_FRESH)) - bgp_unset_attr(&attrs, pool, BA_MULTI_EXIT_DISC); + if (a && !(a->fresh)) + bgp_unset_attr(&attrs, BA_MULTI_EXIT_DISC); } /* NEXT_HOP attribute - delegated to AF-specific hook */ @@ -1834,7 +2131,7 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at /* LOCAL_PREF attribute - required for IBGP, attach if missing */ if (p->is_interior && ! bgp_find_attr(attrs0, BA_LOCAL_PREF)) - bgp_set_attr_u32(&attrs, pool, BA_LOCAL_PREF, 0, p->cf->default_local_pref); + bgp_set_attr_u32(&attrs, BA_LOCAL_PREF, 0, p->cf->default_local_pref); /* AIGP attribute - accumulate local metric or originate new one */ u64 metric; @@ -1843,7 +2140,7 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at (c->cf->aigp_originate && bgp_init_aigp_metric(e, &metric, &ad)))) { ad = bgp_aigp_set_metric(pool, ad, metric); - bgp_set_attr_ptr(&attrs, pool, BA_AIGP, 0, ad); + bgp_set_attr_ptr(&attrs, BA_AIGP, 0, ad); } /* IBGP route reflection, RFC 4456 */ @@ -1851,7 +2148,7 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at { /* ORIGINATOR_ID attribute - attach if not already set */ if (! bgp_find_attr(attrs0, BA_ORIGINATOR_ID)) - bgp_set_attr_u32(&attrs, pool, BA_ORIGINATOR_ID, 0, src->remote_id); + bgp_set_attr_u32(&attrs, BA_ORIGINATOR_ID, 0, src->remote_id); /* CLUSTER_LIST attribute - prepend cluster ID */ a = bgp_find_attr(attrs0, BA_CLUSTER_LIST); @@ -1866,7 +2163,7 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at ad = int_set_prepend(pool, ad, p->rr_cluster_id); /* Should be at least one prepended cluster ID */ - bgp_set_attr_ptr(&attrs, pool, BA_CLUSTER_LIST, 0, ad); + bgp_set_attr_ptr(&attrs, BA_CLUSTER_LIST, 0, ad); } /* AS4_* transition attributes, RFC 6793 4.2.2 */ @@ -1875,15 +2172,15 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at a = bgp_find_attr(attrs, BA_AS_PATH); if (a && as_path_contains_as4(a->u.ptr)) { - bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, as_path_to_old(pool, a->u.ptr)); - bgp_set_attr_ptr(&attrs, pool, BA_AS4_PATH, 0, as_path_strip_confed(pool, a->u.ptr)); + bgp_set_attr_ptr(&attrs, BA_AS_PATH, 0, as_path_to_old(pool, a->u.ptr)); + bgp_set_attr_ptr(&attrs, BA_AS4_PATH, 0, as_path_strip_confed(pool, a->u.ptr)); } a = bgp_find_attr(attrs, BA_AGGREGATOR); if (a && aggregator_contains_as4(a->u.ptr)) { - bgp_set_attr_ptr(&attrs, pool, BA_AGGREGATOR, 0, aggregator_to_old(pool, a->u.ptr)); - bgp_set_attr_ptr(&attrs, pool, BA_AS4_AGGREGATOR, 0, a->u.ptr); + bgp_set_attr_ptr(&attrs, BA_AGGREGATOR, 0, aggregator_to_old(pool, a->u.ptr)); + bgp_set_attr_ptr(&attrs, BA_AS4_AGGREGATOR, 0, a->u.ptr); } } @@ -1894,7 +2191,7 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at if (!a && (p->cf->local_role == BGP_ROLE_PROVIDER || p->cf->local_role == BGP_ROLE_PEER || p->cf->local_role == BGP_ROLE_RS_SERVER)) - bgp_set_attr_u32(&attrs, pool, BA_ONLY_TO_CUSTOMER, 0, p->public_as); + bgp_set_attr_u32(&attrs, BA_ONLY_TO_CUSTOMER, 0, p->public_as); } /* @@ -1908,79 +2205,84 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at } void -bgp_rt_notify(struct proto *P, struct channel *C, net *n, rte *new, rte *old) +bgp_rt_notify(struct proto *P, struct channel *C, const net_addr *n, rte *new, const rte *old) { struct bgp_proto *p = (void *) P; struct bgp_channel *c = (void *) C; struct bgp_bucket *buck; - struct bgp_prefix *px; - u32 path; + struct rte_src *path; if (new) { - struct ea_list *attrs = bgp_update_attrs(p, c, new, new->attrs->eattrs, bgp_linpool2); + struct ea_list *attrs = bgp_update_attrs(p, c, new, new->attrs, tmp_linpool); /* Error during attribute processing */ if (!attrs) - log(L_ERR "%s: Invalid route %N withdrawn", p->p.name, n->n.addr); + log(L_ERR "%s: Invalid route %N withdrawn", p->p.name, n); /* If attributes are invalid, we fail back to withdraw */ - buck = attrs ? bgp_get_bucket(c, attrs) : bgp_get_withdraw_bucket(c); - path = new->attrs->src->global_id; - - lp_flush(bgp_linpool2); + buck = attrs ? bgp_get_bucket(c->ptx, attrs) : bgp_get_withdraw_bucket(c->ptx); + path = new->src; } else { - buck = bgp_get_withdraw_bucket(c); - path = old->attrs->src->global_id; + buck = bgp_get_withdraw_bucket(c->ptx); + path = old->src; } - px = bgp_get_prefix(c, n->n.addr, c->add_path_tx ? path : 0); - add_tail(&buck->prefixes, &px->buck_node); - - bgp_schedule_packet(p->conn, c, PKT_UPDATE); + if (bgp_update_prefix(c, bgp_get_prefix(c->ptx, n, path, c->add_path_tx), buck)) + bgp_schedule_packet(p->conn, c, PKT_UPDATE); } static inline u32 bgp_get_neighbor(rte *r) { - eattr *e = ea_find(r->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH)); + eattr *e = ea_find(r->attrs, BGP_EA_ID(BA_AS_PATH)); u32 as; if (e && as_path_get_first_regular(e->u.ptr, &as)) return as; /* If AS_PATH is not defined, we treat rte as locally originated */ - struct bgp_proto *p = (void *) r->attrs->src->proto; + struct bgp_proto *p = bgp_rte_proto(r); return p->cf->confederation ?: p->local_as; } static inline int rte_stale(rte *r) { - if (r->u.bgp.stale < 0) + if (r->pflags & BGP_REF_STALE) + return 1; + + if (r->pflags & BGP_REF_NOT_STALE) + return 0; + + /* If staleness is unknown, compute and cache it */ + eattr *a = ea_find(r->attrs, BGP_EA_ID(BA_COMMUNITY)); + if (a && int_set_contains(a->u.ptr, BGP_COMM_LLGR_STALE)) { - /* If staleness is unknown, compute and cache it */ - eattr *a = ea_find(r->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY)); - r->u.bgp.stale = a && int_set_contains(a->u.ptr, BGP_COMM_LLGR_STALE); + r->pflags |= BGP_REF_STALE; + return 1; + } + else + { + r->pflags |= BGP_REF_NOT_STALE; + return 0; } - - return r->u.bgp.stale; } int bgp_rte_better(rte *new, rte *old) { - struct bgp_proto *new_bgp = (struct bgp_proto *) new->attrs->src->proto; - struct bgp_proto *old_bgp = (struct bgp_proto *) old->attrs->src->proto; + struct bgp_proto *new_bgp = bgp_rte_proto(new); + struct bgp_proto *old_bgp = bgp_rte_proto(old); eattr *x, *y; u32 n, o; /* Skip suppressed routes (see bgp_rte_recalculate()) */ - n = new->u.bgp.suppressed; - o = old->u.bgp.suppressed; + n = new->pflags & BGP_REF_SUPPRESSED; + o = old->pflags & BGP_REF_SUPPRESSED; if (n > o) return 0; if (n < o) @@ -2003,8 +2305,8 @@ bgp_rte_better(rte *new, rte *old) return 1; /* Start with local preferences */ - x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF)); - y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF)); + x = ea_find(new->attrs, BGP_EA_ID(BA_LOCAL_PREF)); + y = ea_find(old->attrs, BGP_EA_ID(BA_LOCAL_PREF)); n = x ? x->u.data : new_bgp->cf->default_local_pref; o = y ? y->u.data : old_bgp->cf->default_local_pref; if (n > o) @@ -2023,8 +2325,8 @@ bgp_rte_better(rte *new, rte *old) /* RFC 4271 9.1.2.2. a) Use AS path lengths */ if (new_bgp->cf->compare_path_lengths || old_bgp->cf->compare_path_lengths) { - x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH)); - y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH)); + x = ea_find(new->attrs, BGP_EA_ID(BA_AS_PATH)); + y = ea_find(old->attrs, BGP_EA_ID(BA_AS_PATH)); n = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN; o = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN; if (n < o) @@ -2034,8 +2336,8 @@ bgp_rte_better(rte *new, rte *old) } /* RFC 4271 9.1.2.2. b) Use origins */ - x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN)); - y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN)); + x = ea_find(new->attrs, BGP_EA_ID(BA_ORIGIN)); + y = ea_find(old->attrs, BGP_EA_ID(BA_ORIGIN)); n = x ? x->u.data : ORIGIN_INCOMPLETE; o = y ? y->u.data : ORIGIN_INCOMPLETE; if (n < o) @@ -2057,8 +2359,8 @@ bgp_rte_better(rte *new, rte *old) if (new_bgp->cf->med_metric || old_bgp->cf->med_metric || (bgp_get_neighbor(new) == bgp_get_neighbor(old))) { - x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC)); - y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC)); + x = ea_find(new->attrs, BGP_EA_ID(BA_MULTI_EXIT_DISC)); + y = ea_find(old->attrs, BGP_EA_ID(BA_MULTI_EXIT_DISC)); n = x ? x->u.data : new_bgp->cf->default_med; o = y ? y->u.data : old_bgp->cf->default_med; if (n < o) @@ -2074,8 +2376,8 @@ bgp_rte_better(rte *new, rte *old) return 1; /* RFC 4271 9.1.2.2. e) Compare IGP metrics */ - n = new_bgp->cf->igp_metric ? new->attrs->igp_metric : 0; - o = old_bgp->cf->igp_metric ? old->attrs->igp_metric : 0; + n = new_bgp->cf->igp_metric ? rt_get_igp_metric(new) : 0; + o = old_bgp->cf->igp_metric ? rt_get_igp_metric(old) : 0; if (n < o) return 1; if (n > o) @@ -2083,8 +2385,8 @@ bgp_rte_better(rte *new, rte *old) /* RFC 4271 9.1.2.2. f) Compare BGP identifiers */ /* RFC 4456 9. a) Use ORIGINATOR_ID instead of local neighbor ID */ - x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGINATOR_ID)); - y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGINATOR_ID)); + x = ea_find(new->attrs, BGP_EA_ID(BA_ORIGINATOR_ID)); + y = ea_find(old->attrs, BGP_EA_ID(BA_ORIGINATOR_ID)); n = x ? x->u.data : new_bgp->remote_id; o = y ? y->u.data : old_bgp->remote_id; @@ -2101,8 +2403,8 @@ bgp_rte_better(rte *new, rte *old) return 0; /* RFC 4456 9. b) Compare cluster list lengths */ - x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_CLUSTER_LIST)); - y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_CLUSTER_LIST)); + x = ea_find(new->attrs, BGP_EA_ID(BA_CLUSTER_LIST)); + y = ea_find(old->attrs, BGP_EA_ID(BA_CLUSTER_LIST)); n = x ? int_set_get_size(x->u.ptr) : 0; o = y ? int_set_get_size(y->u.ptr) : 0; if (n < o) @@ -2118,13 +2420,13 @@ bgp_rte_better(rte *new, rte *old) int bgp_rte_mergable(rte *pri, rte *sec) { - struct bgp_proto *pri_bgp = (struct bgp_proto *) pri->attrs->src->proto; - struct bgp_proto *sec_bgp = (struct bgp_proto *) sec->attrs->src->proto; + struct bgp_proto *pri_bgp = bgp_rte_proto(pri); + struct bgp_proto *sec_bgp = bgp_rte_proto(sec); eattr *x, *y; u32 p, s; /* Skip suppressed routes (see bgp_rte_recalculate()) */ - if (pri->u.bgp.suppressed != sec->u.bgp.suppressed) + if ((pri->pflags ^ sec->pflags) & BGP_REF_SUPPRESSED) return 0; /* RFC 4271 9.1.2.1. Route resolvability test */ @@ -2136,8 +2438,8 @@ bgp_rte_mergable(rte *pri, rte *sec) return 0; /* Start with local preferences */ - x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF)); - y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF)); + x = ea_find(pri->attrs, BGP_EA_ID(BA_LOCAL_PREF)); + y = ea_find(sec->attrs, BGP_EA_ID(BA_LOCAL_PREF)); p = x ? x->u.data : pri_bgp->cf->default_local_pref; s = y ? y->u.data : sec_bgp->cf->default_local_pref; if (p != s) @@ -2146,8 +2448,8 @@ bgp_rte_mergable(rte *pri, rte *sec) /* RFC 4271 9.1.2.2. a) Use AS path lengths */ if (pri_bgp->cf->compare_path_lengths || sec_bgp->cf->compare_path_lengths) { - x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH)); - y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH)); + x = ea_find(pri->attrs, BGP_EA_ID(BA_AS_PATH)); + y = ea_find(sec->attrs, BGP_EA_ID(BA_AS_PATH)); p = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN; s = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN; @@ -2159,8 +2461,8 @@ bgp_rte_mergable(rte *pri, rte *sec) } /* RFC 4271 9.1.2.2. b) Use origins */ - x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN)); - y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN)); + x = ea_find(pri->attrs, BGP_EA_ID(BA_ORIGIN)); + y = ea_find(sec->attrs, BGP_EA_ID(BA_ORIGIN)); p = x ? x->u.data : ORIGIN_INCOMPLETE; s = y ? y->u.data : ORIGIN_INCOMPLETE; if (p != s) @@ -2170,8 +2472,8 @@ bgp_rte_mergable(rte *pri, rte *sec) if (pri_bgp->cf->med_metric || sec_bgp->cf->med_metric || (bgp_get_neighbor(pri) == bgp_get_neighbor(sec))) { - x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC)); - y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC)); + x = ea_find(pri->attrs, BGP_EA_ID(BA_MULTI_EXIT_DISC)); + y = ea_find(sec->attrs, BGP_EA_ID(BA_MULTI_EXIT_DISC)); p = x ? x->u.data : pri_bgp->cf->default_med; s = y ? y->u.data : sec_bgp->cf->default_med; if (p != s) @@ -2183,8 +2485,8 @@ bgp_rte_mergable(rte *pri, rte *sec) return 0; /* RFC 4271 9.1.2.2. e) Compare IGP metrics */ - p = pri_bgp->cf->igp_metric ? pri->attrs->igp_metric : 0; - s = sec_bgp->cf->igp_metric ? sec->attrs->igp_metric : 0; + p = pri_bgp->cf->igp_metric ? rt_get_igp_metric(pri) : 0; + s = sec_bgp->cf->igp_metric ? rt_get_igp_metric(sec) : 0; if (p != s) return 0; @@ -2197,24 +2499,23 @@ bgp_rte_mergable(rte *pri, rte *sec) static inline int same_group(rte *r, u32 lpref, u32 lasn) { - return (r->pref == lpref) && (bgp_get_neighbor(r) == lasn); + return (rt_get_preference(r) == lpref) && (bgp_get_neighbor(r) == lasn); } static inline int -use_deterministic_med(rte *r) +use_deterministic_med(struct rte_storage *r) { - struct proto *P = r->attrs->src->proto; - return (P->proto == &proto_bgp) && ((struct bgp_proto *) P)->cf->deterministic_med; + struct bgp_proto *p = bgp_rte_proto(&r->rte); + return p && p->cf->deterministic_med; } int bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best) { - rte *r, *s; rte *key = new ? new : old; - u32 lpref = key->pref; + u32 lpref = rt_get_preference(key); u32 lasn = bgp_get_neighbor(key); - int old_suppressed = old ? old->u.bgp.suppressed : 0; + int old_suppressed = old ? !!(old->pflags & BGP_REF_SUPPRESSED) : 0; /* * Proper RFC 4271 path selection is a bit complicated, it cannot be @@ -2266,11 +2567,11 @@ bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best) */ if (new) - new->u.bgp.suppressed = 1; + new->pflags |= BGP_REF_SUPPRESSED; if (old) { - old->u.bgp.suppressed = 1; + old->pflags |= BGP_REF_SUPPRESSED; /* The fast case - replace not best with worse (or remove not best) */ if (old_suppressed && !(new && bgp_rte_better(new, old))) @@ -2278,13 +2579,13 @@ bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best) } /* The default case - find a new best-in-group route */ - r = new; /* new may not be in the list */ - for (s=net->routes; rte_is_valid(s); s=s->next) - if (use_deterministic_med(s) && same_group(s, lpref, lasn)) + rte *r = new; /* new may not be in the list */ + for (struct rte_storage *s = net->routes; rte_is_valid(RTE_OR_NULL(s)); s = s->next) + if (use_deterministic_med(s) && same_group(&s->rte, lpref, lasn)) { - s->u.bgp.suppressed = 1; - if (!r || bgp_rte_better(s, r)) - r = s; + s->rte.pflags |= BGP_REF_SUPPRESSED; + if (!r || bgp_rte_better(&s->rte, r)) + r = &s->rte; } /* Simple case - the last route in group disappears */ @@ -2293,16 +2594,16 @@ bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best) /* Found if new is mergable with best-in-group */ if (new && (new != r) && bgp_rte_mergable(r, new)) - new->u.bgp.suppressed = 0; + new->pflags &= ~BGP_REF_SUPPRESSED; /* Found all existing routes mergable with best-in-group */ - for (s=net->routes; rte_is_valid(s); s=s->next) - if (use_deterministic_med(s) && same_group(s, lpref, lasn)) - if ((s != r) && bgp_rte_mergable(r, s)) - s->u.bgp.suppressed = 0; + for (struct rte_storage *s = net->routes; rte_is_valid(RTE_OR_NULL(s)); s = s->next) + if (use_deterministic_med(s) && same_group(&s->rte, lpref, lasn)) + if ((&s->rte != r) && bgp_rte_mergable(r, &s->rte)) + s->rte.pflags &= ~BGP_REF_SUPPRESSED; /* Found best-in-group */ - r->u.bgp.suppressed = 0; + r->pflags &= ~BGP_REF_SUPPRESSED; /* * There are generally two reasons why we have to force @@ -2334,25 +2635,57 @@ bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best) return !old_suppressed; } -struct rte * -bgp_rte_modify_stale(struct rte *r, struct linpool *pool) +void +bgp_rte_modify_stale(struct rt_export_request *req, const net_addr *n, struct rt_pending_export *rpe UNUSED, rte **feed, uint count) { - eattr *a = ea_find(r->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY)); - const struct adata *ad = a ? a->u.ptr : NULL; - uint flags = a ? a->flags : BAF_PARTIAL; + struct bgp_channel *c = SKIP_BACK(struct bgp_channel, stale_feed, req); + struct rt_import_hook *irh = c->c.in_req.hook; - if (ad && int_set_contains(ad, BGP_COMM_NO_LLGR)) - return NULL; + /* Find our routes among others */ + for (uint i=0; i<count; i++) + { + rte *r = feed[i]; + + if ( + !rte_is_valid(r) || /* Not a valid route */ + (r->sender != irh) || /* Not our route */ + (r->stale_cycle == irh->stale_set)) /* A new route, do not mark as stale */ + continue; + + eattr *ea = ea_find(r->attrs, BGP_EA_ID(BA_COMMUNITY)); + const struct adata *ad = ea ? ea->u.ptr : NULL; + uint flags = ea ? ea->flags : BAF_PARTIAL; + + /* LLGR not allowed, withdraw the route */ + if (ad && int_set_contains(ad, BGP_COMM_NO_LLGR)) + { + rte_import(&c->c.in_req, n, NULL, r->src); + continue; + } - if (ad && int_set_contains(ad, BGP_COMM_LLGR_STALE)) - return r; + /* Route already marked as LLGR, do nothing */ + if (ad && int_set_contains(ad, BGP_COMM_LLGR_STALE)) + continue; - r = rte_cow_rta(r, pool); - bgp_set_attr_ptr(&(r->attrs->eattrs), pool, BA_COMMUNITY, flags, - int_set_add(pool, ad, BGP_COMM_LLGR_STALE)); - r->u.bgp.stale = 1; + /* Store the tmp_linpool state to aggresively save memory */ + struct lp_state tmpp; + lp_save(tmp_linpool, &tmpp); - return r; + /* Mark the route as LLGR */ + rte e0 = *r; + bgp_set_attr_ptr(&e0.attrs, BA_COMMUNITY, flags, int_set_add(tmp_linpool, ad, BGP_COMM_LLGR_STALE)); + e0.pflags &= ~BGP_REF_NOT_STALE; + e0.pflags |= BGP_REF_STALE; + + /* We need to update the route but keep it stale. */ + ASSERT_DIE(irh->stale_set == irh->stale_valid + 1); + irh->stale_set--; + rte_import(&c->c.in_req, n, &e0, r->src); + irh->stale_set++; + + /* Restore the memory state */ + lp_restore(tmp_linpool, &tmpp); + } } @@ -2368,8 +2701,8 @@ bgp_process_as4_attrs(ea_list **attrs, struct linpool *pool) eattr *a4 = bgp_find_attr(*attrs, BA_AS4_AGGREGATOR); /* First, unset AS4_* attributes */ - if (p4) bgp_unset_attr(attrs, pool, BA_AS4_PATH); - if (a4) bgp_unset_attr(attrs, pool, BA_AS4_AGGREGATOR); + if (p4) bgp_unset_attr(attrs, BA_AS4_PATH); + if (a4) bgp_unset_attr(attrs, BA_AS4_AGGREGATOR); /* Handle AGGREGATOR attribute */ if (a2 && a4) @@ -2402,60 +2735,37 @@ bgp_process_as4_attrs(ea_list **attrs, struct linpool *pool) } } -int -bgp_get_attr(const eattr *a, byte *buf, int buflen) -{ - uint i = EA_ID(a->id); - const struct bgp_attr_desc *d; - int len; - - if (bgp_attr_known(i)) - { - d = &bgp_attr_table[i]; - len = bsprintf(buf, "%s", d->name); - buf += len; - if (d->format) - { - *buf++ = ':'; - *buf++ = ' '; - d->format(a, buf, buflen - len - 2); - return GA_FULL; - } - return GA_NAME; - } - - bsprintf(buf, "%02x%s", i, (a->flags & BAF_TRANSITIVE) ? " [t]" : ""); - return GA_NAME; -} - void bgp_get_route_info(rte *e, byte *buf) { - eattr *p = ea_find(e->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH)); - eattr *o = ea_find(e->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN)); + eattr *p = ea_find(e->attrs, BGP_EA_ID(BA_AS_PATH)); + eattr *o = ea_find(e->attrs, BGP_EA_ID(BA_ORIGIN)); u32 origas; - buf += bsprintf(buf, " (%d", e->pref); + buf += bsprintf(buf, " (%d", rt_get_preference(e)); - if (e->u.bgp.suppressed) - buf += bsprintf(buf, "-"); + if (!net_is_flow(e->net)) + { + if (e->pflags & BGP_REF_SUPPRESSED) + buf += bsprintf(buf, "-"); - if (rte_stale(e)) - buf += bsprintf(buf, "s"); + if (rte_stale(e)) + buf += bsprintf(buf, "s"); - u64 metric = bgp_total_aigp_metric(e); - if (metric < BGP_AIGP_MAX) - { - buf += bsprintf(buf, "/%lu", metric); - } - else if (e->attrs->igp_metric) - { - if (!rte_resolvable(e)) - buf += bsprintf(buf, "/-"); - else if (e->attrs->igp_metric >= IGP_METRIC_UNKNOWN) - buf += bsprintf(buf, "/?"); - else - buf += bsprintf(buf, "/%d", e->attrs->igp_metric); + u64 metric = bgp_total_aigp_metric(e); + if (metric < BGP_AIGP_MAX) + { + buf += bsprintf(buf, "/%lu", metric); + } + else if (metric = rt_get_igp_metric(e)) + { + if (!rte_resolvable(e)) + buf += bsprintf(buf, "/-"); + else if (metric >= IGP_METRIC_UNKNOWN) + buf += bsprintf(buf, "/?"); + else + buf += bsprintf(buf, "/%d", metric); + } } buf += bsprintf(buf, ") ["); diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index 0f06746f..68c788ea 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -115,7 +115,7 @@ #include "nest/bird.h" #include "nest/iface.h" #include "nest/protocol.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/cli.h" #include "nest/locks.h" #include "conf/conf.h" @@ -127,9 +127,7 @@ #include "bgp.h" -struct linpool *bgp_linpool; /* Global temporary pool */ -struct linpool *bgp_linpool2; /* Global temporary pool for bgp_rt_notify() */ -static list bgp_sockets; /* Global list of listening sockets */ +static list STATIC_LIST_INIT(bgp_sockets); /* Global list of listening sockets */ static void bgp_connect(struct bgp_proto *p); @@ -142,6 +140,9 @@ static void bgp_update_bfd(struct bgp_proto *p, const struct bfd_options *bfd); static int bgp_incoming_connection(sock *sk, uint dummy UNUSED); static void bgp_listen_sock_err(sock *sk UNUSED, int err); +static void bgp_graceful_restart_feed(struct bgp_channel *c); + + /** * bgp_open - open a BGP instance * @p: BGP instance @@ -162,10 +163,6 @@ bgp_open(struct bgp_proto *p) uint flags = p->cf->free_bind ? SKF_FREEBIND : 0; uint flag_mask = SKF_FREEBIND; - /* FIXME: Add some global init? */ - if (!bgp_linpool) - init_list(&bgp_sockets); - /* We assume that cf->iface is defined iff cf->local_ip is link-local */ WALK_LIST(bs, bgp_sockets) @@ -205,12 +202,6 @@ bgp_open(struct bgp_proto *p) add_tail(&bgp_sockets, &bs->n); - if (!bgp_linpool) - { - bgp_linpool = lp_new_default(proto_pool); - bgp_linpool2 = lp_new_default(proto_pool); - } - return 0; err: @@ -239,15 +230,6 @@ bgp_close(struct bgp_proto *p) rfree(bs->sk); rem_node(&bs->n); mb_free(bs); - - if (!EMPTY_LIST(bgp_sockets)) - return; - - rfree(bgp_linpool); - bgp_linpool = NULL; - - rfree(bgp_linpool2); - bgp_linpool2 = NULL; } static inline int @@ -395,6 +377,7 @@ bgp_close_conn(struct bgp_conn *conn) conn->keepalive_timer = NULL; rfree(conn->hold_timer); conn->hold_timer = NULL; + rfree(conn->tx_ev); conn->tx_ev = NULL; rfree(conn->sk); @@ -533,8 +516,15 @@ void bgp_stop(struct bgp_proto *p, int subcode, byte *data, uint len) { proto_notify_state(&p->p, PS_STOP); + p->uncork_ev->data = NULL; bgp_graceful_close_conn(&p->outgoing_conn, subcode, data, len); bgp_graceful_close_conn(&p->incoming_conn, subcode, data, len); + + struct bgp_channel *c; + WALK_LIST(c, p->p.channels) + if (c->ptx) + bgp_free_pending_tx(c); + ev_schedule(p->event); } @@ -782,32 +772,30 @@ bgp_handle_graceful_restart(struct bgp_proto *p) { case BGP_GRS_NONE: c->gr_active = BGP_GRS_ACTIVE; - rt_refresh_begin(c->c.table, &c->c); + rt_refresh_begin(&c->c.in_req); break; case BGP_GRS_ACTIVE: - rt_refresh_end(c->c.table, &c->c); - rt_refresh_begin(c->c.table, &c->c); + rt_refresh_end(&c->c.in_req); + rt_refresh_begin(&c->c.in_req); break; case BGP_GRS_LLGR: - rt_refresh_begin(c->c.table, &c->c); - rt_modify_stale(c->c.table, &c->c); + rt_refresh_begin(&c->c.in_req); + bgp_graceful_restart_feed(c); break; } } else { /* Just flush the routes */ - rt_refresh_begin(c->c.table, &c->c); - rt_refresh_end(c->c.table, &c->c); + rt_refresh_begin(&c->c.in_req); + rt_refresh_end(&c->c.in_req); } /* Reset bucket and prefix tables */ - bgp_free_bucket_table(c); - bgp_free_prefix_table(c); - bgp_init_bucket_table(c); - bgp_init_prefix_table(c); + bgp_free_pending_tx(c); + bgp_init_pending_tx(c); c->packets_to_send = 0; } @@ -818,6 +806,53 @@ bgp_handle_graceful_restart(struct bgp_proto *p) tm_start(p->gr_timer, p->conn->remote_caps->gr_time S); } +static void +bgp_graceful_restart_feed_done(struct rt_export_request *req) +{ + req->hook = NULL; +} + +static void +bgp_graceful_restart_feed_dump_req(struct rt_export_request *req) +{ + struct bgp_channel *c = SKIP_BACK(struct bgp_channel, stale_feed, req); + debug(" BGP-GR %s.%s export request %p\n", c->c.proto->name, c->c.name, req); +} + +static void +bgp_graceful_restart_feed_log_state_change(struct rt_export_request *req, u8 state) +{ + struct bgp_channel *c = SKIP_BACK(struct bgp_channel, stale_feed, req); + struct bgp_proto *p = (void *) c->c.proto; + BGP_TRACE(D_EVENTS, "Long-lived graceful restart export state changed to %s", rt_export_state_name(state)); + + if (state == TES_READY) + rt_stop_export(req, bgp_graceful_restart_feed_done); +} + +static void +bgp_graceful_restart_drop_export(struct rt_export_request *req UNUSED, const net_addr *n UNUSED, struct rt_pending_export *rpe UNUSED) +{ /* Nothing to do */ } + +static void +bgp_graceful_restart_feed(struct bgp_channel *c) +{ + c->stale_feed = (struct rt_export_request) { + .name = "BGP-GR", + .list = &global_work_list, + .trace_routes = c->c.debug | c->c.proto->debug, + .dump_req = bgp_graceful_restart_feed_dump_req, + .log_state_change = bgp_graceful_restart_feed_log_state_change, + .export_bulk = bgp_rte_modify_stale, + .export_one = bgp_graceful_restart_drop_export, + }; + + rt_request_export(&c->c.table->exporter, &c->stale_feed); +} + + + + /** * bgp_graceful_restart_done - finish active BGP graceful restart * @c: BGP channel @@ -841,7 +876,7 @@ bgp_graceful_restart_done(struct bgp_channel *c) BGP_TRACE(D_EVENTS, "Neighbor graceful restart done"); tm_stop(c->stale_timer); - rt_refresh_end(c->c.table, &c->c); + rt_refresh_end(&c->c.in_req); } /** @@ -883,7 +918,7 @@ bgp_graceful_restart_timeout(timer *t) /* Channel is in GR, and supports LLGR -> start LLGR */ c->gr_active = BGP_GRS_LLGR; tm_start(c->stale_timer, c->stale_time S); - rt_modify_stale(c->c.table, &c->c); + bgp_graceful_restart_feed(c); } } else @@ -921,10 +956,7 @@ bgp_refresh_begin(struct bgp_channel *c) { log(L_WARN "%s: BEGIN-OF-RR received before END-OF-RIB, ignoring", p->p.name); return; } c->load_state = BFS_REFRESHING; - rt_refresh_begin(c->c.table, &c->c); - - if (c->c.in_table) - rt_refresh_begin(c->c.in_table, &c->c); + rt_refresh_begin(&c->c.in_req); } /** @@ -945,10 +977,7 @@ bgp_refresh_end(struct bgp_channel *c) { log(L_WARN "%s: END-OF-RR received without prior BEGIN-OF-RR, ignoring", p->p.name); return; } c->load_state = BFS_NONE; - rt_refresh_end(c->c.table, &c->c); - - if (c->c.in_table) - rt_prune_sync(c->c.in_table, 0); + rt_refresh_end(&c->c.in_req); } @@ -1415,9 +1444,9 @@ bgp_reload_routes(struct channel *C) struct bgp_proto *p = (void *) C->proto; struct bgp_channel *c = (void *) C; - ASSERT(p->conn && (p->route_refresh || c->c.in_table)); + ASSERT(p->conn && (p->route_refresh || (C->in_keep & RIK_PREFILTER))); - if (c->c.in_table) + if (C->in_keep & RIK_PREFILTER) channel_schedule_reload(C); else bgp_schedule_packet(p->conn, c, PKT_ROUTE_REFRESH); @@ -1554,6 +1583,8 @@ bgp_start(struct proto *P) p->last_rx_update = 0; p->event = ev_new_init(p->p.pool, bgp_decision, p); + p->uncork_ev = ev_new_init(p->p.pool, bgp_uncork, p); + p->startup_timer = tm_new_init(p->p.pool, bgp_startup_timeout, p, 0, 0); p->gr_timer = tm_new_init(p->p.pool, bgp_graceful_restart_timeout, p, 0, 0); @@ -1684,6 +1715,13 @@ done: return p->p.proto_state; } +struct rte_owner_class bgp_rte_owner_class = { + .get_route_info = bgp_get_route_info, + .rte_better = bgp_rte_better, + .rte_mergable = bgp_rte_mergable, + .rte_igp_metric = bgp_rte_igp_metric, +}; + static struct proto * bgp_init(struct proto_config *CF) { @@ -1697,10 +1735,9 @@ bgp_init(struct proto_config *CF) P->reload_routes = bgp_reload_routes; P->feed_begin = bgp_feed_begin; P->feed_end = bgp_feed_end; - P->rte_better = bgp_rte_better; - P->rte_mergable = bgp_rte_mergable; - P->rte_recalculate = cf->deterministic_med ? bgp_rte_recalculate : NULL; - P->rte_modify = bgp_rte_modify_stale; + + P->sources.class = &bgp_rte_owner_class; + P->sources.rte_recalculate = cf->deterministic_med ? bgp_rte_recalculate : NULL; p->cf = cf; p->is_internal = (cf->local_as == cf->remote_as); @@ -1767,14 +1804,14 @@ bgp_channel_start(struct channel *C) } c->pool = p->p.pool; // XXXX - bgp_init_bucket_table(c); - bgp_init_prefix_table(c); if (c->cf->import_table) channel_setup_in_table(C); if (c->cf->export_table) - channel_setup_out_table(C); + bgp_setup_out_table(c); + + bgp_init_pending_tx(c); c->stale_timer = tm_new_init(c->pool, bgp_long_lived_stale_timeout, c, 0, 0); @@ -2183,7 +2220,7 @@ bgp_channel_reconfigure(struct channel *C, struct channel_config *CC, int *impor (new->cost != old->cost)) { /* import_changed itself does not force ROUTE_REFRESH when import_table is active */ - if (c->c.in_table && (c->c.channel_state == CS_UP)) + if ((c->c.in_keep & RIK_PREFILTER) && (c->c.channel_state == CS_UP)) bgp_schedule_packet(p->conn, c, PKT_ROUTE_REFRESH); *import_changed = 1; @@ -2621,7 +2658,6 @@ struct channel_class channel_bgp = { struct protocol proto_bgp = { .name = "BGP", .template = "bgp%d", - .class = PROTOCOL_BGP, .preference = DEF_PREF_BGP, .channel_mask = NB_IP | NB_VPN | NB_FLOW, .proto_size = sizeof(struct bgp_proto), @@ -2633,7 +2669,11 @@ struct protocol proto_bgp = { .reconfigure = bgp_reconfigure, .copy_config = bgp_copy_config, .get_status = bgp_get_status, - .get_attr = bgp_get_attr, - .get_route_info = bgp_get_route_info, .show_proto_info = bgp_show_proto_info }; + +void bgp_build(void) +{ + proto_build(&proto_bgp); + bgp_register_attrs(); +} diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h index fea87304..0cd327a2 100644 --- a/proto/bgp/bgp.h +++ b/proto/bgp/bgp.h @@ -14,13 +14,12 @@ #include <stdint.h> #include <setjmp.h> #include "nest/bird.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/bfd.h" //#include "lib/lists.h" #include "lib/hash.h" #include "lib/socket.h" -struct linpool; struct eattr; @@ -68,10 +67,10 @@ struct bgp_af_desc { u8 no_igp; const char *name; uint (*encode_nlri)(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size); - void (*decode_nlri)(struct bgp_parse_state *s, byte *pos, uint len, rta *a); + void (*decode_nlri)(struct bgp_parse_state *s, byte *pos, uint len, ea_list *a); void (*update_next_hop)(struct bgp_export_state *s, eattr *nh, ea_list **to); uint (*encode_next_hop)(struct bgp_write_state *s, eattr *nh, byte *buf, uint size); - void (*decode_next_hop)(struct bgp_parse_state *s, byte *pos, uint len, rta *a); + void (*decode_next_hop)(struct bgp_parse_state *s, byte *pos, uint len, ea_list **to); }; @@ -159,7 +158,7 @@ struct bgp_channel_config { u8 aigp_originate; /* AIGP is originated automatically */ u32 cost; /* IGP cost for direct next hops */ u8 import_table; /* Use c.in_table as Adj-RIB-In */ - u8 export_table; /* Use c.out_table as Adj-RIB-Out */ + u8 export_table; /* Keep Adj-RIB-Out and export it */ struct rtable_config *igp_table_ip4; /* Table for recursive IPv4 next hop lookups */ struct rtable_config *igp_table_ip6; /* Table for recursive IPv6 next hop lookups */ @@ -212,6 +211,10 @@ struct bgp_channel_config { #define BGP_BFD_GRACEFUL 2 /* BFD down triggers graceful restart */ +/* rte->pflags */ +#define BGP_REF_SUPPRESSED 0x1 /* Used for deterministic MED comparison */ +#define BGP_REF_STALE 0x2 /* Route is LLGR_STATE */ +#define BGP_REF_NOT_STALE 0x4 /* Route is NOT LLGR_STATE */ struct bgp_af_caps { u32 afi; @@ -326,6 +329,7 @@ struct bgp_proto { struct bgp_socket *sock; /* Shared listening socket */ struct bfd_request *bfd_req; /* BFD request, if BFD is used */ struct birdsock *postponed_sk; /* Postponed incoming socket for dynamic BGP */ + event *uncork_ev; /* Uncork event in case of congestion */ struct bgp_stats stats; /* BGP statistics */ btime last_established; /* Last time of enter/leave of established state */ btime last_rx_update; /* Last time of RX update */ @@ -357,12 +361,8 @@ struct bgp_channel { /* Rest are zeroed when down */ pool *pool; - HASH(struct bgp_bucket) bucket_hash; /* Hash table of route buckets */ - struct bgp_bucket *withdraw_bucket; /* Withdrawn routes */ - list bucket_queue; /* Queue of buckets to send (struct bgp_bucket) */ - - HASH(struct bgp_prefix) prefix_hash; /* Prefixes to be sent */ - slab *prefix_slab; /* Slab holding prefix nodes */ + struct bgp_pending_tx *ptx; /* Routes waiting to be sent */ + struct rt_exporter prefix_exporter; /* Table-like exporter for ptx */ ip_addr next_hop_addr; /* Local address for NEXT_HOP attribute */ ip_addr link_addr; /* Link-local version of next_hop_addr */ @@ -376,6 +376,7 @@ struct bgp_channel { timer *stale_timer; /* Long-lived stale timer for LLGR */ u32 stale_time; /* Stored LLGR stale time from last session */ + struct rt_export_request stale_feed; /* Feeder request for stale route modification */ u8 add_path_rx; /* Session expects receive of ADD-PATH extended NLRI */ u8 add_path_tx; /* Session expects transmit of ADD-PATH extended NLRI */ @@ -385,8 +386,11 @@ struct bgp_channel { }; struct bgp_prefix { - node buck_node; /* Node in per-bucket list */ + node buck_node_xx; /* Node in per-bucket list */ struct bgp_prefix *next; /* Node in prefix hash table */ + struct bgp_bucket *last; /* Last bucket sent with this prefix */ + struct bgp_bucket *cur; /* Current bucket (cur == last) if no update is required */ + btime lastmod; /* Last modification of this prefix */ u32 hash; u32 path_id; net_addr net[0]; @@ -395,11 +399,24 @@ struct bgp_prefix { struct bgp_bucket { node send_node; /* Node in send queue */ struct bgp_bucket *next; /* Node in bucket hash table */ - list prefixes; /* Prefixes in this bucket (struct bgp_prefix) */ + list prefixes; /* Prefixes to send in this bucket (struct bgp_prefix) */ u32 hash; /* Hash over extended attributes */ + u32 px_uc; /* How many prefixes are linking this bucket */ ea_list eattrs[0]; /* Per-bucket extended attributes */ }; +struct bgp_pending_tx { + resource r; + pool *pool; + + HASH(struct bgp_bucket) bucket_hash; /* Hash table of route buckets */ + struct bgp_bucket *withdraw_bucket; /* Withdrawn routes */ + list bucket_queue; /* Queue of buckets to send (struct bgp_bucket) */ + + HASH(struct bgp_prefix) prefix_hash; /* Prefixes to be sent */ + slab *prefix_slab; /* Slab holding prefix nodes */ +}; + struct bgp_export_state { struct bgp_proto *proto; struct bgp_channel *channel; @@ -463,14 +480,12 @@ struct bgp_parse_state { uint err_subcode; jmp_buf err_jmpbuf; - struct hostentry *hostentry; - struct rtable *base_table; adata *mpls_labels; /* Cached state for bgp_rte_update() */ u32 last_id; struct rte_src *last_src; - rta *cached_rta; + ea_list *cached_ea; }; #define BGP_PORT 179 @@ -511,9 +526,6 @@ bgp_parse_error(struct bgp_parse_state *s, uint subcode) longjmp(s->err_jmpbuf, 1); } -extern struct linpool *bgp_linpool; -extern struct linpool *bgp_linpool2; - void bgp_start_timer(timer *t, uint value); void bgp_check_config(struct bgp_config *c); @@ -535,11 +547,17 @@ struct rte_source *bgp_find_source(struct bgp_proto *p, u32 path_id); struct rte_source *bgp_get_source(struct bgp_proto *p, u32 path_id); static inline int -rte_resolvable(rte *rt) +rte_resolvable(const rte *rt) { - return rt->attrs->dest != RTD_UNREACHABLE; + eattr *nhea = ea_find(rt->attrs, &ea_gen_nexthop); + if (!nhea) + return 0; + + struct nexthop_adata *nhad = (void *) nhea->u.ptr; + return NEXTHOP_IS_REACHABLE(nhad) || (nhad->dest != RTD_UNREACHABLE); } +extern struct rte_owner_class bgp_rte_owner_class; #ifdef LOCAL_DEBUG #define BGP_FORCE_DEBUG 1 @@ -555,74 +573,61 @@ rte_resolvable(rte *rt) /* attrs.c */ -static inline eattr * -bgp_find_attr(ea_list *attrs, uint code) -{ - return ea_find(attrs, EA_CODE(PROTOCOL_BGP, code)); -} - eattr * -bgp_set_attr(ea_list **attrs, struct linpool *pool, uint code, uint flags, uintptr_t val); - -static inline void -bgp_set_attr_u32(ea_list **to, struct linpool *pool, uint code, uint flags, u32 val) -{ bgp_set_attr(to, pool, code, flags, (uintptr_t) val); } - -static inline void -bgp_set_attr_ptr(ea_list **to, struct linpool *pool, uint code, uint flags, const struct adata *val) -{ bgp_set_attr(to, pool, code, flags, (uintptr_t) val); } - -static inline void -bgp_set_attr_data(ea_list **to, struct linpool *pool, uint code, uint flags, void *data, uint len) -{ - struct adata *a = lp_alloc_adata(pool, len); - bmemcpy(a->data, data, len); - bgp_set_attr(to, pool, code, flags, (uintptr_t) a); -} +bgp_find_attr(ea_list *attrs, uint code); -static inline void -bgp_unset_attr(ea_list **to, struct linpool *pool, uint code) -{ eattr *e = bgp_set_attr(to, pool, code, 0, 0); e->type = EAF_TYPE_UNDEF; } +void bgp_set_attr_u32(ea_list **to, uint code, uint flags, u32 val); +void bgp_set_attr_ptr(ea_list **to, uint code, uint flags, const struct adata *ad); +void bgp_set_attr_data(ea_list **to, uint code, uint flags, void *data, uint len); +void bgp_unset_attr(ea_list **to, uint code); int bgp_encode_mp_reach_mrt(struct bgp_write_state *s, eattr *a, byte *buf, uint size); int bgp_encode_attrs(struct bgp_write_state *s, ea_list *attrs, byte *buf, byte *end); ea_list * bgp_decode_attrs(struct bgp_parse_state *s, byte *data, uint len); -void bgp_finish_attrs(struct bgp_parse_state *s, rta *a); +void bgp_finish_attrs(struct bgp_parse_state *s, ea_list **to); + +void bgp_setup_out_table(struct bgp_channel *c); + +void bgp_init_pending_tx(struct bgp_channel *c); +void bgp_free_pending_tx(struct bgp_channel *c); -void bgp_init_bucket_table(struct bgp_channel *c); -void bgp_free_bucket_table(struct bgp_channel *c); -void bgp_free_bucket(struct bgp_channel *c, struct bgp_bucket *b); -void bgp_defer_bucket(struct bgp_channel *c, struct bgp_bucket *b); void bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b); +int bgp_done_bucket(struct bgp_channel *c, struct bgp_bucket *b); -void bgp_init_prefix_table(struct bgp_channel *c); -void bgp_free_prefix_table(struct bgp_channel *c); -void bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *bp); +void bgp_done_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bucket *buck); int bgp_rte_better(struct rte *, struct rte *); int bgp_rte_mergable(rte *pri, rte *sec); int bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best); -struct rte *bgp_rte_modify_stale(struct rte *r, struct linpool *pool); -void bgp_rt_notify(struct proto *P, struct channel *C, net *n, rte *new, rte *old); -int bgp_preexport(struct channel *, struct rte **, struct linpool *); -int bgp_get_attr(const struct eattr *e, byte *buf, int buflen); -void bgp_get_route_info(struct rte *, byte *buf); -int bgp_total_aigp_metric_(rte *e, u64 *metric, const struct adata **ad); +void bgp_rte_modify_stale(struct rt_export_request *req, const net_addr *n, struct rt_pending_export *rpe UNUSED, rte **feed, uint count); +u32 bgp_rte_igp_metric(const rte *); +void bgp_rt_notify(struct proto *P, struct channel *C, const net_addr *n, rte *new, const rte *old); +int bgp_preexport(struct channel *, struct rte *); +void bgp_get_route_info(struct rte *, byte *); +int bgp_total_aigp_metric_(const rte *e, u64 *metric, const struct adata **ad); + +static inline struct bgp_proto *bgp_rte_proto(struct rte *rte) +{ + return (rte->src->owner->class == &bgp_rte_owner_class) ? + SKIP_BACK(struct bgp_proto, p.sources, rte->src->owner) : NULL; +} #define BGP_AIGP_METRIC 1 #define BGP_AIGP_MAX U64(0xffffffffffffffff) static inline u64 -bgp_total_aigp_metric(rte *r) +bgp_total_aigp_metric(const rte *e) { u64 metric = BGP_AIGP_MAX; const struct adata *ad; - bgp_total_aigp_metric_(r, &metric, &ad); + bgp_total_aigp_metric_(e, &metric, &ad); return metric; } +void bgp_register_attrs(void); + /* packets.c */ @@ -634,6 +639,7 @@ void bgp_schedule_packet(struct bgp_conn *conn, struct bgp_channel *c, int type) void bgp_kick_tx(void *vconn); void bgp_tx(struct birdsock *sk); int bgp_rx(struct birdsock *sk, uint size); +void bgp_uncork(void *vp); const char * bgp_error_dsc(unsigned code, unsigned subcode); void bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsigned subcode, byte *data, unsigned len); @@ -659,27 +665,32 @@ void bgp_update_next_hop(struct bgp_export_state *s, eattr *a, ea_list **to); #define BAF_DECODE_FLAGS 0x0100 /* Private flag - attribute flags are handled by the decode hook */ -#define BA_ORIGIN 0x01 /* RFC 4271 */ /* WM */ -#define BA_AS_PATH 0x02 /* WM */ -#define BA_NEXT_HOP 0x03 /* WM */ -#define BA_MULTI_EXIT_DISC 0x04 /* ON */ -#define BA_LOCAL_PREF 0x05 /* WD */ -#define BA_ATOMIC_AGGR 0x06 /* WD */ -#define BA_AGGREGATOR 0x07 /* OT */ -#define BA_COMMUNITY 0x08 /* RFC 1997 */ /* OT */ -#define BA_ORIGINATOR_ID 0x09 /* RFC 4456 */ /* ON */ -#define BA_CLUSTER_LIST 0x0a /* RFC 4456 */ /* ON */ -#define BA_MP_REACH_NLRI 0x0e /* RFC 4760 */ -#define BA_MP_UNREACH_NLRI 0x0f /* RFC 4760 */ -#define BA_EXT_COMMUNITY 0x10 /* RFC 4360 */ -#define BA_AS4_PATH 0x11 /* RFC 6793 */ -#define BA_AS4_AGGREGATOR 0x12 /* RFC 6793 */ -#define BA_AIGP 0x1a /* RFC 7311 */ -#define BA_LARGE_COMMUNITY 0x20 /* RFC 8092 */ +enum bgp_attr_id { + BA_ORIGIN = 0x01, /* RFC 4271 */ /* WM */ + BA_AS_PATH = 0x02, /* WM */ + BA_NEXT_HOP = 0x03, /* WM */ + BA_MULTI_EXIT_DISC = 0x04, /* ON */ + BA_LOCAL_PREF = 0x05, /* WD */ + BA_ATOMIC_AGGR = 0x06, /* WD */ + BA_AGGREGATOR = 0x07, /* OT */ + BA_COMMUNITY = 0x08, /* RFC 1997 */ /* OT */ + BA_ORIGINATOR_ID = 0x09, /* RFC 4456 */ /* ON */ + BA_CLUSTER_LIST = 0x0a, /* RFC 4456 */ /* ON */ + BA_MP_REACH_NLRI = 0x0e, /* RFC 4760 */ + BA_MP_UNREACH_NLRI = 0x0f, /* RFC 4760 */ + BA_EXT_COMMUNITY = 0x10, /* RFC 4360 */ + BA_AS4_PATH = 0x11, /* RFC 6793 */ + BA_AS4_AGGREGATOR = 0x12, /* RFC 6793 */ + BA_AIGP = 0x1a, /* RFC 7311 */ + BA_LARGE_COMMUNITY = 0x20, /* RFC 8092 */ #define BA_ONLY_TO_CUSTOMER 0x23 /* RFC 9234 */ /* Bird's private internal BGP attributes */ -#define BA_MPLS_LABEL_STACK 0xfe /* MPLS label stack transfer attribute */ + BA_MPLS_LABEL_STACK = 0x100, /* MPLS label stack transfer attribute */ + +/* Maximum */ + BGP_ATTR_MAX, +}; /* BGP connection states */ diff --git a/proto/bgp/config.Y b/proto/bgp/config.Y index cb410a5e..9f0d2306 100644 --- a/proto/bgp/config.Y +++ b/proto/bgp/config.Y @@ -19,18 +19,17 @@ CF_DECLS CF_KEYWORDS(BGP, LOCAL, NEIGHBOR, AS, HOLD, TIME, CONNECT, RETRY, KEEPALIVE, MULTIHOP, STARTUP, VIA, NEXT, HOP, SELF, DEFAULT, PATH, METRIC, ERROR, - START, DELAY, FORGET, WAIT, ENABLE, DISABLE, AFTER, BGP_PATH, - BGP_LOCAL_PREF, BGP_MED, BGP_ORIGIN, BGP_NEXT_HOP, BGP_ATOMIC_AGGR, - BGP_AGGREGATOR, BGP_COMMUNITY, BGP_EXT_COMMUNITY, BGP_LARGE_COMMUNITY, + START, DELAY, FORGET, WAIT, ENABLE, DISABLE, AFTER, + BGP_LOCAL_PREF, BGP_MED, SOURCE, ADDRESS, PASSWORD, RR, RS, CLIENT, CLUSTER, ID, AS4, ADVERTISE, IPV4, CAPABILITIES, LIMIT, PASSIVE, PREFER, OLDER, MISSING, LLADDR, - DROP, IGNORE, ROUTE, REFRESH, INTERPRET, COMMUNITIES, BGP_ORIGINATOR_ID, - BGP_CLUSTER_LIST, IGP, TABLE, GATEWAY, DIRECT, RECURSIVE, MED, TTL, + DROP, IGNORE, ROUTE, REFRESH, INTERPRET, COMMUNITIES, + IGP, TABLE, GATEWAY, DIRECT, RECURSIVE, MED, TTL, SECURITY, DETERMINISTIC, SECONDARY, ALLOW, BFD, ADD, PATHS, RX, TX, GRACEFUL, RESTART, AWARE, CHECK, LINK, PORT, EXTENDED, MESSAGES, SETKEY, STRICT, BIND, CONFEDERATION, MEMBER, MULTICAST, FLOW4, FLOW6, LONG, LIVED, STALE, IMPORT, IBGP, EBGP, MANDATORY, INTERNAL, EXTERNAL, SETS, - DYNAMIC, RANGE, NAME, DIGITS, BGP_AIGP, AIGP, ORIGINATE, COST, ENFORCE, + DYNAMIC, RANGE, NAME, DIGITS, AIGP, ORIGINATE, COST, ENFORCE, FIRST, FREE, VALIDATE, BASE, ROLE, ROLES, PEER, PROVIDER, CUSTOMER, RS_SERVER, RS_CLIENT, REQUIRE, BGP_OTC) @@ -45,6 +44,8 @@ CF_KEYWORDS(CEASE, PREFIX, LIMIT, HIT, ADMINISTRATIVE, SHUTDOWN, RESET, PEER, CF_GRAMMAR +toksym: BGP_MED | BGP_LOCAL_PREF | SOURCE ; + proto: bgp_proto '}' ; bgp_proto_start: proto_start BGP { @@ -328,38 +329,6 @@ bgp_channel_end: bgp_proto_channel: bgp_channel_start bgp_channel_opt_list bgp_channel_end; - -dynamic_attr: BGP_ORIGIN - { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_ENUM_BGP_ORIGIN, EA_CODE(PROTOCOL_BGP, BA_ORIGIN)); } ; -dynamic_attr: BGP_PATH - { $$ = f_new_dynamic_attr(EAF_TYPE_AS_PATH, T_PATH, EA_CODE(PROTOCOL_BGP, BA_AS_PATH)); } ; -dynamic_attr: BGP_NEXT_HOP - { $$ = f_new_dynamic_attr(EAF_TYPE_IP_ADDRESS, T_IP, EA_CODE(PROTOCOL_BGP, BA_NEXT_HOP)); } ; -dynamic_attr: BGP_MED - { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC)); } ; -dynamic_attr: BGP_LOCAL_PREF - { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF)); } ; -dynamic_attr: BGP_ATOMIC_AGGR - { $$ = f_new_dynamic_attr(EAF_TYPE_OPAQUE, T_ENUM_EMPTY, EA_CODE(PROTOCOL_BGP, BA_ATOMIC_AGGR)); } ; -dynamic_attr: BGP_AGGREGATOR - { $$ = f_new_dynamic_attr(EAF_TYPE_OPAQUE, T_ENUM_EMPTY, EA_CODE(PROTOCOL_BGP, BA_AGGREGATOR)); } ; -dynamic_attr: BGP_COMMUNITY - { $$ = f_new_dynamic_attr(EAF_TYPE_INT_SET, T_CLIST, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY)); } ; -dynamic_attr: BGP_ORIGINATOR_ID - { $$ = f_new_dynamic_attr(EAF_TYPE_ROUTER_ID, T_QUAD, EA_CODE(PROTOCOL_BGP, BA_ORIGINATOR_ID)); } ; -dynamic_attr: BGP_CLUSTER_LIST - { $$ = f_new_dynamic_attr(EAF_TYPE_INT_SET, T_CLIST, EA_CODE(PROTOCOL_BGP, BA_CLUSTER_LIST)); } ; -dynamic_attr: BGP_EXT_COMMUNITY - { $$ = f_new_dynamic_attr(EAF_TYPE_EC_SET, T_ECLIST, EA_CODE(PROTOCOL_BGP, BA_EXT_COMMUNITY)); } ; -dynamic_attr: BGP_AIGP - { $$ = f_new_dynamic_attr(EAF_TYPE_OPAQUE, T_ENUM_EMPTY, EA_CODE(PROTOCOL_BGP, BA_AIGP)); } ; -dynamic_attr: BGP_LARGE_COMMUNITY - { $$ = f_new_dynamic_attr(EAF_TYPE_LC_SET, T_LCLIST, EA_CODE(PROTOCOL_BGP, BA_LARGE_COMMUNITY)); } ; -dynamic_attr: BGP_OTC - { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_CODE(PROTOCOL_BGP, BA_ONLY_TO_CUSTOMER)); } ; - - - CF_ENUM(T_ENUM_BGP_ORIGIN, ORIGIN_, IGP, EGP, INCOMPLETE) CF_CODE diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c index 54423a1a..d4d2d0b0 100644 --- a/proto/bgp/packets.c +++ b/proto/bgp/packets.c @@ -15,8 +15,8 @@ #include "nest/bird.h" #include "nest/iface.h" #include "nest/protocol.h" -#include "nest/route.h" -#include "nest/attrs.h" +#include "nest/rt.h" +#include "lib/attrs.h" #include "proto/mrt/mrt.h" #include "conf/conf.h" #include "lib/unaligned.h" @@ -986,7 +986,7 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, uint len) #define MISMATCHED_AF " - mismatched address family (%I for %s)" static void -bgp_apply_next_hop(struct bgp_parse_state *s, rta *a, ip_addr gw, ip_addr ll) +bgp_apply_next_hop(struct bgp_parse_state *s, ea_list **to, ip_addr gw, ip_addr ll) { struct bgp_proto *p = s->proto; struct bgp_channel *c = s->channel; @@ -1009,10 +1009,18 @@ bgp_apply_next_hop(struct bgp_parse_state *s, rta *a, ip_addr gw, ip_addr ll) if (nbr->scope == SCOPE_HOST) WITHDRAW(BAD_NEXT_HOP " - address %I is local", nbr->addr); - a->dest = RTD_UNICAST; - a->nh.gw = nbr->addr; - a->nh.iface = nbr->iface; - a->igp_metric = c->cf->cost; + ea_set_attr_u32(to, &ea_gen_igp_metric, 0, c->cf->cost); + + struct nexthop_adata nhad = { + .nh = { + .gw = nbr->addr, + .iface = nbr->iface, + }, + .ad = { + .length = sizeof nhad - sizeof nhad.ad, + }, + }; + ea_set_attr_data(to, &ea_gen_nexthop, 0, nhad.ad.data, nhad.ad.length); } else /* GW_RECURSIVE */ { @@ -1020,62 +1028,51 @@ bgp_apply_next_hop(struct bgp_parse_state *s, rta *a, ip_addr gw, ip_addr ll) WITHDRAW(BAD_NEXT_HOP " - zero address"); rtable *tab = ipa_is_ip4(gw) ? c->igp_table_ip4 : c->igp_table_ip6; - s->hostentry = rt_get_hostentry(tab, gw, ll, c->c.table); - - if (!s->mpls) - rta_apply_hostentry(a, s->hostentry, NULL); - - /* With MPLS, hostentry is applied later in bgp_apply_mpls_labels() */ + if (s->mpls) + { + u32 labels[BGP_MPLS_MAX]; + ea_set_hostentry(to, c->c.table, tab, gw, ll, BGP_MPLS_MAX, labels); + } + else + ea_set_hostentry(to, c->c.table, tab, gw, ll, 0, NULL); } } static void -bgp_apply_mpls_labels(struct bgp_parse_state *s, rta *a, u32 *labels, uint lnum) +bgp_apply_mpls_labels(struct bgp_parse_state *s, ea_list **to, u32 lnum, u32 labels[lnum]) { if (lnum > MPLS_MAX_LABEL_STACK) { REPORT("Too many MPLS labels ($u)", lnum); - a->dest = RTD_UNREACHABLE; - a->hostentry = NULL; - a->nh = (struct nexthop) { }; + ea_set_dest(to, 0, RTD_UNREACHABLE); return; } /* Handle implicit NULL as empty MPLS stack */ if ((lnum == 1) && (labels[0] == BGP_MPLS_NULL)) - lnum = 0; + lnum = s->mpls_labels->length = 0; if (s->channel->cf->gw_mode == GW_DIRECT) { - a->nh.labels = lnum; - memcpy(a->nh.label, labels, 4*lnum); + eattr *e = ea_find(*to, &ea_gen_nexthop); + struct { + struct nexthop_adata nhad; + u32 labels[MPLS_MAX_LABEL_STACK]; + } nh; + + memcpy(&nh.nhad, e->u.ptr, sizeof(struct adata) + e->u.ptr->length); + nh.nhad.nh.labels = lnum; + memcpy(nh.labels, labels, lnum * sizeof(u32)); + nh.nhad.ad.length = sizeof nh.nhad + lnum * sizeof(u32); } else /* GW_RECURSIVE */ { - mpls_label_stack ms; - - ms.len = lnum; - memcpy(ms.stack, labels, 4*lnum); - rta_apply_hostentry(a, s->hostentry, &ms); - } -} - -static void -bgp_apply_flow_validation(struct bgp_parse_state *s, const net_addr *n, rta *a) -{ - struct bgp_channel *c = s->channel; - int valid = rt_flowspec_check(c->base_table, c->c.table, n, a, s->proto->is_interior); - a->dest = valid ? RTD_NONE : RTD_UNREACHABLE; - - /* Set rte.bgp.base_table later from this state variable */ - s->base_table = c->base_table; - - /* Invalidate cached rta if dest changes */ - if (s->cached_rta && (s->cached_rta->dest != a->dest)) - { - rta_free(s->cached_rta); - s->cached_rta = NULL; + eattr *e = ea_find(*to, &ea_gen_hostentry); + ASSERT_DIE(e); + struct hostentry_adata *head = (void *) e->u.ptr; + memcpy(&head->labels, labels, lnum * sizeof(u32)); + head->ad.length = (void *)(&head->labels[lnum]) - (void *) head->ad.data; } } @@ -1108,7 +1105,7 @@ bgp_use_next_hop(struct bgp_export_state *s, eattr *a) return 1; /* Keep it when explicitly set in export filter */ - if (a->type & EAF_FRESH) + if (a->fresh) return 1; /* Check for non-matching AF */ @@ -1125,31 +1122,41 @@ bgp_use_next_hop(struct bgp_export_state *s, eattr *a) return p->neigh && (p->neigh->iface == ifa); } -static inline int +static inline struct nexthop * bgp_use_gateway(struct bgp_export_state *s) { struct bgp_proto *p = s->proto; struct bgp_channel *c = s->channel; - rta *ra = s->route->attrs; + ea_list *ra = s->route->attrs; /* Handle next hop self option - also applies to gateway */ if (c->cf->next_hop_self && bgp_match_src(s, c->cf->next_hop_self)) - return 0; + return NULL; + + eattr *nhea = ea_find(ra, &ea_gen_nexthop); + if (!nhea) + return NULL; /* We need one valid global gateway */ - if ((ra->dest != RTD_UNICAST) || ra->nh.next || ipa_zero(ra->nh.gw) || ipa_is_link_local(ra->nh.gw)) - return 0; + struct nexthop_adata *nhad = (struct nexthop_adata *) nhea->u.ptr; + if (!NEXTHOP_IS_REACHABLE(nhad) || + !NEXTHOP_ONE(nhad) || ipa_zero(nhad->nh.gw) || + ipa_is_link_local(nhad->nh.gw)) + return NULL; /* Check for non-matching AF */ - if ((ipa_is_ip4(ra->nh.gw) != bgp_channel_is_ipv4(c)) && !c->ext_next_hop) - return 0; + if ((ipa_is_ip4(nhad->nh.gw) != bgp_channel_is_ipv4(c)) && !c->ext_next_hop) + return NULL; /* Use it when exported to internal peers */ if (p->is_interior) - return 1; + return &nhad->nh; /* Use it when forwarded to single-hop BGP peer on on the same iface */ - return p->neigh && (p->neigh->iface == ra->nh.iface); + if (p->neigh && (p->neigh->iface == nhad->nh.iface)) + return &nhad->nh; + + return NULL; } static void @@ -1157,31 +1164,31 @@ bgp_update_next_hop_ip(struct bgp_export_state *s, eattr *a, ea_list **to) { if (!a || !bgp_use_next_hop(s, a)) { - if (bgp_use_gateway(s)) + struct nexthop *nhloc; + if (nhloc = bgp_use_gateway(s)) { - rta *ra = s->route->attrs; - ip_addr nh[1] = { ra->nh.gw }; - bgp_set_attr_data(to, s->pool, BA_NEXT_HOP, 0, nh, 16); + ip_addr nh[1] = { nhloc->gw }; + bgp_set_attr_data(to, BA_NEXT_HOP, 0, nh, 16); if (s->mpls) { u32 implicit_null = BGP_MPLS_NULL; - u32 *labels = ra->nh.labels ? ra->nh.label : &implicit_null; - uint lnum = ra->nh.labels ? ra->nh.labels : 1; - bgp_set_attr_data(to, s->pool, BA_MPLS_LABEL_STACK, 0, labels, lnum * 4); + u32 *labels = nhloc->labels ? nhloc->label : &implicit_null; + uint lnum = nhloc->labels ? nhloc->labels : 1; + bgp_set_attr_data(to, BA_MPLS_LABEL_STACK, 0, labels, lnum * 4); } } else { ip_addr nh[2] = { s->channel->next_hop_addr, s->channel->link_addr }; - bgp_set_attr_data(to, s->pool, BA_NEXT_HOP, 0, nh, ipa_nonzero(nh[1]) ? 32 : 16); + bgp_set_attr_data(to, BA_NEXT_HOP, 0, nh, ipa_nonzero(nh[1]) ? 32 : 16); s->local_next_hop = 1; /* TODO: Use local MPLS assigned label */ if (s->mpls) { u32 implicit_null = BGP_MPLS_NULL; - bgp_set_attr_data(to, s->pool, BA_MPLS_LABEL_STACK, 0, &implicit_null, 4); + bgp_set_attr_data(to, BA_MPLS_LABEL_STACK, 0, &implicit_null, 4); } } } @@ -1244,7 +1251,7 @@ bgp_encode_next_hop_ip(struct bgp_write_state *s, eattr *a, byte *buf, uint size } static void -bgp_decode_next_hop_ip(struct bgp_parse_state *s, byte *data, uint len, rta *a) +bgp_decode_next_hop_ip(struct bgp_parse_state *s, byte *data, uint len, ea_list **to) { struct bgp_channel *c = s->channel; struct adata *ad = lp_alloc_adata(s->pool, 32); @@ -1285,8 +1292,8 @@ bgp_decode_next_hop_ip(struct bgp_parse_state *s, byte *data, uint len, rta *a) // XXXX validate next hop - bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_NEXT_HOP, 0, ad); - bgp_apply_next_hop(s, a, nh[0], nh[1]); + bgp_set_attr_ptr(to, BA_NEXT_HOP, 0, ad); + bgp_apply_next_hop(s, to, nh[0], nh[1]); } static uint @@ -1324,7 +1331,7 @@ bgp_encode_next_hop_vpn(struct bgp_write_state *s, eattr *a, byte *buf, uint siz } static void -bgp_decode_next_hop_vpn(struct bgp_parse_state *s, byte *data, uint len, rta *a) +bgp_decode_next_hop_vpn(struct bgp_parse_state *s, byte *data, uint len, ea_list **to) { struct bgp_channel *c = s->channel; struct adata *ad = lp_alloc_adata(s->pool, 32); @@ -1366,8 +1373,8 @@ bgp_decode_next_hop_vpn(struct bgp_parse_state *s, byte *data, uint len, rta *a) // XXXX validate next hop - bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_NEXT_HOP, 0, ad); - bgp_apply_next_hop(s, a, nh[0], nh[1]); + bgp_set_attr_ptr(to, BA_NEXT_HOP, 0, ad); + bgp_apply_next_hop(s, to, nh[0], nh[1]); } @@ -1379,7 +1386,7 @@ bgp_encode_next_hop_none(struct bgp_write_state *s UNUSED, eattr *a UNUSED, byte } static void -bgp_decode_next_hop_none(struct bgp_parse_state *s UNUSED, byte *data UNUSED, uint len UNUSED, rta *a UNUSED) +bgp_decode_next_hop_none(struct bgp_parse_state *s UNUSED, byte *data UNUSED, uint len UNUSED, ea_list **to UNUSED) { /* * Although we expect no next hop and RFC 7606 7.11 states that attribute @@ -1391,11 +1398,11 @@ bgp_decode_next_hop_none(struct bgp_parse_state *s UNUSED, byte *data UNUSED, ui } static void -bgp_update_next_hop_none(struct bgp_export_state *s, eattr *a, ea_list **to) +bgp_update_next_hop_none(struct bgp_export_state *s UNUSED, eattr *a, ea_list **to) { /* NEXT_HOP shall not pass */ if (a) - bgp_unset_attr(to, s->pool, BA_NEXT_HOP); + bgp_unset_attr(to, BA_NEXT_HOP); } @@ -1404,15 +1411,17 @@ bgp_update_next_hop_none(struct bgp_export_state *s, eattr *a, ea_list **to) */ static void -bgp_rte_update(struct bgp_parse_state *s, const net_addr *n, u32 path_id, rta *a0) +bgp_rte_update(struct bgp_parse_state *s, const net_addr *n, u32 path_id, ea_list *a0) { if (path_id != s->last_id) { + rt_unlock_source(s->last_src); + s->last_src = rt_get_source(&s->proto->p, path_id); s->last_id = path_id; - rta_free(s->cached_rta); - s->cached_rta = NULL; + ea_free(s->cached_ea); + s->cached_ea = NULL; } if (!a0) @@ -1422,29 +1431,20 @@ bgp_rte_update(struct bgp_parse_state *s, const net_addr *n, u32 path_id, rta *a REPORT("Invalid route %N withdrawn", n); /* Route withdraw */ - rte_update3(&s->channel->c, n, NULL, s->last_src); + rte_update(&s->channel->c, n, NULL, s->last_src); return; } /* Prepare cached route attributes */ - if (s->cached_rta == NULL) - { - a0->src = s->last_src; - - /* Workaround for rta_lookup() breaking eattrs */ - ea_list *ea = a0->eattrs; - s->cached_rta = rta_lookup(a0); - a0->eattrs = ea; - } + if (s->cached_ea == NULL) + s->cached_ea = ea_lookup(a0, 0); - rta *a = rta_clone(s->cached_rta); - rte *e = rte_get_temp(a); + rte e0 = { + .attrs = s->cached_ea, + .src = s->last_src, + }; - e->pflags = 0; - e->u.bgp.suppressed = 0; - e->u.bgp.stale = -1; - e->u.bgp.base_table = s->base_table; - rte_update3(&s->channel->c, n, e, s->last_src); + rte_update(&s->channel->c, n, &e0, s->last_src); } static void @@ -1467,9 +1467,10 @@ bgp_encode_mpls_labels(struct bgp_write_state *s UNUSED, const adata *mpls, byte } static void -bgp_decode_mpls_labels(struct bgp_parse_state *s, byte **pos, uint *len, uint *pxlen, rta *a) +bgp_decode_mpls_labels(struct bgp_parse_state *s, byte **pos, uint *len, uint *pxlen, ea_list **to) { - u32 labels[BGP_MPLS_MAX], label; + u32 labels[BGP_MPLS_MAX]; + u32 label; uint lnum = 0; do { @@ -1488,26 +1489,15 @@ bgp_decode_mpls_labels(struct bgp_parse_state *s, byte **pos, uint *len, uint *p } while (!(label & BGP_MPLS_BOS)); - if (!a) + if (!*to) return; - /* Attach MPLS attribute unless we already have one */ - if (!s->mpls_labels) - { - s->mpls_labels = lp_alloc_adata(s->pool, 4*BGP_MPLS_MAX); - bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_MPLS_LABEL_STACK, 0, s->mpls_labels); - } - - /* Overwrite data in the attribute */ - s->mpls_labels->length = 4*lnum; - memcpy(s->mpls_labels->data, labels, 4*lnum); - /* Update next hop entry in rta */ - bgp_apply_mpls_labels(s, a, labels, lnum); + bgp_apply_mpls_labels(s, to, lnum, labels); /* Attributes were changed, invalidate cached entry */ - rta_free(s->cached_rta); - s->cached_rta = NULL; + rta_free(s->cached_ea); + s->cached_ea = NULL; return; } @@ -1543,14 +1533,14 @@ bgp_encode_nlri_ip4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *bu memcpy(pos, &a, b); ADVANCE(pos, size, b); - bgp_free_prefix(s->channel, px); + bgp_done_prefix(s->channel, px, buck); } return pos - buf; } static void -bgp_decode_nlri_ip4(struct bgp_parse_state *s, byte *pos, uint len, rta *a) +bgp_decode_nlri_ip4(struct bgp_parse_state *s, byte *pos, uint len, ea_list *a) { while (len) { @@ -1576,7 +1566,7 @@ bgp_decode_nlri_ip4(struct bgp_parse_state *s, byte *pos, uint len, rta *a) /* Decode MPLS labels */ if (s->mpls) - bgp_decode_mpls_labels(s, &pos, &len, &l, a); + bgp_decode_mpls_labels(s, &pos, &len, &l, &a); if (l > IP4_MAX_PREFIX_LENGTH) bgp_parse_error(s, 10); @@ -1628,14 +1618,14 @@ bgp_encode_nlri_ip6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *bu memcpy(pos, &a, b); ADVANCE(pos, size, b); - bgp_free_prefix(s->channel, px); + bgp_done_prefix(s->channel, px, buck); } return pos - buf; } static void -bgp_decode_nlri_ip6(struct bgp_parse_state *s, byte *pos, uint len, rta *a) +bgp_decode_nlri_ip6(struct bgp_parse_state *s, byte *pos, uint len, ea_list *a) { while (len) { @@ -1661,7 +1651,7 @@ bgp_decode_nlri_ip6(struct bgp_parse_state *s, byte *pos, uint len, rta *a) /* Decode MPLS labels */ if (s->mpls) - bgp_decode_mpls_labels(s, &pos, &len, &l, a); + bgp_decode_mpls_labels(s, &pos, &len, &l, &a); if (l > IP6_MAX_PREFIX_LENGTH) bgp_parse_error(s, 10); @@ -1716,14 +1706,14 @@ bgp_encode_nlri_vpn4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *b memcpy(pos, &a, b); ADVANCE(pos, size, b); - bgp_free_prefix(s->channel, px); + bgp_done_prefix(s->channel, px, buck); } return pos - buf; } static void -bgp_decode_nlri_vpn4(struct bgp_parse_state *s, byte *pos, uint len, rta *a) +bgp_decode_nlri_vpn4(struct bgp_parse_state *s, byte *pos, uint len, ea_list *a) { while (len) { @@ -1749,7 +1739,7 @@ bgp_decode_nlri_vpn4(struct bgp_parse_state *s, byte *pos, uint len, rta *a) /* Decode MPLS labels */ if (s->mpls) - bgp_decode_mpls_labels(s, &pos, &len, &l, a); + bgp_decode_mpls_labels(s, &pos, &len, &l, &a); /* Decode route distinguisher */ if (l < 64) @@ -1813,14 +1803,14 @@ bgp_encode_nlri_vpn6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *b memcpy(pos, &a, b); ADVANCE(pos, size, b); - bgp_free_prefix(s->channel, px); + bgp_done_prefix(s->channel, px, buck); } return pos - buf; } static void -bgp_decode_nlri_vpn6(struct bgp_parse_state *s, byte *pos, uint len, rta *a) +bgp_decode_nlri_vpn6(struct bgp_parse_state *s, byte *pos, uint len, ea_list *a) { while (len) { @@ -1846,7 +1836,7 @@ bgp_decode_nlri_vpn6(struct bgp_parse_state *s, byte *pos, uint len, rta *a) /* Decode MPLS labels */ if (s->mpls) - bgp_decode_mpls_labels(s, &pos, &len, &l, a); + bgp_decode_mpls_labels(s, &pos, &len, &l, &a); /* Decode route distinguisher */ if (l < 64) @@ -1900,14 +1890,14 @@ bgp_encode_nlri_flow4(struct bgp_write_state *s, struct bgp_bucket *buck, byte * memcpy(pos, net->data, flen); ADVANCE(pos, size, flen); - bgp_free_prefix(s->channel, px); + bgp_done_prefix(s->channel, px, buck); } return pos - buf; } static void -bgp_decode_nlri_flow4(struct bgp_parse_state *s, byte *pos, uint len, rta *a) +bgp_decode_nlri_flow4(struct bgp_parse_state *s, byte *pos, uint len, ea_list *a) { while (len) { @@ -1958,10 +1948,6 @@ bgp_decode_nlri_flow4(struct bgp_parse_state *s, byte *pos, uint len, rta *a) net_fill_flow4(n, px, pxlen, pos, flen); ADVANCE(pos, len, flen); - /* Apply validation procedure per RFC 8955 (6) */ - if (a && s->channel->cf->validate) - bgp_apply_flow_validation(s, n, a); - bgp_rte_update(s, n, path_id, a); } } @@ -1992,14 +1978,14 @@ bgp_encode_nlri_flow6(struct bgp_write_state *s, struct bgp_bucket *buck, byte * memcpy(pos, net->data, flen); ADVANCE(pos, size, flen); - bgp_free_prefix(s->channel, px); + bgp_done_prefix(s->channel, px, buck); } return pos - buf; } static void -bgp_decode_nlri_flow6(struct bgp_parse_state *s, byte *pos, uint len, rta *a) +bgp_decode_nlri_flow6(struct bgp_parse_state *s, byte *pos, uint len, ea_list *a) { while (len) { @@ -2050,10 +2036,6 @@ bgp_decode_nlri_flow6(struct bgp_parse_state *s, byte *pos, uint len, rta *a) net_fill_flow6(n, px, pxlen, pos, flen); ADVANCE(pos, len, flen); - /* Apply validation procedure per RFC 8955 (6) */ - if (a && s->channel->cf->validate) - bgp_apply_flow_validation(s, n, a); - bgp_rte_update(s, n, path_id, a); } } @@ -2230,6 +2212,8 @@ bgp_create_ip_reach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *bu * var IPv4 Network Layer Reachability Information */ + ASSERT_DIE(s->channel->ptx->withdraw_bucket != buck); + int lr, la; la = bgp_encode_attrs(s, buck->eattrs, buf+4, buf + MAX_ATTRS_LENGTH); @@ -2251,6 +2235,8 @@ bgp_create_ip_reach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *bu static byte * bgp_create_mp_reach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end) { + ASSERT_DIE(s->channel->ptx->withdraw_bucket != buck); + /* * 2 B IPv4 Withdrawn Routes Length (zero) * --- IPv4 Withdrawn Routes NLRI (unused) @@ -2374,11 +2360,14 @@ bgp_create_update(struct bgp_channel *c, byte *buf) again: ; + struct lp_state tmpp; + lp_save(tmp_linpool, &tmpp); + /* Initialize write state */ struct bgp_write_state s = { .proto = p, .channel = c, - .pool = bgp_linpool, + .pool = tmp_linpool, .mp_reach = (c->afi != BGP_AF_IPV4) || c->ext_next_hop, .as4_session = p->as4_session, .add_path = c->add_path_tx, @@ -2386,7 +2375,7 @@ again: ; }; /* Try unreachable bucket */ - if ((buck = c->withdraw_bucket) && !EMPTY_LIST(buck->prefixes)) + if ((buck = c->ptx->withdraw_bucket) && !EMPTY_LIST(buck->prefixes)) { res = (c->afi == BGP_AF_IPV4) && !c->ext_next_hop ? bgp_create_ip_unreach(&s, buck, buf, end): @@ -2396,14 +2385,14 @@ again: ; } /* Try reachable buckets */ - if (!EMPTY_LIST(c->bucket_queue)) + if (!EMPTY_LIST(c->ptx->bucket_queue)) { - buck = HEAD(c->bucket_queue); + buck = HEAD(c->ptx->bucket_queue); /* Cleanup empty buckets */ - if (EMPTY_LIST(buck->prefixes)) + if (bgp_done_bucket(c, buck)) { - bgp_free_bucket(c, buck); + lp_restore(tmp_linpool, &tmpp); goto again; } @@ -2411,13 +2400,13 @@ again: ; bgp_create_ip_reach(&s, buck, buf, end): bgp_create_mp_reach(&s, buck, buf, end); - if (EMPTY_LIST(buck->prefixes)) - bgp_free_bucket(c, buck); - else - bgp_defer_bucket(c, buck); + bgp_done_bucket(c, buck); if (!res) + { + lp_restore(tmp_linpool, &tmpp); goto again; + } goto done; } @@ -2428,7 +2417,7 @@ again: ; done: BGP_TRACE_RL(&rl_snd_update, D_PACKETS, "Sending UPDATE"); p->stats.tx_updates++; - lp_flush(s.pool); + lp_restore(tmp_linpool, &tmpp); return res; } @@ -2495,7 +2484,6 @@ static inline void bgp_decode_nlri(struct bgp_parse_state *s, u32 afi, byte *nlri, uint len, ea_list *ea, byte *nh, uint nh_len) { struct bgp_channel *c = bgp_get_channel(s->proto, afi); - rta *a = NULL; if (!c) DISCARD(BAD_AFI, BGP_AFI(afi), BGP_SAFI(afi)); @@ -2506,8 +2494,7 @@ bgp_decode_nlri(struct bgp_parse_state *s, u32 afi, byte *nlri, uint len, ea_lis s->last_id = 0; s->last_src = s->proto->p.main_source; - - s->base_table = NULL; + rt_lock_source(s->last_src); /* * IPv4 BGP and MP-BGP may be used together in one update, therefore we do not @@ -2518,25 +2505,24 @@ bgp_decode_nlri(struct bgp_parse_state *s, u32 afi, byte *nlri, uint len, ea_lis if (ea) { - a = allocz(RTA_MAX_SIZE); - - a->source = RTS_BGP; - a->scope = SCOPE_UNIVERSE; - a->from = s->proto->remote_ip; - a->eattrs = ea; + ea_set_attr_data(&ea, &ea_gen_from, 0, &s->proto->remote_ip, sizeof(ip_addr)); + ea_set_attr_u32(&ea, &ea_gen_preference, 0, c->c.preference); + ea_set_attr_u32(&ea, &ea_gen_source, 0, RTS_BGP); - c->desc->decode_next_hop(s, nh, nh_len, a); - bgp_finish_attrs(s, a); + c->desc->decode_next_hop(s, nh, nh_len, &ea); + bgp_finish_attrs(s, &ea); /* Handle withdraw during next hop decoding */ if (s->err_withdraw) - a = NULL; + ea = NULL; } - c->desc->decode_nlri(s, nlri, len, a); + c->desc->decode_nlri(s, nlri, len, ea); - rta_free(s->cached_rta); - s->cached_rta = NULL; + rta_free(s->cached_ea); + s->cached_ea = NULL; + + rt_unlock_source(s->last_src); } static void @@ -2558,10 +2544,13 @@ bgp_rx_update(struct bgp_conn *conn, byte *pkt, uint len) bgp_start_timer(conn->hold_timer, conn->hold_time); + struct lp_state tmpp; + lp_save(tmp_linpool, &tmpp); + /* Initialize parse state */ struct bgp_parse_state s = { .proto = p, - .pool = bgp_linpool, + .pool = tmp_linpool, .as4_session = p->as4_session, }; @@ -2638,8 +2627,8 @@ bgp_rx_update(struct bgp_conn *conn, byte *pkt, uint len) ea, s.mp_next_hop_data, s.mp_next_hop_len); done: - rta_free(s.cached_rta); - lp_flush(s.pool); + rta_free(s.cached_ea); + lp_restore(tmp_linpool, &tmpp); return; } @@ -2962,7 +2951,11 @@ bgp_schedule_packet(struct bgp_conn *conn, struct bgp_channel *c, int type) { ASSERT(conn->sk); - DBG("BGP: Scheduling packet type %d\n", type); + struct bgp_proto *p = conn->bgp; + if (c) + BGP_TRACE(D_PACKETS, "Scheduling packet type %d for channel %s", type, c->c.name); + else + BGP_TRACE(D_PACKETS, "Scheduling packet type %d", type); if (c) { @@ -3231,6 +3224,21 @@ bgp_rx_packet(struct bgp_conn *conn, byte *pkt, uint len) } } +void +bgp_uncork(void *vp) +{ + struct bgp_proto *p = vp; + + if (p && p->conn && (p->conn->state == BS_ESTABLISHED) && !p->conn->sk->rx_hook) + { + struct birdsock *sk = p->conn->sk; + ASSERT_DIE(sk->rpos > sk->rbuf); + sk->rx_hook = bgp_rx; + bgp_rx(sk, sk->rpos - sk->rbuf); + BGP_TRACE(D_PACKETS, "Uncorked"); + } +} + /** * bgp_rx - handle received data * @sk: socket @@ -3245,6 +3253,7 @@ int bgp_rx(sock *sk, uint size) { struct bgp_conn *conn = sk->data; + struct bgp_proto *p = conn->bgp; byte *pkt_start = sk->rbuf; byte *end = pkt_start + size; uint i, len; @@ -3254,6 +3263,12 @@ bgp_rx(sock *sk, uint size) { if ((conn->state == BS_CLOSE) || (conn->sk != sk)) return 0; + if ((conn->state == BS_ESTABLISHED) && rt_cork_check(conn->bgp->uncork_ev)) + { + sk->rx_hook = NULL; + BGP_TRACE(D_PACKETS, "Corked"); + return 0; + } for(i=0; i<16; i++) if (pkt_start[i] != 0xff) { diff --git a/proto/mrt/Makefile b/proto/mrt/Makefile index 925fb102..000e1c1c 100644 --- a/proto/mrt/Makefile +++ b/proto/mrt/Makefile @@ -2,5 +2,6 @@ src := mrt.c obj := $(src-o-files) $(all-daemon) $(cf-local) +$(call proto-build,mrt_build) -tests_objs := $(tests_objs) $(src-o-files)
\ No newline at end of file +tests_objs := $(tests_objs) $(src-o-files) diff --git a/proto/mrt/mrt.c b/proto/mrt/mrt.c index 8d97c860..f4c09ab1 100644 --- a/proto/mrt/mrt.c +++ b/proto/mrt/mrt.c @@ -113,13 +113,13 @@ mrt_buffer_flush(buffer *b) } #define MRT_DEFINE_TYPE(S, T) \ - static inline void mrt_put_##S##_(buffer *b, T x) \ + UNUSED static inline void mrt_put_##S##_(buffer *b, T x) \ { \ put_##S(b->pos, x); \ b->pos += sizeof(T); \ } \ \ - static inline void mrt_put_##S(buffer *b, T x) \ + UNUSED static inline void mrt_put_##S(buffer *b, T x) \ { \ mrt_buffer_need(b, sizeof(T)); \ put_##S(b->pos, x); \ @@ -423,7 +423,7 @@ mrt_rib_table_header(struct mrt_table_dump_state *s, net_addr *n) static void mrt_rib_table_entry_bgp_attrs(struct mrt_table_dump_state *s, rte *r) { - struct ea_list *eattrs = r->attrs->eattrs; + struct ea_list *eattrs = r->attrs; buffer *b = &s->buf; if (!eattrs) @@ -431,7 +431,7 @@ mrt_rib_table_entry_bgp_attrs(struct mrt_table_dump_state *s, rte *r) /* Attribute list must be normalized for bgp_encode_attrs() */ if (!rta_is_cached(r->attrs)) - ea_normalize(eattrs); + eattrs = ea_normalize(eattrs, 0); mrt_buffer_need(b, MRT_ATTR_BUFFER_SIZE); byte *pos = b->pos; @@ -460,7 +460,7 @@ mrt_rib_table_entry_bgp_attrs(struct mrt_table_dump_state *s, rte *r) return; fail: - mrt_log(s, "Attribute list too long for %N", r->net->n.addr); + mrt_log(s, "Attribute list too long for %N", r->net); } #endif @@ -472,9 +472,9 @@ mrt_rib_table_entry(struct mrt_table_dump_state *s, rte *r) #ifdef CONFIG_BGP /* Find peer index */ - if (r->attrs->src->proto->proto == &proto_bgp) + struct bgp_proto *p = bgp_rte_proto(r); + if (p) { - struct bgp_proto *p = (void *) r->attrs->src->proto; struct mrt_peer_entry *n = HASH_FIND(s->peer_hash, PEER, p->remote_id, p->remote_as, p->remote_ip); @@ -488,7 +488,7 @@ mrt_rib_table_entry(struct mrt_table_dump_state *s, rte *r) /* Path Identifier */ if (s->add_path) - mrt_put_u32(b, r->attrs->src->private_id); + mrt_put_u32(b, r->src->private_id); /* Route Attributes */ mrt_put_u16(b, 0); @@ -512,26 +512,21 @@ mrt_rib_table_dump(struct mrt_table_dump_state *s, net *n, int add_path) mrt_init_message(&s->buf, MRT_TABLE_DUMP_V2, subtype); mrt_rib_table_header(s, n->n.addr); - rte *rt, *rt0; - for (rt0 = n->routes; rt = rt0; rt0 = rt0->next) + for (struct rte_storage *rt, *rt0 = n->routes; rt = rt0; rt0 = rt0->next) { - if (rte_is_filtered(rt)) + if (rte_is_filtered(&rt->rte)) continue; /* Skip routes that should be reported in the other phase */ - if (!s->always_add_path && (!rt->attrs->src->private_id != !s->add_path)) + if (!s->always_add_path && (!rt->rte.src->private_id != !s->add_path)) { s->want_add_path = 1; continue; } - rte_make_tmp_attrs(&rt, s->linpool, NULL); - - if (f_run(s->filter, &rt, s->linpool, 0) <= F_ACCEPT) - mrt_rib_table_entry(s, rt); - - if (rt != rt0) - rte_free(rt); + rte e = rt->rte; + if (f_run(s->filter, &e, 0) <= F_ACCEPT) + mrt_rib_table_entry(s, &e); lp_flush(s->linpool); } @@ -562,8 +557,8 @@ mrt_table_dump_init(pool *pp) struct mrt_table_dump_state *s = mb_allocz(pool, sizeof(struct mrt_table_dump_state)); s->pool = pool; - s->linpool = lp_new(pool, 4080); - s->peer_lp = lp_new(pool, 4080); + s->linpool = lp_new(pool); + s->peer_lp = lp_new(pool); mrt_buffer_init(&s->buf, pool, 2 * MRT_ATTR_BUFFER_SIZE); /* We lock the current config as we may reference it indirectly by filter */ @@ -708,14 +703,17 @@ mrt_dump_cont(struct cli *c) cli_printf(c, 0, ""); mrt_table_dump_free(c->rover); - c->cont = c->cleanup = c->rover = NULL; + c->cont = NULL; + c->cleanup = NULL; + c->rover = NULL; } -static void +static int mrt_dump_cleanup(struct cli *c) { mrt_table_dump_free(c->rover); c->rover = NULL; + return 0; } void @@ -909,7 +907,6 @@ mrt_copy_config(struct proto_config *dest UNUSED, struct proto_config *src UNUSE struct protocol proto_mrt = { .name = "MRT", .template = "mrt%d", - .class = PROTOCOL_MRT, .proto_size = sizeof(struct mrt_proto), .config_size = sizeof(struct mrt_config), .init = mrt_init, @@ -918,3 +915,9 @@ struct protocol proto_mrt = { .reconfigure = mrt_reconfigure, .copy_config = mrt_copy_config, }; + +void +mrt_build(void) +{ + proto_build(&proto_mrt); +} diff --git a/proto/mrt/mrt.h b/proto/mrt/mrt.h index 4ff94c12..3b83aa39 100644 --- a/proto/mrt/mrt.h +++ b/proto/mrt/mrt.h @@ -13,7 +13,7 @@ #include "nest/bird.h" #include "nest/protocol.h" #include "lib/lists.h" -#include "nest/route.h" +#include "nest/rt.h" #include "lib/event.h" #include "lib/hash.h" diff --git a/proto/ospf/Makefile b/proto/ospf/Makefile index 39e74f71..85664543 100644 --- a/proto/ospf/Makefile +++ b/proto/ospf/Makefile @@ -2,5 +2,6 @@ src := dbdes.c hello.c iface.c lsack.c lsalib.c lsreq.c lsupd.c neighbor.c ospf. obj := $(src-o-files) $(all-daemon) $(cf-local) +$(call proto-build,ospf_build) -tests_objs := $(tests_objs) $(src-o-files)
\ No newline at end of file +tests_objs := $(tests_objs) $(src-o-files) diff --git a/proto/ospf/config.Y b/proto/ospf/config.Y index 4b7d5a36..bc3df8db 100644 --- a/proto/ospf/config.Y +++ b/proto/ospf/config.Y @@ -190,7 +190,7 @@ ospf_check_auth(void) CF_DECLS -CF_KEYWORDS(OSPF, V2, V3, OSPF_METRIC1, OSPF_METRIC2, OSPF_TAG, OSPF_ROUTER_ID) +CF_KEYWORDS(OSPF, V2, V3) CF_KEYWORDS(AREA, NEIGHBORS, RFC1583COMPAT, STUB, TICK, COST, COST2, RETRANSMIT) CF_KEYWORDS(HELLO, TRANSMIT, PRIORITY, DEAD, TYPE, BROADCAST, BCAST, DEFAULT) CF_KEYWORDS(NONBROADCAST, NBMA, POINTOPOINT, PTP, POINTOMULTIPOINT, PTMP) @@ -505,11 +505,6 @@ ospf_iface: ospf_iface_start ospf_iface_patt_list ospf_iface_opt_list { ospf_iface_finish(); } ; -dynamic_attr: OSPF_METRIC1 { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_OSPF_METRIC1); } ; -dynamic_attr: OSPF_METRIC2 { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_OSPF_METRIC2); } ; -dynamic_attr: OSPF_TAG { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_OSPF_TAG); } ; -dynamic_attr: OSPF_ROUTER_ID { $$ = f_new_dynamic_attr(EAF_TYPE_ROUTER_ID, T_QUAD, EA_OSPF_ROUTER_ID); } ; - CF_CLI_HELP(SHOW OSPF, ..., [[Show information about OSPF protocol]]); CF_CLI(SHOW OSPF, optproto, [<name>], [[Show information about OSPF protocol]]) { PROTO_WALK_CMD($3, &proto_ospf, p) ospf_sh(p); }; diff --git a/proto/ospf/iface.c b/proto/ospf/iface.c index f38b8210..4cd45033 100644 --- a/proto/ospf/iface.c +++ b/proto/ospf/iface.c @@ -522,7 +522,10 @@ static inline void add_nbma_node(struct ospf_iface *ifa, struct nbma_node *src, int found) { struct nbma_node *n = mb_alloc(ifa->pool, sizeof(struct nbma_node)); + + n->n = (node) {}; add_tail(&ifa->nbma_list, NODE n); + n->ip = src->ip; n->eligible = src->eligible; n->found = found; diff --git a/proto/ospf/ospf.c b/proto/ospf/ospf.c index 42ffdb06..4e29f960 100644 --- a/proto/ospf/ospf.c +++ b/proto/ospf/ospf.c @@ -106,13 +106,12 @@ #include <stdlib.h> #include "ospf.h" +#include "lib/macro.h" -static int ospf_preexport(struct channel *C, rte **new, struct linpool *pool); -static void ospf_make_tmp_attrs(struct rte *rt, struct linpool *pool); -static void ospf_store_tmp_attrs(struct rte *rt, struct linpool *pool); +static int ospf_preexport(struct channel *C, rte *new); static void ospf_reload_routes(struct channel *C); static int ospf_rte_better(struct rte *new, struct rte *old); -static int ospf_rte_same(struct rte *new, struct rte *old); +static u32 ospf_rte_igp_metric(const rte *rt); static void ospf_disp(timer *timer); @@ -301,7 +300,7 @@ ospf_start(struct proto *P) p->lsab_size = 256; p->lsab_used = 0; p->lsab = mb_alloc(P->pool, p->lsab_size); - p->nhpool = lp_new(P->pool, 12*sizeof(struct nexthop)); + p->nhpool = lp_new(P->pool); init_list(&(p->iface_list)); init_list(&(p->area_list)); fib_init(&p->rtf, P->pool, ospf_get_af(p), sizeof(ort), OFFSETOF(ort, fn), 0, NULL); @@ -378,10 +377,8 @@ ospf_init(struct proto_config *CF) P->reload_routes = ospf_reload_routes; P->feed_begin = ospf_feed_begin; P->feed_end = ospf_feed_end; - P->make_tmp_attrs = ospf_make_tmp_attrs; - P->store_tmp_attrs = ospf_store_tmp_attrs; - P->rte_better = ospf_rte_better; - P->rte_same = ospf_rte_same; + + P->sources.class = &ospf_rte_owner_class; return P; } @@ -390,35 +387,40 @@ ospf_init(struct proto_config *CF) static int ospf_rte_better(struct rte *new, struct rte *old) { - if (new->u.ospf.metric1 == LSINFINITY) + u32 new_metric1 = ea_get_int(new->attrs, &ea_ospf_metric1, LSINFINITY); + + if (new_metric1 == LSINFINITY) return 0; - if(new->attrs->source < old->attrs->source) return 1; - if(new->attrs->source > old->attrs->source) return 0; + u32 ns = rt_get_source_attr(new); + u32 os = rt_get_source_attr(old); + + if (ns < os) return 1; + if (ns > os) return 0; - if(new->attrs->source == RTS_OSPF_EXT2) + if (ns == RTS_OSPF_EXT2) { - if(new->u.ospf.metric2 < old->u.ospf.metric2) return 1; - if(new->u.ospf.metric2 > old->u.ospf.metric2) return 0; + u32 old_metric2 = ea_get_int(old->attrs, &ea_ospf_metric2, LSINFINITY); + u32 new_metric2 = ea_get_int(new->attrs, &ea_ospf_metric2, LSINFINITY); + if (new_metric2 < old_metric2) return 1; + if (new_metric2 > old_metric2) return 0; } - if (new->u.ospf.metric1 < old->u.ospf.metric1) + u32 old_metric1 = ea_get_int(old->attrs, &ea_ospf_metric1, LSINFINITY); + if (new_metric1 < old_metric1) return 1; return 0; /* Old is shorter or same */ } -static int -ospf_rte_same(struct rte *new, struct rte *old) +static u32 +ospf_rte_igp_metric(const rte *rt) { - /* new->attrs == old->attrs always */ - return - new->u.ospf.metric1 == old->u.ospf.metric1 && - new->u.ospf.metric2 == old->u.ospf.metric2 && - new->u.ospf.tag == old->u.ospf.tag && - new->u.ospf.router_id == old->u.ospf.router_id; -} + if (rt_get_source_attr(rt) == RTS_OSPF_EXT2) + return IGP_METRIC_UNKNOWN; + return ea_get_int(rt->attrs, &ea_ospf_metric1, LSINFINITY); +} void ospf_schedule_rtcalc(struct ospf_proto *p) @@ -484,14 +486,13 @@ ospf_disp(timer * timer) * import to the filters. */ static int -ospf_preexport(struct channel *C, rte **new, struct linpool *pool UNUSED) +ospf_preexport(struct channel *C, rte *e) { struct ospf_proto *p = (struct ospf_proto *) C->proto; struct ospf_area *oa = ospf_main_area(p); - rte *e = *new; /* Reject our own routes */ - if (e->attrs->src->proto == &p->p) + if (e->sender == C->in_req.hook) return -1; /* Do not export routes to stub areas */ @@ -501,26 +502,6 @@ ospf_preexport(struct channel *C, rte **new, struct linpool *pool UNUSED) return 0; } -static void -ospf_make_tmp_attrs(struct rte *rt, struct linpool *pool) -{ - rte_init_tmp_attrs(rt, pool, 4); - rte_make_tmp_attr(rt, EA_OSPF_METRIC1, EAF_TYPE_INT, rt->u.ospf.metric1); - rte_make_tmp_attr(rt, EA_OSPF_METRIC2, EAF_TYPE_INT, rt->u.ospf.metric2); - rte_make_tmp_attr(rt, EA_OSPF_TAG, EAF_TYPE_INT, rt->u.ospf.tag); - rte_make_tmp_attr(rt, EA_OSPF_ROUTER_ID, EAF_TYPE_ROUTER_ID, rt->u.ospf.router_id); -} - -static void -ospf_store_tmp_attrs(struct rte *rt, struct linpool *pool) -{ - rte_init_tmp_attrs(rt, pool, 4); - rt->u.ospf.metric1 = rte_store_tmp_attr(rt, EA_OSPF_METRIC1); - rt->u.ospf.metric2 = rte_store_tmp_attr(rt, EA_OSPF_METRIC2); - rt->u.ospf.tag = rte_store_tmp_attr(rt, EA_OSPF_TAG); - rt->u.ospf.router_id = rte_store_tmp_attr(rt, EA_OSPF_ROUTER_ID); -} - /** * ospf_shutdown - Finish of OSPF instance * @P: OSPF protocol instance @@ -554,10 +535,13 @@ ospf_shutdown(struct proto *P) /* Cleanup locked rta entries */ FIB_WALK(&p->rtf, ort, nf) { - rta_free(nf->old_rta); + ea_free(nf->old_ea); } FIB_WALK_END; + if (tm_active(p->disp_timer)) + tm_stop(p->disp_timer); + return PS_DOWN; } @@ -590,7 +574,8 @@ ospf_get_route_info(rte * rte, byte * buf) { char *type = "<bug>"; - switch (rte->attrs->source) + uint source = rt_get_source_attr(rte); + switch (source) { case RTS_OSPF: type = "I"; @@ -607,38 +592,26 @@ ospf_get_route_info(rte * rte, byte * buf) } buf += bsprintf(buf, " %s", type); - buf += bsprintf(buf, " (%d/%d", rte->pref, rte->u.ospf.metric1); - if (rte->attrs->source == RTS_OSPF_EXT2) - buf += bsprintf(buf, "/%d", rte->u.ospf.metric2); + buf += bsprintf(buf, " (%d/%d", rt_get_preference(rte), ea_get_int(rte->attrs, &ea_ospf_metric1, LSINFINITY)); + if (source == RTS_OSPF_EXT2) + buf += bsprintf(buf, "/%d", ea_get_int(rte->attrs, &ea_ospf_metric2, LSINFINITY)); buf += bsprintf(buf, ")"); - if ((rte->attrs->source == RTS_OSPF_EXT1 || rte->attrs->source == RTS_OSPF_EXT2) && rte->u.ospf.tag) + if (source == RTS_OSPF_EXT1 || source == RTS_OSPF_EXT2) { - buf += bsprintf(buf, " [%x]", rte->u.ospf.tag); + eattr *ea = ea_find(rte->attrs, &ea_ospf_tag); + if (ea && (ea->u.data > 0)) + buf += bsprintf(buf, " [%x]", ea->u.data); } - if (rte->u.ospf.router_id) - buf += bsprintf(buf, " [%R]", rte->u.ospf.router_id); + + eattr *ea = ea_find(rte->attrs, &ea_ospf_router_id); + if (ea) + buf += bsprintf(buf, " [%R]", ea->u.data); } -static int -ospf_get_attr(const eattr * a, byte * buf, int buflen UNUSED) +static void +ospf_tag_format(const eattr * a, byte * buf, uint buflen) { - switch (a->id) - { - case EA_OSPF_METRIC1: - bsprintf(buf, "metric1"); - return GA_NAME; - case EA_OSPF_METRIC2: - bsprintf(buf, "metric2"); - return GA_NAME; - case EA_OSPF_TAG: - bsprintf(buf, "tag: 0x%08x", a->u.data); - return GA_FULL; - case EA_OSPF_ROUTER_ID: - bsprintf(buf, "router_id"); - return GA_NAME; - default: - return GA_UNKNOWN; - } + bsnprintf(buf, buflen, "0x%08x", a->u.data); } static void @@ -1533,10 +1506,15 @@ ospf_sh_lsadb(struct lsadb_show_data *ld) } +struct rte_owner_class ospf_rte_owner_class = { + .get_route_info = ospf_get_route_info, + .rte_better = ospf_rte_better, + .rte_igp_metric = ospf_rte_igp_metric, +}; + struct protocol proto_ospf = { .name = "OSPF", .template = "ospf%d", - .class = PROTOCOL_OSPF, .preference = DEF_PREF_OSPF, .channel_mask = NB_IP, .proto_size = sizeof(struct ospf_proto), @@ -1547,6 +1525,38 @@ struct protocol proto_ospf = { .shutdown = ospf_shutdown, .reconfigure = ospf_reconfigure, .get_status = ospf_get_status, - .get_attr = ospf_get_attr, - .get_route_info = ospf_get_route_info }; + +struct ea_class ea_ospf_metric1 = { + .name = "ospf_metric1", + .type = T_INT, +}; + +struct ea_class ea_ospf_metric2 = { + .name = "ospf_metric2", + .type = T_INT, +}; + +struct ea_class ea_ospf_tag = { + .name = "ospf_tag", + .type = T_INT, + .format = ospf_tag_format, +}; + +struct ea_class ea_ospf_router_id = { + .name = "ospf_router_id", + .type = T_QUAD, +}; + +void +ospf_build(void) +{ + proto_build(&proto_ospf); + + EA_REGISTER_ALL( + &ea_ospf_metric1, + &ea_ospf_metric2, + &ea_ospf_tag, + &ea_ospf_router_id + ); +} diff --git a/proto/ospf/ospf.h b/proto/ospf/ospf.h index 3e704ae8..3477ba5a 100644 --- a/proto/ospf/ospf.h +++ b/proto/ospf/ospf.h @@ -22,7 +22,7 @@ #include "lib/resource.h" #include "nest/protocol.h" #include "nest/iface.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/cli.h" #include "nest/locks.h" #include "nest/bfd.h" @@ -939,12 +939,7 @@ struct lsadb_show_data { u32 router; /* Advertising router, 0 -> all */ }; - -#define EA_OSPF_METRIC1 EA_CODE(PROTOCOL_OSPF, 0) -#define EA_OSPF_METRIC2 EA_CODE(PROTOCOL_OSPF, 1) -#define EA_OSPF_TAG EA_CODE(PROTOCOL_OSPF, 2) -#define EA_OSPF_ROUTER_ID EA_CODE(PROTOCOL_OSPF, 3) - +extern struct ea_class ea_ospf_metric1, ea_ospf_metric2, ea_ospf_tag, ea_ospf_router_id; /* * For regular networks, neighbor address must match network prefix. @@ -1007,6 +1002,8 @@ void ospf_sh_state(struct proto *P, int verbose, int reachable); void ospf_sh_lsadb(struct lsadb_show_data *ld); +extern struct rte_owner_class ospf_rte_owner_class; + /* iface.c */ void ospf_iface_chstate(struct ospf_iface *ifa, u8 state); void ospf_iface_sm(struct ospf_iface *ifa, int event); diff --git a/proto/ospf/rt.c b/proto/ospf/rt.c index faee49dc..69c2907d 100644 --- a/proto/ospf/rt.c +++ b/proto/ospf/rt.c @@ -28,24 +28,30 @@ nh_is_vlink(struct nexthop *nhs) static inline int unresolved_vlink(ort *ort) { - return ort->n.nhs && nh_is_vlink(ort->n.nhs); + return ort->n.nhs && nh_is_vlink(&ort->n.nhs->nh); } -static inline struct nexthop * +static inline struct nexthop_adata * new_nexthop(struct ospf_proto *p, ip_addr gw, struct iface *iface, byte weight) { - struct nexthop *nh = lp_allocz(p->nhpool, sizeof(struct nexthop)); - nh->gw = gw; - nh->iface = iface; - nh->weight = weight; - return nh; + struct nexthop_adata *nhad = lp_alloc(p->nhpool, sizeof(struct nexthop_adata)); + *nhad = (struct nexthop_adata) { + .ad = { .length = sizeof *nhad - sizeof nhad->ad, }, + .nh = { + .gw = gw, + .iface = iface, + .weight = weight, + }, + }; + + return nhad; } /* Returns true if there are device nexthops in n */ static inline int -has_device_nexthops(const struct nexthop *n) +has_device_nexthops(struct nexthop_adata *nhad) { - for (; n; n = n->next) + NEXTHOP_WALK(n, nhad) if (ipa_zero(n->gw)) return 1; @@ -53,38 +59,22 @@ has_device_nexthops(const struct nexthop *n) } /* Replace device nexthops with nexthops to gw */ -static struct nexthop * -fix_device_nexthops(struct ospf_proto *p, const struct nexthop *n, ip_addr gw) +static struct nexthop_adata * +fix_device_nexthops(struct ospf_proto *p, struct nexthop_adata *old, ip_addr gw) { - struct nexthop *root1 = NULL; - struct nexthop *root2 = NULL; - struct nexthop **nn1 = &root1; - struct nexthop **nn2 = &root2; - if (!p->ecmp) - return new_nexthop(p, gw, n->iface, n->weight); - - /* This is a bit tricky. We cannot just copy the list and update n->gw, - because the list should stay sorted, so we create two lists, one with new - gateways and one with old ones, and then merge them. */ - - for (; n; n = n->next) { - struct nexthop *nn = new_nexthop(p, ipa_zero(n->gw) ? gw : n->gw, n->iface, n->weight); + struct nexthop_adata *new = (struct nexthop_adata *) lp_store_adata(p->nhpool, old->ad.data, old->ad.length); + new->nh.gw = gw; + return new; + } + struct nexthop_adata *tmp = (struct nexthop_adata *) tmp_copy_adata(&old->ad); + NEXTHOP_WALK(n, tmp) if (ipa_zero(n->gw)) - { - *nn1 = nn; - nn1 = &(nn->next); - } - else - { - *nn2 = nn; - nn2 = &(nn->next); - } - } + n->gw = gw; - return nexthop_merge(root1, root2, 1, 1, p->ecmp, p->nhpool); + return nexthop_sort(tmp, p->nhpool); } @@ -144,7 +134,7 @@ orta_compare(const struct ospf_proto *p, const orta *new, const orta *old) { int r; - if (old->type == RTS_DUMMY) + if (!old->type) return 1; /* Prefer intra-area to inter-area to externals */ @@ -169,9 +159,9 @@ orta_compare(const struct ospf_proto *p, const orta *new, const orta *old) return -1; if (!new->nhs) return 1; - if (nh_is_vlink(new->nhs)) + if (nh_is_vlink(&new->nhs->nh)) return -1; - if (nh_is_vlink(old->nhs)) + if (nh_is_vlink(&old->nhs->nh)) return 1; @@ -195,7 +185,7 @@ orta_compare_asbr(const struct ospf_proto *p, const orta *new, const orta *old) { int r; - if (old->type == RTS_DUMMY) + if (!old->type) return 1; if (!p->rfc1583) @@ -225,7 +215,7 @@ orta_compare_ext(const struct ospf_proto *p, const orta *new, const orta *old) { int r; - if (old->type == RTS_DUMMY) + if (!old->type) return 1; /* 16.4 (6a) - prefer routes with lower type */ @@ -279,11 +269,7 @@ ort_merge(struct ospf_proto *p, ort *o, const orta *new) orta *old = &o->n; if (old->nhs != new->nhs) - { - old->nhs = nexthop_merge(old->nhs, new->nhs, old->nhs_reuse, new->nhs_reuse, - p->ecmp, p->nhpool); - old->nhs_reuse = 1; - } + old->nhs = nexthop_merge(old->nhs, new->nhs, p->ecmp, p->nhpool); if (old->rid < new->rid) old->rid = new->rid; @@ -295,11 +281,7 @@ ort_merge_ext(struct ospf_proto *p, ort *o, const orta *new) orta *old = &o->n; if (old->nhs != new->nhs) - { - old->nhs = nexthop_merge(old->nhs, new->nhs, old->nhs_reuse, new->nhs_reuse, - p->ecmp, p->nhpool); - old->nhs_reuse = 1; - } + old->nhs = nexthop_merge(old->nhs, new->nhs, p->ecmp, p->nhpool); if (old->tag != new->tag) old->tag = 0; @@ -1165,7 +1147,7 @@ ospf_check_vlinks(struct ospf_proto *p) if (tmp && (tmp->color == INSPF) && ipa_nonzero(tmp->lb) && tmp->nhs) { - struct ospf_iface *nhi = ospf_iface_find(p, tmp->nhs->iface); + struct ospf_iface *nhi = ospf_iface_find(p, tmp->nhs->nh.iface); if ((ifa->state != OSPF_IS_PTP) || (ifa->vifa != nhi) @@ -1579,10 +1561,7 @@ ospf_ext_spf(struct ospf_proto *p) /* Replace device nexthops with nexthops to forwarding address from LSA */ if (has_device_nexthops(nfa.nhs)) - { nfa.nhs = fix_device_nexthops(p, nfa.nhs, rt.fwaddr); - nfa.nhs_reuse = 1; - } } if (rt.ebit) @@ -1726,10 +1705,10 @@ ospf_rt_spf(struct ospf_proto *p) static inline int -inherit_nexthops(struct nexthop *pn) +inherit_nexthops(struct nexthop_adata *pn) { /* Proper nexthops (with defined GW) or dummy vlink nexthops (without iface) */ - return pn && (ipa_nonzero(pn->gw) || !pn->iface); + return pn && (ipa_nonzero(pn->nh.gw) || !pn->nh.iface); } static inline ip_addr @@ -1744,12 +1723,12 @@ link_lsa_lladdr(struct ospf_proto *p, struct top_hash_entry *en) return ospf_is_ip4(p) ? ipa_from_ip4(ospf3_6to4(ll)) : ipa_from_ip6(ll); } -static struct nexthop * +static struct nexthop_adata * calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry *par, int pos, uint data, uint lif, uint nif) { struct ospf_proto *p = oa->po; - struct nexthop *pn = par->nhs; + struct nexthop_adata *pn = par->nhs; struct top_hash_entry *link = NULL; struct ospf_iface *ifa = NULL; ip_addr nh = IPA_NONE; @@ -1827,10 +1806,10 @@ calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en, return NULL; } - struct nexthop *nhs = new_nexthop(p, nh, ifa->iface, ifa->ecmp_weight); + struct nexthop_adata *nhs = new_nexthop(p, nh, ifa->iface, ifa->ecmp_weight); if (ifa->addr->flags & IA_HOST) - nhs->flags = RNF_ONLINK; + nhs->nh.flags = RNF_ONLINK; return nhs; } @@ -1851,7 +1830,7 @@ calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en, if (ipa_zero(en->lb)) goto bad; - return new_nexthop(p, en->lb, pn->iface, pn->weight); + return new_nexthop(p, en->lb, pn->nh.iface, pn->nh.weight); } else /* OSPFv3 */ { @@ -1859,7 +1838,7 @@ calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en, * Next-hop is taken from lladdr field of Link-LSA, en->lb_id * is computed in link_back(). */ - link = ospf_hash_find(p->gr, pn->iface->index, en->lb_id, rid, LSA_T_LINK); + link = ospf_hash_find(p->gr, pn->nh.iface->index, en->lb_id, rid, LSA_T_LINK); if (!link) return NULL; @@ -1867,7 +1846,7 @@ calc_next_hop(struct ospf_area *oa, struct top_hash_entry *en, if (ipa_zero(nh)) return NULL; - return new_nexthop(p, nh, pn->iface, pn->weight); + return new_nexthop(p, nh, pn->nh.iface, pn->nh.weight); } } @@ -1914,7 +1893,7 @@ add_cand(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry if (!link_back(oa, en, par, lif, nif)) return; - struct nexthop *nhs = calc_next_hop(oa, en, par, pos, data, lif, nif); + struct nexthop_adata *nhs = calc_next_hop(oa, en, par, pos, data, lif, nif); if (!nhs) { log(L_WARN "%s: Cannot find next hop for LSA (Type: %04x, Id: %R, Rt: %R)", @@ -1923,7 +1902,7 @@ add_cand(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry } /* If en->dist > 0, we know that en->color == CANDIDATE and en->nhs is defined. */ - if ((dist == en->dist) && !nh_is_vlink(en->nhs)) + if ((dist == en->dist) && !nh_is_vlink(&en->nhs->nh)) { /* * For multipath, we should merge nexthops. We merge regular nexthops only. @@ -1947,13 +1926,11 @@ add_cand(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry */ /* Keep old ones */ - if (!p->ecmp || nh_is_vlink(nhs) || (nhs == en->nhs)) + if (!p->ecmp || nh_is_vlink(&nhs->nh) || (nhs == en->nhs)) return; /* Merge old and new */ - int new_reuse = (par->nhs != nhs); - en->nhs = nexthop_merge(en->nhs, nhs, en->nhs_reuse, new_reuse, p->ecmp, p->nhpool); - en->nhs_reuse = 1; + en->nhs = nexthop_merge(en->nhs, nhs, p->ecmp, p->nhpool); return; } @@ -1967,7 +1944,6 @@ add_cand(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry en->nhs = nhs; en->dist = dist; en->color = CANDIDATE; - en->nhs_reuse = (par->nhs != nhs); prev = NULL; @@ -2001,14 +1977,34 @@ add_cand(struct ospf_area *oa, struct top_hash_entry *en, struct top_hash_entry } static inline int -ort_changed(ort *nf, rta *nr) +ort_changed(ort *nf, ea_list *nr) { - rta *or = nf->old_rta; - return !or || + ea_list *or = nf->old_ea; + + if (!or || (nf->n.metric1 != nf->old_metric1) || (nf->n.metric2 != nf->old_metric2) || - (nf->n.tag != nf->old_tag) || (nf->n.rid != nf->old_rid) || - (nr->source != or->source) || (nr->dest != or->dest) || - !nexthop_same(&(nr->nh), &(or->nh)); + (nf->n.tag != nf->old_tag) || (nf->n.rid != nf->old_rid)) + return 1; + + eattr *nhea_n = ea_find(nr, &ea_gen_nexthop); + eattr *nhea_o = ea_find(or, &ea_gen_nexthop); + if (!nhea_n != !nhea_o) + return 1; + + if (nhea_n && nhea_o) + { + struct nexthop_adata *nhad_n = (struct nexthop_adata *) nhea_n->u.ptr; + struct nexthop_adata *nhad_o = (struct nexthop_adata *) nhea_o->u.ptr; + + if (!nexthop_same(nhad_n, nhad_o)) + return 1; + } + + if ( ea_get_int(nr, &ea_gen_source, 0) + != ea_get_int(or, &ea_gen_source, 0)) + return 1; + + return 0; } static void @@ -2030,10 +2026,9 @@ again1: FIB_ITERATE_START(fib, &fit, ort, nf) { /* Sanity check of next-hop addresses, failure should not happen */ - if (nf->n.type) + if (nf->n.type && nf->n.nhs) { - struct nexthop *nh; - for (nh = nf->n.nhs; nh; nh = nh->next) + NEXTHOP_WALK(nh, nf->n.nhs) if (ipa_nonzero(nh->gw)) { neighbor *nbr = neigh_find(&p->p, nh->gw, nh->iface, @@ -2052,46 +2047,69 @@ again1: if (nf->n.type) /* Add the route */ { - rta a0 = { - .src = p->p.main_source, - .source = nf->n.type, - .scope = SCOPE_UNIVERSE, - .dest = RTD_UNICAST, - .nh = *(nf->n.nhs), - }; - - if (reload || ort_changed(nf, &a0)) + struct { + ea_list l; + eattr a[7]; + } eattrs; + + eattrs.l = (ea_list) {}; + + eattrs.a[eattrs.l.count++] = + EA_LITERAL_EMBEDDED(&ea_gen_preference, 0, p->p.main_channel->preference); + + eattrs.a[eattrs.l.count++] = + EA_LITERAL_EMBEDDED(&ea_gen_source, 0, nf->n.type); + + eattrs.a[eattrs.l.count++] = + EA_LITERAL_DIRECT_ADATA(&ea_gen_nexthop, 0, &nf->n.nhs->ad); + + if (reload || ort_changed(nf, &eattrs.l)) { - rta *a = rta_lookup(&a0); - rte *e = rte_get_temp(a); + nf->old_metric1 = nf->n.metric1; + nf->old_metric2 = nf->n.metric2; + nf->old_tag = nf->n.tag; + nf->old_rid = nf->n.rid; - rta_free(nf->old_rta); - nf->old_rta = rta_clone(a); - e->u.ospf.metric1 = nf->old_metric1 = nf->n.metric1; - e->u.ospf.metric2 = nf->old_metric2 = nf->n.metric2; - e->u.ospf.tag = nf->old_tag = nf->n.tag; - e->u.ospf.router_id = nf->old_rid = nf->n.rid; - e->pflags = EA_ID_FLAG(EA_OSPF_METRIC1) | EA_ID_FLAG(EA_OSPF_ROUTER_ID); + eattrs.a[eattrs.l.count++] = + EA_LITERAL_EMBEDDED(&ea_ospf_metric1, 0, nf->n.metric1); if (nf->n.type == RTS_OSPF_EXT2) - e->pflags |= EA_ID_FLAG(EA_OSPF_METRIC2); + eattrs.a[eattrs.l.count++] = + EA_LITERAL_EMBEDDED(&ea_ospf_metric2, 0, nf->n.metric2); - /* Perhaps onfly if tag is non-zero? */ if ((nf->n.type == RTS_OSPF_EXT1) || (nf->n.type == RTS_OSPF_EXT2)) - e->pflags |= EA_ID_FLAG(EA_OSPF_TAG); + eattrs.a[eattrs.l.count++] = + EA_LITERAL_EMBEDDED(&ea_ospf_tag, 0, nf->n.tag); + + eattrs.a[eattrs.l.count++] = + EA_LITERAL_EMBEDDED(&ea_ospf_router_id, 0, nf->n.rid); + + ASSERT_DIE(ARRAY_SIZE(eattrs.a) >= eattrs.l.count); + ea_list *eal = ea_lookup(&eattrs.l, 0); + ea_free(nf->old_ea); + nf->old_ea = eal; + + rte e0 = { + .attrs = eal, + .src = p->p.main_source, + }; + + /* DBG("Mod rte type %d - %N via %I on iface %s, met %d\n", a0.source, nf->fn.addr, a0.gw, a0.iface ? a0.iface->name : "(none)", nf->n.metric1); - rte_update(&p->p, nf->fn.addr, e); + */ + + rte_update(p->p.main_channel, nf->fn.addr, &e0, p->p.main_source); } } - else if (nf->old_rta) + else if (nf->old_ea) { /* Remove the route */ - rta_free(nf->old_rta); - nf->old_rta = NULL; + rta_free(nf->old_ea); + nf->old_ea = NULL; - rte_update(&p->p, nf->fn.addr, NULL); + rte_update(p->p.main_channel, nf->fn.addr, NULL, p->p.main_source); } /* Remove unused rt entry, some special entries are persistent */ @@ -2107,7 +2125,6 @@ again1: } FIB_ITERATE_END; - WALK_LIST(oa, p->area_list) { /* Cleanup ASBR hash tables */ diff --git a/proto/ospf/rt.h b/proto/ospf/rt.h index 094e125b..88eefef9 100644 --- a/proto/ospf/rt.h +++ b/proto/ospf/rt.h @@ -18,8 +18,6 @@ typedef struct orta { u8 type; /* RTS_OSPF_* */ - u8 nhs_reuse; /* Whether nhs nodes can be reused during merging. - See a note in rt.c:add_cand() */ u32 options; /* * For ORT_ROUTER routes, options field are router-LSA style @@ -53,7 +51,7 @@ typedef struct orta struct ospf_area *oa; struct ospf_area *voa; /* Used when route is replaced in ospf_rt_sum_tr(), NULL otherwise */ - struct nexthop *nhs; /* Next hops computed during SPF */ + struct nexthop_adata *nhs; /* Next hops computed during SPF */ struct top_hash_entry *en; /* LSA responsible for this orta */ } orta; @@ -80,7 +78,7 @@ typedef struct ort */ orta n; u32 old_metric1, old_metric2, old_tag, old_rid; - rta *old_rta; + ea_list *old_ea; u32 lsa_id; u8 external_rte; u8 area_net; diff --git a/proto/ospf/topology.c b/proto/ospf/topology.c index 52c2a0ce..85bce03d 100644 --- a/proto/ospf/topology.c +++ b/proto/ospf/topology.c @@ -1300,7 +1300,7 @@ find_surrogate_fwaddr(struct ospf_proto *p, struct ospf_area *oa) } void -ospf_rt_notify(struct proto *P, struct channel *ch UNUSED, net *n, rte *new, rte *old UNUSED) +ospf_rt_notify(struct proto *P, struct channel *ch UNUSED, const net_addr *n, rte *new, const rte *old UNUSED) { struct ospf_proto *p = (struct ospf_proto *) P; struct ospf_area *oa = NULL; /* non-NULL for NSSA-LSA */ @@ -1319,7 +1319,7 @@ ospf_rt_notify(struct proto *P, struct channel *ch UNUSED, net *n, rte *new, rte if (!new) { - nf = fib_find(&p->rtf, n->n.addr); + nf = fib_find(&p->rtf, n); if (!nf || !nf->external_rte) return; @@ -1337,23 +1337,23 @@ ospf_rt_notify(struct proto *P, struct channel *ch UNUSED, net *n, rte *new, rte ASSERT(p->asbr); /* Get route attributes */ - rta *a = new->attrs; - eattr *m1a = ea_find(a->eattrs, EA_OSPF_METRIC1); - eattr *m2a = ea_find(a->eattrs, EA_OSPF_METRIC2); + ea_list *a = new->attrs; + eattr *m1a = ea_find(a, &ea_ospf_metric1); + eattr *m2a = ea_find(a, &ea_ospf_metric2); uint m1 = m1a ? m1a->u.data : 0; uint m2 = m2a ? m2a->u.data : 10000; if (m1 > LSINFINITY) { log(L_WARN "%s: Invalid ospf_metric1 value %u for route %N", - p->p.name, m1, n->n.addr); + p->p.name, m1, n); m1 = LSINFINITY; } if (m2 > LSINFINITY) { log(L_WARN "%s: Invalid ospf_metric2 value %u for route %N", - p->p.name, m2, n->n.addr); + p->p.name, m2, n); m2 = LSINFINITY; } @@ -1363,11 +1363,14 @@ ospf_rt_notify(struct proto *P, struct channel *ch UNUSED, net *n, rte *new, rte uint ebit = m2a || !m1a; uint metric = ebit ? m2 : m1; - uint tag = ea_get_int(a->eattrs, EA_OSPF_TAG, 0); + uint tag = ea_get_int(a, &ea_ospf_tag, 0); ip_addr fwd = IPA_NONE; - if ((a->dest == RTD_UNICAST) && use_gw_for_fwaddr(p, a->nh.gw, a->nh.iface)) - fwd = a->nh.gw; + eattr *nhea = ea_find(a, &ea_gen_nexthop); + struct nexthop_adata *nhad = (struct nexthop_adata *) nhea->u.ptr; + if (NEXTHOP_IS_REACHABLE(nhad)) + if (use_gw_for_fwaddr(p, nhad->nh.gw, nhad->nh.iface)) + fwd = nhad->nh.gw; /* NSSA-LSA with P-bit set must have non-zero forwarding address */ if (oa && ipa_zero(fwd)) @@ -1377,12 +1380,12 @@ ospf_rt_notify(struct proto *P, struct channel *ch UNUSED, net *n, rte *new, rte if (ipa_zero(fwd)) { log(L_ERR "%s: Cannot find forwarding address for NSSA-LSA %N", - p->p.name, n->n.addr); + p->p.name, n); return; } } - nf = fib_get(&p->rtf, n->n.addr); + nf = fib_get(&p->rtf, n); ospf_originate_ext_lsa(p, oa, nf, LSA_M_EXPORT, metric, ebit, fwd, tag, 1, p->vpn_pe); nf->external_rte = 1; } @@ -2135,7 +2138,7 @@ ospf_hash_delete(struct top_graph *f, struct top_hash_entry *e) if (*ee == e) { *ee = e->next; - sl_free(f->hash_slab, e); + sl_free(e); if (f->hash_entries-- < f->hash_entries_min) ospf_top_rehash(f, -HASH_LO_STEP); return; diff --git a/proto/ospf/topology.h b/proto/ospf/topology.h index 535d1f1b..3c92b431 100644 --- a/proto/ospf/topology.h +++ b/proto/ospf/topology.h @@ -28,7 +28,7 @@ struct top_hash_entry u16 next_lsa_opts; /* For postponed LSA origination */ btime inst_time; /* Time of installation into DB */ struct ort *nf; /* Reference fibnode for sum and ext LSAs, NULL for otherwise */ - struct nexthop *nhs; /* Computed nexthops - valid only in ospf_rt_spf() */ + struct nexthop_adata *nhs; /* Computed nexthops - valid only in ospf_rt_spf() */ ip_addr lb; /* In OSPFv2, link back address. In OSPFv3, any global address in the area useful for vlinks */ u32 lb_id; /* Interface ID of link back iface (for bcast or NBMA networks) */ u32 dist; /* Distance from the root */ @@ -39,8 +39,6 @@ struct top_hash_entry #define CANDIDATE 1 #define INSPF 2 u8 mode; /* LSA generated during RT calculation (LSA_RTCALC or LSA_STALE)*/ - u8 nhs_reuse; /* Whether nhs nodes can be reused during merging. - See a note in rt.c:add_cand() */ }; @@ -200,7 +198,7 @@ void ospf_originate_sum_rt_lsa(struct ospf_proto *p, struct ospf_area *oa, u32 d void ospf_originate_ext_lsa(struct ospf_proto *p, struct ospf_area *oa, ort *nf, u8 mode, u32 metric, u32 ebit, ip_addr fwaddr, u32 tag, int pbit, int dn); void ospf_originate_gr_lsa(struct ospf_proto *p, struct ospf_iface *ifa); -void ospf_rt_notify(struct proto *P, struct channel *ch, net *n, rte *new, rte *old); +void ospf_rt_notify(struct proto *P, struct channel *ch, const net_addr *n, rte *new, const rte *old); void ospf_update_topology(struct ospf_proto *p); struct top_hash_entry *ospf_hash_find(struct top_graph *, u32 domain, u32 lsa, u32 rtr, u32 type); diff --git a/proto/perf/Makefile b/proto/perf/Makefile index 7877fb19..42051f43 100644 --- a/proto/perf/Makefile +++ b/proto/perf/Makefile @@ -2,5 +2,6 @@ src := perf.c obj := $(src-o-files) $(all-daemon) $(cf-local) +$(call proto-build,perf_build) tests_objs := $(tests_objs) $(src-o-files) diff --git a/proto/perf/perf.c b/proto/perf/perf.c index ba401a8a..d82ac8aa 100644 --- a/proto/perf/perf.c +++ b/proto/perf/perf.c @@ -18,7 +18,7 @@ #include "nest/bird.h" #include "nest/iface.h" #include "nest/protocol.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/cli.h" #include "conf/conf.h" #include "filter/filter.h" @@ -85,7 +85,7 @@ random_net_ip4(void) } struct perf_random_routes { - struct rta *a; + ea_list *a; net_addr net; }; @@ -142,17 +142,21 @@ perf_loop(void *data) *((net_addr_ip4 *) &(p->data[i].net)) = random_net_ip4(); if (!p->attrs_per_rte || !(i % p->attrs_per_rte)) { - struct rta a0 = { - .src = p->p.main_source, - .source = RTS_PERF, - .scope = SCOPE_UNIVERSE, - .dest = RTD_UNICAST, + ea_list *ea = NULL; + + ea_set_attr_u32(&ea, &ea_gen_preference, 0, p->p.main_channel->preference); + ea_set_attr_u32(&ea, &ea_gen_source, 0, RTS_PERF); + + struct nexthop_adata nhad = { .nh.iface = p->ifa->iface, .nh.gw = gw, .nh.weight = 1, }; - p->data[i].a = rta_lookup(&a0); + ea_set_attr_data(&ea, &ea_gen_nexthop, 0, + &nhad.ad.data, sizeof nhad - sizeof nhad.ad); + + p->data[i].a = rta_lookup(ea, 0); } else p->data[i].a = rta_clone(p->data[i-1].a); @@ -160,18 +164,17 @@ perf_loop(void *data) clock_gettime(CLOCK_MONOTONIC, &ts_generated); - for (uint i=0; i<N; i++) { - rte *e = rte_get_temp(p->data[i].a); - e->pflags = 0; - - rte_update(P, &(p->data[i].net), e); + for (uint i=0; i<N; i++) + { + rte e0 = { .attrs = p->data[i].a, .src = P->main_source, }; + rte_update(P->main_channel, &(p->data[i].net), &e0, P->main_source); } clock_gettime(CLOCK_MONOTONIC, &ts_update); if (!p->keep) for (uint i=0; i<N; i++) - rte_update(P, &(p->data[i].net), NULL); + rte_update(P->main_channel, &(p->data[i].net), NULL, P->main_source); clock_gettime(CLOCK_MONOTONIC, &ts_withdraw); @@ -204,7 +207,7 @@ perf_loop(void *data) } static void -perf_rt_notify(struct proto *P, struct channel *c UNUSED, struct network *net UNUSED, struct rte *new UNUSED, struct rte *old UNUSED) +perf_rt_notify(struct proto *P, struct channel *c UNUSED, const net_addr *net UNUSED, struct rte *new UNUSED, const struct rte *old UNUSED) { struct perf_proto *p = (struct perf_proto *) P; p->exp++; @@ -306,7 +309,6 @@ perf_copy_config(struct proto_config *dest UNUSED, struct proto_config *src UNUS struct protocol proto_perf = { .name = "Perf", .template = "perf%d", - .class = PROTOCOL_PERF, .channel_mask = NB_IP, .proto_size = sizeof(struct perf_proto), .config_size = sizeof(struct perf_config), @@ -315,3 +317,9 @@ struct protocol proto_perf = { .reconfigure = perf_reconfigure, .copy_config = perf_copy_config, }; + +void +perf_build(void) +{ + proto_build(&proto_perf); +} diff --git a/proto/pipe/Makefile b/proto/pipe/Makefile index 5093da98..ba66027f 100644 --- a/proto/pipe/Makefile +++ b/proto/pipe/Makefile @@ -2,5 +2,6 @@ src := pipe.c obj := $(src-o-files) $(all-daemon) $(cf-local) +$(call proto-build,pipe_build) -tests_objs := $(tests_objs) $(src-o-files)
\ No newline at end of file +tests_objs := $(tests_objs) $(src-o-files) diff --git a/proto/pipe/config.Y b/proto/pipe/config.Y index 1202c169..0990168e 100644 --- a/proto/pipe/config.Y +++ b/proto/pipe/config.Y @@ -16,7 +16,7 @@ CF_DEFINES CF_DECLS -CF_KEYWORDS(PIPE, PEER, TABLE) +CF_KEYWORDS(PIPE, PEER, TABLE, MAX, GENERATION) CF_GRAMMAR @@ -25,6 +25,7 @@ proto: pipe_proto '}' { this_channel = NULL; } ; pipe_proto_start: proto_start PIPE { this_proto = proto_config_new(&proto_pipe, $1); + PIPE_CFG->max_generation = 16; } proto_name { @@ -40,7 +41,17 @@ pipe_proto: pipe_proto_start '{' | pipe_proto proto_item ';' | pipe_proto channel_item_ ';' + | pipe_proto IMPORT IN net_any imexport ';' { + if (this_channel->net_type && ($4->type != this_channel->net_type)) + cf_error("Incompatible export prefilter type"); + PIPE_CFG->in_subprefix = $4; + this_channel->in_filter = $5; + } | pipe_proto PEER TABLE rtable ';' { PIPE_CFG->peer = $4; } + | pipe_proto MAX GENERATION expr ';' { + if (($4 < 1) || ($4 > 254)) cf_error("Max generation must be in range 1..254, got %u", $4); + PIPE_CFG->max_generation = $4; + } ; CF_CODE diff --git a/proto/pipe/pipe.c b/proto/pipe/pipe.c index 481f5804..b3b50a0d 100644 --- a/proto/pipe/pipe.c +++ b/proto/pipe/pipe.c @@ -35,7 +35,7 @@ #include "nest/bird.h" #include "nest/iface.h" #include "nest/protocol.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/cli.h" #include "conf/conf.h" #include "filter/filter.h" @@ -43,70 +43,54 @@ #include "pipe.h" +#ifdef CONFIG_BGP +#include "proto/bgp/bgp.h" +#endif + static void -pipe_rt_notify(struct proto *P, struct channel *src_ch, net *n, rte *new, rte *old) +pipe_rt_notify(struct proto *P, struct channel *src_ch, const net_addr *n, rte *new, const rte *old) { struct pipe_proto *p = (void *) P; struct channel *dst = (src_ch == p->pri) ? p->sec : p->pri; - struct rte_src *src; - - rte *e; - rta *a; if (!new && !old) return; - if (dst->table->pipe_busy) - { - log(L_ERR "Pipe loop detected when sending %N to table %s", - n->n.addr, dst->table->name); - return; - } - if (new) { - a = alloca(rta_size(new->attrs)); - memcpy(a, new->attrs, rta_size(new->attrs)); - - a->aflags = 0; - a->hostentry = NULL; - e = rte_get_temp(a); - e->pflags = 0; + rte e0 = { + .attrs = new->attrs, + .src = new->src, + .generation = new->generation + 1, + }; - /* Copy protocol specific embedded attributes. */ - memcpy(&(e->u), &(new->u), sizeof(e->u)); - e->pref = new->pref; - e->pflags = new->pflags; + ea_unset_attr(&e0.attrs, 0, &ea_gen_hostentry); -#ifdef CONFIG_BGP - /* Hack to cleanup cached value */ - if (e->attrs->src->proto->proto == &proto_bgp) - { - e->u.bgp.stale = -1; - e->u.bgp.base_table = NULL; - } -#endif - - src = a->src; + rte_update(dst, n, &e0, new->src); } else - { - e = NULL; - src = old->attrs->src; - } - - src_ch->table->pipe_busy = 1; - rte_update2(dst, n->n.addr, e, src); - src_ch->table->pipe_busy = 0; + rte_update(dst, n, NULL, old->src); } static int -pipe_preexport(struct channel *C, rte **ee, struct linpool *p UNUSED) +pipe_preexport(struct channel *C, rte *e) { - struct proto *pp = (*ee)->sender->proto; + struct pipe_proto *p = (void *) C->proto; - if (pp == C->proto) - return -1; /* Avoid local loops automatically */ + /* Avoid direct loopbacks */ + if (e->sender == C->in_req.hook) + return -1; + + /* Indirection check */ + uint max_generation = ((struct pipe_config *) p->p.cf)->max_generation; + if (e->generation >= max_generation) + { + log_rl(&p->rl_gen, L_ERR "Route overpiped (%u hops of %u configured in %s) in table %s: %N %s/%u:%u", + e->generation, max_generation, C->proto->name, + C->table->name, e->net, e->src->owner->name, e->src->private_id, e->src->global_id); + + return -1; + } return 0; } @@ -139,10 +123,16 @@ pipe_postconfig(struct proto_config *CF) if (cc->table->addr_type != cf->peer->addr_type) cf_error("Primary table and peer table must have the same type"); + if (cc->out_subprefix && (cc->table->addr_type != cc->out_subprefix->type)) + cf_error("Export subprefix must match table type"); + + if (cf->in_subprefix && (cc->table->addr_type != cf->in_subprefix->type)) + cf_error("Import subprefix must match table type"); + if (cc->rx_limit.action) cf_error("Pipe protocol does not support receive limits"); - if (cc->in_keep_filtered) + if (cc->in_keep) cf_error("Pipe protocol prohibits keeping filtered routes"); cc->debug = cf->c.debug; @@ -158,6 +148,7 @@ pipe_configure_channels(struct pipe_proto *p, struct pipe_config *cf) .channel = cc->channel, .table = cc->table, .out_filter = cc->out_filter, + .out_subprefix = cc->out_subprefix, .in_limit = cc->in_limit, .ra_mode = RA_ANY, .debug = cc->debug, @@ -169,6 +160,7 @@ pipe_configure_channels(struct pipe_proto *p, struct pipe_config *cf) .channel = cc->channel, .table = cf->peer, .out_filter = cc->in_filter, + .out_subprefix = cf->in_subprefix, .in_limit = cc->out_limit, .ra_mode = RA_ANY, .debug = cc->debug, @@ -191,6 +183,8 @@ pipe_init(struct proto_config *CF) P->preexport = pipe_preexport; P->reload_routes = pipe_reload_routes; + p->rl_gen = (struct tbf) TBF_DEFAULT_LOG_LIMITS; + pipe_configure_channels(p, cf); return P; @@ -222,8 +216,18 @@ pipe_get_status(struct proto *P, byte *buf) static void pipe_show_stats(struct pipe_proto *p) { - struct proto_stats *s1 = &p->pri->stats; - struct proto_stats *s2 = &p->sec->stats; + struct channel_import_stats *s1i = &p->pri->import_stats; + struct channel_export_stats *s1e = &p->pri->export_stats; + struct channel_import_stats *s2i = &p->sec->import_stats; + struct channel_export_stats *s2e = &p->sec->export_stats; + + struct rt_import_stats *rs1i = p->pri->in_req.hook ? &p->pri->in_req.hook->stats : NULL; + struct rt_export_stats *rs1e = p->pri->out_req.hook ? &p->pri->out_req.hook->stats : NULL; + struct rt_import_stats *rs2i = p->sec->in_req.hook ? &p->sec->in_req.hook->stats : NULL; + struct rt_export_stats *rs2e = p->sec->out_req.hook ? &p->sec->out_req.hook->stats : NULL; + + u32 pri_routes = p->pri->in_limit.count; + u32 sec_routes = p->sec->in_limit.count; /* * Pipe stats (as anything related to pipes) are a bit tricky. There @@ -247,24 +251,22 @@ pipe_show_stats(struct pipe_proto *p) */ cli_msg(-1006, " Routes: %u imported, %u exported", - s1->imp_routes, s2->imp_routes); + pri_routes, sec_routes); cli_msg(-1006, " Route change stats: received rejected filtered ignored accepted"); cli_msg(-1006, " Import updates: %10u %10u %10u %10u %10u", - s2->exp_updates_received, s2->exp_updates_rejected + s1->imp_updates_invalid, - s2->exp_updates_filtered, s1->imp_updates_ignored, s1->imp_updates_accepted); + rs2e->updates_received, s2e->updates_rejected + s1i->updates_invalid, + s2e->updates_filtered, rs1i->updates_ignored, rs1i->updates_accepted); cli_msg(-1006, " Import withdraws: %10u %10u --- %10u %10u", - s2->exp_withdraws_received, s1->imp_withdraws_invalid, - s1->imp_withdraws_ignored, s1->imp_withdraws_accepted); + rs2e->withdraws_received, s1i->withdraws_invalid, + rs1i->withdraws_ignored, rs1i->withdraws_accepted); cli_msg(-1006, " Export updates: %10u %10u %10u %10u %10u", - s1->exp_updates_received, s1->exp_updates_rejected + s2->imp_updates_invalid, - s1->exp_updates_filtered, s2->imp_updates_ignored, s2->imp_updates_accepted); + rs1e->updates_received, s1e->updates_rejected + s2i->updates_invalid, + s1e->updates_filtered, rs2i->updates_ignored, rs2i->updates_accepted); cli_msg(-1006, " Export withdraws: %10u %10u --- %10u %10u", - s1->exp_withdraws_received, s2->imp_withdraws_invalid, - s2->imp_withdraws_ignored, s2->imp_withdraws_accepted); + rs1e->withdraws_received, s2i->withdraws_invalid, + rs2i->withdraws_ignored, rs2i->withdraws_accepted); } -static const char *pipe_feed_state[] = { [ES_DOWN] = "down", [ES_FEEDING] = "feed", [ES_READY] = "up" }; - static void pipe_show_proto_info(struct proto *P) { @@ -273,13 +275,17 @@ pipe_show_proto_info(struct proto *P) cli_msg(-1006, " Channel %s", "main"); cli_msg(-1006, " Table: %s", p->pri->table->name); cli_msg(-1006, " Peer table: %s", p->sec->table->name); - cli_msg(-1006, " Import state: %s", pipe_feed_state[p->sec->export_state]); - cli_msg(-1006, " Export state: %s", pipe_feed_state[p->pri->export_state]); + cli_msg(-1006, " Import state: %s", rt_export_state_name(rt_export_get_state(p->sec->out_req.hook))); + cli_msg(-1006, " Export state: %s", rt_export_state_name(rt_export_get_state(p->pri->out_req.hook))); cli_msg(-1006, " Import filter: %s", filter_name(p->sec->out_filter)); cli_msg(-1006, " Export filter: %s", filter_name(p->pri->out_filter)); - channel_show_limit(&p->pri->in_limit, "Import limit:"); - channel_show_limit(&p->sec->in_limit, "Export limit:"); + + + channel_show_limit(&p->pri->in_limit, "Import limit:", + (p->pri->limit_active & (1 << PLD_IN)), p->pri->limit_actions[PLD_IN]); + channel_show_limit(&p->sec->in_limit, "Export limit:", + (p->sec->limit_active & (1 << PLD_IN)), p->sec->limit_actions[PLD_IN]); if (P->proto_state != PS_DOWN) pipe_show_stats(p); @@ -297,7 +303,6 @@ pipe_update_debug(struct proto *P) struct protocol proto_pipe = { .name = "Pipe", .template = "pipe%d", - .class = PROTOCOL_PIPE, .proto_size = sizeof(struct pipe_proto), .config_size = sizeof(struct pipe_config), .postconfig = pipe_postconfig, @@ -307,3 +312,9 @@ struct protocol proto_pipe = { .get_status = pipe_get_status, .show_proto_info = pipe_show_proto_info }; + +void +pipe_build(void) +{ + proto_build(&proto_pipe); +} diff --git a/proto/pipe/pipe.h b/proto/pipe/pipe.h index 038c6666..a6534e1c 100644 --- a/proto/pipe/pipe.h +++ b/proto/pipe/pipe.h @@ -12,12 +12,15 @@ struct pipe_config { struct proto_config c; struct rtable_config *peer; /* Table we're connected to */ + const net_addr *in_subprefix; + u8 max_generation; }; struct pipe_proto { struct proto p; struct channel *pri; struct channel *sec; + struct tbf rl_gen; }; #endif diff --git a/proto/radv/Makefile b/proto/radv/Makefile index 05317eff..4780bee3 100644 --- a/proto/radv/Makefile +++ b/proto/radv/Makefile @@ -2,5 +2,6 @@ src := packets.c radv.c obj := $(src-o-files) $(all-daemon) $(cf-local) +$(call proto-build,radv_build) -tests_objs := $(tests_objs) $(src-o-files)
\ No newline at end of file +tests_objs := $(tests_objs) $(src-o-files) diff --git a/proto/radv/config.Y b/proto/radv/config.Y index 8d4a3ab9..fb68d2e5 100644 --- a/proto/radv/config.Y +++ b/proto/radv/config.Y @@ -33,7 +33,7 @@ CF_KEYWORDS(RADV, PREFIX, INTERFACE, MIN, MAX, RA, DELAY, INTERVAL, SOLICITED, RETRANS, TIMER, CURRENT, HOP, LIMIT, DEFAULT, VALID, PREFERRED, MULT, LIFETIME, SKIP, ONLINK, AUTONOMOUS, RDNSS, DNSSL, NS, DOMAIN, LOCAL, TRIGGER, SENSITIVE, PREFERENCE, LOW, MEDIUM, HIGH, PROPAGATE, ROUTE, - ROUTES, RA_PREFERENCE, RA_LIFETIME) + ROUTES) CF_ENUM(T_ENUM_RA_PREFERENCE, RA_PREF_, LOW, MEDIUM, HIGH) @@ -336,9 +336,6 @@ radv_sensitive: | SENSITIVE bool { $$ = $2; } ; -dynamic_attr: RA_PREFERENCE { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_ENUM_RA_PREFERENCE, EA_RA_PREFERENCE); } ; -dynamic_attr: RA_LIFETIME { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_RA_LIFETIME); } ; - CF_CODE CF_END diff --git a/proto/radv/radv.c b/proto/radv/radv.c index fe3713ef..10d5e3ed 100644 --- a/proto/radv/radv.c +++ b/proto/radv/radv.c @@ -10,6 +10,7 @@ #include <stdlib.h> #include "radv.h" +#include "lib/macro.h" /** * DOC: Router Advertisements @@ -42,6 +43,8 @@ * RFC 6106 - DNS extensions (RDDNS, DNSSL) */ +static struct ea_class ea_radv_preference, ea_radv_lifetime; + static void radv_prune_prefixes(struct radv_iface *ifa); static void radv_prune_routes(struct radv_proto *p); @@ -385,18 +388,18 @@ radv_trigger_valid(struct radv_config *cf) } static inline int -radv_net_match_trigger(struct radv_config *cf, net *n) +radv_net_match_trigger(struct radv_config *cf, const net_addr *n) { - return radv_trigger_valid(cf) && net_equal(n->n.addr, &cf->trigger); + return radv_trigger_valid(cf) && net_equal(n, &cf->trigger); } int -radv_preexport(struct channel *C, rte **new, struct linpool *pool UNUSED) +radv_preexport(struct channel *C, rte *new) { // struct radv_proto *p = (struct radv_proto *) P; struct radv_config *cf = (struct radv_config *) (C->proto->cf); - if (radv_net_match_trigger(cf, (*new)->net)) + if (radv_net_match_trigger(cf, new->net)) return RIC_PROCESS; if (cf->propagate_routes) @@ -406,7 +409,7 @@ radv_preexport(struct channel *C, rte **new, struct linpool *pool UNUSED) } static void -radv_rt_notify(struct proto *P, struct channel *ch UNUSED, net *n, rte *new, rte *old UNUSED) +radv_rt_notify(struct proto *P, struct channel *ch UNUSED, const net_addr *n, rte *new, const rte *old UNUSED) { struct radv_proto *p = (struct radv_proto *) P; struct radv_config *cf = (struct radv_config *) (P->cf); @@ -444,11 +447,11 @@ radv_rt_notify(struct proto *P, struct channel *ch UNUSED, net *n, rte *new, rte { /* Update */ - ea = ea_find(new->attrs->eattrs, EA_RA_PREFERENCE); + ea = ea_find(new->attrs, &ea_radv_preference); uint preference = ea ? ea->u.data : RA_PREF_MEDIUM; uint preference_set = !!ea; - ea = ea_find(new->attrs->eattrs, EA_RA_LIFETIME); + ea = ea_find(new->attrs, &ea_radv_lifetime); uint lifetime = ea ? ea->u.data : 0; uint lifetime_set = !!ea; @@ -457,14 +460,14 @@ radv_rt_notify(struct proto *P, struct channel *ch UNUSED, net *n, rte *new, rte (preference != RA_PREF_HIGH)) { log(L_WARN "%s: Invalid ra_preference value %u on route %N", - p->p.name, preference, n->n.addr); + p->p.name, preference, n); preference = RA_PREF_MEDIUM; preference_set = 1; lifetime = 0; lifetime_set = 1; } - rt = fib_get(&p->routes, n->n.addr); + rt = fib_get(&p->routes, n); /* Ignore update if nothing changed */ if (rt->valid && @@ -487,7 +490,7 @@ radv_rt_notify(struct proto *P, struct channel *ch UNUSED, net *n, rte *new, rte else { /* Withdraw */ - rt = fib_find(&p->routes, n->n.addr); + rt = fib_find(&p->routes, n); if (!rt || !rt->valid) return; @@ -738,27 +741,26 @@ radv_pref_str(u32 pref) } } -/* The buffer has some minimal size */ -static int -radv_get_attr(const eattr *a, byte *buf, int buflen UNUSED) +static void +radv_preference_format(const eattr *a, byte *buf, uint buflen) { - switch (a->id) - { - case EA_RA_PREFERENCE: - bsprintf(buf, "preference: %s", radv_pref_str(a->u.data)); - return GA_FULL; - case EA_RA_LIFETIME: - bsprintf(buf, "lifetime"); - return GA_NAME; - default: - return GA_UNKNOWN; - } + bsnprintf(buf, buflen, "%s", radv_pref_str(a->u.data)); } +static struct ea_class ea_radv_preference = { + .name = "radv_preference", + .type = T_ENUM_RA_PREFERENCE, + .format = radv_preference_format, +}; + +static struct ea_class ea_radv_lifetime = { + .name = "radv_lifetime", + .type = T_INT, +}; + struct protocol proto_radv = { .name = "RAdv", .template = "radv%d", - .class = PROTOCOL_RADV, .channel_mask = NB_IP6, .proto_size = sizeof(struct radv_proto), .config_size = sizeof(struct radv_config), @@ -769,5 +771,15 @@ struct protocol proto_radv = { .reconfigure = radv_reconfigure, .copy_config = radv_copy_config, .get_status = radv_get_status, - .get_attr = radv_get_attr }; + +void +radv_build(void) +{ + proto_build(&proto_radv); + + EA_REGISTER_ALL( + &ea_radv_preference, + &ea_radv_lifetime + ); +} diff --git a/proto/radv/radv.h b/proto/radv/radv.h index 14d40f8a..c9219bda 100644 --- a/proto/radv/radv.h +++ b/proto/radv/radv.h @@ -19,7 +19,7 @@ #include "lib/resource.h" #include "nest/protocol.h" #include "nest/iface.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/cli.h" #include "nest/locks.h" #include "conf/conf.h" @@ -195,10 +195,6 @@ struct radv_iface #define RA_PREF_HIGH 0x08 #define RA_PREF_MASK 0x18 -/* Attributes */ -#define EA_RA_PREFERENCE EA_CODE(PROTOCOL_RADV, 0) -#define EA_RA_LIFETIME EA_CODE(PROTOCOL_RADV, 1) - #ifdef LOCAL_DEBUG #define RADV_FORCE_DEBUG 1 #else diff --git a/proto/rip/Makefile b/proto/rip/Makefile index 7feabcd8..b9ff62d6 100644 --- a/proto/rip/Makefile +++ b/proto/rip/Makefile @@ -2,5 +2,6 @@ src := packets.c rip.c obj := $(src-o-files) $(all-daemon) $(cf-local) +$(call proto-build,rip_build) -tests_objs := $(tests_objs) $(src-o-files)
\ No newline at end of file +tests_objs := $(tests_objs) $(src-o-files) diff --git a/proto/rip/config.Y b/proto/rip/config.Y index 28ee9609..3c0973b1 100644 --- a/proto/rip/config.Y +++ b/proto/rip/config.Y @@ -37,7 +37,7 @@ CF_KEYWORDS(RIP, NG, ECMP, LIMIT, WEIGHT, INFINITY, METRIC, UPDATE, TIMEOUT, PASSIVE, VERSION, SPLIT, HORIZON, POISON, REVERSE, CHECK, ZERO, TIME, BFD, AUTHENTICATION, NONE, PLAINTEXT, CRYPTOGRAPHIC, MD5, TTL, SECURITY, RX, TX, BUFFER, LENGTH, PRIORITY, ONLY, LINK, - DEMAND, CIRCUIT, RIP_METRIC, RIP_TAG) + DEMAND, CIRCUIT) %type <i> rip_variant rip_auth @@ -190,9 +190,6 @@ rip_iface: rip_iface_start iface_patt_list_nopx rip_iface_opt_list rip_iface_finish; -dynamic_attr: RIP_METRIC { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_RIP_METRIC); } ; -dynamic_attr: RIP_TAG { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_RIP_TAG); } ; - CF_CLI_HELP(SHOW RIP, ..., [[Show information about RIP protocol]]); CF_CLI(SHOW RIP INTERFACES, optproto opttext, [<name>] [\"<interface>\"], [[Show information about RIP interfaces]]) diff --git a/proto/rip/rip.c b/proto/rip/rip.c index e1a235a0..ab0e3f4b 100644 --- a/proto/rip/rip.c +++ b/proto/rip/rip.c @@ -78,6 +78,7 @@ #include <stdlib.h> #include "rip.h" +#include "lib/macro.h" static inline void rip_lock_neighbor(struct rip_neighbor *n); @@ -88,6 +89,7 @@ static inline void rip_iface_kick_timer(struct rip_iface *ifa); static void rip_iface_timer(timer *timer); static void rip_trigger_update(struct rip_proto *p); +static struct ea_class ea_rip_metric, ea_rip_tag, ea_rip_from; /* * RIP routes @@ -108,14 +110,14 @@ rip_add_rte(struct rip_proto *p, struct rip_rte **rp, struct rip_rte *src) } static inline void -rip_remove_rte(struct rip_proto *p, struct rip_rte **rp) +rip_remove_rte(struct rip_proto *p UNUSED, struct rip_rte **rp) { struct rip_rte *rt = *rp; rip_unlock_neighbor(rt->from); *rp = rt->next; - sl_free(p->rte_slab, rt); + sl_free(rt); } static inline int rip_same_rte(struct rip_rte *a, struct rip_rte *b) @@ -124,6 +126,11 @@ static inline int rip_same_rte(struct rip_rte *a, struct rip_rte *b) static inline int rip_valid_rte(struct rip_rte *rt) { return rt->from->ifa != NULL; } +struct rip_iface_adata { + struct adata ad; + struct iface *iface; +}; + /** * rip_announce_rte - announce route from RIP routing table to the core * @p: RIP instance @@ -144,65 +151,94 @@ rip_announce_rte(struct rip_proto *p, struct rip_entry *en) if (rt) { /* Update */ - rta a0 = { - .src = p->p.main_source, - .source = RTS_RIP, - .scope = SCOPE_UNIVERSE, - .dest = RTD_UNICAST, + struct { + ea_list l; + eattr a[3]; + } ea_block = { + .l.count = ARRAY_SIZE(ea_block.a), + .a = { + EA_LITERAL_EMBEDDED(&ea_gen_preference, 0, p->p.main_channel->preference), + EA_LITERAL_EMBEDDED(&ea_gen_source, 0, RTS_RIP), + EA_LITERAL_EMBEDDED(&ea_rip_metric, 0, rt->metric), + }, }; - u8 rt_metric = rt->metric; + ea_list *ea = &ea_block.l; + u16 rt_tag = rt->tag; + struct iface *rt_from = NULL; if (p->ecmp) { /* ECMP route */ - struct nexthop *nhs = NULL; int num = 0; for (rt = en->routes; rt && (num < p->ecmp); rt = rt->next) + if (rip_valid_rte(rt)) + num++; + + struct nexthop_adata *nhad = (struct nexthop_adata *) tmp_alloc_adata((num+1) * sizeof(struct nexthop)); + struct nexthop *nh = &nhad->nh; + + for (rt = en->routes; rt && (num < p->ecmp); rt = rt->next) { if (!rip_valid_rte(rt)) - continue; + continue; - struct nexthop *nh = allocz(sizeof(struct nexthop)); + *nh = (struct nexthop) { + .gw = rt->next_hop, + .iface = rt->from->ifa->iface, + .weight = rt->from->ifa->cf->ecmp_weight, + }; - nh->gw = rt->next_hop; - nh->iface = rt->from->ifa->iface; - nh->weight = rt->from->ifa->cf->ecmp_weight; + if (!rt_from) + rt_from = rt->from->ifa->iface; - nexthop_insert(&nhs, nh); - num++; + nh = NEXTHOP_NEXT(nh); if (rt->tag != rt_tag) rt_tag = 0; } - a0.nh = *nhs; + nhad->ad.length = ((void *) nh - (void *) nhad->ad.data); + + ea_set_attr(&ea, + EA_LITERAL_DIRECT_ADATA(&ea_gen_nexthop, 0, + &(nexthop_sort(nhad, tmp_linpool)->ad))); } else { /* Unipath route */ - a0.from = rt->from->nbr->addr; - a0.nh.gw = rt->next_hop; - a0.nh.iface = rt->from->ifa->iface; + rt_from = rt->from->ifa->iface; + + struct nexthop_adata nhad = { + .nh.gw = rt->next_hop, + .nh.iface = rt->from->ifa->iface, + }; + + ea_set_attr_data(&ea, &ea_gen_nexthop, 0, + &nhad.ad.data, sizeof nhad - sizeof nhad.ad); + ea_set_attr_data(&ea, &ea_gen_from, 0, &rt->from->nbr->addr, sizeof(ip_addr)); } - rta *a = rta_lookup(&a0); - rte *e = rte_get_temp(a); + ea_set_attr_u32(&ea, &ea_rip_tag, 0, rt_tag); - e->u.rip.from = a0.nh.iface; - e->u.rip.metric = rt_metric; - e->u.rip.tag = rt_tag; - e->pflags = EA_ID_FLAG(EA_RIP_METRIC) | EA_ID_FLAG(EA_RIP_TAG); + struct rip_iface_adata riad = { + .ad = { .length = sizeof(struct rip_iface_adata) - sizeof(struct adata) }, + .iface = rt_from, + }; + ea_set_attr(&ea, + EA_LITERAL_DIRECT_ADATA(&ea_rip_from, 0, &riad.ad)); - rte_update(&p->p, en->n.addr, e); + rte e0 = { + .attrs = ea, + .src = p->p.main_source, + }; + + rte_update(p->p.main_channel, en->n.addr, &e0, p->p.main_source); } else - { - /* Withdraw */ - rte_update(&p->p, en->n.addr, NULL); - } + rte_update(p->p.main_channel, en->n.addr, NULL, p->p.main_source); } /** @@ -297,8 +333,8 @@ rip_withdraw_rte(struct rip_proto *p, net_addr *n, struct rip_neighbor *from) * it into our data structures. */ static void -rip_rt_notify(struct proto *P, struct channel *ch UNUSED, struct network *net, struct rte *new, - struct rte *old UNUSED) +rip_rt_notify(struct proto *P, struct channel *ch UNUSED, const net_addr *net, struct rte *new, + const struct rte *old UNUSED) { struct rip_proto *p = (struct rip_proto *) P; struct rip_entry *en; @@ -307,20 +343,22 @@ rip_rt_notify(struct proto *P, struct channel *ch UNUSED, struct network *net, s if (new) { /* Update */ - u32 rt_metric = ea_get_int(new->attrs->eattrs, EA_RIP_METRIC, 1); - u32 rt_tag = ea_get_int(new->attrs->eattrs, EA_RIP_TAG, 0); + u32 rt_tag = ea_get_int(new->attrs, &ea_rip_tag, 0); + u32 rt_metric = ea_get_int(new->attrs, &ea_rip_metric, 1); + const eattr *rie = ea_find(new->attrs, &ea_rip_from); + struct iface *rt_from = rie ? ((struct rip_iface_adata *) rie->u.ptr)->iface : NULL; if (rt_metric > p->infinity) { log(L_WARN "%s: Invalid rip_metric value %u for route %N", - p->p.name, rt_metric, net->n.addr); + p->p.name, rt_metric, net); rt_metric = p->infinity; } if (rt_tag > 0xffff) { log(L_WARN "%s: Invalid rip_tag value %u for route %N", - p->p.name, rt_tag, net->n.addr); + p->p.name, rt_tag, net); rt_metric = p->infinity; rt_tag = 0; } @@ -332,21 +370,27 @@ rip_rt_notify(struct proto *P, struct channel *ch UNUSED, struct network *net, s * collection. */ - en = fib_get(&p->rtable, net->n.addr); + en = fib_get(&p->rtable, net); old_metric = en->valid ? en->metric : -1; en->valid = RIP_ENTRY_VALID; en->metric = rt_metric; en->tag = rt_tag; - en->from = (new->attrs->src->proto == P) ? new->u.rip.from : NULL; - en->iface = new->attrs->nh.iface; - en->next_hop = new->attrs->nh.gw; + en->from = (new->src->owner == &P->sources) ? rt_from : NULL; + + eattr *nhea = ea_find(new->attrs, &ea_gen_nexthop); + if (nhea) + { + struct nexthop_adata *nhad = (struct nexthop_adata *) nhea->u.ptr; + en->iface = nhad->nh.iface; + en->next_hop = nhad->nh.gw; + } } else { /* Withdraw */ - en = fib_find(&p->rtable, net->n.addr); + en = fib_find(&p->rtable, net); if (!en || en->valid != RIP_ENTRY_VALID) return; @@ -1068,37 +1112,33 @@ rip_reload_routes(struct channel *C) rip_kick_timer(p); } -static void -rip_make_tmp_attrs(struct rte *rt, struct linpool *pool) -{ - rte_init_tmp_attrs(rt, pool, 2); - rte_make_tmp_attr(rt, EA_RIP_METRIC, EAF_TYPE_INT, rt->u.rip.metric); - rte_make_tmp_attr(rt, EA_RIP_TAG, EAF_TYPE_INT, rt->u.rip.tag); -} +static struct rte_owner_class rip_rte_owner_class; -static void -rip_store_tmp_attrs(struct rte *rt, struct linpool *pool) +static inline struct rip_proto * +rip_rte_proto(struct rte *rte) { - rte_init_tmp_attrs(rt, pool, 2); - rt->u.rip.metric = rte_store_tmp_attr(rt, EA_RIP_METRIC); - rt->u.rip.tag = rte_store_tmp_attr(rt, EA_RIP_TAG); + return (rte->src->owner->class == &rip_rte_owner_class) ? + SKIP_BACK(struct rip_proto, p.sources, rte->src->owner) : NULL; } static int rip_rte_better(struct rte *new, struct rte *old) { - return new->u.rip.metric < old->u.rip.metric; + ASSERT_DIE(new->src == old->src); + struct rip_proto *p = rip_rte_proto(new); + + u32 new_metric = ea_get_int(new->attrs, &ea_rip_metric, p->infinity); + u32 old_metric = ea_get_int(old->attrs, &ea_rip_metric, p->infinity); + + return new_metric < old_metric; } -static int -rip_rte_same(struct rte *new, struct rte *old) +static u32 +rip_rte_igp_metric(const rte *rt) { - return ((new->u.rip.metric == old->u.rip.metric) && - (new->u.rip.tag == old->u.rip.tag) && - (new->u.rip.from == old->u.rip.from)); + return ea_get_int(rt->attrs, &ea_rip_metric, IGP_METRIC_UNKNOWN); } - static void rip_postconfig(struct proto_config *CF) { @@ -1120,10 +1160,7 @@ rip_init(struct proto_config *CF) P->rt_notify = rip_rt_notify; P->neigh_notify = rip_neigh_notify; P->reload_routes = rip_reload_routes; - P->make_tmp_attrs = rip_make_tmp_attrs; - P->store_tmp_attrs = rip_store_tmp_attrs; - P->rte_better = rip_rte_better; - P->rte_same = rip_rte_same; + P->sources.class = &rip_rte_owner_class; return P; } @@ -1198,29 +1235,38 @@ rip_reconfigure(struct proto *P, struct proto_config *CF) static void rip_get_route_info(rte *rte, byte *buf) { - buf += bsprintf(buf, " (%d/%d)", rte->pref, rte->u.rip.metric); + struct rip_proto *p = rip_rte_proto(rte); + u32 rt_metric = ea_get_int(rte->attrs, &ea_rip_metric, p->infinity); + u32 rt_tag = ea_get_int(rte->attrs, &ea_rip_tag, 0); + + buf += bsprintf(buf, " (%d/%d)", rt_get_preference(rte), rt_metric); - if (rte->u.rip.tag) - bsprintf(buf, " [%04x]", rte->u.rip.tag); + if (rt_tag) + bsprintf(buf, " [%04x]", rt_tag); } -static int -rip_get_attr(const eattr *a, byte *buf, int buflen UNUSED) +static void +rip_tag_format(const eattr *a, byte *buf, uint buflen) { - switch (a->id) - { - case EA_RIP_METRIC: - bsprintf(buf, "metric: %d", a->u.data); - return GA_FULL; + bsnprintf(buf, buflen, "%04x", a->u.data); +} - case EA_RIP_TAG: - bsprintf(buf, "tag: %04x", a->u.data); - return GA_FULL; +static struct ea_class ea_rip_metric = { + .name = "rip_metric", + .type = T_INT, +}; - default: - return GA_UNKNOWN; - } -} +static struct ea_class ea_rip_tag = { + .name = "rip_tag", + .type = T_INT, + .format = rip_tag_format, +}; + +static struct ea_class ea_rip_from = { + .name = "rip_from", + .type = T_IFACE, + .readonly = 1, +}; void rip_show_interfaces(struct proto *P, const char *iff) @@ -1321,10 +1367,15 @@ rip_dump(struct proto *P) } +static struct rte_owner_class rip_rte_owner_class = { + .get_route_info = rip_get_route_info, + .rte_better = rip_rte_better, + .rte_igp_metric = rip_rte_igp_metric, +}; + struct protocol proto_rip = { .name = "RIP", .template = "rip%d", - .class = PROTOCOL_RIP, .preference = DEF_PREF_RIP, .channel_mask = NB_IP, .proto_size = sizeof(struct rip_proto), @@ -1335,6 +1386,16 @@ struct protocol proto_rip = { .start = rip_start, .shutdown = rip_shutdown, .reconfigure = rip_reconfigure, - .get_route_info = rip_get_route_info, - .get_attr = rip_get_attr }; + +void +rip_build(void) +{ + proto_build(&proto_rip); + + EA_REGISTER_ALL( + &ea_rip_metric, + &ea_rip_tag, + &ea_rip_from + ); +} diff --git a/proto/rip/rip.h b/proto/rip/rip.h index 8d347000..a01f8d3b 100644 --- a/proto/rip/rip.h +++ b/proto/rip/rip.h @@ -16,7 +16,7 @@ #include "nest/cli.h" #include "nest/iface.h" #include "nest/protocol.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/password.h" #include "nest/locks.h" #include "nest/bfd.h" @@ -195,9 +195,6 @@ struct rip_rte #define RIP_ENTRY_VALID 1 /* Valid outgoing route */ #define RIP_ENTRY_STALE 2 /* Stale outgoing route, waiting for GC */ -#define EA_RIP_METRIC EA_CODE(PROTOCOL_RIP, 0) -#define EA_RIP_TAG EA_CODE(PROTOCOL_RIP, 1) - static inline int rip_is_v2(struct rip_proto *p) { return p->rip2; } diff --git a/proto/rpki/Makefile b/proto/rpki/Makefile index eb09b7df..8e3a2761 100644 --- a/proto/rpki/Makefile +++ b/proto/rpki/Makefile @@ -2,5 +2,6 @@ src := rpki.c packets.c tcp_transport.c ssh_transport.c transport.c obj := $(src-o-files) $(all-daemon) $(cf-local) +$(call proto-build,rpki_build) -tests_objs := $(tests_objs) $(src-o-files)
\ No newline at end of file +tests_objs := $(tests_objs) $(src-o-files) diff --git a/proto/rpki/packets.c b/proto/rpki/packets.c index d246dd50..108da61b 100644 --- a/proto/rpki/packets.c +++ b/proto/rpki/packets.c @@ -661,9 +661,9 @@ rpki_handle_cache_response_pdu(struct rpki_cache *cache, const struct pdu_cache_ * a refresh cycle. */ if (cache->p->roa4_channel) - rt_refresh_begin(cache->p->roa4_channel->table, cache->p->roa4_channel); + rt_refresh_begin(&cache->p->roa4_channel->in_req); if (cache->p->roa6_channel) - rt_refresh_begin(cache->p->roa6_channel->table, cache->p->roa6_channel); + rt_refresh_begin(&cache->p->roa6_channel->in_req); cache->p->refresh_channels = 1; } @@ -846,9 +846,9 @@ rpki_handle_end_of_data_pdu(struct rpki_cache *cache, const struct pdu_end_of_da { cache->p->refresh_channels = 0; if (cache->p->roa4_channel) - rt_refresh_end(cache->p->roa4_channel->table, cache->p->roa4_channel); + rt_refresh_end(&cache->p->roa4_channel->in_req); if (cache->p->roa6_channel) - rt_refresh_end(cache->p->roa6_channel->table, cache->p->roa6_channel); + rt_refresh_end(&cache->p->roa6_channel->in_req); } cache->last_update = current_time(); @@ -924,6 +924,9 @@ rpki_rx_hook(struct birdsock *sk, uint size) struct rpki_cache *cache = sk->data; struct rpki_proto *p = cache->p; + if ((p->p.proto_state == PS_DOWN) || (p->cache != cache)) + return 0; + byte *pkt_start = sk->rbuf; byte *end = pkt_start + size; @@ -980,6 +983,8 @@ rpki_err_hook(struct birdsock *sk, int error_num) CACHE_TRACE(D_EVENTS, cache, "The other side closed a connection"); } + if (cache->p->cache != cache) + return; rpki_cache_change_state(cache, RPKI_CS_ERROR_TRANSPORT); } @@ -999,6 +1004,9 @@ rpki_tx_hook(sock *sk) { struct rpki_cache *cache = sk->data; + if (cache->p->cache != cache) + return; + while (rpki_fire_tx(cache) > 0) ; } @@ -1008,6 +1016,9 @@ rpki_connected_hook(sock *sk) { struct rpki_cache *cache = sk->data; + if (cache->p->cache != cache) + return; + CACHE_TRACE(D_EVENTS, cache, "Connected"); proto_notify_state(&cache->p->p, PS_UP); diff --git a/proto/rpki/rpki.c b/proto/rpki/rpki.c index 4ccb38e3..56615e36 100644 --- a/proto/rpki/rpki.c +++ b/proto/rpki/rpki.c @@ -120,26 +120,20 @@ rpki_table_add_roa(struct rpki_cache *cache, struct channel *channel, const net_ { struct rpki_proto *p = cache->p; - rta a0 = { - .src = p->p.main_source, - .source = RTS_RPKI, - .scope = SCOPE_UNIVERSE, - .dest = RTD_NONE, - }; - - rta *a = rta_lookup(&a0); - rte *e = rte_get_temp(a); + ea_list *ea = NULL; + ea_set_attr_u32(&ea, &ea_gen_preference, 0, channel->preference); + ea_set_attr_u32(&ea, &ea_gen_source, 0, RTS_RPKI); - e->pflags = 0; + rte e0 = { .attrs = ea, .src = p->p.main_source, }; - rte_update2(channel, &pfxr->n, e, a0.src); + rte_update(channel, &pfxr->n, &e0, p->p.main_source); } void rpki_table_remove_roa(struct rpki_cache *cache, struct channel *channel, const net_addr_union *pfxr) { struct rpki_proto *p = cache->p; - rte_update2(channel, &pfxr->n, NULL, p->p.main_source); + rte_update(channel, &pfxr->n, NULL, p->p.main_source); } @@ -387,6 +381,9 @@ rpki_refresh_hook(timer *tm) { struct rpki_cache *cache = tm->data; + if (cache->p->cache != cache) + return; + CACHE_DBG(cache, "%s", rpki_cache_state_to_str(cache->state)); switch (cache->state) @@ -433,6 +430,9 @@ rpki_retry_hook(timer *tm) { struct rpki_cache *cache = tm->data; + if (cache->p->cache != cache) + return; + CACHE_DBG(cache, "%s", rpki_cache_state_to_str(cache->state)); switch (cache->state) @@ -478,6 +478,9 @@ rpki_expire_hook(timer *tm) { struct rpki_cache *cache = tm->data; + if (cache->p->cache != cache) + return; + if (!cache->last_update) return; @@ -946,7 +949,6 @@ rpki_copy_config(struct proto_config *dest UNUSED, struct proto_config *src UNUS struct protocol proto_rpki = { .name = "RPKI", .template = "rpki%d", - .class = PROTOCOL_RPKI, .preference = DEF_PREF_RPKI, .proto_size = sizeof(struct rpki_proto), .config_size = sizeof(struct rpki_config), @@ -960,3 +962,9 @@ struct protocol proto_rpki = { .reconfigure = rpki_reconfigure, .get_status = rpki_get_status, }; + +void +rpki_build(void) +{ + proto_build(&proto_rpki); +} diff --git a/proto/rpki/rpki.h b/proto/rpki/rpki.h index 8a5c38fd..26fbb46e 100644 --- a/proto/rpki/rpki.h +++ b/proto/rpki/rpki.h @@ -13,7 +13,7 @@ #define _BIRD_RPKI_H_ #include "nest/bird.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/protocol.h" #include "lib/socket.h" #include "lib/ip.h" diff --git a/proto/static/Makefile b/proto/static/Makefile index e38f9b74..26aed31f 100644 --- a/proto/static/Makefile +++ b/proto/static/Makefile @@ -2,5 +2,6 @@ src := static.c obj := $(src-o-files) $(all-daemon) $(cf-local) +$(call proto-build,static_build) -tests_objs := $(tests_objs) $(src-o-files)
\ No newline at end of file +tests_objs := $(tests_objs) $(src-o-files) diff --git a/proto/static/static.c b/proto/static/static.c index 2789c1bb..65f3eccc 100644 --- a/proto/static/static.c +++ b/proto/static/static.c @@ -38,7 +38,7 @@ #include "nest/bird.h" #include "nest/iface.h" #include "nest/protocol.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/cli.h" #include "conf/conf.h" #include "filter/filter.h" @@ -47,91 +47,96 @@ #include "static.h" -static linpool *static_lp; - static inline struct rte_src * static_get_source(struct static_proto *p, uint i) { return i ? rt_get_source(&p->p, i) : p->p.main_source; } +static inline void static_free_source(struct rte_src *src, uint i) +{ if (i) rt_unlock_source(src); } + static void static_announce_rte(struct static_proto *p, struct static_route *r) { - rta *a = allocz(RTA_MAX_SIZE); - a->src = static_get_source(p, r->index); - a->source = RTS_STATIC; - a->scope = SCOPE_UNIVERSE; - a->dest = r->dest; + struct rte_src *src; + ea_list *ea = NULL; + ea_set_attr_u32(&ea, &ea_gen_preference, 0, p->p.main_channel->preference); + ea_set_attr_u32(&ea, &ea_gen_source, 0, RTS_STATIC); if (r->dest == RTD_UNICAST) { - struct static_route *r2; - struct nexthop *nhs = NULL; + uint sz = 0; + for (struct static_route *r2 = r; r2; r2 = r2->mp_next) + if (r2->active) + sz += NEXTHOP_SIZE_CNT(r2->mls ? r2->mls->length / sizeof(u32) : 0); - for (r2 = r; r2; r2 = r2->mp_next) + if (!sz) + goto withdraw; + + struct nexthop_adata *nhad = allocz(sz + sizeof *nhad); + struct nexthop *nh = &nhad->nh; + + for (struct static_route *r2 = r; r2; r2 = r2->mp_next) { if (!r2->active) continue; - struct nexthop *nh = allocz(NEXTHOP_MAX_SIZE); - nh->gw = r2->via; - nh->iface = r2->neigh->iface; - nh->flags = r2->onlink ? RNF_ONLINK : 0; - nh->weight = r2->weight; + *nh = (struct nexthop) { + .gw = r2->via, + .iface = r2->neigh->iface, + .flags = r2->onlink ? RNF_ONLINK : 0, + .weight = r2->weight, + }; + if (r2->mls) { - nh->labels = r2->mls->len; - memcpy(nh->label, r2->mls->stack, r2->mls->len * sizeof(u32)); + nh->labels = r2->mls->length / sizeof(u32); + memcpy(nh->label, r2->mls->data, r2->mls->length); } - nexthop_insert(&nhs, nh); + nh = NEXTHOP_NEXT(nh); } - if (!nhs) - goto withdraw; - - nexthop_link(a, nhs); + ea_set_attr_data(&ea, &ea_gen_nexthop, 0, + nhad->ad.data, (void *) nh - (void *) nhad->ad.data); } - if (r->dest == RTDX_RECURSIVE) + else if (r->dest == RTDX_RECURSIVE) { rtable *tab = ipa_is_ip4(r->via) ? p->igp_table_ip4 : p->igp_table_ip6; - rta_set_recursive_next_hop(p->p.main_channel->table, a, tab, r->via, IPA_NONE, r->mls); + u32 *labels = r->mls ? (void *) r->mls->data : NULL; + u32 lnum = r->mls ? r->mls->length / sizeof(u32) : 0; + + ea_set_hostentry(&ea, p->p.main_channel->table, tab, + r->via, IPA_NONE, lnum, labels); } + else if (r->dest) + ea_set_dest(&ea, 0, r->dest); + /* Already announced */ if (r->state == SRS_CLEAN) return; /* We skip rta_lookup() here */ - rte *e = rte_get_temp(a); - e->pflags = 0; + src = static_get_source(p, r->index); + rte e0 = { .attrs = ea, .src = src, .net = r->net, }, *e = &e0; + /* Evaluate the filter */ if (r->cmds) - { - /* Create a temporary table node */ - e->net = alloca(sizeof(net) + r->net->length); - memset(e->net, 0, sizeof(net) + r->net->length); - net_copy(e->net->n.addr, r->net); - - /* Evaluate the filter */ - f_eval_rte(r->cmds, &e, static_lp); + f_eval_rte(r->cmds, e); - /* Remove the temporary node */ - e->net = NULL; - } + rte_update(p->p.main_channel, r->net, e, src); + static_free_source(src, r->index); - rte_update2(p->p.main_channel, r->net, e, a->src); r->state = SRS_CLEAN; - - if (r->cmds) - lp_flush(static_lp); - return; withdraw: if (r->state == SRS_DOWN) return; - rte_update2(p->p.main_channel, r->net, NULL, a->src); + src = static_get_source(p, r->index); + rte_update(p->p.main_channel, r->net, NULL, src); + static_free_source(src, r->index); r->state = SRS_DOWN; } @@ -297,7 +302,11 @@ static void static_remove_rte(struct static_proto *p, struct static_route *r) { if (r->state) - rte_update2(p->p.main_channel, r->net, NULL, static_get_source(p, r->index)); + { + struct rte_src *src = static_get_source(p, r->index); + rte_update(p->p.main_channel, r->net, NULL, src); + static_free_source(src, r->index); + } static_reset_rte(p, r); } @@ -320,31 +329,17 @@ static_same_dest(struct static_route *x, struct static_route *y) (x->weight != y->weight) || (x->use_bfd != y->use_bfd) || (!x->mls != !y->mls) || - ((x->mls) && (y->mls) && (x->mls->len != y->mls->len))) + ((x->mls) && (y->mls) && adata_same(x->mls, y->mls))) return 0; - - if (!x->mls) - continue; - - for (uint i = 0; i < x->mls->len; i++) - if (x->mls->stack[i] != y->mls->stack[i]) - return 0; } return !x && !y; case RTDX_RECURSIVE: if (!ipa_equal(x->via, y->via) || (!x->mls != !y->mls) || - ((x->mls) && (y->mls) && (x->mls->len != y->mls->len))) + ((x->mls) && (y->mls) && adata_same(x->mls, y->mls))) return 0; - if (!x->mls) - return 1; - - for (uint i = 0; i < x->mls->len; i++) - if (x->mls->stack[i] != y->mls->stack[i]) - return 0; - return 1; default: @@ -413,16 +408,16 @@ static_reload_routes(struct channel *C) static int static_rte_better(rte *new, rte *old) { - u32 n = ea_get_int(new->attrs->eattrs, EA_GEN_IGP_METRIC, IGP_METRIC_UNKNOWN); - u32 o = ea_get_int(old->attrs->eattrs, EA_GEN_IGP_METRIC, IGP_METRIC_UNKNOWN); + u32 n = ea_get_int(new->attrs, &ea_gen_igp_metric, IGP_METRIC_UNKNOWN); + u32 o = ea_get_int(old->attrs, &ea_gen_igp_metric, IGP_METRIC_UNKNOWN); return n < o; } static int static_rte_mergable(rte *pri, rte *sec) { - u32 a = ea_get_int(pri->attrs->eattrs, EA_GEN_IGP_METRIC, IGP_METRIC_UNKNOWN); - u32 b = ea_get_int(sec->attrs->eattrs, EA_GEN_IGP_METRIC, IGP_METRIC_UNKNOWN); + u32 a = ea_get_int(pri->attrs, &ea_gen_igp_metric, IGP_METRIC_UNKNOWN); + u32 b = ea_get_int(sec->attrs, &ea_gen_igp_metric, IGP_METRIC_UNKNOWN); return a == b; } @@ -454,6 +449,8 @@ static_postconfig(struct proto_config *CF) static_index_routes(cf); } +static struct rte_owner_class static_rte_owner_class; + static struct proto * static_init(struct proto_config *CF) { @@ -465,8 +462,7 @@ static_init(struct proto_config *CF) P->neigh_notify = static_neigh_notify; P->reload_routes = static_reload_routes; - P->rte_better = static_rte_better; - P->rte_mergable = static_rte_mergable; + P->sources.class = &static_rte_owner_class; if (cf->igp_table_ip4) p->igp_table_ip4 = cf->igp_table_ip4->table; @@ -484,9 +480,6 @@ static_start(struct proto *P) struct static_config *cf = (void *) P->cf; struct static_route *r; - if (!static_lp) - static_lp = lp_new(&root_pool, LP_GOOD_SIZE(1024)); - if (p->igp_table_ip4) rt_lock_table(p->igp_table_ip4); @@ -517,19 +510,13 @@ static_shutdown(struct proto *P) WALK_LIST(r, cf->routes) static_reset_rte(p, r); - return PS_DOWN; -} - -static void -static_cleanup(struct proto *P) -{ - struct static_proto *p = (void *) P; - if (p->igp_table_ip4) rt_unlock_table(p->igp_table_ip4); if (p->igp_table_ip6) rt_unlock_table(p->igp_table_ip6); + + return PS_DOWN; } static void @@ -719,11 +706,12 @@ static_copy_config(struct proto_config *dest, struct proto_config *src) static void static_get_route_info(rte *rte, byte *buf) { - eattr *a = ea_find(rte->attrs->eattrs, EA_GEN_IGP_METRIC); - if (a) - buf += bsprintf(buf, " (%d/%u)", rte->pref, a->u.data); + eattr *a = ea_find(rte->attrs, &ea_gen_igp_metric); + u32 pref = rt_get_preference(rte); + if (a && (a->u.data < IGP_METRIC_UNKNOWN)) + buf += bsprintf(buf, " (%d/%u)", pref, a->u.data); else - buf += bsprintf(buf, " (%d)", rte->pref); + buf += bsprintf(buf, " (%d)", pref); } static void @@ -773,11 +761,15 @@ static_show(struct proto *P) static_show_rt(r); } +static struct rte_owner_class static_rte_owner_class = { + .get_route_info = static_get_route_info, + .rte_better = static_rte_better, + .rte_mergable = static_rte_mergable, +}; struct protocol proto_static = { .name = "Static", .template = "static%d", - .class = PROTOCOL_STATIC, .preference = DEF_PREF_STATIC, .channel_mask = NB_ANY, .proto_size = sizeof(struct static_proto), @@ -787,8 +779,12 @@ struct protocol proto_static = { .dump = static_dump, .start = static_start, .shutdown = static_shutdown, - .cleanup = static_cleanup, .reconfigure = static_reconfigure, .copy_config = static_copy_config, - .get_route_info = static_get_route_info, }; + +void +static_build(void) +{ + proto_build(&proto_static); +} diff --git a/proto/static/static.h b/proto/static/static.h index fc91f71c..ea7ca33b 100644 --- a/proto/static/static.h +++ b/proto/static/static.h @@ -9,7 +9,7 @@ #ifndef _BIRD_STATIC_H_ #define _BIRD_STATIC_H_ -#include "nest/route.h" +#include "nest/rt.h" #include "nest/bfd.h" #include "lib/buffer.h" @@ -49,7 +49,7 @@ struct static_route { byte weight; /* Multipath next hop weight */ byte use_bfd; /* Configured to use BFD */ struct bfd_request *bfd_req; /* BFD request, if BFD is used */ - mpls_label_stack *mls; /* MPLS label stack; may be NULL */ + struct adata *mls; /* MPLS label stack; may be NULL */ }; /* diff --git a/sysdep/bsd/krt-sock.c b/sysdep/bsd/krt-sock.c index 521f43f3..1c1bd50c 100644 --- a/sysdep/bsd/krt-sock.c +++ b/sysdep/bsd/krt-sock.c @@ -25,7 +25,7 @@ #include "nest/bird.h" #include "nest/iface.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/protocol.h" #include "nest/iface.h" #include "sysdep/unix/unix.h" @@ -398,7 +398,6 @@ krt_read_route(struct ks_msg *msg, struct krt_proto *p, int scan) /* p is NULL iff KRT_SHARED_SOCKET and !scan */ int ipv6; - rte *e; net *net; sockaddr dst, gate, mask; ip_addr idst, igate, imask; @@ -519,11 +518,10 @@ krt_read_route(struct ks_msg *msg, struct krt_proto *p, int scan) net = net_get(p->p.main_channel->table, &ndst); rta a = { - .src = p->p.main_source, - .source = RTS_INHERIT, - .scope = SCOPE_UNIVERSE, }; + ea_set_attr_u32(&a->eattrs, &ea_gen_source, 0, RTS_INHERIT); + /* reject/blackhole routes have also set RTF_GATEWAY, we wil check them first. */ @@ -579,19 +577,16 @@ krt_read_route(struct ks_msg *msg, struct krt_proto *p, int scan) } } - done: - e = rte_get_temp(&a); - e->net = net; - e->u.krt.src = src; - e->u.krt.proto = src2; - e->u.krt.seen = 0; - e->u.krt.best = 0; - e->u.krt.metric = 0; + done:; + rte e0 = { .attrs = &a, .net = net, }; + + ea_set_attr(e0.attrs->eattrs, + EA_LITERAL_EMBEDDED(EA_KRT_SOURCE, T_INT, 0, src2)); if (scan) - krt_got_route(p, e); + krt_got_route(p, &e0, src); else - krt_got_route_async(p, e, new); + krt_got_route_async(p, &e0, new, src); } static void diff --git a/sysdep/linux/krt-sys.h b/sysdep/linux/krt-sys.h index 8897f889..aa90f6e4 100644 --- a/sysdep/linux/krt-sys.h +++ b/sysdep/linux/krt-sys.h @@ -34,38 +34,6 @@ static inline struct ifa * kif_get_primary_ip(struct iface *i UNUSED) { return N #define KRT_ALLOW_MERGE_PATHS 1 -#define EA_KRT_PREFSRC EA_CODE(PROTOCOL_KERNEL, 0x10) -#define EA_KRT_REALM EA_CODE(PROTOCOL_KERNEL, 0x11) -#define EA_KRT_SCOPE EA_CODE(PROTOCOL_KERNEL, 0x12) - - -#define KRT_METRICS_MAX 0x10 /* RTAX_QUICKACK+1 */ -#define KRT_METRICS_OFFSET 0x20 /* Offset of EA_KRT_* vs RTAX_* */ - -#define KRT_FEATURES_MAX 4 - -/* - * Following attributes are parts of RTA_METRICS kernel route attribute, their - * ids must be consistent with their RTAX_* constants (+ KRT_METRICS_OFFSET) - */ -#define EA_KRT_METRICS EA_CODE(PROTOCOL_KERNEL, 0x20) /* Dummy one */ -#define EA_KRT_LOCK EA_CODE(PROTOCOL_KERNEL, 0x21) -#define EA_KRT_MTU EA_CODE(PROTOCOL_KERNEL, 0x22) -#define EA_KRT_WINDOW EA_CODE(PROTOCOL_KERNEL, 0x23) -#define EA_KRT_RTT EA_CODE(PROTOCOL_KERNEL, 0x24) -#define EA_KRT_RTTVAR EA_CODE(PROTOCOL_KERNEL, 0x25) -#define EA_KRT_SSTRESH EA_CODE(PROTOCOL_KERNEL, 0x26) -#define EA_KRT_CWND EA_CODE(PROTOCOL_KERNEL, 0x27) -#define EA_KRT_ADVMSS EA_CODE(PROTOCOL_KERNEL, 0x28) -#define EA_KRT_REORDERING EA_CODE(PROTOCOL_KERNEL, 0x29) -#define EA_KRT_HOPLIMIT EA_CODE(PROTOCOL_KERNEL, 0x2a) -#define EA_KRT_INITCWND EA_CODE(PROTOCOL_KERNEL, 0x2b) -#define EA_KRT_FEATURES EA_CODE(PROTOCOL_KERNEL, 0x2c) -#define EA_KRT_RTO_MIN EA_CODE(PROTOCOL_KERNEL, 0x2d) -#define EA_KRT_INITRWND EA_CODE(PROTOCOL_KERNEL, 0x2e) -#define EA_KRT_QUICKACK EA_CODE(PROTOCOL_KERNEL, 0x2f) - - struct krt_params { u32 table_id; /* Kernel table ID we sync with */ u32 metric; /* Kernel metric used for all routes */ diff --git a/sysdep/linux/netlink.Y b/sysdep/linux/netlink.Y index 487ad1d8..7ba8c7c9 100644 --- a/sysdep/linux/netlink.Y +++ b/sysdep/linux/netlink.Y @@ -11,9 +11,6 @@ CF_HDR CF_DECLS CF_KEYWORDS(KERNEL, TABLE, METRIC, NETLINK, RX, BUFFER, - KRT_PREFSRC, KRT_REALM, KRT_SCOPE, KRT_MTU, KRT_WINDOW, - KRT_RTT, KRT_RTTVAR, KRT_SSTRESH, KRT_CWND, KRT_ADVMSS, KRT_REORDERING, - KRT_HOPLIMIT, KRT_INITCWND, KRT_RTO_MIN, KRT_INITRWND, KRT_QUICKACK, KRT_LOCK_MTU, KRT_LOCK_WINDOW, KRT_LOCK_RTT, KRT_LOCK_RTTVAR, KRT_LOCK_SSTRESH, KRT_LOCK_CWND, KRT_LOCK_ADVMSS, KRT_LOCK_REORDERING, KRT_LOCK_HOPLIMIT, KRT_LOCK_RTO_MIN, KRT_FEATURE_ECN, KRT_FEATURE_ALLFRAG) @@ -28,39 +25,22 @@ kern_sys_item: | NETLINK RX BUFFER expr { THIS_KRT->sys.netlink_rx_buffer = $4; } ; -dynamic_attr: KRT_PREFSRC { $$ = f_new_dynamic_attr(EAF_TYPE_IP_ADDRESS, T_IP, EA_KRT_PREFSRC); } ; -dynamic_attr: KRT_REALM { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_REALM); } ; -dynamic_attr: KRT_SCOPE { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_SCOPE); } ; - -dynamic_attr: KRT_MTU { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_MTU); } ; -dynamic_attr: KRT_WINDOW { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_WINDOW); } ; -dynamic_attr: KRT_RTT { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_RTT); } ; -dynamic_attr: KRT_RTTVAR { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_RTTVAR); } ; -dynamic_attr: KRT_SSTRESH { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_SSTRESH); } ; -dynamic_attr: KRT_CWND { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_CWND); } ; -dynamic_attr: KRT_ADVMSS { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_ADVMSS); } ; -dynamic_attr: KRT_REORDERING { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_REORDERING); } ; -dynamic_attr: KRT_HOPLIMIT { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_HOPLIMIT); } ; -dynamic_attr: KRT_INITCWND { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_INITCWND); } ; -dynamic_attr: KRT_RTO_MIN { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_RTO_MIN); } ; -dynamic_attr: KRT_INITRWND { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_INITRWND); } ; -dynamic_attr: KRT_QUICKACK { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_QUICKACK); } ; - /* Bits of EA_KRT_LOCK, based on RTAX_* constants */ -dynamic_attr: KRT_LOCK_MTU { $$ = f_new_dynamic_attr_bit(2, T_BOOL, EA_KRT_LOCK); } ; -dynamic_attr: KRT_LOCK_WINDOW { $$ = f_new_dynamic_attr_bit(3, T_BOOL, EA_KRT_LOCK); } ; -dynamic_attr: KRT_LOCK_RTT { $$ = f_new_dynamic_attr_bit(4, T_BOOL, EA_KRT_LOCK); } ; -dynamic_attr: KRT_LOCK_RTTVAR { $$ = f_new_dynamic_attr_bit(5, T_BOOL, EA_KRT_LOCK); } ; -dynamic_attr: KRT_LOCK_SSTRESH { $$ = f_new_dynamic_attr_bit(6, T_BOOL, EA_KRT_LOCK); } ; -dynamic_attr: KRT_LOCK_CWND { $$ = f_new_dynamic_attr_bit(7, T_BOOL, EA_KRT_LOCK); } ; -dynamic_attr: KRT_LOCK_ADVMSS { $$ = f_new_dynamic_attr_bit(8, T_BOOL, EA_KRT_LOCK); } ; -dynamic_attr: KRT_LOCK_REORDERING { $$ = f_new_dynamic_attr_bit(9, T_BOOL, EA_KRT_LOCK); } ; -dynamic_attr: KRT_LOCK_HOPLIMIT { $$ = f_new_dynamic_attr_bit(10, T_BOOL, EA_KRT_LOCK); } ; -dynamic_attr: KRT_LOCK_RTO_MIN { $$ = f_new_dynamic_attr_bit(13, T_BOOL, EA_KRT_LOCK); } ; - -dynamic_attr: KRT_FEATURE_ECN { $$ = f_new_dynamic_attr_bit(0, T_BOOL, EA_KRT_FEATURES); } ; -dynamic_attr: KRT_FEATURE_ALLFRAG { $$ = f_new_dynamic_attr(3, T_BOOL, EA_KRT_FEATURES); } ; +attr_bit: KRT_LOCK_MTU { $$ = f_new_dynamic_attr_bit(2, "krt_lock"); } ; +attr_bit: KRT_LOCK_WINDOW { $$ = f_new_dynamic_attr_bit(3, "krt_lock"); } ; +attr_bit: KRT_LOCK_RTT { $$ = f_new_dynamic_attr_bit(4, "krt_lock"); } ; +attr_bit: KRT_LOCK_RTTVAR { $$ = f_new_dynamic_attr_bit(5, "krt_lock"); } ; +attr_bit: KRT_LOCK_SSTRESH { $$ = f_new_dynamic_attr_bit(6, "krt_lock"); } ; +attr_bit: KRT_LOCK_CWND { $$ = f_new_dynamic_attr_bit(7, "krt_lock"); } ; +attr_bit: KRT_LOCK_ADVMSS { $$ = f_new_dynamic_attr_bit(8, "krt_lock"); } ; +attr_bit: KRT_LOCK_REORDERING { $$ = f_new_dynamic_attr_bit(9, "krt_lock"); } ; +attr_bit: KRT_LOCK_HOPLIMIT { $$ = f_new_dynamic_attr_bit(10, "krt_lock"); } ; +attr_bit: KRT_LOCK_RTO_MIN { $$ = f_new_dynamic_attr_bit(13, "krt_lock"); } ; + +/* Bits of EA_KRT_FEATURES */ +attr_bit: KRT_FEATURE_ECN { $$ = f_new_dynamic_attr_bit(0, "krt_features"); } ; +attr_bit: KRT_FEATURE_ALLFRAG { $$ = f_new_dynamic_attr_bit(3, "krt_features"); } ; CF_CODE diff --git a/sysdep/linux/netlink.c b/sysdep/linux/netlink.c index bc65e0d2..099ae6e9 100644 --- a/sysdep/linux/netlink.c +++ b/sysdep/linux/netlink.c @@ -17,7 +17,7 @@ #undef LOCAL_DEBUG #include "nest/bird.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/protocol.h" #include "nest/iface.h" #include "lib/alloca.h" @@ -26,6 +26,7 @@ #include "lib/socket.h" #include "lib/string.h" #include "lib/hash.h" +#include "lib/macro.h" #include "conf/conf.h" #include <asm/types.h> @@ -109,8 +110,8 @@ struct nl_parse_state int scan; int merge; - net *net; - rta *attrs; + net_addr *net; + ea_list *attrs; struct krt_proto *proto; s8 new; s8 krt_src; @@ -122,6 +123,101 @@ struct nl_parse_state }; /* + * Netlink eattr definitions + */ + +#define KRT_METRICS_MAX ARRAY_SIZE(ea_krt_metrics) +#define KRT_FEATURES_MAX 4 + +static void krt_bitfield_format(const eattr *e, byte *buf, uint buflen); + +static struct ea_class + ea_krt_prefsrc = { + .name = "krt_prefsrc", + .type = T_IP, + }, + ea_krt_realm = { + .name = "krt_realm", + .type = T_INT, + }, + ea_krt_scope = { + .name = "krt_scope", + .type = T_INT, + }; + +static struct ea_class ea_krt_metrics[] = { + [RTAX_LOCK] = { + .name = "krt_lock", + .type = T_INT, + .format = krt_bitfield_format, + }, + [RTAX_FEATURES] = { + .name = "krt_features", + .type = T_INT, + .format = krt_bitfield_format, + }, +#define KRT_METRIC_INT(_rtax, _name) [_rtax] = { .name = _name, .type = T_INT } + KRT_METRIC_INT(RTAX_MTU, "krt_mtu"), + KRT_METRIC_INT(RTAX_WINDOW, "krt_window"), + KRT_METRIC_INT(RTAX_RTT, "krt_rtt"), + KRT_METRIC_INT(RTAX_RTTVAR, "krt_rttvar"), + KRT_METRIC_INT(RTAX_SSTHRESH, "krt_sstresh"), + KRT_METRIC_INT(RTAX_CWND, "krt_cwnd"), + KRT_METRIC_INT(RTAX_ADVMSS, "krt_advmss"), + KRT_METRIC_INT(RTAX_REORDERING, "krt_reordering"), + KRT_METRIC_INT(RTAX_HOPLIMIT, "krt_hoplimit"), + KRT_METRIC_INT(RTAX_INITCWND, "krt_initcwnd"), + KRT_METRIC_INT(RTAX_RTO_MIN, "krt_rto_min"), + KRT_METRIC_INT(RTAX_INITRWND, "krt_initrwnd"), + KRT_METRIC_INT(RTAX_QUICKACK, "krt_quickack"), +#undef KRT_METRIC_INT +}; + +static const char *krt_metrics_names[KRT_METRICS_MAX] = { + NULL, "lock", "mtu", "window", "rtt", "rttvar", "sstresh", "cwnd", "advmss", + "reordering", "hoplimit", "initcwnd", "features", "rto_min", "initrwnd", "quickack" +}; + +static const char *krt_features_names[KRT_FEATURES_MAX] = { + "ecn", NULL, NULL, "allfrag" +}; + +static void +krt_bitfield_format(const eattr *a, byte *buf, uint buflen) +{ + if (a->id == ea_krt_metrics[RTAX_LOCK].id) + ea_format_bitfield(a, buf, buflen, krt_metrics_names, 2, KRT_METRICS_MAX); + else if (a->id == ea_krt_metrics[RTAX_FEATURES].id) + ea_format_bitfield(a, buf, buflen, krt_features_names, 0, KRT_FEATURES_MAX); +} + +static void +nl_ea_register(void) +{ + EA_REGISTER_ALL( + &ea_krt_prefsrc, + &ea_krt_realm, + &ea_krt_scope + ); + + for (uint i = 0; i < KRT_METRICS_MAX; i++) + { + if (!ea_krt_metrics[i].name) + ea_krt_metrics[i] = (struct ea_class) { + .name = mb_sprintf(&root_pool, "krt_metric_%d", i), + .type = T_INT, + }; + + ea_register_init(&ea_krt_metrics[i]); + } + + for (uint i = 1; i < KRT_METRICS_MAX; i++) + ASSERT_DIE(ea_krt_metrics[i].id == ea_krt_metrics[0].id + i); +} + + + +/* * Synchronous Netlink interface */ @@ -747,12 +843,12 @@ nl_add_nexthop(struct nlmsghdr *h, uint bufsize, struct nexthop *nh, int af UNUS } static void -nl_add_multipath(struct nlmsghdr *h, uint bufsize, struct nexthop *nh, int af, ea_list *eattrs) +nl_add_multipath(struct nlmsghdr *h, uint bufsize, struct nexthop_adata *nhad, int af, ea_list *eattrs) { struct rtattr *a = nl_open_attr(h, bufsize, RTA_MULTIPATH); - eattr *flow = ea_find(eattrs, EA_KRT_REALM); + eattr *flow = ea_find(eattrs, &ea_krt_realm); - for (; nh; nh = nh->next) + NEXTHOP_WALK(nh, nhad) { struct rtnexthop *rtnh = nl_open_nexthop(h, bufsize); @@ -776,31 +872,44 @@ nl_add_multipath(struct nlmsghdr *h, uint bufsize, struct nexthop *nh, int af, e nl_close_attr(h, a); } -static struct nexthop * +static struct nexthop_adata * nl_parse_multipath(struct nl_parse_state *s, struct krt_proto *p, const net_addr *n, struct rtattr *ra, int af, int krt_src) { struct rtattr *a[BIRD_RTA_MAX]; - struct rtnexthop *nh = RTA_DATA(ra); - struct nexthop *rv, *first, **last; - unsigned len = RTA_PAYLOAD(ra); + struct rtnexthop *nh, *orig_nh = RTA_DATA(ra); + unsigned len, orig_len = RTA_PAYLOAD(ra); + uint cnt = 0; - first = NULL; - last = &first; + /* First count the nexthops */ + for (len = orig_len, nh = orig_nh; len; len -= NLMSG_ALIGN(nh->rtnh_len), nh = RTNH_NEXT(nh)) + { + /* Use RTNH_OK(nh,len) ?? */ + if ((len < sizeof(*nh)) || (len < nh->rtnh_len)) + goto err; + + if ((nh->rtnh_flags & RTNH_F_DEAD) && (krt_src != KRT_SRC_BIRD)) + ; + else + cnt++; + } + + struct nexthop_adata *nhad = lp_allocz(s->pool, cnt * NEXTHOP_MAX_SIZE + sizeof *nhad); + struct nexthop *rv = &nhad->nh; - while (len) + for (len = orig_len, nh = orig_nh; len; len -= NLMSG_ALIGN(nh->rtnh_len), nh = RTNH_NEXT(nh)) { /* Use RTNH_OK(nh,len) ?? */ if ((len < sizeof(*nh)) || (len < nh->rtnh_len)) goto err; if ((nh->rtnh_flags & RTNH_F_DEAD) && (krt_src != KRT_SRC_BIRD)) - goto next; + continue; - *last = rv = lp_allocz(s->pool, NEXTHOP_MAX_SIZE); - last = &(rv->next); + *rv = (struct nexthop) { + .weight = nh->rtnh_hops, + .iface = if_find_by_index(nh->rtnh_ifindex), + }; - rv->weight = nh->rtnh_hops; - rv->iface = if_find_by_index(nh->rtnh_ifindex); if (!rv->iface) { log(L_ERR "KRT: Received route %N with unknown ifindex %u", n, nh->rtnh_ifindex); @@ -879,16 +988,14 @@ nl_parse_multipath(struct nl_parse_state *s, struct krt_proto *p, const net_addr } #endif - next: - len -= NLMSG_ALIGN(nh->rtnh_len); - nh = RTNH_NEXT(nh); + rv = NEXTHOP_NEXT(rv); } - /* Ensure nexthops are sorted to satisfy nest invariant */ - if (!nexthop_is_sorted(first)) - first = nexthop_sort(first); + /* Store final length */ + nhad->ad.length = (void *) rv - (void *) nhad->ad.data; - return first; + /* Ensure nexthops are sorted to satisfy nest invariant */ + return nexthop_is_sorted(nhad) ? nhad : nexthop_sort(nhad, s->pool); err: log(L_ERR "KRT: Received strange multipath route %N", n); @@ -1316,11 +1423,16 @@ HASH_DEFINE_REHASH_FN(RTH, struct krt_proto) int krt_capable(rte *e) { - rta *a = e->attrs; + eattr *ea = ea_find(e->attrs, &ea_gen_nexthop); + if (!ea) + return 0; + + struct nexthop_adata *nhad = (void *) ea->u.ptr; + if (NEXTHOP_IS_REACHABLE(nhad)) + return 1; - switch (a->dest) + switch (nhad->dest) { - case RTD_UNICAST: case RTD_BLACKHOLE: case RTD_UNREACHABLE: case RTD_PROHIBIT: @@ -1332,22 +1444,21 @@ krt_capable(rte *e) } static inline int -nh_bufsize(struct nexthop *nh) +nh_bufsize(struct nexthop_adata *nhad) { int rv = 0; - for (; nh != NULL; nh = nh->next) + NEXTHOP_WALK(nh, nhad) rv += RTNH_LENGTH(RTA_LENGTH(sizeof(ip_addr))); return rv; } static int -nl_send_route(struct krt_proto *p, rte *e, int op, int dest, struct nexthop *nh) +nl_send_route(struct krt_proto *p, const rte *e, int op, int dest, struct nexthop_adata *nh) { eattr *ea; - net *net = e->net; - rta *a = e->attrs; - ea_list *eattrs = a->eattrs; - int bufsize = 128 + KRT_METRICS_MAX*8 + nh_bufsize(&(a->nh)); + ea_list *eattrs = e->attrs; + + int bufsize = 128 + KRT_METRICS_MAX*8 + (nh ? nh_bufsize(nh) : 0); u32 priority = 0; struct { @@ -1359,7 +1470,7 @@ nl_send_route(struct krt_proto *p, rte *e, int op, int dest, struct nexthop *nh) int rsize = sizeof(*r) + bufsize; r = alloca(rsize); - DBG("nl_send_route(%N,op=%x)\n", net->n.addr, op); + DBG("nl_send_route(%N,op=%x)\n", e->net, op); bzero(&r->h, sizeof(r->h)); bzero(&r->r, sizeof(r->r)); @@ -1368,7 +1479,7 @@ nl_send_route(struct krt_proto *p, rte *e, int op, int dest, struct nexthop *nh) r->h.nlmsg_flags = op | NLM_F_REQUEST | NLM_F_ACK; r->r.rtm_family = p->af; - r->r.rtm_dst_len = net_pxlen(net->n.addr); + r->r.rtm_dst_len = net_pxlen(e->net); r->r.rtm_protocol = RTPROT_BIRD; r->r.rtm_scope = RT_SCOPE_NOWHERE; #ifdef HAVE_MPLS_KERNEL @@ -1380,7 +1491,7 @@ nl_send_route(struct krt_proto *p, rte *e, int op, int dest, struct nexthop *nh) * 2) Never use RTA_PRIORITY */ - u32 label = net_mpls(net->n.addr); + u32 label = net_mpls(e->net); nl_add_attr_mpls(&r->h, rsize, RTA_DST, 1, &label); r->r.rtm_scope = RT_SCOPE_UNIVERSE; r->r.rtm_type = RTN_UNICAST; @@ -1388,12 +1499,12 @@ nl_send_route(struct krt_proto *p, rte *e, int op, int dest, struct nexthop *nh) else #endif { - nl_add_attr_ipa(&r->h, rsize, RTA_DST, net_prefix(net->n.addr)); + nl_add_attr_ipa(&r->h, rsize, RTA_DST, net_prefix(e->net)); /* Add source address for IPv6 SADR routes */ - if (net->n.addr->type == NET_IP6_SADR) + if (e->net->type == NET_IP6_SADR) { - net_addr_ip6_sadr *a = (void *) &net->n.addr; + net_addr_ip6_sadr *a = (void *) &e->net; nl_add_attr_ip6(&r->h, rsize, RTA_SRC, a->src_prefix); r->r.rtm_src_len = a->src_pxlen; } @@ -1413,11 +1524,9 @@ nl_send_route(struct krt_proto *p, rte *e, int op, int dest, struct nexthop *nh) if (p->af == AF_MPLS) priority = 0; - else if (a->source == RTS_DUMMY) - priority = e->u.krt.metric; else if (KRT_CF->sys.metric) priority = KRT_CF->sys.metric; - else if ((op != NL_OP_DELETE) && (ea = ea_find(eattrs, EA_KRT_METRIC))) + else if ((op != NL_OP_DELETE) && (ea = ea_find(eattrs, &ea_krt_metric))) priority = ea->u.data; if (priority) @@ -1430,15 +1539,15 @@ nl_send_route(struct krt_proto *p, rte *e, int op, int dest, struct nexthop *nh) /* Default scope is LINK for device routes, UNIVERSE otherwise */ if (p->af == AF_MPLS) r->r.rtm_scope = RT_SCOPE_UNIVERSE; - else if (ea = ea_find(eattrs, EA_KRT_SCOPE)) + else if (ea = ea_find(eattrs, &ea_krt_scope)) r->r.rtm_scope = ea->u.data; else - r->r.rtm_scope = (dest == RTD_UNICAST && ipa_zero(nh->gw)) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE; + r->r.rtm_scope = (dest == RTD_UNICAST && ipa_zero(nh->nh.gw)) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE; - if (ea = ea_find(eattrs, EA_KRT_PREFSRC)) + if (ea = ea_find(eattrs, &ea_krt_prefsrc)) nl_add_attr_ipa(&r->h, rsize, RTA_PREFSRC, *(ip_addr *)ea->u.ptr->data); - if (ea = ea_find(eattrs, EA_KRT_REALM)) + if (ea = ea_find(eattrs, &ea_krt_realm)) nl_add_attr_u32(&r->h, rsize, RTA_FLOW, ea->u.data); @@ -1446,9 +1555,9 @@ nl_send_route(struct krt_proto *p, rte *e, int op, int dest, struct nexthop *nh) metrics[0] = 0; struct ea_walk_state ews = { .eattrs = eattrs }; - while (ea = ea_walk(&ews, EA_KRT_METRICS, KRT_METRICS_MAX)) + while (ea = ea_walk(&ews, ea_krt_metrics[0].id, KRT_METRICS_MAX)) { - int id = ea->id - EA_KRT_METRICS; + int id = ea->id - ea_krt_metrics[0].id; metrics[0] |= 1 << id; metrics[id] = ea->u.data; } @@ -1462,14 +1571,14 @@ dest: { case RTD_UNICAST: r->r.rtm_type = RTN_UNICAST; - if (nh->next && !krt_ecmp6(p)) + if (!NEXTHOP_ONE(nh) && !krt_ecmp6(p)) nl_add_multipath(&r->h, rsize, nh, p->af, eattrs); else { - nl_add_attr_u32(&r->h, rsize, RTA_OIF, nh->iface->index); - nl_add_nexthop(&r->h, rsize, nh, p->af); + nl_add_attr_u32(&r->h, rsize, RTA_OIF, nh->nh.iface->index); + nl_add_nexthop(&r->h, rsize, &nh->nh, p->af); - if (nh->flags & RNF_ONLINK) + if (nh->nh.flags & RNF_ONLINK) r->r.rtm_flags |= RTNH_F_ONLINK; } break; @@ -1495,28 +1604,43 @@ dest: static inline int nl_add_rte(struct krt_proto *p, rte *e) { - rta *a = e->attrs; + ea_list *ea = e->attrs; int err = 0; - if (krt_ecmp6(p) && a->nh.next) - { - struct nexthop *nh = &(a->nh); - - err = nl_send_route(p, e, NL_OP_ADD, RTD_UNICAST, nh); - if (err < 0) - return err; + eattr *nhea = ea_find(ea, &ea_gen_nexthop); + struct nexthop_adata *nhad = nhea ? (struct nexthop_adata *) nhea->u.ptr : NULL; - for (nh = nh->next; nh; nh = nh->next) - err += nl_send_route(p, e, NL_OP_APPEND, RTD_UNICAST, nh); + if (krt_ecmp6(p) && nhad && NEXTHOP_IS_REACHABLE(nhad) && !NEXTHOP_ONE(nhad)) + { + uint cnt = 0; + NEXTHOP_WALK(nh, nhad) + { + struct { + struct nexthop_adata nhad; + u32 labels[MPLS_MAX_LABEL_STACK]; + } nhx; + memcpy(&nhx.nhad.nh, nh, NEXTHOP_SIZE(nh)); + nhx.nhad.ad.length = (void *) NEXTHOP_NEXT(&nhx.nhad.nh) - (void *) nhx.nhad.ad.data; + + if (!cnt++) + { + err = nl_send_route(p, e, NL_OP_ADD, RTD_UNICAST, &nhx.nhad); + if (err < 0) + return err; + } + else + err += nl_send_route(p, e, NL_OP_APPEND, RTD_UNICAST, &nhx.nhad); + } return err; } - return nl_send_route(p, e, NL_OP_ADD, a->dest, &(a->nh)); + return nl_send_route(p, e, NL_OP_ADD, + NEXTHOP_IS_REACHABLE(nhad) ? RTD_UNICAST : nhad->dest, nhad); } static inline int -nl_delete_rte(struct krt_proto *p, rte *e) +nl_delete_rte(struct krt_proto *p, const rte *e) { int err = 0; @@ -1531,13 +1655,15 @@ nl_delete_rte(struct krt_proto *p, rte *e) static inline int nl_replace_rte(struct krt_proto *p, rte *e) { - rta *a = e->attrs; - return nl_send_route(p, e, NL_OP_REPLACE, a->dest, &(a->nh)); + eattr *nhea = ea_find(e->attrs, &ea_gen_nexthop); + struct nexthop_adata *nhad = nhea ? (struct nexthop_adata *) nhea->u.ptr : NULL; + return nl_send_route(p, e, NL_OP_REPLACE, + NEXTHOP_IS_REACHABLE(nhad) ? RTD_UNICAST : nhad->dest, nhad); } void -krt_replace_rte(struct krt_proto *p, net *n UNUSED, rte *new, rte *old) +krt_replace_rte(struct krt_proto *p, const net_addr *n UNUSED, rte *new, const rte *old) { int err = 0; @@ -1576,7 +1702,7 @@ krt_replace_rte(struct krt_proto *p, net *n UNUSED, rte *new, rte *old) } static int -nl_mergable_route(struct nl_parse_state *s, net *net, struct krt_proto *p, uint priority, uint krt_type, uint rtm_family) +nl_mergable_route(struct nl_parse_state *s, const net_addr *net, struct krt_proto *p, uint priority, uint krt_type, uint rtm_family) { /* Route merging is used for IPv6 scans */ if (!s->scan || (rtm_family != AF_INET6)) @@ -1596,18 +1722,25 @@ nl_mergable_route(struct nl_parse_state *s, net *net, struct krt_proto *p, uint static void nl_announce_route(struct nl_parse_state *s) { - rte *e = rte_get_temp(s->attrs); - e->net = s->net; - e->u.krt.src = s->krt_src; - e->u.krt.proto = s->krt_proto; - e->u.krt.seen = 0; - e->u.krt.best = 0; - e->u.krt.metric = s->krt_metric; + rte e0 = { + .attrs = s->attrs, + .net = s->net, + }; + + EA_LOCAL_LIST(2) ea = { + .l = { .count = 2, .next = e0.attrs }, + .a = { + EA_LITERAL_EMBEDDED(&ea_krt_source, 0, s->krt_proto), + EA_LITERAL_EMBEDDED(&ea_krt_metric, 0, s->krt_metric), + }, + }; + + e0.attrs = &ea.l; if (s->scan) - krt_got_route(s->proto, e); + krt_got_route(s->proto, &e0, s->krt_src); else - krt_got_route_async(s->proto, e, s->new); + krt_got_route_async(s->proto, &e0, s->new, s->krt_src); s->net = NULL; s->attrs = NULL; @@ -1757,92 +1890,121 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) krt_src = KRT_SRC_ALIEN; } - net_addr *n = &dst; + net_addr *net = &dst; if (p->p.net_type == NET_IP6_SADR) { - n = alloca(sizeof(net_addr_ip6_sadr)); - net_fill_ip6_sadr(n, net6_prefix(&dst), net6_pxlen(&dst), + net = alloca(sizeof(net_addr_ip6_sadr)); + net_fill_ip6_sadr(net, net6_prefix(&dst), net6_pxlen(&dst), net6_prefix(&src), net6_pxlen(&src)); } - net *net = net_get(p->p.main_channel->table, n); - if (s->net && !nl_mergable_route(s, net, p, priority, i->rtm_type, i->rtm_family)) nl_announce_route(s); - rta *ra = lp_allocz(s->pool, RTA_MAX_SIZE); - ra->src = p->p.main_source; - ra->source = RTS_INHERIT; - ra->scope = SCOPE_UNIVERSE; + ea_list *ra = NULL; + ea_set_attr_u32(&ra, &ea_gen_source, 0, RTS_INHERIT); if (a[RTA_FLOW]) s->rta_flow = rta_get_u32(a[RTA_FLOW]); else s->rta_flow = 0; + union { + struct { + struct adata ad; + struct nexthop nh; + u32 labels[MPLS_MAX_LABEL_STACK]; + }; + struct nexthop_adata nhad; + } nhad = {}; + switch (i->rtm_type) { case RTN_UNICAST: - ra->dest = RTD_UNICAST; - if (a[RTA_MULTIPATH]) { - struct nexthop *nh = nl_parse_multipath(s, p, n, a[RTA_MULTIPATH], i->rtm_family, krt_src); + struct nexthop_adata *nh = nl_parse_multipath(s, p, net, a[RTA_MULTIPATH], i->rtm_family, krt_src); if (!nh) SKIP("strange RTA_MULTIPATH\n"); - nexthop_link(ra, nh); + ea_set_attr(&ra, EA_LITERAL_DIRECT_ADATA( + &ea_gen_nexthop, 0, &nh->ad)); break; } if ((i->rtm_flags & RTNH_F_DEAD) && (krt_src != KRT_SRC_BIRD)) SKIP("ignore RTNH_F_DEAD\n"); - ra->nh.iface = if_find_by_index(oif); - if (!ra->nh.iface) + nhad.nh.iface = if_find_by_index(oif); + if (!nhad.nh.iface) { - log(L_ERR "KRT: Received route %N with unknown ifindex %u", net->n.addr, oif); + log(L_ERR "KRT: Received route %N with unknown ifindex %u", net, oif); return; } if (a[RTA_GATEWAY]) - ra->nh.gw = rta_get_ipa(a[RTA_GATEWAY]); + nhad.nh.gw = rta_get_ipa(a[RTA_GATEWAY]); #ifdef HAVE_MPLS_KERNEL if (a[RTA_VIA]) - ra->nh.gw = rta_get_via(a[RTA_VIA]); + nhad.nh.gw = rta_get_via(a[RTA_VIA]); #endif - if (ipa_nonzero(ra->nh.gw)) + if (ipa_nonzero(nhad.nh.gw)) { /* Silently skip strange 6to4 routes */ const net_addr_ip6 sit = NET_ADDR_IP6(IP6_NONE, 96); - if ((i->rtm_family == AF_INET6) && ipa_in_netX(ra->nh.gw, (net_addr *) &sit)) + if ((i->rtm_family == AF_INET6) && ipa_in_netX(nhad.nh.gw, (net_addr *) &sit)) return; if (i->rtm_flags & RTNH_F_ONLINK) - ra->nh.flags |= RNF_ONLINK; + nhad.nh.flags |= RNF_ONLINK; neighbor *nbr; - nbr = neigh_find(&p->p, ra->nh.gw, ra->nh.iface, - (ra->nh.flags & RNF_ONLINK) ? NEF_ONLINK : 0); + nbr = neigh_find(&p->p, nhad.nh.gw, nhad.nh.iface, + (nhad.nh.flags & RNF_ONLINK) ? NEF_ONLINK : 0); if (!nbr || (nbr->scope == SCOPE_HOST)) { - log(L_ERR "KRT: Received route %N with strange next-hop %I", net->n.addr, - ra->nh.gw); + log(L_ERR "KRT: Received route %N with strange next-hop %I", net, + nhad.nh.gw); return; } } +#ifdef HAVE_MPLS_KERNEL + if ((i->rtm_family == AF_MPLS) && a[RTA_NEWDST] && !a[RTA_MULTIPATH]) + nhad.nh.labels = rta_get_mpls(a[RTA_NEWDST], nhad.nh.label); + + if (a[RTA_ENCAP] && a[RTA_ENCAP_TYPE] && !a[RTA_MULTIPATH]) + { + switch (rta_get_u16(a[RTA_ENCAP_TYPE])) + { + case LWTUNNEL_ENCAP_MPLS: + { + struct rtattr *enca[BIRD_RTA_MAX]; + nl_attr_len = RTA_PAYLOAD(a[RTA_ENCAP]); + nl_parse_attrs(RTA_DATA(a[RTA_ENCAP]), encap_mpls_want, enca, sizeof(enca)); + nhad.nh.labels = rta_get_mpls(enca[RTA_DST], nhad.nh.label); + break; + } + default: + SKIP("unknown encapsulation method %d\n", rta_get_u16(a[RTA_ENCAP_TYPE])); + break; + } + } +#endif + + /* Finalize the nexthop */ + nhad.ad.length = (void *) NEXTHOP_NEXT(&nhad.nh) - (void *) nhad.ad.data; break; case RTN_BLACKHOLE: - ra->dest = RTD_BLACKHOLE; + nhad.nhad = NEXTHOP_DEST_LITERAL(RTD_BLACKHOLE); break; case RTN_UNREACHABLE: - ra->dest = RTD_UNREACHABLE; + nhad.nhad = NEXTHOP_DEST_LITERAL(RTD_UNREACHABLE); break; case RTN_PROHIBIT: - ra->dest = RTD_PROHIBIT; + nhad.nhad = NEXTHOP_DEST_LITERAL(RTD_PROHIBIT); break; /* FIXME: What about RTN_THROW? */ default: @@ -1850,105 +2012,36 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) return; } -#ifdef HAVE_MPLS_KERNEL - if ((i->rtm_family == AF_MPLS) && a[RTA_NEWDST] && !ra->nh.next) - ra->nh.labels = rta_get_mpls(a[RTA_NEWDST], ra->nh.label); - - if (a[RTA_ENCAP] && a[RTA_ENCAP_TYPE] && !ra->nh.next) - { - switch (rta_get_u16(a[RTA_ENCAP_TYPE])) - { - case LWTUNNEL_ENCAP_MPLS: - { - struct rtattr *enca[BIRD_RTA_MAX]; - nl_attr_len = RTA_PAYLOAD(a[RTA_ENCAP]); - nl_parse_attrs(RTA_DATA(a[RTA_ENCAP]), encap_mpls_want, enca, sizeof(enca)); - ra->nh.labels = rta_get_mpls(enca[RTA_DST], ra->nh.label); - break; - } - default: - SKIP("unknown encapsulation method %d\n", rta_get_u16(a[RTA_ENCAP_TYPE])); - break; - } - } -#endif - if (i->rtm_scope != def_scope) - { - ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr)); - ea->next = ra->eattrs; - ra->eattrs = ea; - ea->flags = EALF_SORTED; - ea->count = 1; - ea->attrs[0].id = EA_KRT_SCOPE; - ea->attrs[0].flags = 0; - ea->attrs[0].type = EAF_TYPE_INT; - ea->attrs[0].u.data = i->rtm_scope; - } + ea_set_attr(&ra, + EA_LITERAL_EMBEDDED(&ea_krt_scope, 0, i->rtm_scope)); if (a[RTA_PREFSRC]) - { - ip_addr ps = rta_get_ipa(a[RTA_PREFSRC]); - - ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr)); - ea->next = ra->eattrs; - ra->eattrs = ea; - ea->flags = EALF_SORTED; - ea->count = 1; - ea->attrs[0].id = EA_KRT_PREFSRC; - ea->attrs[0].flags = 0; - ea->attrs[0].type = EAF_TYPE_IP_ADDRESS; - - struct adata *ad = lp_alloc(s->pool, sizeof(struct adata) + sizeof(ps)); - ad->length = sizeof(ps); - memcpy(ad->data, &ps, sizeof(ps)); - - ea->attrs[0].u.ptr = ad; - } + { + ip_addr ps = rta_get_ipa(a[RTA_PREFSRC]); + + ea_set_attr(&ra, + EA_LITERAL_STORE_ADATA(&ea_krt_prefsrc, 0, &ps, sizeof(ps))); + } /* Can be set per-route or per-nexthop */ if (s->rta_flow) - { - ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr)); - ea->next = ra->eattrs; - ra->eattrs = ea; - ea->flags = EALF_SORTED; - ea->count = 1; - ea->attrs[0].id = EA_KRT_REALM; - ea->attrs[0].flags = 0; - ea->attrs[0].type = EAF_TYPE_INT; - ea->attrs[0].u.data = s->rta_flow; - } + ea_set_attr(&ra, + EA_LITERAL_EMBEDDED(&ea_krt_realm, 0, s->rta_flow)); if (a[RTA_METRICS]) { u32 metrics[KRT_METRICS_MAX]; - ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + KRT_METRICS_MAX * sizeof(eattr)); - int t, n = 0; - if (nl_parse_metrics(a[RTA_METRICS], metrics, ARRAY_SIZE(metrics)) < 0) { - log(L_ERR "KRT: Received route %N with strange RTA_METRICS attribute", net->n.addr); + log(L_ERR "KRT: Received route %N with strange RTA_METRICS attribute", net); return; } - for (t = 1; t < KRT_METRICS_MAX; t++) + for (uint t = 1; t < KRT_METRICS_MAX; t++) if (metrics[0] & (1 << t)) - { - ea->attrs[n].id = EA_CODE(PROTOCOL_KERNEL, KRT_METRICS_OFFSET + t); - ea->attrs[n].flags = 0; - ea->attrs[n].type = EAF_TYPE_INT; /* FIXME: Some are EAF_TYPE_BITFIELD */ - ea->attrs[n].u.data = metrics[t]; - n++; - } - - if (n > 0) - { - ea->next = ra->eattrs; - ea->flags = EALF_SORTED; - ea->count = n; - ra->eattrs = ea; - } + ea_set_attr(&ra, + EA_LITERAL_EMBEDDED(&ea_krt_metrics[t], 0, metrics[t])); } /* @@ -1962,7 +2055,12 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) if (!s->net) { /* Store the new route */ - s->net = net; + s->net = lp_alloc(s->pool, net->length); + net_copy(s->net, net); + + ea_set_attr_data(&ra, &ea_gen_nexthop, 0, + nhad.ad.data, nhad.ad.length); + s->attrs = ra; s->proto = p; s->new = new; @@ -1974,20 +2072,18 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) else { /* Merge next hops with the stored route */ - rta *oa = s->attrs; - - struct nexthop *nhs = &oa->nh; - nexthop_insert(&nhs, &ra->nh); - - /* Perhaps new nexthop is inserted at the first position */ - if (nhs == &ra->nh) - { - /* Swap rtas */ - s->attrs = ra; - - /* Keep old eattrs */ - ra->eattrs = oa->eattrs; - } + eattr *nhea = ea_find(s->attrs, &ea_gen_nexthop); + struct nexthop_adata *nhad_old = nhea ? (struct nexthop_adata *) nhea->u.ptr : NULL; + + if (nhad_old) + ea_set_attr(&s->attrs, + EA_LITERAL_DIRECT_ADATA(&ea_gen_nexthop, 0, + &(nexthop_merge(nhad_old, &nhad.nhad, + KRT_CF->merge_paths, s->pool)->ad) + )); + else + ea_set_attr_data(&s->attrs, &ea_gen_nexthop, 0, + nhad.ad.data, nhad.ad.length); } } @@ -2195,6 +2291,8 @@ krt_sys_io_init(void) { nl_linpool = lp_new_default(krt_pool); HASH_INIT(nl_table_map, krt_pool, 6); + + nl_ea_register(); } int @@ -2248,56 +2346,6 @@ krt_sys_copy_config(struct krt_config *d, struct krt_config *s) d->sys.metric = s->sys.metric; } -static const char *krt_metrics_names[KRT_METRICS_MAX] = { - NULL, "lock", "mtu", "window", "rtt", "rttvar", "sstresh", "cwnd", "advmss", - "reordering", "hoplimit", "initcwnd", "features", "rto_min", "initrwnd", "quickack" -}; - -static const char *krt_features_names[KRT_FEATURES_MAX] = { - "ecn", NULL, NULL, "allfrag" -}; - -int -krt_sys_get_attr(const eattr *a, byte *buf, int buflen UNUSED) -{ - switch (a->id) - { - case EA_KRT_PREFSRC: - bsprintf(buf, "prefsrc"); - return GA_NAME; - - case EA_KRT_REALM: - bsprintf(buf, "realm"); - return GA_NAME; - - case EA_KRT_SCOPE: - bsprintf(buf, "scope"); - return GA_NAME; - - case EA_KRT_LOCK: - buf += bsprintf(buf, "lock:"); - ea_format_bitfield(a, buf, buflen, krt_metrics_names, 2, KRT_METRICS_MAX); - return GA_FULL; - - case EA_KRT_FEATURES: - buf += bsprintf(buf, "features:"); - ea_format_bitfield(a, buf, buflen, krt_features_names, 0, KRT_FEATURES_MAX); - return GA_FULL; - - default:; - int id = (int)EA_ID(a->id) - KRT_METRICS_OFFSET; - if (id > 0 && id < KRT_METRICS_MAX) - { - bsprintf(buf, "%s", krt_metrics_names[id]); - return GA_NAME; - } - - return GA_UNKNOWN; - } -} - - - void kif_sys_start(struct kif_proto *p UNUSED) { diff --git a/sysdep/unix/Makefile b/sysdep/unix/Makefile index d0d36b5f..3f1a8b3a 100644 --- a/sysdep/unix/Makefile +++ b/sysdep/unix/Makefile @@ -1,7 +1,9 @@ -src := alloc.c io.c krt.c log.c main.c random.c +src := alloc.c io.c io-loop.c krt.c log.c main.c random.c domain.c obj := $(src-o-files) $(all-daemon) $(cf-local) +$(call proto-build,kif_build) +$(call proto-build,krt_build) $(conf-y-targets): $(s)krt.Y src := $(filter-out main.c, $(src)) diff --git a/sysdep/unix/alloc.c b/sysdep/unix/alloc.c index 90453f7b..a2384ca8 100644 --- a/sysdep/unix/alloc.c +++ b/sysdep/unix/alloc.c @@ -19,111 +19,169 @@ #include <sys/mman.h> #endif -#ifdef HAVE_MMAP -#define KEEP_PAGES 512 +long page_size = 0; -static u64 page_size = 0; -static _Bool use_fake = 0; +#ifdef HAVE_MMAP +#define KEEP_PAGES_MAIN_MAX 256 +#define KEEP_PAGES_MAIN_MIN 8 +#define CLEANUP_PAGES_BULK 256 -uint pages_kept = 0; -static list pages_list; +STATIC_ASSERT(KEEP_PAGES_MAIN_MIN * 4 < KEEP_PAGES_MAIN_MAX); -static void cleanup_pages(void *data); -static event page_cleanup_event = { .hook = cleanup_pages }; +static _Bool use_fake = 0; +#if DEBUGGING +struct free_page { + node unused[42]; + node n; +}; #else -static const u64 page_size = 4096; /* Fake page size */ +struct free_page { + node n; +}; #endif -u64 get_page_size(void) +struct free_pages { + list pages; + u16 min, max; /* Minimal and maximal number of free pages kept */ + uint cnt; /* Number of empty pages */ + event cleanup; +}; + +static void global_free_pages_cleanup_event(void *); + +static struct free_pages global_free_pages = { + .min = KEEP_PAGES_MAIN_MIN, + .max = KEEP_PAGES_MAIN_MAX, + .cleanup = { .hook = global_free_pages_cleanup_event }, +}; + +uint *pages_kept = &global_free_pages.cnt; + +static void * +alloc_sys_page(void) { - if (page_size) - return page_size; + void *ptr = mmap(NULL, page_size, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); -#ifdef HAVE_MMAP - if (page_size = sysconf(_SC_PAGESIZE)) - { - if ((u64_popcount(page_size) > 1) || (page_size > 16384)) - { - /* Too big or strange page, use the aligned allocator instead */ - page_size = 4096; - use_fake = 1; - } - return page_size; - } + if (ptr == MAP_FAILED) + bug("mmap(%lu) failed: %m", page_size); - bug("Page size must be non-zero"); -#endif + return ptr; } +extern int shutting_down; /* Shutdown requested. */ + +#else // ! HAVE_MMAP +#define use_fake 1 +#endif + void * alloc_page(void) { -#ifdef HAVE_MMAP - if (pages_kept) - { - node *page = TAIL(pages_list); - rem_node(page); - pages_kept--; - memset(page, 0, get_page_size()); - return page; - } - - if (!use_fake) - { - void *ret = mmap(NULL, get_page_size(), PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (ret == MAP_FAILED) - bug("mmap(%lu) failed: %m", (long unsigned int) page_size); - return ret; - } - else -#endif + if (use_fake) { void *ptr = NULL; int err = posix_memalign(&ptr, page_size, page_size); + if (err || !ptr) bug("posix_memalign(%lu) failed", (long unsigned int) page_size); + return ptr; } + +#ifdef HAVE_MMAP + struct free_pages *fps = &global_free_pages; + + if (fps->cnt) + { + struct free_page *fp = SKIP_BACK(struct free_page, n, HEAD(fps->pages)); + rem_node(&fp->n); + if ((--fps->cnt < fps->min) && !shutting_down) + ev_send(&global_work_list, &fps->cleanup); + + bzero(fp, page_size); + return fp; + } + + return alloc_sys_page(); +#endif } void free_page(void *ptr) { -#ifdef HAVE_MMAP - if (!use_fake) + if (use_fake) { - if (!pages_kept) - init_list(&pages_list); + free(ptr); + return; + } + +#ifdef HAVE_MMAP + struct free_pages *fps = &global_free_pages; + struct free_page *fp = ptr; - memset(ptr, 0, sizeof(node)); - add_tail(&pages_list, ptr); + fp->n = (node) {}; + add_tail(&fps->pages, &fp->n); - if (++pages_kept > KEEP_PAGES) - ev_schedule(&page_cleanup_event); - } - else + if ((++fps->cnt > fps->max) && !shutting_down) + ev_send(&global_work_list, &fps->cleanup); #endif - free(ptr); } #ifdef HAVE_MMAP static void -cleanup_pages(void *data UNUSED) +global_free_pages_cleanup_event(void *data UNUSED) { - for (uint seen = 0; (pages_kept > KEEP_PAGES) && (seen < KEEP_PAGES); seen++) + if (shutting_down) + return; + + struct free_pages *fps = &global_free_pages; + + while (fps->cnt / 2 < fps->min) { - void *ptr = HEAD(pages_list); - rem_node(ptr); - if (munmap(ptr, get_page_size()) == 0) - pages_kept--; + struct free_page *fp = alloc_sys_page(); + fp->n = (node) {}; + add_tail(&fps->pages, &fp->n); + fps->cnt++; + } + + for (uint seen = 0; (seen < CLEANUP_PAGES_BULK) && (fps->cnt > fps->max / 2); seen++) + { + struct free_page *fp = SKIP_BACK(struct free_page, n, TAIL(fps->pages)); + rem_node(&fp->n); + + if (munmap(fp, page_size) == 0) + fps->cnt--; else if (errno == ENOMEM) - add_tail(&pages_list, ptr); + add_head(&fps->pages, &fp->n); else - bug("munmap(%p) failed: %m", ptr); + bug("munmap(%p) failed: %m", fp); } - - if (pages_kept > KEEP_PAGES) - ev_schedule(&page_cleanup_event); } #endif + +void +resource_sys_init(void) +{ +#ifdef HAVE_MMAP + ASSERT_DIE(global_free_pages.cnt == 0); + + if (!(page_size = sysconf(_SC_PAGESIZE))) + die("System page size must be non-zero"); + + if (u64_popcount(page_size) == 1) + { + struct free_pages *fps = &global_free_pages; + + init_list(&fps->pages); + global_free_pages_cleanup_event(NULL); + return; + } + + /* Too big or strange page, use the aligned allocator instead */ + log(L_WARN "Got strange memory page size (%lu), using the aligned allocator instead", page_size); + use_fake = 1; +#endif + + page_size = 4096; +} diff --git a/sysdep/unix/domain.c b/sysdep/unix/domain.c new file mode 100644 index 00000000..0a5858a6 --- /dev/null +++ b/sysdep/unix/domain.c @@ -0,0 +1,116 @@ +/* + * BIRD Locking + * + * (c) 2020 Maria Matejka <mq@jmq.cz> + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#undef LOCAL_DEBUG + +#undef DEBUG_LOCKING + +#include "lib/birdlib.h" +#include "lib/locking.h" +#include "lib/resource.h" +#include "lib/timer.h" + +#include "conf/conf.h" + +#include <errno.h> +#include <fcntl.h> +#include <poll.h> +#include <pthread.h> +#include <semaphore.h> +#include <stdatomic.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +/* + * Locking subsystem + */ + +_Thread_local struct lock_order locking_stack = {}; +_Thread_local struct domain_generic **last_locked = NULL; + +#define ASSERT_NO_LOCK ASSERT_DIE(last_locked == NULL) + +struct domain_generic { + pthread_mutex_t mutex; + uint order; + struct domain_generic **prev; + struct lock_order *locked_by; + const char *name; +}; + +#define DOMAIN_INIT(_name, _order) { .mutex = PTHREAD_MUTEX_INITIALIZER, .name = _name, .order = _order } + +static struct domain_generic the_bird_domain_gen = DOMAIN_INIT("The BIRD", OFFSETOF(struct lock_order, the_bird)); + +DOMAIN(the_bird) the_bird_domain = { .the_bird = &the_bird_domain_gen }; + +struct domain_generic * +domain_new(const char *name, uint order) +{ + ASSERT_DIE(order < sizeof(struct lock_order)); + struct domain_generic *dg = xmalloc(sizeof(struct domain_generic)); + *dg = (struct domain_generic) DOMAIN_INIT(name, order); + return dg; +} + +void +domain_free(struct domain_generic *dg) +{ + pthread_mutex_destroy(&dg->mutex); + xfree(dg); +} + +uint dg_order(struct domain_generic *dg) +{ + return dg->order; +} + +void do_lock(struct domain_generic *dg, struct domain_generic **lsp) +{ + if ((char *) lsp - (char *) &locking_stack != dg->order) + bug("Trying to lock on bad position: order=%u, lsp=%p, base=%p", dg->order, lsp, &locking_stack); + + if (lsp <= last_locked) + bug("Trying to lock in a bad order"); + if (*lsp) + bug("Inconsistent locking stack state on lock"); + + btime lock_begin = current_time(); + pthread_mutex_lock(&dg->mutex); + btime duration = current_time() - lock_begin; + if (config && (duration > config->watchdog_warning)) + log(L_WARN "Locking of %s took %d ms", dg->name, (int) (duration TO_MS)); + + if (dg->prev || dg->locked_by) + bug("Previous unlock not finished correctly"); + dg->prev = last_locked; + *lsp = dg; + last_locked = lsp; + dg->locked_by = &locking_stack; +} + +void do_unlock(struct domain_generic *dg, struct domain_generic **lsp) +{ + if ((char *) lsp - (char *) &locking_stack != dg->order) + bug("Trying to unlock on bad position: order=%u, lsp=%p, base=%p", dg->order, lsp, &locking_stack); + + if (dg->locked_by != &locking_stack) + bug("Inconsistent domain state on unlock"); + if ((last_locked != lsp) || (*lsp != dg)) + bug("Inconsistent locking stack state on unlock"); + dg->locked_by = NULL; + last_locked = dg->prev; + *lsp = NULL; + dg->prev = NULL; + pthread_mutex_unlock(&dg->mutex); +} diff --git a/proto/bfd/io.c b/sysdep/unix/io-loop.c index 1cd9365a..3e3fc31a 100644 --- a/proto/bfd/io.c +++ b/sysdep/unix/io-loop.c @@ -15,7 +15,6 @@ #include <sys/time.h> #include "nest/bird.h" -#include "proto/bfd/io.h" #include "lib/buffer.h" #include "lib/lists.h" @@ -24,56 +23,41 @@ #include "lib/timer.h" #include "lib/socket.h" +#include "lib/io-loop.h" +#include "sysdep/unix/io-loop.h" +#include "conf/conf.h" -struct birdloop -{ - pool *pool; - pthread_t thread; - pthread_mutex_t mutex; - - u8 stop_called; - u8 poll_active; - u8 wakeup_masked; - int wakeup_fds[2]; - - struct timeloop time; - list event_list; - list sock_list; - uint sock_num; - - BUFFER(sock *) poll_sk; - BUFFER(struct pollfd) poll_fd; - u8 poll_changed; - u8 close_scheduled; -}; - +#define THREAD_STACK_SIZE 65536 /* To be lowered in near future */ /* * Current thread context */ -static pthread_key_t current_loop_key; -extern pthread_key_t current_time_key; +_Thread_local struct birdloop *birdloop_current; +static _Thread_local struct birdloop *birdloop_wakeup_masked; +static _Thread_local uint birdloop_wakeup_masked_count; -static inline struct birdloop * -birdloop_current(void) +event_list * +birdloop_event_list(struct birdloop *loop) { - return pthread_getspecific(current_loop_key); + return &loop->event_list; } -static inline void -birdloop_set_current(struct birdloop *loop) +struct timeloop * +birdloop_time_loop(struct birdloop *loop) { - pthread_setspecific(current_loop_key, loop); - pthread_setspecific(current_time_key, loop ? &loop->time : &main_timeloop); + return &loop->time; } -static inline void -birdloop_init_current(void) +_Bool +birdloop_inside(struct birdloop *loop) { - pthread_key_create(¤t_loop_key, NULL); -} + for (struct birdloop *c = birdloop_current; c; c = c->prev_loop) + if (loop == c) + return 1; + return 0; +} /* * Wakeup code for birdloop @@ -150,60 +134,24 @@ wakeup_do_kick(struct birdloop *loop) } static inline void -wakeup_kick(struct birdloop *loop) -{ - if (!loop->wakeup_masked) - wakeup_do_kick(loop); - else - loop->wakeup_masked = 2; -} - -/* For notifications from outside */ -void -wakeup_kick_current(void) +birdloop_do_ping(struct birdloop *loop) { - struct birdloop *loop = birdloop_current(); + if (atomic_fetch_add_explicit(&loop->ping_sent, 1, memory_order_acq_rel)) + return; - if (loop && loop->poll_active) - wakeup_kick(loop); -} - - -/* - * Events - */ - -static inline uint -events_waiting(struct birdloop *loop) -{ - return !EMPTY_LIST(loop->event_list); -} - -static inline void -events_init(struct birdloop *loop) -{ - init_list(&loop->event_list); -} - -static void -events_fire(struct birdloop *loop) -{ - times_update(&loop->time); - ev_run_list(&loop->event_list); + if (loop == birdloop_wakeup_masked) + birdloop_wakeup_masked_count++; + else + wakeup_do_kick(loop); } void -ev2_schedule(event *e) +birdloop_ping(struct birdloop *loop) { - struct birdloop *loop = birdloop_current(); - - if (loop->poll_active && EMPTY_LIST(loop->event_list)) - wakeup_kick(loop); - - if (e->n.next) - rem_node(&e->n); - - add_tail(&loop->event_list, &e->n); + if (birdloop_inside(loop) && !loop->ping_pending) + loop->ping_pending++; + else + birdloop_do_ping(loop); } @@ -231,16 +179,14 @@ sockets_add(struct birdloop *loop, sock *s) s->index = -1; loop->poll_changed = 1; - if (loop->poll_active) - wakeup_kick(loop); + birdloop_ping(loop); } void sk_start(sock *s) { - struct birdloop *loop = birdloop_current(); - - sockets_add(loop, s); + ASSERT_DIE(birdloop_current != &main_birdloop); + sockets_add(birdloop_current, s); } static void @@ -250,30 +196,21 @@ sockets_remove(struct birdloop *loop, sock *s) loop->sock_num--; if (s->index >= 0) - loop->poll_sk.data[s->index] = NULL; - - s->index = -1; - loop->poll_changed = 1; - - /* Wakeup moved to sk_stop() */ -} - -void -sk_stop(sock *s) -{ - struct birdloop *loop = birdloop_current(); - - sockets_remove(loop, s); - - if (loop->poll_active) { + loop->poll_sk.data[s->index] = NULL; + s->index = -1; + loop->poll_changed = 1; loop->close_scheduled = 1; - wakeup_kick(loop); + birdloop_ping(loop); } else close(s->fd); +} - s->fd = -1; +void +sk_stop(sock *s) +{ + sockets_remove(birdloop_current, s); } static inline uint sk_want_events(sock *s) @@ -354,7 +291,7 @@ sockets_fire(struct birdloop *loop) sock **psk = loop->poll_sk.data; int poll_num = loop->poll_fd.used - 1; - times_update(&loop->time); + times_update(); /* Last fd is internal wakeup fd */ if (pfd[poll_num].revents & POLLIN) @@ -373,12 +310,15 @@ sockets_fire(struct birdloop *loop) if (pfd->revents & POLLIN) while (e && *psk && (*psk)->rx_hook) - e = sk_read(*psk, 0); + e = sk_read(*psk, pfd->revents); e = 1; if (pfd->revents & POLLOUT) + { + loop->poll_changed = 1; while (e && *psk) e = sk_write(*psk); + } } } @@ -387,90 +327,176 @@ sockets_fire(struct birdloop *loop) * Birdloop */ -static void * birdloop_main(void *arg); +struct birdloop main_birdloop; + +static void birdloop_enter_locked(struct birdloop *loop); + +void +birdloop_init(void) +{ + wakeup_init(&main_birdloop); + + main_birdloop.time.domain = the_bird_domain.the_bird; + main_birdloop.time.loop = &main_birdloop; + + times_update(); + timers_init(&main_birdloop.time, &root_pool); + + birdloop_enter_locked(&main_birdloop); +} + +static void *birdloop_main(void *arg); struct birdloop * -birdloop_new(void) +birdloop_new(pool *pp, uint order, const char *name) { - /* FIXME: this init should be elsewhere and thread-safe */ - static int init = 0; - if (!init) - { birdloop_init_current(); init = 1; } + struct domain_generic *dg = domain_new(name, order); - pool *p = rp_new(NULL, "Birdloop root"); + pool *p = rp_new(pp, name); struct birdloop *loop = mb_allocz(p, sizeof(struct birdloop)); loop->pool = p; - pthread_mutex_init(&loop->mutex, NULL); - wakeup_init(loop); + loop->time.domain = dg; + loop->time.loop = loop; - events_init(loop); + birdloop_enter(loop); + + wakeup_init(loop); + ev_init_list(&loop->event_list, loop, name); timers_init(&loop->time, p); sockets_init(loop); + int e = 0; + + if (e = pthread_attr_init(&loop->thread_attr)) + die("pthread_attr_init() failed: %M", e); + + if (e = pthread_attr_setstacksize(&loop->thread_attr, THREAD_STACK_SIZE)) + die("pthread_attr_setstacksize(%u) failed: %M", THREAD_STACK_SIZE, e); + + if (e = pthread_attr_setdetachstate(&loop->thread_attr, PTHREAD_CREATE_DETACHED)) + die("pthread_attr_setdetachstate(PTHREAD_CREATE_DETACHED) failed: %M", e); + + if (e = pthread_create(&loop->thread_id, &loop->thread_attr, birdloop_main, loop)) + die("pthread_create() failed: %M", e); + + birdloop_leave(loop); + return loop; } +static void +birdloop_do_stop(struct birdloop *loop, void (*stopped)(void *data), void *data) +{ + loop->stopped = stopped; + loop->stop_data = data; + wakeup_do_kick(loop); +} + void -birdloop_start(struct birdloop *loop) +birdloop_stop(struct birdloop *loop, void (*stopped)(void *data), void *data) { - int rv = pthread_create(&loop->thread, NULL, birdloop_main, loop); - if (rv) - die("pthread_create(): %M", rv); + DG_LOCK(loop->time.domain); + birdloop_do_stop(loop, stopped, data); + DG_UNLOCK(loop->time.domain); } void -birdloop_stop(struct birdloop *loop) +birdloop_stop_self(struct birdloop *loop, void (*stopped)(void *data), void *data) { - pthread_mutex_lock(&loop->mutex); - loop->stop_called = 1; - wakeup_do_kick(loop); - pthread_mutex_unlock(&loop->mutex); + ASSERT_DIE(loop == birdloop_current); + ASSERT_DIE(DG_IS_LOCKED(loop->time.domain)); - int rv = pthread_join(loop->thread, NULL); - if (rv) - die("pthread_join(): %M", rv); + birdloop_do_stop(loop, stopped, data); } void birdloop_free(struct birdloop *loop) { + ASSERT_DIE(loop->links == 0); + ASSERT_DIE(pthread_equal(pthread_self(), loop->thread_id)); + + rcu_birdloop_stop(&loop->rcu); + pthread_attr_destroy(&loop->thread_attr); + + domain_free(loop->time.domain); rfree(loop->pool); } +static void +birdloop_enter_locked(struct birdloop *loop) +{ + ASSERT_DIE(DG_IS_LOCKED(loop->time.domain)); + ASSERT_DIE(!birdloop_inside(loop)); + + /* Store the old context */ + loop->prev_loop = birdloop_current; + + /* Put the new context */ + birdloop_current = loop; +} void birdloop_enter(struct birdloop *loop) { - /* TODO: these functions could save and restore old context */ - pthread_mutex_lock(&loop->mutex); - birdloop_set_current(loop); + DG_LOCK(loop->time.domain); + return birdloop_enter_locked(loop); +} + +static void +birdloop_leave_locked(struct birdloop *loop) +{ + /* Check the current context */ + ASSERT_DIE(birdloop_current == loop); + + /* Send pending pings */ + if (loop->ping_pending) + { + loop->ping_pending = 0; + birdloop_do_ping(loop); + } + + /* Restore the old context */ + birdloop_current = loop->prev_loop; } void birdloop_leave(struct birdloop *loop) { - /* TODO: these functions could save and restore old context */ - birdloop_set_current(NULL); - pthread_mutex_unlock(&loop->mutex); + birdloop_leave_locked(loop); + DG_UNLOCK(loop->time.domain); } void birdloop_mask_wakeups(struct birdloop *loop) { - pthread_mutex_lock(&loop->mutex); - loop->wakeup_masked = 1; - pthread_mutex_unlock(&loop->mutex); + ASSERT_DIE(birdloop_wakeup_masked == NULL); + birdloop_wakeup_masked = loop; } void birdloop_unmask_wakeups(struct birdloop *loop) { - pthread_mutex_lock(&loop->mutex); - if (loop->wakeup_masked == 2) + ASSERT_DIE(birdloop_wakeup_masked == loop); + birdloop_wakeup_masked = NULL; + if (birdloop_wakeup_masked_count) wakeup_do_kick(loop); - loop->wakeup_masked = 0; - pthread_mutex_unlock(&loop->mutex); + + birdloop_wakeup_masked_count = 0; +} + +void +birdloop_link(struct birdloop *loop) +{ + ASSERT_DIE(birdloop_inside(loop)); + loop->links++; +} + +void +birdloop_unlink(struct birdloop *loop) +{ + ASSERT_DIE(birdloop_inside(loop)); + loop->links--; } static void * @@ -480,16 +506,17 @@ birdloop_main(void *arg) timer *t; int rv, timeout; - birdloop_set_current(loop); + rcu_birdloop_start(&loop->rcu); + + btime loop_begin = current_time(); - pthread_mutex_lock(&loop->mutex); + tmp_init(loop->pool); + + birdloop_enter(loop); while (1) { - events_fire(loop); - timers_fire(&loop->time); - - times_update(&loop->time); - if (events_waiting(loop)) + timers_fire(&loop->time, 0); + if (ev_run_list(&loop->event_list)) timeout = 0; else if (t = timers_first(&loop->time)) timeout = (tm_remains(t) TO_MS) + 1; @@ -499,8 +526,11 @@ birdloop_main(void *arg) if (loop->poll_changed) sockets_prepare(loop); - loop->poll_active = 1; - pthread_mutex_unlock(&loop->mutex); + btime duration = current_time() - loop_begin; + if (duration > config->watchdog_warning) + log(L_WARN "I/O loop cycle took %d ms", (int) (duration TO_MS)); + + birdloop_leave(loop); try: rv = poll(loop->poll_fd.data, loop->poll_fd.used, timeout); @@ -511,25 +541,38 @@ birdloop_main(void *arg) die("poll: %m"); } - pthread_mutex_lock(&loop->mutex); - loop->poll_active = 0; + birdloop_enter(loop); if (loop->close_scheduled) sockets_close_fds(loop); - if (loop->stop_called) + if (loop->stopped) break; + loop_begin = current_time(); + if (rv) sockets_fire(loop); - timers_fire(&loop->time); + atomic_exchange_explicit(&loop->ping_sent, 0, memory_order_acq_rel); } - loop->stop_called = 0; - pthread_mutex_unlock(&loop->mutex); + /* Flush remaining events */ + ASSERT_DIE(!ev_run_list(&loop->event_list)); + + /* No timers allowed */ + ASSERT_DIE(timers_count(&loop->time) == 0); + ASSERT_DIE(EMPTY_LIST(loop->sock_list)); + ASSERT_DIE(loop->sock_num == 0); + + birdloop_leave(loop); + loop->stopped(loop->stop_data); return NULL; } - +void +birdloop_yield(void) +{ + usleep(100); +} diff --git a/sysdep/unix/io-loop.h b/sysdep/unix/io-loop.h new file mode 100644 index 00000000..31c40459 --- /dev/null +++ b/sysdep/unix/io-loop.h @@ -0,0 +1,43 @@ +/* + * BIRD -- I/O and event loop + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#ifndef _BIRD_SYSDEP_UNIX_IO_LOOP_H_ +#define _BIRD_SYSDEP_UNIX_IO_LOOP_H_ + +#include "lib/rcu.h" + +struct birdloop +{ + pool *pool; + + struct timeloop time; + event_list event_list; + list sock_list; + uint sock_num; + + BUFFER(sock *) poll_sk; + BUFFER(struct pollfd) poll_fd; + u8 poll_changed; + u8 close_scheduled; + + uint ping_pending; + _Atomic u32 ping_sent; + int wakeup_fds[2]; + + pthread_t thread_id; + pthread_attr_t thread_attr; + + struct rcu_birdloop rcu; + + uint links; + + void (*stopped)(void *data); + void *stop_data; + + struct birdloop *prev_loop; +}; + +#endif diff --git a/sysdep/unix/io.c b/sysdep/unix/io.c index 17dc05a3..23baffb2 100644 --- a/sysdep/unix/io.c +++ b/sysdep/unix/io.c @@ -36,12 +36,14 @@ #include "lib/resource.h" #include "lib/socket.h" #include "lib/event.h" +#include "lib/locking.h" #include "lib/timer.h" #include "lib/string.h" #include "nest/iface.h" #include "conf/conf.h" #include "sysdep/unix/unix.h" +#include "sysdep/unix/io-loop.h" #include CONFIG_INCLUDE_SYSIO_H /* Maximum number of calls of tx handler for one socket in one @@ -122,55 +124,50 @@ rf_fileno(struct rfile *f) btime boot_time; + void -times_init(struct timeloop *loop) +times_update(void) { struct timespec ts; int rv; + btime old_time = current_time(); + btime old_real_time = current_real_time(); + rv = clock_gettime(CLOCK_MONOTONIC, &ts); if (rv < 0) die("Monotonic clock is missing"); if ((ts.tv_sec < 0) || (((u64) ts.tv_sec) > ((u64) 1 << 40))) log(L_WARN "Monotonic clock is crazy"); - - loop->last_time = ts.tv_sec S + ts.tv_nsec NS; - loop->real_time = 0; -} - -void -times_update(struct timeloop *loop) -{ - struct timespec ts; - int rv; - - rv = clock_gettime(CLOCK_MONOTONIC, &ts); - if (rv < 0) - die("clock_gettime: %m"); - + btime new_time = ts.tv_sec S + ts.tv_nsec NS; - if (new_time < loop->last_time) + if (new_time < old_time) log(L_ERR "Monotonic clock is broken"); - loop->last_time = new_time; - loop->real_time = 0; -} - -void -times_update_real_time(struct timeloop *loop) -{ - struct timespec ts; - int rv; - rv = clock_gettime(CLOCK_REALTIME, &ts); if (rv < 0) die("clock_gettime: %m"); - loop->real_time = ts.tv_sec S + ts.tv_nsec NS; -} + btime new_real_time = ts.tv_sec S + ts.tv_nsec NS; + if (!atomic_compare_exchange_strong_explicit( + &last_time, + &old_time, + new_time, + memory_order_acq_rel, + memory_order_relaxed)) + DBG("Time update collision: last_time"); + + if (!atomic_compare_exchange_strong_explicit( + &real_time, + &old_real_time, + new_real_time, + memory_order_acq_rel, + memory_order_relaxed)) + DBG("Time update collision: real_time"); +} /** * DOC: Sockets @@ -804,18 +801,16 @@ sk_free(resource *r) sk_ssh_free(s); #endif - if (s->fd < 0) + if ((s->fd < 0) || (s->flags & SKF_THREAD)) return; - /* FIXME: we should call sk_stop() for SKF_THREAD sockets */ - if (!(s->flags & SKF_THREAD)) - { - if (s == current_sock) - current_sock = sk_next(s); - if (s == stored_sock) - stored_sock = sk_next(s); + if (s == current_sock) + current_sock = sk_next(s); + if (s == stored_sock) + stored_sock = sk_next(s); + + if (enlisted(&s->n)) rem_node(&s->n); - } if (s->type != SK_SSH && s->type != SK_SSH_ACTIVE) close(s->fd); @@ -1108,7 +1103,11 @@ sk_passive_connected(sock *s, int type) return 1; } - sk_insert(t); + if (s->flags & SKF_PASSIVE_THREAD) + t->flags |= SKF_THREAD; + else + sk_insert(t); + sk_alloc_bufs(t); s->rx_hook(t, 0); return 1; @@ -1516,6 +1515,36 @@ sk_open_unix(sock *s, char *name) return 0; } +static void +sk_reloop_hook(void *_vs) +{ + sock *s = _vs; + if (birdloop_inside(&main_birdloop)) + { + s->flags &= ~SKF_THREAD; + sk_insert(s); + } + else + { + s->flags |= SKF_THREAD; + sk_start(s); + } +} + +void +sk_reloop(sock *s, struct birdloop *loop) +{ + if (enlisted(&s->n)) + rem_node(&s->n); + + s->reloop = (event) { + .hook = sk_reloop_hook, + .data = s, + }; + + ev_send_loop(loop, &s->reloop); +} + #define CMSG_RX_SPACE MAX(CMSG4_SPACE_PKTINFO+CMSG4_SPACE_TTL, \ CMSG6_SPACE_PKTINFO+CMSG6_SPACE_TTL) @@ -1854,8 +1883,8 @@ sk_read_ssh(sock *s) /* sk_read() and sk_write() are called from BFD's event loop */ -int -sk_read(sock *s, int revents) +static inline int +sk_read_noflush(sock *s, int revents) { switch (s->type) { @@ -1918,7 +1947,15 @@ sk_read(sock *s, int revents) } int -sk_write(sock *s) +sk_read(sock *s, int revents) +{ + int e = sk_read_noflush(s, revents); + tmp_flush(); + return e; +} + +static inline int +sk_write_noflush(sock *s) { switch (s->type) { @@ -1966,6 +2003,14 @@ sk_write(sock *s) } } +int +sk_write(sock *s) +{ + int e = sk_write_noflush(s); + tmp_flush(); + return e; +} + int sk_is_ipv4(sock *s) { return s->af == AF_INET; } @@ -1984,6 +2029,7 @@ sk_err(sock *s, int revents) } s->err_hook(s, se); + tmp_flush(); } void @@ -2020,30 +2066,17 @@ struct event_log_entry static struct event_log_entry event_log[EVENT_LOG_LENGTH]; static struct event_log_entry *event_open; static int event_log_pos, event_log_num, watchdog_active; -static btime last_time; +static btime last_io_time; static btime loop_time; static void io_update_time(void) { - struct timespec ts; - int rv; - - /* - * This is third time-tracking procedure (after update_times() above and - * times_update() in BFD), dedicated to internal event log and latency - * tracking. Hopefully, we consolidate these sometimes. - */ - - rv = clock_gettime(CLOCK_MONOTONIC, &ts); - if (rv < 0) - die("clock_gettime: %m"); - - last_time = ts.tv_sec S + ts.tv_nsec NS; + last_io_time = current_time(); if (event_open) { - event_open->duration = last_time - event_open->timestamp; + event_open->duration = last_io_time - event_open->timestamp; if (event_open->duration > config->latency_limit) log(L_WARN "Event 0x%p 0x%p took %u.%03u ms", @@ -2072,7 +2105,7 @@ io_log_event(void *hook, void *data) en->hook = hook; en->data = data; - en->timestamp = last_time; + en->timestamp = last_io_time; en->duration = 0; event_log_num++; @@ -2100,14 +2133,14 @@ io_log_dump(void) struct event_log_entry *en = event_log + (event_log_pos + i) % EVENT_LOG_LENGTH; if (en->hook) log(L_DEBUG " Event 0x%p 0x%p at %8d for %d ms", en->hook, en->data, - (int) ((last_time - en->timestamp) TO_MS), (int) (en->duration TO_MS)); + (int) ((last_io_time - en->timestamp) TO_MS), (int) (en->duration TO_MS)); } } void watchdog_sigalrm(int sig UNUSED) { - /* Update last_time and duration, but skip latency check */ + /* Update last_io_time and duration, but skip latency check */ config->latency_limit = 0xffffffff; io_update_time(); @@ -2120,7 +2153,7 @@ watchdog_start1(void) { io_update_time(); - loop_time = last_time; + loop_time = last_io_time; } static inline void @@ -2128,7 +2161,7 @@ watchdog_start(void) { io_update_time(); - loop_time = last_time; + loop_time = last_io_time; event_log_num = 0; if (config->watchdog_timeout) @@ -2149,7 +2182,7 @@ watchdog_stop(void) watchdog_active = 0; } - btime duration = last_time - loop_time; + btime duration = last_io_time - loop_time; if (duration > config->watchdog_warning) log(L_WARN "I/O loop cycle took %u.%03u ms for %d events", (uint) (duration TO_MS), (uint) (duration % 1000), event_log_num); @@ -2164,8 +2197,9 @@ void io_init(void) { init_list(&sock_list); - init_list(&global_event_list); - init_list(&global_work_list); + ev_init_list(&global_event_list, &main_birdloop, "Global event list"); + ev_init_list(&global_work_list, &main_birdloop, "Global work list"); + ev_init_list(&main_birdloop.event_list, &main_birdloop, "Global fast event list"); krt_io_init(); // XXX init_times(); // XXX update_times(); @@ -2179,6 +2213,8 @@ static int short_loops = 0; #define SHORT_LOOP_MAX 10 #define WORK_EVENTS_MAX 10 +void pipe_drain(int fd); + void io_loop(void) { @@ -2193,22 +2229,28 @@ io_loop(void) watchdog_start1(); for(;;) { - times_update(&main_timeloop); + times_update(); events = ev_run_list(&global_event_list); events = ev_run_list_limited(&global_work_list, WORK_EVENTS_MAX) || events; - timers_fire(&main_timeloop); + events = ev_run_list(&main_birdloop.event_list) || events; + timers_fire(&main_birdloop.time, 1); io_close_event(); // FIXME poll_tout = (events ? 0 : 3000); /* Time in milliseconds */ - if (t = timers_first(&main_timeloop)) + if (t = timers_first(&main_birdloop.time)) { - times_update(&main_timeloop); + times_update(); timeout = (tm_remains(t) TO_MS) + 1; poll_tout = MIN(poll_tout, timeout); } - nfds = 0; + /* A hack to reload main io_loop() when something has changed asynchronously. */ + pfd[0].fd = main_birdloop.wakeup_fds[0]; + pfd[0].events = POLLIN; + + nfds = 1; + WALK_LIST(n, sock_list) { pfd[nfds] = (struct pollfd) { .fd = -1 }; /* everything other set to 0 by this */ @@ -2267,7 +2309,9 @@ io_loop(void) /* And finally enter poll() to find active sockets */ watchdog_stop(); + birdloop_leave(&main_birdloop); pout = poll(pfd, nfds, poll_tout); + birdloop_enter(&main_birdloop); watchdog_start(); if (pout < 0) @@ -2278,7 +2322,15 @@ io_loop(void) } if (pout) { - times_update(&main_timeloop); + if (pfd[0].revents & POLLIN) + { + /* IO loop reload requested */ + pipe_drain(main_birdloop.wakeup_fds[0]); + atomic_exchange_explicit(&main_birdloop.ping_sent, 0, memory_order_acq_rel); + continue; + } + + times_update(); /* guaranteed to be non-empty */ current_sock = SKIP_BACK(sock, n, HEAD(sock_list)); diff --git a/sysdep/unix/krt.Y b/sysdep/unix/krt.Y index 95b54d65..4ce9a328 100644 --- a/sysdep/unix/krt.Y +++ b/sysdep/unix/krt.Y @@ -29,7 +29,7 @@ kif_set_preferred(ip_addr ip) CF_DECLS -CF_KEYWORDS(KERNEL, PERSIST, SCAN, TIME, LEARN, DEVICE, ROUTES, GRACEFUL, RESTART, KRT_SOURCE, KRT_METRIC, MERGE, PATHS) +CF_KEYWORDS(KERNEL, PERSIST, SCAN, TIME, LEARN, DEVICE, ROUTES, GRACEFUL, RESTART, MERGE, PATHS) CF_KEYWORDS(INTERFACE, PREFERRED) %type <i> kern_mp_limit @@ -122,9 +122,6 @@ kif_iface: kif_iface_start iface_patt_list_nopx kif_iface_opt_list; -dynamic_attr: KRT_SOURCE { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_SOURCE); } ; -dynamic_attr: KRT_METRIC { $$ = f_new_dynamic_attr(EAF_TYPE_INT, T_INT, EA_KRT_METRIC); } ; - CF_CODE CF_END diff --git a/sysdep/unix/krt.c b/sysdep/unix/krt.c index 342adee5..cfa9b1ee 100644 --- a/sysdep/unix/krt.c +++ b/sysdep/unix/krt.c @@ -53,7 +53,7 @@ #include "nest/bird.h" #include "nest/iface.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/protocol.h" #include "filter/filter.h" #include "conf/conf.h" @@ -232,7 +232,6 @@ kif_copy_config(struct proto_config *dest, struct proto_config *src) struct protocol proto_unix_iface = { .name = "Device", .template = "device%d", - .class = PROTOCOL_DEVICE, .proto_size = sizeof(struct kif_proto), .config_size = sizeof(struct kif_config), .preconfig = kif_preconfig, @@ -243,6 +242,13 @@ struct protocol proto_unix_iface = { .copy_config = kif_copy_config }; +void +kif_build(void) +{ + proto_build(&proto_unix_iface); +} + + /* * Tracing of routes */ @@ -251,14 +257,14 @@ static inline void krt_trace_in(struct krt_proto *p, rte *e, char *msg) { if (p->p.debug & D_PACKETS) - log(L_TRACE "%s: %N: %s", p->p.name, e->net->n.addr, msg); + log(L_TRACE "%s: %N: %s", p->p.name, e->net, msg); } static inline void krt_trace_in_rl(struct tbf *f, struct krt_proto *p, rte *e, char *msg) { if (p->p.debug & D_PACKETS) - log_rl(f, L_TRACE "%s: %N: %s", p->p.name, e->net->n.addr, msg); + log_rl(f, L_TRACE "%s: %N: %s", p->p.name, e->net, msg); } /* @@ -277,261 +283,49 @@ static struct tbf rl_alien = TBF_DEFAULT_LOG_LIMITS; * the same key. */ -static inline int -krt_same_key(rte *a, rte *b) +static inline u32 +krt_metric(rte *a) { - return a->u.krt.metric == b->u.krt.metric; + eattr *ea = ea_find(a->attrs, &ea_krt_metric); + return ea ? ea->u.data : 0; } static inline int -krt_uptodate(rte *a, rte *b) -{ - if (a->attrs != b->attrs) - return 0; - - if (a->u.krt.proto != b->u.krt.proto) - return 0; - - return 1; -} - -static void -krt_learn_announce_update(struct krt_proto *p, rte *e) +krt_same_key(rte *a, rte *b) { - net *n = e->net; - rta *aa = rta_clone(e->attrs); - rte *ee = rte_get_temp(aa); - ee->pflags = EA_ID_FLAG(EA_KRT_SOURCE) | EA_ID_FLAG(EA_KRT_METRIC); - ee->u.krt = e->u.krt; - rte_update(&p->p, n->n.addr, ee); + return (krt_metric(a) == krt_metric(b)); } -static void -krt_learn_announce_delete(struct krt_proto *p, net *n) +static inline int +krt_uptodate(rte *a, rte *b) { - rte_update(&p->p, n->n.addr, NULL); + return (a->attrs == b->attrs); } /* Called when alien route is discovered during scan */ static void krt_learn_scan(struct krt_proto *p, rte *e) { - net *n0 = e->net; - net *n = net_get(p->krt_table, n0->n.addr); - rte *m, **mm; - - e->attrs = rta_lookup(e->attrs); - - for(mm=&n->routes; m = *mm; mm=&m->next) - if (krt_same_key(m, e)) - break; - if (m) - { - if (krt_uptodate(m, e)) - { - krt_trace_in_rl(&rl_alien, p, e, "[alien] seen"); - rte_free(e); - m->u.krt.seen = 1; - } - else - { - krt_trace_in(p, e, "[alien] updated"); - *mm = m->next; - rte_free(m); - m = NULL; - } - } - else - krt_trace_in(p, e, "[alien] created"); - if (!m) - { - e->next = n->routes; - n->routes = e; - e->u.krt.seen = 1; - } -} - -static void -krt_learn_prune(struct krt_proto *p) -{ - struct fib *fib = &p->krt_table->fib; - struct fib_iterator fit; - - KRT_TRACE(p, D_EVENTS, "Pruning inherited routes"); - - FIB_ITERATE_INIT(&fit, fib); -again: - FIB_ITERATE_START(fib, &fit, net, n) - { - rte *e, **ee, *best, **pbest, *old_best; - - /* - * Note that old_best may be NULL even if there was an old best route in - * the previous step, because it might be replaced in krt_learn_scan(). - * But in that case there is a new valid best route. - */ - - old_best = NULL; - best = NULL; - pbest = NULL; - ee = &n->routes; - while (e = *ee) - { - if (e->u.krt.best) - old_best = e; - - if (!e->u.krt.seen) - { - *ee = e->next; - rte_free(e); - continue; - } - - if (!best || best->u.krt.metric > e->u.krt.metric) - { - best = e; - pbest = ee; - } - - e->u.krt.seen = 0; - e->u.krt.best = 0; - ee = &e->next; - } - if (!n->routes) - { - DBG("%I/%d: deleting\n", n->n.prefix, n->n.pxlen); - if (old_best) - krt_learn_announce_delete(p, n); + rte e0 = { + .attrs = e->attrs, + .src = rt_get_source(&p->p, krt_metric(e)), + }; - FIB_ITERATE_PUT(&fit); - fib_delete(fib, n); - goto again; - } + ea_set_attr_u32(&e0.attrs, &ea_gen_preference, 0, p->p.main_channel->preference); - best->u.krt.best = 1; - *pbest = best->next; - best->next = n->routes; - n->routes = best; - - if ((best != old_best) || p->reload) - { - DBG("%I/%d: announcing (metric=%d)\n", n->n.prefix, n->n.pxlen, best->u.krt.metric); - krt_learn_announce_update(p, best); - } - else - DBG("%I/%d: uptodate (metric=%d)\n", n->n.prefix, n->n.pxlen, best->u.krt.metric); - } - FIB_ITERATE_END; - - p->reload = 0; + rte_update(p->p.main_channel, e->net, &e0, e0.src); + rt_unlock_source(e0.src); } static void krt_learn_async(struct krt_proto *p, rte *e, int new) { - net *n0 = e->net; - net *n = net_get(p->krt_table, n0->n.addr); - rte *g, **gg, *best, **bestp, *old_best; - - e->attrs = rta_lookup(e->attrs); - - old_best = n->routes; - for(gg=&n->routes; g = *gg; gg = &g->next) - if (krt_same_key(g, e)) - break; if (new) - { - if (g) - { - if (krt_uptodate(g, e)) - { - krt_trace_in(p, e, "[alien async] same"); - rte_free(e); - return; - } - krt_trace_in(p, e, "[alien async] updated"); - *gg = g->next; - rte_free(g); - } - else - krt_trace_in(p, e, "[alien async] created"); - - e->next = n->routes; - n->routes = e; - } - else if (!g) - { - krt_trace_in(p, e, "[alien async] delete failed"); - rte_free(e); - return; - } - else - { - krt_trace_in(p, e, "[alien async] removed"); - *gg = g->next; - rte_free(e); - rte_free(g); - } - best = n->routes; - bestp = &n->routes; - for(gg=&n->routes; g=*gg; gg=&g->next) - { - if (best->u.krt.metric > g->u.krt.metric) - { - best = g; - bestp = gg; - } - - g->u.krt.best = 0; - } - - if (best) - { - best->u.krt.best = 1; - *bestp = best->next; - best->next = n->routes; - n->routes = best; - } - - if (best != old_best) - { - DBG("krt_learn_async: distributing change\n"); - if (best) - krt_learn_announce_update(p, best); - else - krt_learn_announce_delete(p, n); - } -} - -static void -krt_learn_init(struct krt_proto *p) -{ - if (KRT_CF->learn) - { - struct rtable_config *cf = mb_allocz(p->p.pool, sizeof(struct rtable_config)); - cf->name = "Inherited"; - cf->addr_type = p->p.net_type; - cf->internal = 1; - - p->krt_table = rt_setup(p->p.pool, cf); - } -} + return krt_learn_scan(p, e); -static void -krt_dump(struct proto *P) -{ - struct krt_proto *p = (struct krt_proto *) P; - - if (!KRT_CF->learn) - return; - debug("KRT: Table of inheritable routes\n"); - rt_dump(p->krt_table); -} - -static void -krt_dump_attrs(rte *e) -{ - debug(" [m=%d,p=%d]", e->u.krt.metric, e->u.krt.proto); + struct rte_src *src = rt_get_source(&p->p, krt_metric(e)); + rte_update(p->p.main_channel, e->net, NULL, src); + rt_unlock_source(src); } #endif @@ -543,79 +337,83 @@ krt_dump_attrs(rte *e) static inline int krt_is_installed(struct krt_proto *p, net *n) { - return n->routes && bmap_test(&p->p.main_channel->export_map, n->routes->id); + return n->routes && bmap_test(&p->p.main_channel->export_map, n->routes->rte.id); } -static void -krt_flush_routes(struct krt_proto *p) +static uint +rte_feed_count(net *n) { - struct rtable *t = p->p.main_channel->table; + uint count = 0; + for (struct rte_storage *e = n->routes; e; e = e->next) + if (rte_is_valid(RTE_OR_NULL(e))) + count++; + return count; +} - KRT_TRACE(p, D_EVENTS, "Flushing kernel routes"); - FIB_WALK(&t->fib, net, n) +static void +rte_feed_obtain(net *n, rte **feed, uint count) +{ + uint i = 0; + for (struct rte_storage *e = n->routes; e; e = e->next) + if (rte_is_valid(RTE_OR_NULL(e))) { - if (krt_is_installed(p, n)) - { - /* FIXME: this does not work if gw is changed in export filter */ - krt_replace_rte(p, n, NULL, n->routes); - } + ASSERT_DIE(i < count); + feed[i++] = &e->rte; } - FIB_WALK_END; + ASSERT_DIE(i == count); } static struct rte * -krt_export_net(struct krt_proto *p, net *net, rte **rt_free) +krt_export_net(struct krt_proto *p, net *net) { struct channel *c = p->p.main_channel; const struct filter *filter = c->out_filter; - rte *rt; if (c->ra_mode == RA_MERGED) - return rt_export_merged(c, net, rt_free, krt_filter_lp, 1); + { + uint count = rte_feed_count(net); + if (!count) + return NULL; - rt = net->routes; - *rt_free = NULL; + rte **feed = alloca(count * sizeof(rte *)); + rte_feed_obtain(net, feed, count); + return rt_export_merged(c, feed, count, krt_filter_lp, 1); + } + + static _Thread_local rte rt; + rt = net->routes->rte; - if (!rte_is_valid(rt)) + if (!rte_is_valid(&rt)) return NULL; if (filter == FILTER_REJECT) return NULL; - rte_make_tmp_attrs(&rt, krt_filter_lp, NULL); - /* We could run krt_preexport() here, but it is already handled by krt_is_installed() */ if (filter == FILTER_ACCEPT) goto accept; - if (f_run(filter, &rt, krt_filter_lp, FF_SILENT) > F_ACCEPT) + if (f_run(filter, &rt, FF_SILENT) > F_ACCEPT) goto reject; accept: - if (rt != net->routes) - *rt_free = rt; - return rt; + return &rt; reject: - if (rt != net->routes) - rte_free(rt); return NULL; } static int krt_same_dest(rte *k, rte *e) { - rta *ka = k->attrs, *ea = e->attrs; - - if (ka->dest != ea->dest) - return 0; + ea_list *ka = k->attrs, *ea = e->attrs; - if (ka->dest == RTD_UNICAST) - return nexthop_same(&(ka->nh), &(ea->nh)); + eattr *nhea_k = ea_find(ka, &ea_gen_nexthop); + eattr *nhea_e = ea_find(ea, &ea_gen_nexthop); - return 1; + return (!nhea_k == !nhea_e) && adata_same(nhea_k->u.ptr, nhea_e->u.ptr); } /* @@ -624,13 +422,13 @@ krt_same_dest(rte *k, rte *e) */ void -krt_got_route(struct krt_proto *p, rte *e) +krt_got_route(struct krt_proto *p, rte *e, s8 src) { - rte *new = NULL, *rt_free = NULL; - net *n = e->net; + rte *new = NULL; + e->pflags = 0; #ifdef KRT_ALLOW_LEARN - switch (e->u.krt.src) + switch (src) { case KRT_SRC_KERNEL: goto ignore; @@ -642,24 +440,25 @@ krt_got_route(struct krt_proto *p, rte *e) if (KRT_CF->learn) krt_learn_scan(p, e); else - { - krt_trace_in_rl(&rl_alien, p, e, "[alien] ignored"); - rte_free(e); - } + krt_trace_in_rl(&rl_alien, p, e, "[alien] ignored"); return; } #endif /* The rest is for KRT_SRC_BIRD (or KRT_SRC_UNKNOWN) */ + /* Deleting all routes if flush is requested */ + if (p->flush_routes) + goto delete; /* We wait for the initial feed to have correct installed state */ if (!p->ready) goto ignore; - if (!krt_is_installed(p, n)) + net *net = net_find(p->p.main_channel->table, e->net); + if (!net || !krt_is_installed(p, net)) goto delete; - new = krt_export_net(p, n, &rt_free); + new = krt_export_net(p, net); /* Rejected by filters */ if (!new) @@ -692,20 +491,15 @@ ignore: update: krt_trace_in(p, new, "updating"); - krt_replace_rte(p, n, new, e); + krt_replace_rte(p, e->net, new, e); goto done; delete: krt_trace_in(p, e, "deleting"); - krt_replace_rte(p, n, NULL, e); + krt_replace_rte(p, e->net, NULL, e); goto done; done: - rte_free(e); - - if (rt_free) - rte_free(rt_free); - lp_flush(krt_filter_lp); } @@ -723,40 +517,42 @@ krt_prune(struct krt_proto *p) KRT_TRACE(p, D_EVENTS, "Pruning table %s", t->name); FIB_WALK(&t->fib, net, n) { - if (p->ready && krt_is_installed(p, n) && !bmap_test(&p->seen_map, n->routes->id)) + if (p->ready && krt_is_installed(p, n) && !bmap_test(&p->seen_map, n->routes->rte.id)) { - rte *rt_free = NULL; - rte *new = krt_export_net(p, n, &rt_free); + rte *new = krt_export_net(p, n); if (new) { krt_trace_in(p, new, "installing"); - krt_replace_rte(p, n, new, NULL); + krt_replace_rte(p, n->n.addr, new, NULL); } - if (rt_free) - rte_free(rt_free); - lp_flush(krt_filter_lp); } } FIB_WALK_END; -#ifdef KRT_ALLOW_LEARN - if (KRT_CF->learn) - krt_learn_prune(p); -#endif - if (p->ready) p->initialized = 1; } +static void +krt_flush_routes(struct krt_proto *p) +{ + KRT_TRACE(p, D_EVENTS, "Flushing kernel routes"); + p->flush_routes = 1; + krt_init_scan(p); + krt_do_scan(p); + /* No prune! */ + p->flush_routes = 0; +} + void -krt_got_route_async(struct krt_proto *p, rte *e, int new) +krt_got_route_async(struct krt_proto *p, rte *e, int new, s8 src) { - net *net = e->net; + e->pflags = 0; - switch (e->u.krt.src) + switch (src) { case KRT_SRC_BIRD: /* Should be filtered by the back end */ @@ -766,7 +562,7 @@ krt_got_route_async(struct krt_proto *p, rte *e, int new) if (new) { krt_trace_in(p, e, "[redirect] deleting"); - krt_replace_rte(p, net, NULL, e); + krt_replace_rte(p, e->net, NULL, e); } /* If !new, it is probably echo of our deletion */ break; @@ -780,7 +576,6 @@ krt_got_route_async(struct krt_proto *p, rte *e, int new) } #endif } - rte_free(e); } @@ -899,29 +694,10 @@ krt_scan_timer_kick(struct krt_proto *p) * Updates */ -static void -krt_make_tmp_attrs(struct rte *rt, struct linpool *pool) -{ - rte_init_tmp_attrs(rt, pool, 2); - rte_make_tmp_attr(rt, EA_KRT_SOURCE, EAF_TYPE_INT, rt->u.krt.proto); - rte_make_tmp_attr(rt, EA_KRT_METRIC, EAF_TYPE_INT, rt->u.krt.metric); -} - -static void -krt_store_tmp_attrs(struct rte *rt, struct linpool *pool) -{ - rte_init_tmp_attrs(rt, pool, 2); - rt->u.krt.proto = rte_store_tmp_attr(rt, EA_KRT_SOURCE); - rt->u.krt.metric = rte_store_tmp_attr(rt, EA_KRT_METRIC); -} - static int -krt_preexport(struct channel *C, rte **new, struct linpool *pool UNUSED) +krt_preexport(struct channel *C, rte *e) { - // struct krt_proto *p = (struct krt_proto *) P; - rte *e = *new; - - if (e->attrs->src->proto == C->proto) + if (e->src->owner == &C->proto->sources) return -1; if (!krt_capable(e)) @@ -931,8 +707,8 @@ krt_preexport(struct channel *C, rte **new, struct linpool *pool UNUSED) } static void -krt_rt_notify(struct proto *P, struct channel *ch UNUSED, net *net, - rte *new, rte *old) +krt_rt_notify(struct proto *P, struct channel *ch UNUSED, const net_addr *net, + rte *new, const rte *old) { struct krt_proto *p = (struct krt_proto *) P; @@ -996,14 +772,6 @@ krt_feed_end(struct channel *C) } -static int -krt_rte_same(rte *a, rte *b) -{ - /* src is always KRT_SRC_ALIEN and type is irrelevant */ - return (a->u.krt.proto == b->u.krt.proto) && (a->u.krt.metric == b->u.krt.metric); -} - - /* * Protocol glue */ @@ -1057,9 +825,6 @@ krt_init(struct proto_config *CF) p->p.if_notify = krt_if_notify; p->p.reload_routes = krt_reload_routes; p->p.feed_end = krt_feed_end; - p->p.make_tmp_attrs = krt_make_tmp_attrs; - p->p.store_tmp_attrs = krt_store_tmp_attrs; - p->p.rte_same = krt_rte_same; krt_sys_init(p); return &p->p; @@ -1085,10 +850,6 @@ krt_start(struct proto *P) bmap_init(&p->seen_map, p->p.pool, 1024); add_tail(&krt_proto_list, &p->krt_node); -#ifdef KRT_ALLOW_LEARN - krt_learn_init(p); -#endif - if (!krt_sys_start(p)) { rem_node(&p->krt_node); @@ -1168,24 +929,15 @@ krt_copy_config(struct proto_config *dest, struct proto_config *src) krt_sys_copy_config(d, s); } -static int -krt_get_attr(const eattr *a, byte *buf, int buflen) -{ - switch (a->id) - { - case EA_KRT_SOURCE: - bsprintf(buf, "source"); - return GA_NAME; - - case EA_KRT_METRIC: - bsprintf(buf, "metric"); - return GA_NAME; - - default: - return krt_sys_get_attr(a, buf, buflen); - } -} +struct ea_class ea_krt_source = { + .name = "krt_source", + .type = T_INT, +}; +struct ea_class ea_krt_metric = { + .name = "krt_metric", + .type = T_INT, +}; #ifdef CONFIG_IP6_SADR_KERNEL #define MAYBE_IP6_SADR NB_IP6_SADR @@ -1202,7 +954,6 @@ krt_get_attr(const eattr *a, byte *buf, int buflen) struct protocol proto_unix_kernel = { .name = "Kernel", .template = "kernel%d", - .class = PROTOCOL_KERNEL, .preference = DEF_PREF_INHERITED, .channel_mask = NB_IP | MAYBE_IP6_SADR | MAYBE_MPLS, .proto_size = sizeof(struct krt_proto), @@ -1214,9 +965,15 @@ struct protocol proto_unix_kernel = { .shutdown = krt_shutdown, .reconfigure = krt_reconfigure, .copy_config = krt_copy_config, - .get_attr = krt_get_attr, -#ifdef KRT_ALLOW_LEARN - .dump = krt_dump, - .dump_attrs = krt_dump_attrs, -#endif }; + +void +krt_build(void) +{ + proto_build(&proto_unix_kernel); + + EA_REGISTER_ALL( + &ea_krt_source, + &ea_krt_metric, + ); +} diff --git a/sysdep/unix/krt.h b/sysdep/unix/krt.h index 1536e502..9f7ebb4f 100644 --- a/sysdep/unix/krt.h +++ b/sysdep/unix/krt.h @@ -21,8 +21,10 @@ struct kif_proto; #define KRT_DEFAULT_ECMP_LIMIT 16 -#define EA_KRT_SOURCE EA_CODE(PROTOCOL_KERNEL, 0) -#define EA_KRT_METRIC EA_CODE(PROTOCOL_KERNEL, 1) +extern struct ea_class ea_krt_source, ea_krt_metric; + +#define KRT_REF_SEEN 0x1 /* Seen in table */ +#define KRT_REF_BEST 0x2 /* Best in table */ /* Whenever we recognize our own routes, we allow learing of foreign routes */ @@ -48,10 +50,6 @@ struct krt_proto { struct proto p; struct krt_state sys; /* Sysdep state */ -#ifdef KRT_ALLOW_LEARN - struct rtable *krt_table; /* Internal table of inherited routes */ -#endif - timer *scan_timer; struct bmap sync_map; /* Keeps track which exported routes were successfully written to kernel */ struct bmap seen_map; /* Routes seen during last periodic scan */ @@ -60,6 +58,7 @@ struct krt_proto { byte ready; /* Initial feed has been finished */ byte initialized; /* First scan has been finished */ byte reload; /* Next scan is doing reload */ + byte flush_routes; /* Scanning to flush */ }; extern pool *krt_pool; @@ -74,8 +73,8 @@ extern pool *krt_pool; struct proto_config * kif_init_config(int class); void kif_request_scan(void); void krt_use_shared_scan(void); -void krt_got_route(struct krt_proto *p, struct rte *e); -void krt_got_route_async(struct krt_proto *p, struct rte *e, int new); +void krt_got_route(struct krt_proto *p, struct rte *e, s8 src); +void krt_got_route_async(struct krt_proto *p, struct rte *e, int new, s8 src); static inline int krt_get_sync_error(struct krt_proto *p, struct rte *e) @@ -138,7 +137,7 @@ void krt_sys_copy_config(struct krt_config *, struct krt_config *); int krt_capable(rte *e); void krt_do_scan(struct krt_proto *); -void krt_replace_rte(struct krt_proto *p, net *n, rte *new, rte *old); +void krt_replace_rte(struct krt_proto *p, const net_addr *n, rte *new, const rte *old); int krt_sys_get_attr(const eattr *a, byte *buf, int buflen); diff --git a/sysdep/unix/log.c b/sysdep/unix/log.c index 14d18c01..185231e8 100644 --- a/sysdep/unix/log.c +++ b/sysdep/unix/log.c @@ -15,6 +15,7 @@ * user's manual. */ +#include <stdatomic.h> #include <stdio.h> #include <stdlib.h> #include <stdarg.h> @@ -35,8 +36,10 @@ static FILE *dbgf; static list *current_log_list; static char *current_syslog_name; /* NULL -> syslog closed */ +static _Atomic uint max_thread_id = ATOMIC_VAR_INIT(1); +static _Thread_local uint this_thread_id; -#ifdef USE_PTHREADS +#define THIS_THREAD_ID (this_thread_id ?: (this_thread_id = atomic_fetch_add_explicit(&max_thread_id, 1, memory_order_acq_rel))) #include <pthread.h> @@ -48,15 +51,6 @@ static pthread_t main_thread; void main_thread_init(void) { main_thread = pthread_self(); } static int main_thread_self(void) { return pthread_equal(pthread_self(), main_thread); } -#else - -static inline void log_lock(void) { } -static inline void log_unlock(void) { } -void main_thread_init(void) { } -static int main_thread_self(void) { return 1; } - -#endif - #ifdef HAVE_SYSLOG_H #include <sys/syslog.h> @@ -189,7 +183,7 @@ log_commit(int class, buffer *buf) l->pos += msg_len; } - fprintf(l->fh, "%s <%s> ", tbuf, class_names[class]); + fprintf(l->fh, "%s [%04x] <%s> ", tbuf, THIS_THREAD_ID, class_names[class]); } fputs(buf->start, l->fh); fputc('\n', l->fh); @@ -299,6 +293,8 @@ die(const char *msg, ...) exit(1); } +static struct timespec dbg_time_start; + /** * debug - write to debug output * @msg: a printf-like message @@ -309,22 +305,36 @@ die(const char *msg, ...) void debug(const char *msg, ...) { -#define MAX_DEBUG_BUFSIZE 65536 +#define MAX_DEBUG_BUFSIZE 16384 va_list args; - static uint bufsize = 4096; - static char *buf = NULL; - - if (!buf) - buf = mb_alloc(&root_pool, bufsize); + char buf[MAX_DEBUG_BUFSIZE], *pos = buf; + int max = MAX_DEBUG_BUFSIZE; va_start(args, msg); if (dbgf) { - while (bvsnprintf(buf, bufsize, msg, args) < 0) - if (bufsize >= MAX_DEBUG_BUFSIZE) - bug("Extremely long debug output, split it."); - else - buf = mb_realloc(buf, (bufsize *= 2)); + struct timespec dbg_time; + clock_gettime(CLOCK_MONOTONIC, &dbg_time); + uint nsec; + uint sec; + + if (dbg_time.tv_nsec > dbg_time_start.tv_nsec) + { + nsec = dbg_time.tv_nsec - dbg_time_start.tv_nsec; + sec = dbg_time.tv_sec - dbg_time_start.tv_sec; + } + else + { + nsec = 1000000000 + dbg_time.tv_nsec - dbg_time_start.tv_nsec; + sec = dbg_time.tv_sec - dbg_time_start.tv_sec - 1; + } + + int n = bsnprintf(pos, max, "%u.%09u: [%04x] ", sec, nsec, THIS_THREAD_ID); + pos += n; + max -= n; + + if (bvsnprintf(pos, max, msg, args) < 0) + bug("Extremely long debug output, split it."); fputs(buf, dbgf); } @@ -429,6 +439,8 @@ done: void log_init_debug(char *f) { + clock_gettime(CLOCK_MONOTONIC, &dbg_time_start); + if (dbgf && dbgf != stderr) fclose(dbgf); if (!f) diff --git a/sysdep/unix/main.c b/sysdep/unix/main.c index f87d0c43..bf9f2be0 100644 --- a/sysdep/unix/main.c +++ b/sysdep/unix/main.c @@ -28,9 +28,10 @@ #include "lib/resource.h" #include "lib/socket.h" #include "lib/event.h" +#include "lib/locking.h" #include "lib/timer.h" #include "lib/string.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/protocol.h" #include "nest/iface.h" #include "nest/cli.h" @@ -56,7 +57,7 @@ async_dump(void) // XXXX tm_dump_all(); if_dump_all(); neigh_dump_all(); - rta_dump_all(); + ea_dump_all(); rt_dump_all(); protos_dump_all(); @@ -479,6 +480,14 @@ cli_err(sock *s, int err) cli_free(s->data); } +static void +cli_connect_err(sock *s UNUSED, int err) +{ + ASSERT_DIE(err); + if (config->cli_debug) + log(L_INFO "Failed to accept CLI connection: %s", strerror(err)); +} + static int cli_connect(sock *s, uint size UNUSED) { @@ -507,6 +516,7 @@ cli_init_unix(uid_t use_uid, gid_t use_gid) s = cli_sk = sk_new(cli_pool); s->type = SK_UNIX_PASSIVE; s->rx_hook = cli_connect; + s->err_hook = cli_connect_err; s->rbsize = 1024; s->fast_rx = 1; @@ -864,16 +874,19 @@ main(int argc, char **argv) dmalloc_debug(0x2f03d00); #endif + times_update(); parse_args(argc, argv); log_switch(1, NULL, NULL); + the_bird_lock(); + random_init(); net_init(); resource_init(); - timer_init(); + birdloop_init(); olock_init(); - io_init(); rt_init(); + io_init(); if_init(); // roa_init(); config_init(); @@ -897,8 +910,6 @@ main(int argc, char **argv) open_pid_file(); protos_build(); - proto_build(&proto_unix_kernel); - proto_build(&proto_unix_iface); struct config *conf = read_config(); @@ -920,6 +931,7 @@ main(int argc, char **argv) dup2(0, 2); } + main_thread_init(); write_pid_file(); diff --git a/test/birdtest.c b/test/birdtest.c index 6ad743ce..5e3de1c5 100644 --- a/test/birdtest.c +++ b/test/birdtest.c @@ -20,6 +20,8 @@ #include "test/birdtest.h" #include "lib/string.h" +#include "lib/event.h" +#include "lib/io-loop.h" #ifdef HAVE_EXECINFO_H #include <execinfo.h> @@ -63,6 +65,9 @@ bt_init(int argc, char *argv[]) { int c; + /* We have no interest in stdin */ + close(0); + initstate(BT_RANDOM_SEED, (char *) bt_random_state, sizeof(bt_random_state)); bt_verbose = 0; @@ -119,6 +124,11 @@ bt_init(int argc, char *argv[]) clock_gettime(CLOCK_MONOTONIC, &bt_begin); bt_suite_case_begin = bt_suite_begin = bt_begin; + the_bird_lock(); + resource_init(); + ev_init_list(&global_event_list, &main_birdloop, "Global event list in unit tests"); + ev_init_list(&global_work_list, &main_birdloop, "Global work list in unit tests"); + birdloop_init(); return; usage: @@ -172,6 +182,8 @@ int bt_run_test_fn(int (*fn)(const void *), const void *fn_arg, int timeout) if (!bt_suite_result) result = 0; + tmp_flush(); + return result; } @@ -240,7 +252,7 @@ bt_log_result(int result, u64 time, const char *fmt, va_list argptr) printf("%s\n", result_str); if (do_die && !result) - abort(); + exit(1); } static u64 diff --git a/test/bt-utils.c b/test/bt-utils.c index cbca3a6b..36e44da4 100644 --- a/test/bt-utils.c +++ b/test/bt-utils.c @@ -14,7 +14,7 @@ #include "test/bt-utils.h" #include "nest/bird.h" -#include "nest/route.h" +#include "nest/rt.h" #include "nest/protocol.h" #include "sysdep/unix/unix.h" @@ -53,6 +53,8 @@ cf_file_read(byte *dest, uint max_len, int fd) return l; } +void resource_sys_init(void); + void bt_bird_init(void) { @@ -60,25 +62,19 @@ bt_bird_init(void) log_init_debug(""); log_switch(bt_verbose != 0, NULL, NULL); - resource_init(); olock_init(); - timer_init(); - io_init(); rt_init(); + io_init(); if_init(); config_init(); protos_build(); - proto_build(&proto_unix_kernel); - proto_build(&proto_unix_iface); } void bt_bird_cleanup(void) { - for (int i = 0; i < PROTOCOL__MAX; i++) - class_to_protocol[i] = NULL; - config = new_config = NULL; + the_bird_unlock(); } static char * |