diff options
48 files changed, 2450 insertions, 1874 deletions
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 678a08f6..7809fecd 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -360,17 +360,17 @@ build-opensuse-15.3-amd64: <<: *build-linux image: registry.nic.cz/labs/bird:opensuse-15.3-amd64 -build-freebsd-11-amd64: - <<: *build-base - tags: - - freebsd - - amd64 - -build-freebsd-11-i386: - <<: *build-base - tags: - - freebsd - - i386 +#build-freebsd-11-amd64: +# <<: *build-base +# tags: +# - freebsd +# - amd64 + +#build-freebsd-11-i386: +# <<: *build-base +# tags: +# - freebsd +# - i386 .pkg-deb: &pkg-deb diff --git a/client/client.c b/client/client.c index 97cf6639..934e16e0 100644 --- a/client/client.c +++ b/client/client.c @@ -50,6 +50,7 @@ static byte *server_read_pos = server_read_buf; int init = 1; /* During intial sequence */ int busy = 1; /* Executing BIRD command */ int interactive; /* Whether stdin is terminal */ +int last_code; /* Last return code */ static int num_lines, skip_input; int term_lns, term_cls; @@ -196,7 +197,7 @@ init_commands(void) { /* Initial command is finished and we want to exit */ cleanup(); - exit(0); + exit((last_code < 8000) ? 0 : 1); } input_init(); @@ -283,6 +284,8 @@ server_got_reply(char *x) if (code) PRINTF(len, "%s\n", verbose ? x : x+5); + last_code = code; + if (x[4] == ' ') { busy = 0; diff --git a/conf/conf.h b/conf/conf.h index 18de8def..4ccaa54e 100644 --- a/conf/conf.h +++ b/conf/conf.h @@ -44,6 +44,7 @@ struct config { int cli_debug; /* Tracing of CLI connections and commands */ int latency_debug; /* I/O loop tracks duration of each event */ + int pipe_debug; /* Track route propagation through pipes */ u32 latency_limit; /* Events with longer duration are logged (us) */ u32 watchdog_warning; /* I/O loop watchdog limit for warning (us) */ u32 watchdog_timeout; /* Watchdog timeout (in seconds, 0 = disabled) */ diff --git a/distro/pkg/rpm/bird.service b/distro/pkg/rpm/bird.service index fa203c78..26bcb8a0 100644 --- a/distro/pkg/rpm/bird.service +++ b/distro/pkg/rpm/bird.service @@ -5,8 +5,9 @@ After=network.target [Service] Type=simple +ExecStartPre=/usr/sbin/bird -p ExecStart=/usr/sbin/bird -f -u bird -g bird -ExecReload=/bin/kill -HUP $MAINPID +ExecReload=/usr/sbin/birdc configure Restart=on-failure [Install] diff --git a/doc/bird.sgml b/doc/bird.sgml index aa058b37..d17de23f 100644 --- a/doc/bird.sgml +++ b/doc/bird.sgml @@ -2144,6 +2144,13 @@ protocol bfd [<name>] { to configure separate BFD protocol instances for IPv4 and for IPv6 sessions. + <tag><label id="bfd-strict-bind">strict bind <m/switch/</tag> + Specify whether each BFD interface should use a separate listening + socket bound to its local address, or just use a shared listening socket + accepting all addresses. Binding to a specific address could be useful + in cases like running multiple BIRD instances on a machine, each + handling a different set of interfaces. Default: disabled. + <tag><label id="bfd-iface">interface <m/pattern/ [, <m/.../] { <m/options/ }</tag> Interface definitions allow to specify options for sessions associated with such interfaces and also may contain interface specific options. @@ -4202,6 +4209,14 @@ include standard channel config options; see the example below. <tag><label id="pipe-peer-table">peer table <m/table/</tag> Defines secondary routing table to connect to. The primary one is selected by the <cf/table/ keyword. + + <tag><label id="pipe-max-generation">max generation <m/expr/</tag> + Sets maximal generation of route that may pass through this pipe. + The generation value is increased by one by each pipe on its path. + Not meeting this requirement causes an error message complaining about + an overpiped route. If you have long chains of pipes, you probably want + to raise this value; anyway the default of 16 should be enough for even + most strange uses. Maximum is 254. </descrip> <sect1>Attributes @@ -5235,7 +5250,7 @@ Note that for negated matches, value must be either zero or equal to bitmask <cf>port 1..1023,1194,3306</cf>). <tag><label id="flow-dport">dport <m/numbers-match/</tag> - Set a mating destination port numbers (e.g. <cf>dport 49151</cf>). + Set a matching destination port numbers (e.g. <cf>dport 49151</cf>). <tag><label id="flow-sport">sport <m/numbers-match/</tag> Set a matching source port numbers (e.g. <cf>sport = 0</cf>). diff --git a/filter/f-inst.c b/filter/f-inst.c index c2abd5aa..1690c9f6 100644 --- a/filter/f-inst.c +++ b/filter/f-inst.c @@ -533,8 +533,8 @@ switch (sa.sa_code) { - case SA_NET: RESULT(sa.type, net, (*fs->rte)->net->n.addr); break; - case SA_PROTO: RESULT(sa.type, s, (*fs->rte)->src->proto->name); break; + case SA_NET: RESULT(sa.type, net, fs->rte->net); break; + case SA_PROTO: RESULT(sa.type, s, fs->rte->src->proto->name); break; default: { struct eattr *nhea = ea_find(*fs->eattrs, &ea_gen_nexthop); @@ -615,7 +615,7 @@ struct iface *ifa = (ipa_is_link_local(ip) && nh_ea) ? ((struct nexthop_adata *) nh_ea->u.ptr)->nh.iface : NULL; - neighbor *n = neigh_find((*fs->rte)->src->proto, ip, ifa, 0); + neighbor *n = neigh_find(fs->rte->src->proto, ip, ifa, 0); if (!n || (n->scope == SCOPE_HOST)) runtime( "Invalid gw address" ); diff --git a/filter/filter.c b/filter/filter.c index 9bedb938..124c9932 100644 --- a/filter/filter.c +++ b/filter/filter.c @@ -74,10 +74,7 @@ struct filter_state { } stack; /* The route we are processing. This may be NULL to indicate no route available. */ - struct rte **rte; - - /* The old rta to be freed after filters are done. */ - struct rta *old_rta; + struct rte *rte; /* Cached pointer to ea_list */ struct ea_list **eattrs; @@ -99,15 +96,7 @@ void (*bt_assert_hook)(int result, const struct f_line_item *assert); static inline void f_cache_eattrs(struct filter_state *fs) { - fs->eattrs = &((*fs->rte)->attrs->eattrs); -} - -static inline void f_rte_cow(struct filter_state *fs) -{ - if (!((*fs->rte)->flags & REF_COW)) - return; - - *fs->rte = rte_cow(*fs->rte); + fs->eattrs = &(fs->rte->attrs->eattrs); } /* @@ -116,22 +105,16 @@ static inline void f_rte_cow(struct filter_state *fs) static void f_rta_cow(struct filter_state *fs) { - if (!rta_is_cached((*fs->rte)->attrs)) + if (!rta_is_cached(fs->rte->attrs)) return; - /* Prepare to modify rte */ - f_rte_cow(fs); - - /* Store old rta to free it later, it stores reference from rte_cow() */ - fs->old_rta = (*fs->rte)->attrs; - /* * Get shallow copy of rta. Fields eattrs and nexthops of rta are shared * with fs->old_rta (they will be copied when the cached rta will be obtained * at the end of f_run()), also the lock of hostentry is inherited (we * suppose hostentry is not changed by filters). */ - (*fs->rte)->attrs = rta_do_cow((*fs->rte)->attrs, tmp_linpool); + fs->rte->attrs = rta_do_cow(fs->rte->attrs, tmp_linpool); /* Re-cache the ea_list */ f_cache_eattrs(fs); @@ -243,29 +226,15 @@ interpret(struct filter_state *fs, const struct f_line *line, struct f_val *val) /** * f_run - run a filter for a route * @filter: filter to run - * @rte: route being filtered, may be modified + * @rte: route being filtered, must be write-able * @tmp_pool: all filter allocations go from this pool * @flags: flags * - * If filter needs to modify the route, there are several - * posibilities. @rte might be read-only (with REF_COW flag), in that - * case rw copy is obtained by rte_cow() and @rte is replaced. If - * @rte is originally rw, it may be directly modified (and it is never - * copied). - * - * The returned rte may reuse the (possibly cached, cloned) rta, or - * (if rta was modified) contains a modified uncached rta, which - * uses parts allocated from @tmp_pool and parts shared from original - * rta. There is one exception - if @rte is rw but contains a cached - * rta and that is modified, rta in returned rte is also cached. - * - * Ownership of cached rtas is consistent with rte, i.e. - * if a new rte is returned, it has its own clone of cached rta - * (and cached rta of read-only source rte is intact), if rte is - * modified in place, old cached rta is possibly freed. + * If @rte->attrs is cached, the returned rte allocates a new rta on + * tmp_pool, otherwise the filters may modify it. */ enum filter_return -f_run(const struct filter *filter, struct rte **rte, int flags) +f_run(const struct filter *filter, struct rte *rte, int flags) { if (filter == FILTER_ACCEPT) return F_ACCEPT; @@ -273,7 +242,6 @@ f_run(const struct filter *filter, struct rte **rte, int flags) if (filter == FILTER_REJECT) return F_REJECT; - int rte_cow = ((*rte)->flags & REF_COW); DBG( "Running filter `%s'...", filter->name ); /* Initialize the filter state */ @@ -289,32 +257,6 @@ f_run(const struct filter *filter, struct rte **rte, int flags) /* Run the interpreter itself */ enum filter_return fret = interpret(&filter_state, filter->root, NULL); - if (filter_state.old_rta) { - /* - * Cached rta was modified and filter_state->rte contains now an uncached one, - * sharing some part with the cached one. The cached rta should - * be freed (if rte was originally COW, filter_state->old_rta is a clone - * obtained during rte_cow()). - * - * This also implements the exception mentioned in f_run() - * description. The reason for this is that rta reuses parts of - * filter_state->old_rta, and these may be freed during rta_free(filter_state->old_rta). - * This is not the problem if rte was COW, because original rte - * also holds the same rta. - */ - if (!rte_cow) { - /* Cache the new attrs */ - (*filter_state.rte)->attrs = rta_lookup((*filter_state.rte)->attrs); - - /* Drop cached ea_list pointer */ - filter_state.eattrs = NULL; - } - - /* Uncache the old attrs and drop the pointer as it is invalid now. */ - rta_free(filter_state.old_rta); - filter_state.old_rta = NULL; - } - /* Process the filter output, log it and return */ if (fret < F_ACCEPT) { if (!(filter_state.flags & FF_SILENT)) @@ -339,7 +281,7 @@ f_run(const struct filter *filter, struct rte **rte, int flags) */ enum filter_return -f_eval_rte(const struct f_line *expr, struct rte **rte) +f_eval_rte(const struct f_line *expr, struct rte *rte) { filter_state = (struct filter_state) { .rte = rte, @@ -349,8 +291,7 @@ f_eval_rte(const struct f_line *expr, struct rte **rte) LOG_BUFFER_INIT(filter_state.buf); - ASSERT(!((*rte)->flags & REF_COW)); - ASSERT(!rta_is_cached((*rte)->attrs)); + ASSERT(!rta_is_cached(rte->attrs)); return interpret(&filter_state, expr, NULL); } @@ -475,6 +416,23 @@ filter_commit(struct config *new, struct config *old) } } +void channel_filter_dump(const struct filter *f) +{ + if (f == FILTER_ACCEPT) + debug(" ALL"); + else if (f == FILTER_REJECT) + debug(" NONE"); + else if (f == FILTER_UNDEF) + debug(" UNDEF"); + else if (f->sym) { + ASSERT(f->sym->filter == f); + debug(" named filter %s", f->sym->name); + } else { + debug("\n"); + f_dump_line(f->root, 2); + } +} + void filters_dump_all(void) { struct symbol *sym; @@ -494,19 +452,10 @@ void filters_dump_all(void) struct channel *c; WALK_LIST(c, sym->proto->proto->channels) { debug(" Channel %s (%s) IMPORT", c->name, net_label[c->net_type]); - if (c->in_filter == FILTER_ACCEPT) - debug(" ALL\n"); - else if (c->in_filter == FILTER_REJECT) - debug(" NONE\n"); - else if (c->in_filter == FILTER_UNDEF) - debug(" UNDEF\n"); - else if (c->in_filter->sym) { - ASSERT(c->in_filter->sym->filter == c->in_filter); - debug(" named filter %s\n", c->in_filter->sym->name); - } else { - debug("\n"); - f_dump_line(c->in_filter->root, 2); - } + channel_filter_dump(c->in_filter); + debug(" EXPORT", c->name, net_label[c->net_type]); + channel_filter_dump(c->out_filter); + debug("\n"); } } } diff --git a/filter/filter.h b/filter/filter.h index 0273ef15..3f2e62eb 100644 --- a/filter/filter.h +++ b/filter/filter.h @@ -51,8 +51,8 @@ struct filter { struct rte; -enum filter_return f_run(const struct filter *filter, struct rte **rte, int flags); -enum filter_return f_eval_rte(const struct f_line *expr, struct rte **rte); +enum filter_return f_run(const struct filter *filter, struct rte *rte, int flags); +enum filter_return f_eval_rte(const struct f_line *expr, struct rte *rte); uint f_eval_int(const struct f_line *expr); enum filter_return f_eval_buf(const struct f_line *expr, buffer *buf); @@ -85,25 +85,29 @@ ip4_classify(ip4_addr ad) u32 a = _I(ad); u32 b = a >> 24U; - if (b && b <= 0xdf) + if (b < 0xe0) { - if (b == 0x7f) + if (b == 0x00) /* 0.0.0.0/8 This network */ + return IADDR_INVALID; + + if (b == 0x7f) /* 127.0.0.0/8 Loopback address */ return IADDR_HOST | SCOPE_HOST; - else if ((b == 0x0a) || - ((a & 0xffff0000) == 0xc0a80000) || - ((a & 0xfff00000) == 0xac100000)) + + if ((b == 0x0a) || /* 10.0.0.0/8 Private range */ + ((a & 0xffff0000) == 0xc0a80000) || /* 192.168.0.0/16 Private range */ + ((a & 0xfff00000) == 0xac100000)) /* 172.16.0.0/12 Private range */ return IADDR_HOST | SCOPE_SITE; - else - return IADDR_HOST | SCOPE_UNIVERSE; + + return IADDR_HOST | SCOPE_UNIVERSE; } - if (b >= 0xe0 && b <= 0xef) + if (b < 0xf0) /* 224.0.0.0/4 Multicast address */ return IADDR_MULTICAST | SCOPE_UNIVERSE; - if (a == 0xffffffff) + if (a == 0xffffffff) /* 255.255.255.255 Broadcast address */ return IADDR_BROADCAST | SCOPE_LINK; - return IADDR_INVALID; + return IADDR_HOST | SCOPE_SITE; /* 240.0.0.0/4 Reserved / private */ } int diff --git a/lib/route.h b/lib/route.h index 1d8877c8..283a3760 100644 --- a/lib/route.h +++ b/lib/route.h @@ -16,19 +16,21 @@ struct network; struct proto; struct cli; + typedef struct rte { - struct rte *next; - struct network *net; /* Network this RTE belongs to */ - struct rte_src *src; /* Route source that created the route */ - struct channel *sender; /* Channel used to send the route to the routing table */ struct rta *attrs; /* Attributes of this route */ + const net_addr *net; /* Network this RTE belongs to */ + struct rte_src *src; /* Route source that created the route */ + struct rt_import_hook *sender; /* Import hook used to send the route to the routing table */ + btime lastmod; /* Last modified (set by table) */ u32 id; /* Table specific route id */ - byte flags; /* Flags (REF_...) */ + byte flags; /* Table-specific flags */ byte pflags; /* Protocol-specific flags */ - btime lastmod; /* Last modified */ + u8 generation; /* If this route import is based on other previously exported route, + this value should be 1 + MAX(generation of the parent routes). + Otherwise the route is independent and this value is zero. */ } rte; -#define REF_COW 1 /* Copy this rte on write */ #define REF_FILTERED 2 /* Route is rejected by import filter */ #define REF_STALE 4 /* Route is stale in a refresh cycle */ #define REF_DISCARD 8 /* Route is scheduled for discard */ @@ -321,7 +323,7 @@ static inline u32 rt_get_preference(rte *rt) /* IGP metric: second-order comparison */ extern struct ea_class ea_gen_igp_metric; -u32 rt_get_igp_metric(rte *rt); +u32 rt_get_igp_metric(const rte *rt); #define IGP_METRIC_UNKNOWN 0x80000000 /* Default igp_metric used when no other protocol-specific metric is availabe */ @@ -331,16 +333,23 @@ extern struct ea_class ea_gen_from; /* Source: An old method to devise the route source protocol and kind. * To be superseded in a near future by something more informative. */ extern struct ea_class ea_gen_source; -static inline u32 rt_get_source_attr(rte *rt) +static inline u32 rt_get_source_attr(const rte *rt) { return ea_get_int(rt->attrs->eattrs, &ea_gen_source, 0); } /* Flowspec validation result */ -#define FLOWSPEC_UNKNOWN 0 -#define FLOWSPEC_VALID 1 -#define FLOWSPEC_INVALID 2 +enum flowspec_valid { + FLOWSPEC_UNKNOWN = 0, + FLOWSPEC_VALID = 1, + FLOWSPEC_INVALID = 2, + FLOWSPEC__MAX, +}; + +extern const char * flowspec_valid_names[FLOWSPEC__MAX]; +static inline const char *flowspec_valid_name(enum flowspec_valid v) +{ return (v < FLOWSPEC__MAX) ? flowspec_valid_names[v] : "???"; } extern struct ea_class ea_gen_flowspec_valid; -static inline u32 rt_get_flowspec_valid(rte *rt) +static inline enum flowspec_valid rt_get_flowspec_valid(rte *rt) { return ea_get_int(rt->attrs->eattrs, &ea_gen_flowspec_valid, FLOWSPEC_UNKNOWN); } /* Next hop: For now, stored as adata */ @@ -402,7 +411,7 @@ static inline int nhea_dest(eattr *nhea) return nhad->dest; } -static inline int rte_dest(rte *r) +static inline int rte_dest(const rte *r) { return nhea_dest(ea_find(r->attrs->eattrs, &ea_gen_nexthop)); } diff --git a/nest/config.Y b/nest/config.Y index 6a35cdd2..7c68a09a 100644 --- a/nest/config.Y +++ b/nest/config.Y @@ -112,7 +112,7 @@ proto_postconfig(void) CF_DECLS -CF_KEYWORDS(ROUTER, ID, HOSTNAME, PROTOCOL, TEMPLATE, PREFERENCE, DISABLED, DEBUG, ALL, OFF, DIRECT) +CF_KEYWORDS(ROUTER, ID, HOSTNAME, PROTOCOL, TEMPLATE, PREFERENCE, DISABLED, DEBUG, ALL, OFF, DIRECT, PIPE) CF_KEYWORDS(INTERFACE, IMPORT, EXPORT, FILTER, NONE, VRF, DEFAULT, TABLE, STATES, ROUTES, FILTERS) CF_KEYWORDS(IPV4, IPV6, VPN4, VPN6, ROA4, ROA6, FLOW4, FLOW6, SADR, MPLS) CF_KEYWORDS(RECEIVE, LIMIT, ACTION, WARN, BLOCK, RESTART, DISABLE, KEEP, FILTERED, RPKI) @@ -371,6 +371,7 @@ debug_default: DEBUG PROTOCOLS debug_mask { new_config->proto_default_debug = $3; } | DEBUG CHANNELS debug_mask { new_config->channel_default_debug = $3; } | DEBUG COMMANDS expr { new_config->cli_debug = $3; } + | DEBUG PIPE bool { new_config->pipe_debug = $3; } ; /* MRTDUMP PROTOCOLS is in systep/unix/config.Y */ @@ -850,8 +851,10 @@ CF_CLI(DUMP NEIGHBORS,,, [[Dump neighbor cache]]) { neigh_dump_all(); cli_msg(0, ""); } ; CF_CLI(DUMP ATTRIBUTES,,, [[Dump attribute cache]]) { rta_dump_all(); cli_msg(0, ""); } ; -CF_CLI(DUMP ROUTES,,, [[Dump routing table]]) +CF_CLI(DUMP ROUTES,,, [[Dump routes]]) { rt_dump_all(); cli_msg(0, ""); } ; +CF_CLI(DUMP TABLES,,, [[Dump table connections]]) +{ rt_dump_hooks_all(); cli_msg(0, ""); } ; CF_CLI(DUMP PROTOCOLS,,, [[Dump protocol information]]) { protos_dump_all(); cli_msg(0, ""); } ; CF_CLI(DUMP FILTER ALL,,, [[Dump all filters in linearized form]]) diff --git a/nest/limit.h b/nest/limit.h new file mode 100644 index 00000000..5838ad3b --- /dev/null +++ b/nest/limit.h @@ -0,0 +1,49 @@ +/* + * BIRD Internet Routing Daemon -- Limits + * + * (c) 1998--2000 Martin Mares <mj@ucw.cz> + * (c) 2021 Maria Matejka <mq@jmq.cz> + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#ifndef _BIRD_LIMIT_H_ +#define _BIRD_LIMIT_H_ + +struct limit { + u32 max; + u32 count; + int (*action)(struct limit *, void *data); +}; + +static inline int limit_do_action(struct limit *l, void *data) +{ + return l->action ? l->action(l, data) : 1; +} + +static inline int limit_push(struct limit *l, void *data) +{ + if ((l->count >= l->max) && limit_do_action(l, data)) + return 1; + + l->count++; + return 0; +} + +static inline void limit_pop(struct limit *l) +{ + --l->count; +} + +static inline void limit_reset(struct limit *l) +{ + l->count = 0; +} + +static inline void limit_update(struct limit *l, void *data, u32 max) +{ + if (l->count > (l->max = max)) + limit_do_action(l, data); +} + +#endif diff --git a/nest/proto.c b/nest/proto.c index 72613f8d..77817888 100644 --- a/nest/proto.c +++ b/nest/proto.c @@ -42,8 +42,7 @@ static int graceful_restart_state; static u32 graceful_restart_locks; static char *p_states[] = { "DOWN", "START", "UP", "STOP" }; -static char *c_states[] = { "DOWN", "START", "UP", "FLUSHING" }; -static char *e_states[] = { "DOWN", "FEEDING", "READY" }; +static char *c_states[] = { "DOWN", "START", "UP", "STOP", "RESTART" }; extern struct protocol proto_unix_iface; @@ -51,15 +50,17 @@ static void channel_request_reload(struct channel *c); static void proto_shutdown_loop(timer *); static void proto_rethink_goal(struct proto *p); static char *proto_state_name(struct proto *p); -static void channel_verify_limits(struct channel *c); -static inline void channel_reset_limit(struct channel_limit *l); - +static void channel_init_limit(struct channel *c, struct limit *l, int dir, struct channel_limit *cf); +static void channel_update_limit(struct channel *c, struct limit *l, int dir, struct channel_limit *cf); +static void channel_reset_limit(struct channel *c, struct limit *l, int dir); +static void channel_feed_end(struct channel *c); +static void channel_export_stopped(struct rt_export_request *req); static inline int proto_is_done(struct proto *p) { return (p->proto_state == PS_DOWN) && (p->active_channels == 0); } static inline int channel_is_active(struct channel *c) -{ return (c->channel_state == CS_START) || (c->channel_state == CS_UP); } +{ return (c->channel_state != CS_DOWN); } static inline int channel_reloadable(struct channel *c) { return c->proto->reload_routes && c->reloadable; } @@ -67,10 +68,46 @@ static inline int channel_reloadable(struct channel *c) static inline void channel_log_state_change(struct channel *c) { - if (c->export_state) - CD(c, "State changed to %s/%s", c_states[c->channel_state], e_states[c->export_state]); - else - CD(c, "State changed to %s", c_states[c->channel_state]); + CD(c, "State changed to %s", c_states[c->channel_state]); +} + +void +channel_import_log_state_change(struct rt_import_request *req, u8 state) +{ + struct channel *c = SKIP_BACK(struct channel, in_req, req); + CD(c, "Channel import state changed to %s", rt_import_state_name(state)); +} + +void +channel_export_log_state_change(struct rt_export_request *req, u8 state) +{ + struct channel *c = SKIP_BACK(struct channel, out_req, req); + CD(c, "Channel export state changed to %s", rt_export_state_name(state)); + + switch (state) + { + case TES_FEEDING: + if (c->proto->feed_begin) + c->proto->feed_begin(c, !c->refeeding); + break; + case TES_READY: + channel_feed_end(c); + break; + } +} + +static void +channel_dump_import_req(struct rt_import_request *req) +{ + struct channel *c = SKIP_BACK(struct channel, in_req, req); + debug(" Channel %s.%s import request %p\n", c->proto->name, c->name, req); +} + +static void +channel_dump_export_req(struct rt_export_request *req) +{ + struct channel *c = SKIP_BACK(struct channel, out_req, req); + debug(" Channel %s.%s export request %p\n", c->proto->name, c->name, req); } static void @@ -140,6 +177,15 @@ proto_find_channel_by_name(struct proto *p, const char *n) return NULL; } +rte * channel_preimport(struct rt_import_request *req, rte *new, rte *old); + +void rt_notify_optimal(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe); +void rt_notify_any(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe); +void rt_feed_any(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe, rte **feed, uint count); +void rt_notify_accepted(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe, rte **feed, uint count); +void rt_notify_merged(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe, rte **feed, uint count); + + /** * proto_add_channel - connect protocol to a routing table * @p: protocol instance @@ -164,12 +210,14 @@ proto_add_channel(struct proto *p, struct channel_config *cf) c->channel = cf->channel; c->proto = p; c->table = cf->table->table; + rt_lock_table(c->table); c->in_filter = cf->in_filter; c->out_filter = cf->out_filter; - c->rx_limit = cf->rx_limit; - c->in_limit = cf->in_limit; - c->out_limit = cf->out_limit; + + channel_init_limit(c, &c->rx_limit, PLD_RX, &cf->rx_limit); + channel_init_limit(c, &c->in_limit, PLD_IN, &cf->in_limit); + channel_init_limit(c, &c->out_limit, PLD_OUT, &cf->out_limit); c->net_type = cf->net_type; c->ra_mode = cf->ra_mode; @@ -180,7 +228,6 @@ proto_add_channel(struct proto *p, struct channel_config *cf) c->rpki_reload = cf->rpki_reload; c->channel_state = CS_DOWN; - c->export_state = ES_DOWN; c->last_state_change = current_time(); c->reloadable = 1; @@ -202,6 +249,7 @@ proto_remove_channel(struct proto *p UNUSED, struct channel *c) CD(c, "Removed", c->name); + rt_unlock_table(c->table); rem_node(&c->n); mb_free(c); } @@ -222,7 +270,7 @@ proto_pause_channels(struct proto *p) struct channel *c; WALK_LIST(c, p->channels) if (!c->disabled && channel_is_active(c)) - channel_set_state(c, CS_START); + channel_set_state(c, CS_PAUSE); } static void @@ -231,7 +279,7 @@ proto_stop_channels(struct proto *p) struct channel *c; WALK_LIST(c, p->channels) if (!c->disabled && channel_is_active(c)) - channel_set_state(c, CS_FLUSHING); + channel_set_state(c, CS_STOP); } static void @@ -243,71 +291,6 @@ proto_remove_channels(struct proto *p) } static void -channel_schedule_feed(struct channel *c, int initial) -{ - // DBG("%s: Scheduling meal\n", p->name); - ASSERT(c->channel_state == CS_UP); - - c->export_state = ES_FEEDING; - c->refeeding = !initial; - - ev_schedule_work(c->feed_event); -} - -static void -channel_feed_loop(void *ptr) -{ - struct channel *c = ptr; - - if (c->export_state != ES_FEEDING) - return; - - /* Start feeding */ - if (!c->feed_active) - { - if (c->proto->feed_begin) - c->proto->feed_begin(c, !c->refeeding); - - c->refeed_pending = 0; - } - - // DBG("Feeding protocol %s continued\n", p->name); - if (!rt_feed_channel(c)) - { - ev_schedule_work(c->feed_event); - return; - } - - /* Reset export limit if the feed ended with acceptable number of exported routes */ - struct channel_limit *l = &c->out_limit; - if (c->refeeding && - (l->state == PLS_BLOCKED) && - (c->refeed_count <= l->limit) && - (c->stats.exp_routes <= l->limit)) - { - log(L_INFO "Protocol %s resets route export limit (%u)", c->proto->name, l->limit); - channel_reset_limit(&c->out_limit); - - /* Continue in feed - it will process routing table again from beginning */ - c->refeed_count = 0; - ev_schedule_work(c->feed_event); - return; - } - - // DBG("Feeding protocol %s finished\n", p->name); - c->export_state = ES_READY; - channel_log_state_change(c); - - if (c->proto->feed_end) - c->proto->feed_end(c); - - /* Restart feeding */ - if (c->refeed_pending) - channel_request_feeding(c); -} - - -static void channel_roa_in_changed(struct rt_subscription *s) { struct channel *c = s->data; @@ -325,14 +308,12 @@ static void channel_roa_out_changed(struct rt_subscription *s) { struct channel *c = s->data; - int active = (c->export_state == ES_FEEDING); + CD(c, "Feeding triggered by RPKI change"); - CD(c, "Feeding triggered by RPKI change%s", active ? " - already active" : ""); + c->refeed_pending = 1; - if (!active) - channel_request_feeding(c); - else - c->refeed_pending = 1; + if (c->out_req.hook) + rt_stop_export(&c->out_req, channel_export_stopped); } /* Temporary code, subscriptions should be changed to resources */ @@ -438,32 +419,189 @@ channel_roa_unsubscribe_all(struct channel *c) } static void +channel_start_import(struct channel *c) +{ + if (c->in_req.hook) + { + log(L_WARN "%s.%s: Attempted to start channel's already started import", c->proto->name, c->name); + return; + } + + int nlen = strlen(c->name) + strlen(c->proto->name) + 2; + char *rn = mb_allocz(c->proto->pool, nlen); + bsprintf(rn, "%s.%s", c->proto->name, c->name); + + c->in_req = (struct rt_import_request) { + .name = rn, + .trace_routes = c->debug | c->proto->debug, + .dump_req = channel_dump_import_req, + .log_state_change = channel_import_log_state_change, + .preimport = channel_preimport, + .rte_modify = c->proto->rte_modify, + }; + + ASSERT(c->channel_state == CS_UP); + + channel_reset_limit(c, &c->rx_limit, PLD_RX); + channel_reset_limit(c, &c->in_limit, PLD_IN); + + memset(&c->import_stats, 0, sizeof(struct channel_import_stats)); + + DBG("%s.%s: Channel start import req=%p\n", c->proto->name, c->name, &c->in_req); + rt_request_import(c->table, &c->in_req); +} + +static void channel_start_export(struct channel *c) { + if (c->out_req.hook) + { + log(L_WARN "%s.%s: Attempted to start channel's already started export", c->proto->name, c->name); + return; + } + ASSERT(c->channel_state == CS_UP); - ASSERT(c->export_state == ES_DOWN); + int nlen = strlen(c->name) + strlen(c->proto->name) + 2; + char *rn = mb_allocz(c->proto->pool, nlen); + bsprintf(rn, "%s.%s", c->proto->name, c->name); + + c->out_req = (struct rt_export_request) { + .name = rn, + .trace_routes = c->debug | c->proto->debug, + .dump_req = channel_dump_export_req, + .log_state_change = channel_export_log_state_change, + }; + + bmap_init(&c->export_map, c->proto->pool, 1024); + bmap_init(&c->export_reject_map, c->proto->pool, 1024); + + channel_reset_limit(c, &c->out_limit, PLD_OUT); + + memset(&c->export_stats, 0, sizeof(struct channel_export_stats)); - channel_schedule_feed(c, 1); /* Sets ES_FEEDING */ + switch (c->ra_mode) { + case RA_OPTIMAL: + c->out_req.export_one = rt_notify_optimal; + break; + case RA_ANY: + c->out_req.export_one = rt_notify_any; + c->out_req.export_bulk = rt_feed_any; + break; + case RA_ACCEPTED: + c->out_req.export_bulk = rt_notify_accepted; + break; + case RA_MERGED: + c->out_req.export_bulk = rt_notify_merged; + break; + default: + bug("Unknown route announcement mode"); + } + + DBG("%s.%s: Channel start export req=%p\n", c->proto->name, c->name, &c->out_req); + rt_request_export(c->table, &c->out_req); } static void -channel_stop_export(struct channel *c) +channel_check_stopped(struct channel *c) { - /* Need to abort feeding */ - if (c->export_state == ES_FEEDING) - rt_feed_channel_abort(c); + switch (c->channel_state) + { + case CS_STOP: + if (c->out_req.hook || c->in_req.hook) + return; + + channel_set_state(c, CS_DOWN); + ev_schedule(c->proto->event); + + break; + case CS_PAUSE: + if (c->out_req.hook) + return; + + channel_set_state(c, CS_START); + break; + default: + bug("Stopped channel in a bad state: %d", c->channel_state); + } + + DBG("%s.%s: Channel requests/hooks stopped (in state %s)\n", c->proto->name, c->name, c_states[c->channel_state]); +} + +void +channel_import_stopped(struct rt_import_request *req) +{ + struct channel *c = SKIP_BACK(struct channel, in_req, req); - c->export_state = ES_DOWN; - c->stats.exp_routes = 0; - bmap_reset(&c->export_map, 1024); + req->hook = NULL; + + if (c->in_table) + rt_prune_sync(c->in_table, 1); + + mb_free(c->in_req.name); + c->in_req.name = NULL; + + channel_check_stopped(c); } +static void +channel_export_stopped(struct rt_export_request *req) +{ + struct channel *c = SKIP_BACK(struct channel, out_req, req); + + /* The hook has already stopped */ + req->hook = NULL; + + if (c->refeed_pending) + { + c->refeeding = 1; + c->refeed_pending = 0; + rt_request_export(c->table, req); + return; + } + + /* Free the routes from out_table */ + if (c->out_table) + rt_prune_sync(c->out_table, 1); + + mb_free(c->out_req.name); + c->out_req.name = NULL; + + channel_check_stopped(c); +} + +static void +channel_feed_end(struct channel *c) +{ + struct rt_export_request *req = &c->out_req; + + /* Reset export limit if the feed ended with acceptable number of exported routes */ + struct limit *l = &c->out_limit; + if (c->refeeding && + (c->limit_active & (1 << PLD_OUT)) && + (c->refeed_count <= l->max) && + (l->count <= l->max)) + { + log(L_INFO "Protocol %s resets route export limit (%u)", c->proto->name, l->max); + + c->refeed_pending = 1; + rt_stop_export(req, channel_export_stopped); + return; + } + + if (c->proto->feed_end) + c->proto->feed_end(c); + + if (c->refeed_pending) + rt_stop_export(req, channel_export_stopped); + else + c->refeeding = 0; +} /* Called by protocol for reload from in_table */ void channel_schedule_reload(struct channel *c) { - ASSERT(c->channel_state == CS_UP); + ASSERT(c->in_req.hook); rt_reload_channel_abort(c); ev_schedule_work(c->reload_event); @@ -489,23 +627,6 @@ channel_reload_loop(void *ptr) channel_request_reload(c); } -static void -channel_reset_import(struct channel *c) -{ - /* Need to abort feeding */ - ev_postpone(c->reload_event); - rt_reload_channel_abort(c); - - rt_prune_sync(c->in_table, 1); -} - -static void -channel_reset_export(struct channel *c) -{ - /* Just free the routes */ - rt_prune_sync(c->out_table, 1); -} - /* Called by protocol to activate in_table */ void channel_setup_in_table(struct channel *c) @@ -537,20 +658,11 @@ channel_setup_out_table(struct channel *c) static void channel_do_start(struct channel *c) { - rt_lock_table(c->table); - add_tail(&c->table->channels, &c->table_node); c->proto->active_channels++; - c->feed_event = ev_new_init(c->proto->pool, channel_feed_loop, c); - - bmap_init(&c->export_map, c->proto->pool, 1024); - memset(&c->stats, 0, sizeof(struct proto_stats)); - - channel_reset_limit(&c->rx_limit); - channel_reset_limit(&c->in_limit); - channel_reset_limit(&c->out_limit); - CALL(c->channel->start, c); + + channel_start_import(c); } static void @@ -565,9 +677,31 @@ channel_do_up(struct channel *c) } static void -channel_do_flush(struct channel *c) +channel_do_pause(struct channel *c) +{ + /* Need to abort feeding */ + if (c->reload_event) + { + ev_postpone(c->reload_event); + rt_reload_channel_abort(c); + } + + /* Stop export */ + if (c->out_req.hook) + rt_stop_export(&c->out_req, channel_export_stopped); + + channel_roa_unsubscribe_all(c); + + bmap_free(&c->export_map); + bmap_free(&c->export_reject_map); +} + +static void +channel_do_stop(struct channel *c) { - rt_schedule_prune(c->table); + /* Stop import */ + if (c->in_req.hook) + rt_stop_import(&c->in_req, channel_import_stopped); c->gr_wait = 0; if (c->gr_lock) @@ -576,28 +710,21 @@ channel_do_flush(struct channel *c) CALL(c->channel->shutdown, c); /* This have to be done in here, as channel pool is freed before channel_do_down() */ - bmap_free(&c->export_map); c->in_table = NULL; c->reload_event = NULL; c->out_table = NULL; - - channel_roa_unsubscribe_all(c); } static void channel_do_down(struct channel *c) { - ASSERT(!c->feed_active && !c->reload_active); + ASSERT(!c->reload_active); - rem_node(&c->table_node); - rt_unlock_table(c->table); c->proto->active_channels--; - if ((c->stats.imp_routes + c->stats.filt_routes) != 0) - log(L_ERR "%s: Channel %s is down but still has some routes", c->proto->name, c->name); - // bmap_free(&c->export_map); - memset(&c->stats, 0, sizeof(struct proto_stats)); + memset(&c->import_stats, 0, sizeof(struct channel_import_stats)); + memset(&c->export_stats, 0, sizeof(struct channel_export_stats)); c->in_table = NULL; c->reload_event = NULL; @@ -616,7 +743,6 @@ void channel_set_state(struct channel *c, uint state) { uint cs = c->channel_state; - uint es = c->export_state; DBG("%s reporting channel %s state transition %s -> %s\n", c->proto->name, c->name, c_states[cs], c_states[state]); if (state == cs) @@ -628,20 +754,11 @@ channel_set_state(struct channel *c, uint state) switch (state) { case CS_START: - ASSERT(cs == CS_DOWN || cs == CS_UP); + ASSERT(cs == CS_DOWN || cs == CS_PAUSE); if (cs == CS_DOWN) channel_do_start(c); - if (es != ES_DOWN) - channel_stop_export(c); - - if (c->in_table && (cs == CS_UP)) - channel_reset_import(c); - - if (c->out_table && (cs == CS_UP)) - channel_reset_export(c); - break; case CS_UP: @@ -656,23 +773,24 @@ channel_set_state(struct channel *c, uint state) channel_do_up(c); break; - case CS_FLUSHING: - ASSERT(cs == CS_START || cs == CS_UP); + case CS_PAUSE: + ASSERT(cs == CS_UP); - if (es != ES_DOWN) - channel_stop_export(c); + if (cs == CS_UP) + channel_do_pause(c); + break; - if (c->in_table && (cs == CS_UP)) - channel_reset_import(c); + case CS_STOP: + ASSERT(cs == CS_UP || cs == CS_START || cs == CS_PAUSE); - if (c->out_table && (cs == CS_UP)) - channel_reset_export(c); + if (cs == CS_UP) + channel_do_pause(c); - channel_do_flush(c); + channel_do_stop(c); break; case CS_DOWN: - ASSERT(cs == CS_FLUSHING); + ASSERT(cs == CS_STOP); channel_do_down(c); break; @@ -697,35 +815,16 @@ channel_set_state(struct channel *c, uint state) void channel_request_feeding(struct channel *c) { - ASSERT(c->channel_state == CS_UP); - - CD(c, "Feeding requested"); - - /* Do nothing if we are still waiting for feeding */ - if (c->export_state == ES_DOWN) - return; - - /* If we are already feeding, we want to restart it */ - if (c->export_state == ES_FEEDING) - { - /* Unless feeding is in initial state */ - if (!c->feed_active) - return; + ASSERT(c->out_req.hook); - rt_feed_channel_abort(c); - } - - /* Track number of exported routes during refeed */ - c->refeed_count = 0; - - channel_schedule_feed(c, 0); /* Sets ES_FEEDING */ - channel_log_state_change(c); + c->refeed_pending = 1; + rt_stop_export(&c->out_req, channel_export_stopped); } static void channel_request_reload(struct channel *c) { - ASSERT(c->channel_state == CS_UP); + ASSERT(c->in_req.hook); ASSERT(channel_reloadable(c)); CD(c, "Reload requested"); @@ -736,8 +835,8 @@ channel_request_reload(struct channel *c) * Should this be done before reload_routes() hook? * Perhaps, but routes are updated asynchronously. */ - channel_reset_limit(&c->rx_limit); - channel_reset_limit(&c->in_limit); + channel_reset_limit(c, &c->rx_limit, PLD_RX); + channel_reset_limit(c, &c->in_limit, PLD_IN); } const struct channel_class channel_basic = { @@ -840,19 +939,19 @@ channel_reconfigure(struct channel *c, struct channel_config *cf) /* Reconfigure channel fields */ c->in_filter = cf->in_filter; c->out_filter = cf->out_filter; - c->rx_limit = cf->rx_limit; - c->in_limit = cf->in_limit; - c->out_limit = cf->out_limit; + + channel_update_limit(c, &c->rx_limit, PLD_RX, &cf->rx_limit); + channel_update_limit(c, &c->in_limit, PLD_IN, &cf->in_limit); + channel_update_limit(c, &c->out_limit, PLD_OUT, &cf->out_limit); // c->ra_mode = cf->ra_mode; c->merge_limit = cf->merge_limit; c->preference = cf->preference; c->debug = cf->debug; + c->in_req.trace_routes = c->out_req.trace_routes = c->debug | c->proto->debug; c->in_keep_filtered = cf->in_keep_filtered; c->rpki_reload = cf->rpki_reload; - channel_verify_limits(c); - /* Execute channel-specific reconfigure hook */ if (c->channel->reconfigure && !c->channel->reconfigure(c, cf, &import_changed, &export_changed)) return 0; @@ -964,8 +1063,8 @@ proto_event(void *ptr) if (proto_is_done(p)) { - if (p->proto->cleanup) - p->proto->cleanup(p); + rfree(p->pool); + p->pool = NULL; p->active = 0; proto_log_state_change(p); @@ -1517,7 +1616,7 @@ graceful_restart_done(timer *t UNUSED) WALK_LIST(c, p->channels) { /* Resume postponed export of routes */ - if ((c->channel_state == CS_UP) && c->gr_wait && c->proto->rt_notify) + if ((c->channel_state == CS_UP) && c->gr_wait && p->rt_notify) channel_start_export(c); /* Cleanup */ @@ -1607,7 +1706,11 @@ protos_dump_all(void) struct proto *p; WALK_LIST(p, proto_list) { - debug(" protocol %s state %s\n", p->name, p_states[p->proto_state]); +#define DPF(x) (p->x ? " " #x : "") + debug(" protocol %s (%p) state %s with %d active channels flags: %s%s%s%s%s\n", + p->name, p, p_states[p->proto_state], p->active_channels, + DPF(disabled), DPF(active), DPF(do_start), DPF(do_stop), DPF(reconfiguring)); +#undef DPF struct channel *c; WALK_LIST(c, p->channels) @@ -1617,6 +1720,9 @@ protos_dump_all(void) debug("\tInput filter: %s\n", filter_name(c->in_filter)); if (c->out_filter) debug("\tOutput filter: %s\n", filter_name(c->out_filter)); + debug("\tChannel state: %s/%s/%s\n", c_states[c->channel_state], + c->in_req.hook ? rt_import_state_name(rt_import_get_state(c->in_req.hook)) : "-", + c->out_req.hook ? rt_export_state_name(rt_export_get_state(c->out_req.hook)) : "-"); } if (p->proto->dump && (p->proto_state != PS_DOWN)) @@ -1735,88 +1841,104 @@ proto_set_message(struct proto *p, char *msg, int len) } -static const char * -channel_limit_name(struct channel_limit *l) -{ - const char *actions[] = { - [PLA_WARN] = "warn", - [PLA_BLOCK] = "block", - [PLA_RESTART] = "restart", - [PLA_DISABLE] = "disable", - }; +static const char * channel_limit_name[] = { + [PLA_WARN] = "warn", + [PLA_BLOCK] = "block", + [PLA_RESTART] = "restart", + [PLA_DISABLE] = "disable", +}; - return actions[l->action]; -} -/** - * channel_notify_limit: notify about limit hit and take appropriate action - * @c: channel - * @l: limit being hit - * @dir: limit direction (PLD_*) - * @rt_count: the number of routes - * - * The function is called by the route processing core when limit @l - * is breached. It activates the limit and tooks appropriate action - * according to @l->action. - */ -void -channel_notify_limit(struct channel *c, struct channel_limit *l, int dir, u32 rt_count) +static void +channel_log_limit(struct channel *c, struct limit *l, int dir) { const char *dir_name[PLD_MAX] = { "receive", "import" , "export" }; - const byte dir_down[PLD_MAX] = { PDC_RX_LIMIT_HIT, PDC_IN_LIMIT_HIT, PDC_OUT_LIMIT_HIT }; - struct proto *p = c->proto; + log(L_WARN "Channel %s.%s hits route %s limit (%d), action: %s", + c->proto->name, c->name, dir_name[dir], l->max, channel_limit_name[c->limit_actions[dir]]); +} - if (l->state == PLS_BLOCKED) +static void +channel_activate_limit(struct channel *c, struct limit *l, int dir) +{ + if (c->limit_active & (1 << dir)) return; - /* For warning action, we want the log message every time we hit the limit */ - if (!l->state || ((l->action == PLA_WARN) && (rt_count == l->limit))) - log(L_WARN "Protocol %s hits route %s limit (%d), action: %s", - p->name, dir_name[dir], l->limit, channel_limit_name(l)); + c->limit_active |= (1 << dir); + channel_log_limit(c, l, dir); +} - switch (l->action) - { - case PLA_WARN: - l->state = PLS_ACTIVE; - break; +static int +channel_limit_warn(struct limit *l, void *data) +{ + struct channel_limit_data *cld = data; + struct channel *c = cld->c; + int dir = cld->dir; - case PLA_BLOCK: - l->state = PLS_BLOCKED; - break; + channel_log_limit(c, l, dir); - case PLA_RESTART: - case PLA_DISABLE: - l->state = PLS_BLOCKED; - if (p->proto_state == PS_UP) - proto_schedule_down(p, l->action == PLA_RESTART, dir_down[dir]); - break; - } + return 0; } -static void -channel_verify_limits(struct channel *c) +static int +channel_limit_block(struct limit *l, void *data) +{ + struct channel_limit_data *cld = data; + struct channel *c = cld->c; + int dir = cld->dir; + + channel_activate_limit(c, l, dir); + + return 1; +} + +static const byte chl_dir_down[PLD_MAX] = { PDC_RX_LIMIT_HIT, PDC_IN_LIMIT_HIT, PDC_OUT_LIMIT_HIT }; + +static int +channel_limit_down(struct limit *l, void *data) { - struct channel_limit *l; - u32 all_routes = c->stats.imp_routes + c->stats.filt_routes; + struct channel_limit_data *cld = data; + struct channel *c = cld->c; + struct proto *p = c->proto; + int dir = cld->dir; - l = &c->rx_limit; - if (l->action && (all_routes > l->limit)) - channel_notify_limit(c, l, PLD_RX, all_routes); + channel_activate_limit(c, l, dir); - l = &c->in_limit; - if (l->action && (c->stats.imp_routes > l->limit)) - channel_notify_limit(c, l, PLD_IN, c->stats.imp_routes); + if (p->proto_state == PS_UP) + proto_schedule_down(p, c->limit_actions[dir] == PLA_RESTART, chl_dir_down[dir]); - l = &c->out_limit; - if (l->action && (c->stats.exp_routes > l->limit)) - channel_notify_limit(c, l, PLD_OUT, c->stats.exp_routes); + return 1; } -static inline void -channel_reset_limit(struct channel_limit *l) +static int (*channel_limit_action[])(struct limit *, void *) = { + [PLA_NONE] = NULL, + [PLA_WARN] = channel_limit_warn, + [PLA_BLOCK] = channel_limit_block, + [PLA_RESTART] = channel_limit_down, + [PLA_DISABLE] = channel_limit_down, +}; + +static void +channel_update_limit(struct channel *c, struct limit *l, int dir, struct channel_limit *cf) +{ + l->action = channel_limit_action[cf->action]; + c->limit_actions[dir] = cf->action; + + struct channel_limit_data cld = { .c = c, .dir = dir }; + limit_update(l, &cld, cf->action ? cf->limit : ~((u32) 0)); +} + +static void +channel_init_limit(struct channel *c, struct limit *l, int dir, struct channel_limit *cf) +{ + channel_reset_limit(c, l, dir); + channel_update_limit(c, l, dir, cf); +} + +static void +channel_reset_limit(struct channel *c, struct limit *l, int dir) { - if (l->action) - l->state = PLS_INITIAL; + limit_reset(l); + c->limit_active &= ~(1 << dir); } static inline void @@ -1868,8 +1990,6 @@ proto_do_down(struct proto *p) { p->down_code = 0; neigh_prune(); - rfree(p->pool); - p->pool = NULL; /* Shutdown is finished in the protocol event */ if (proto_is_done(p)) @@ -1964,38 +2084,58 @@ proto_state_name(struct proto *p) static void channel_show_stats(struct channel *c) { - struct proto_stats *s = &c->stats; + struct channel_import_stats *ch_is = &c->import_stats; + struct channel_export_stats *ch_es = &c->export_stats; + struct rt_import_stats *rt_is = c->in_req.hook ? &c->in_req.hook->stats : NULL; + struct rt_export_stats *rt_es = c->out_req.hook ? &c->out_req.hook->stats : NULL; + +#define SON(ie, item) ((ie) ? (ie)->item : 0) +#define SCI(item) SON(ch_is, item) +#define SCE(item) SON(ch_es, item) +#define SRI(item) SON(rt_is, item) +#define SRE(item) SON(rt_es, item) + + u32 rx_routes = c->rx_limit.count; + u32 in_routes = c->in_limit.count; + u32 out_routes = c->out_limit.count; if (c->in_keep_filtered) cli_msg(-1006, " Routes: %u imported, %u filtered, %u exported, %u preferred", - s->imp_routes, s->filt_routes, s->exp_routes, s->pref_routes); + in_routes, (rx_routes - in_routes), out_routes, SRI(pref)); else cli_msg(-1006, " Routes: %u imported, %u exported, %u preferred", - s->imp_routes, s->exp_routes, s->pref_routes); - - cli_msg(-1006, " Route change stats: received rejected filtered ignored accepted"); - cli_msg(-1006, " Import updates: %10u %10u %10u %10u %10u", - s->imp_updates_received, s->imp_updates_invalid, - s->imp_updates_filtered, s->imp_updates_ignored, - s->imp_updates_accepted); - cli_msg(-1006, " Import withdraws: %10u %10u --- %10u %10u", - s->imp_withdraws_received, s->imp_withdraws_invalid, - s->imp_withdraws_ignored, s->imp_withdraws_accepted); - cli_msg(-1006, " Export updates: %10u %10u %10u --- %10u", - s->exp_updates_received, s->exp_updates_rejected, - s->exp_updates_filtered, s->exp_updates_accepted); - cli_msg(-1006, " Export withdraws: %10u --- --- --- %10u", - s->exp_withdraws_received, s->exp_withdraws_accepted); + in_routes, out_routes, SRI(pref)); + + cli_msg(-1006, " Route change stats: received rejected filtered ignored RX limit IN limit accepted"); + cli_msg(-1006, " Import updates: %10u %10u %10u %10u %10u %10u %10u", + SCI(updates_received), SCI(updates_invalid), + SCI(updates_filtered), SRI(updates_ignored), + SCI(updates_limited_rx), SCI(updates_limited_in), + SRI(updates_accepted)); + cli_msg(-1006, " Import withdraws: %10u %10u --- %10u --- %10u", + SCI(withdraws_received), SCI(withdraws_invalid), + SRI(withdraws_ignored), SRI(withdraws_accepted)); + cli_msg(-1006, " Export updates: %10u %10u %10u --- %10u %10u", + SRE(updates_received), SCE(updates_rejected), + SCE(updates_filtered), SCE(updates_limited), SCE(updates_accepted)); + cli_msg(-1006, " Export withdraws: %10u --- --- --- ---%10u", + SRE(withdraws_received), SCE(withdraws_accepted)); + +#undef SRI +#undef SRE +#undef SCI +#undef SCE +#undef SON } void -channel_show_limit(struct channel_limit *l, const char *dsc) +channel_show_limit(struct limit *l, const char *dsc, int active, int action) { if (!l->action) return; - cli_msg(-1006, " %-16s%d%s", dsc, l->limit, l->state ? " [HIT]" : ""); - cli_msg(-1006, " Action: %s", channel_limit_name(l)); + cli_msg(-1006, " %-16s%d%s", dsc, l->max, active ? " [HIT]" : ""); + cli_msg(-1006, " Action: %s", channel_limit_name[action]); } void @@ -2003,6 +2143,8 @@ channel_show_info(struct channel *c) { cli_msg(-1006, " Channel %s", c->name); cli_msg(-1006, " State: %s", c_states[c->channel_state]); + cli_msg(-1006, " Import state: %s", rt_import_state_name(rt_import_get_state(c->in_req.hook))); + cli_msg(-1006, " Export state: %s", rt_export_state_name(rt_export_get_state(c->out_req.hook))); cli_msg(-1006, " Table: %s", c->table->name); cli_msg(-1006, " Preference: %d", c->preference); cli_msg(-1006, " Input filter: %s", filter_name(c->in_filter)); @@ -2013,9 +2155,9 @@ channel_show_info(struct channel *c) c->gr_lock ? " pending" : "", c->gr_wait ? " waiting" : ""); - channel_show_limit(&c->rx_limit, "Receive limit:"); - channel_show_limit(&c->in_limit, "Import limit:"); - channel_show_limit(&c->out_limit, "Export limit:"); + channel_show_limit(&c->rx_limit, "Receive limit:", c->limit_active & (1 << PLD_RX), c->limit_actions[PLD_RX]); + channel_show_limit(&c->in_limit, "Import limit:", c->limit_active & (1 << PLD_IN), c->limit_actions[PLD_IN]); + channel_show_limit(&c->out_limit, "Export limit:", c->limit_active & (1 << PLD_OUT), c->limit_actions[PLD_OUT]); if (c->channel_state != CS_DOWN) channel_show_stats(c); diff --git a/nest/protocol.h b/nest/protocol.h index 8f0cc4b4..aeb60ac6 100644 --- a/nest/protocol.h +++ b/nest/protocol.h @@ -13,6 +13,7 @@ #include "lib/resource.h" #include "lib/event.h" #include "nest/rt.h" +#include "nest/limit.h" #include "conf/conf.h" struct iface; @@ -58,7 +59,6 @@ struct protocol { void (*dump)(struct proto *); /* Debugging dump */ int (*start)(struct proto *); /* Start the instance */ int (*shutdown)(struct proto *); /* Stop the instance */ - void (*cleanup)(struct proto *); /* Called after shutdown when protocol became hungry/down */ void (*get_status)(struct proto *, byte *buf); /* Get instance status (for `show protocols' command) */ void (*get_route_info)(struct rte *, byte *buf); /* Get route information (for `show route' command) */ // int (*get_attr)(const struct eattr *, byte *buf, int buflen); /* ASCIIfy dynamic attribute (returns GA_*) */ @@ -114,31 +114,6 @@ struct proto_config { }; /* Protocol statistics */ -struct proto_stats { - /* Import - from protocol to core */ - u32 imp_routes; /* Number of routes successfully imported to the (adjacent) routing table */ - u32 filt_routes; /* Number of routes rejected in import filter but kept in the routing table */ - u32 pref_routes; /* Number of routes selected as best in the (adjacent) routing table */ - u32 imp_updates_received; /* Number of route updates received */ - u32 imp_updates_invalid; /* Number of route updates rejected as invalid */ - u32 imp_updates_filtered; /* Number of route updates rejected by filters */ - u32 imp_updates_ignored; /* Number of route updates rejected as already in route table */ - u32 imp_updates_accepted; /* Number of route updates accepted and imported */ - u32 imp_withdraws_received; /* Number of route withdraws received */ - u32 imp_withdraws_invalid; /* Number of route withdraws rejected as invalid */ - u32 imp_withdraws_ignored; /* Number of route withdraws rejected as already not in route table */ - u32 imp_withdraws_accepted; /* Number of route withdraws accepted and processed */ - - /* Export - from core to protocol */ - u32 exp_routes; /* Number of routes successfully exported to the protocol */ - u32 exp_updates_received; /* Number of route updates received */ - u32 exp_updates_rejected; /* Number of route updates rejected by protocol */ - u32 exp_updates_filtered; /* Number of route updates rejected by filters */ - u32 exp_updates_accepted; /* Number of route updates accepted and exported */ - u32 exp_withdraws_received; /* Number of route withdraws received */ - u32 exp_withdraws_accepted; /* Number of route withdraws accepted and processed */ -}; - struct proto { node n; /* Node in global proto_list */ struct protocol *proto; /* Protocol */ @@ -193,9 +168,9 @@ struct proto { void (*if_notify)(struct proto *, unsigned flags, struct iface *i); void (*ifa_notify)(struct proto *, unsigned flags, struct ifa *a); - void (*rt_notify)(struct proto *, struct channel *, struct network *net, struct rte *new, struct rte *old); + void (*rt_notify)(struct proto *, struct channel *, const net_addr *net, struct rte *new, const struct rte *old); void (*neigh_notify)(struct neighbor *neigh); - int (*preexport)(struct proto *, struct rte *rt); + int (*preexport)(struct channel *, struct rte *rt); void (*reload_routes)(struct channel *); void (*feed_begin)(struct channel *, int initial); void (*feed_end)(struct channel *); @@ -214,10 +189,10 @@ struct proto { int (*rte_recalculate)(struct rtable *, struct network *, struct rte *, struct rte *, struct rte *); int (*rte_better)(struct rte *, struct rte *); int (*rte_mergable)(struct rte *, struct rte *); - struct rte * (*rte_modify)(struct rte *, struct linpool *); + struct rte *(*rte_modify)(struct rte *, struct linpool *); void (*rte_insert)(struct network *, struct rte *); void (*rte_remove)(struct network *, struct rte *); - u32 (*rte_igp_metric)(struct rte *); + u32 (*rte_igp_metric)(const struct rte *); /* Hic sunt protocol-specific data */ }; @@ -257,7 +232,7 @@ void channel_graceful_restart_unlock(struct channel *c); #define DEFAULT_GR_WAIT 240 -void channel_show_limit(struct channel_limit *l, const char *dsc); +void channel_show_limit(struct limit *l, const char *dsc, int active, int action); void channel_show_info(struct channel *c); void channel_cmd_debug(struct channel *c, uint mask); @@ -412,19 +387,30 @@ extern struct proto_config *cf_dev_proto; #define PLA_RESTART 4 /* Force protocol restart */ #define PLA_DISABLE 5 /* Shutdown and disable protocol */ -#define PLS_INITIAL 0 /* Initial limit state after protocol start */ -#define PLS_ACTIVE 1 /* Limit was hit */ -#define PLS_BLOCKED 2 /* Limit is active and blocking new routes */ - struct channel_limit { u32 limit; /* Maximum number of prefixes */ u8 action; /* Action to take (PLA_*) */ - u8 state; /* State of limit (PLS_*) */ }; -void channel_notify_limit(struct channel *c, struct channel_limit *l, int dir, u32 rt_count); +struct channel_limit_data { + struct channel *c; + int dir; +}; + +#define CLP__RX(_c) (&(_c)->rx_limit) +#define CLP__IN(_c) (&(_c)->in_limit) +#define CLP__OUT(_c) (&(_c)->out_limit) +#if 0 +#define CHANNEL_LIMIT_LOG(_c, _dir, _op) log(L_TRACE "%s.%s: %s limit %s %u", (_c)->proto->name, (_c)->name, #_dir, _op, (CLP__##_dir(_c))->count) +#else +#define CHANNEL_LIMIT_LOG(_c, _dir, _op) +#endif + +#define CHANNEL_LIMIT_PUSH(_c, _dir) ({ CHANNEL_LIMIT_LOG(_c, _dir, "push from"); struct channel_limit_data cld = { .c = (_c), .dir = PLD_##_dir }; limit_push(CLP__##_dir(_c), &cld); }) +#define CHANNEL_LIMIT_POP(_c, _dir) ({ limit_pop(CLP__##_dir(_c)); CHANNEL_LIMIT_LOG(_c, _dir, "pop to"); }) + /* * Channels */ @@ -466,6 +452,7 @@ struct channel_config { struct proto_config *parent; /* Where channel is defined (proto or template) */ struct rtable_config *table; /* Table we're attached to */ const struct filter *in_filter, *out_filter; /* Attached filters */ + struct channel_limit rx_limit; /* Limit for receiving routes from protocol (relevant when in_keep_filtered is active) */ struct channel_limit in_limit; /* Limit for importing routes from protocol */ @@ -482,7 +469,6 @@ struct channel_config { struct channel { node n; /* Node in proto->channels */ - node table_node; /* Node in table->channels */ const char *name; /* Channel name (may be NULL) */ const struct channel_class *channel; @@ -491,14 +477,39 @@ struct channel { struct rtable *table; const struct filter *in_filter; /* Input filter */ const struct filter *out_filter; /* Output filter */ - struct bmap export_map; /* Keeps track which routes passed export filter */ - struct channel_limit rx_limit; /* Receive limit (for in_keep_filtered) */ - struct channel_limit in_limit; /* Input limit */ - struct channel_limit out_limit; /* Output limit */ - - struct event *feed_event; /* Event responsible for feeding */ - struct fib_iterator feed_fit; /* Routing table iterator used during feeding */ - struct proto_stats stats; /* Per-channel protocol statistics */ + struct bmap export_map; /* Keeps track which routes were really exported */ + struct bmap export_reject_map; /* Keeps track which routes were rejected by export filter */ + + struct limit rx_limit; /* Receive limit (for in_keep_filtered) */ + struct limit in_limit; /* Input limit */ + struct limit out_limit; /* Output limit */ + + u8 limit_actions[PLD_MAX]; /* Limit actions enum */ + u8 limit_active; /* Flags for active limits */ + + struct channel_import_stats { + /* Import - from protocol to core */ + u32 updates_received; /* Number of route updates received */ + u32 updates_invalid; /* Number of route updates rejected as invalid */ + u32 updates_filtered; /* Number of route updates rejected by filters */ + u32 updates_limited_rx; /* Number of route updates exceeding the rx_limit */ + u32 updates_limited_in; /* Number of route updates exceeding the in_limit */ + u32 withdraws_received; /* Number of route withdraws received */ + u32 withdraws_invalid; /* Number of route withdraws rejected as invalid */ + } import_stats; + + struct channel_export_stats { + /* Export - from core to protocol */ + u32 updates_rejected; /* Number of route updates rejected by protocol */ + u32 updates_filtered; /* Number of route updates rejected by filters */ + u32 updates_accepted; /* Number of route updates accepted and exported */ + u32 updates_limited; /* Number of route updates exceeding the out_limit */ + u32 withdraws_accepted; /* Number of route withdraws accepted and processed */ + } export_stats; + + struct rt_import_request in_req; /* Table import connection */ + struct rt_export_request out_req; /* Table export connection */ + u32 refeed_count; /* Number of routes exported during refeed regardless of out_limit */ u8 net_type; /* Routing table network type (NET_*), 0 for undefined */ @@ -511,10 +522,7 @@ struct channel { u8 stale; /* Used in reconfiguration */ u8 channel_state; - u8 export_state; /* Route export state (ES_*, see below) */ - u8 feed_active; - u8 flush_active; - u8 refeeding; /* We are refeeding (valid only if export_state == ES_FEEDING) */ + u8 refeeding; /* Refeeding the channel. */ u8 reloadable; /* Hook reload_routes() is allowed on the channel */ u8 gr_lock; /* Graceful restart mechanism should wait for this channel */ u8 gr_wait; /* Route export to channel is postponed until graceful restart */ @@ -524,7 +532,7 @@ struct channel { struct rtable *in_table; /* Internal table for received routes */ struct event *reload_event; /* Event responsible for reloading from in_table */ struct fib_iterator reload_fit; /* FIB iterator in in_table used during reloading */ - struct rte *reload_next_rte; /* Route iterator in in_table used during reloading */ + struct rte_storage *reload_next_rte; /* Route iterator in in_table used during reloading */ u8 reload_active; /* Iterator reload_fit is linked */ u8 reload_pending; /* Reloading and another reload is scheduled */ @@ -562,34 +570,34 @@ struct channel { * restricted by that and is on volition of the protocol. Generally, channels * are opened in protocols' start() hooks when going to PS_UP. * - * CS_FLUSHING - The transitional state between initialized channel and closed + * CS_STOP - The transitional state between initialized channel and closed * channel. The channel is still initialized, but no route exchange is allowed. * Instead, the associated table is running flush loop to remove routes imported * through the channel. After that, the channel changes state to CS_DOWN and * is detached from the table (the table is unlocked and the channel is unlinked - * from it). Unlike other states, the CS_FLUSHING state is not explicitly + * from it). Unlike other states, the CS_STOP state is not explicitly * entered or left by the protocol. A protocol may request to close a channel * (by calling channel_close()), which causes the channel to change state to - * CS_FLUSHING and later to CS_DOWN. Also note that channels are closed + * CS_STOP and later to CS_DOWN. Also note that channels are closed * automatically by the core when the protocol is going down. * + * CS_PAUSE - Almost the same as CS_STOP, just the table import is kept and + * the table export is stopped before transitioning to CS_START. + * * Allowed transitions: * * CS_DOWN -> CS_START / CS_UP - * CS_START -> CS_UP / CS_FLUSHING - * CS_UP -> CS_START / CS_FLUSHING - * CS_FLUSHING -> CS_DOWN (automatic) + * CS_START -> CS_UP / CS_STOP + * CS_UP -> CS_PAUSE / CS_STOP + * CS_PAUSE -> CS_START (automatic) + * CS_STOP -> CS_DOWN (automatic) */ #define CS_DOWN 0 #define CS_START 1 #define CS_UP 2 -#define CS_FLUSHING 3 - -#define ES_DOWN 0 -#define ES_FEEDING 1 -#define ES_READY 2 - +#define CS_STOP 3 +#define CS_PAUSE 4 struct channel_config *proto_cf_find_channel(struct proto_config *p, uint net_type); static inline struct channel_config *proto_cf_main_channel(struct proto_config *pc) @@ -607,25 +615,11 @@ void channel_schedule_reload(struct channel *c); static inline void channel_init(struct channel *c) { channel_set_state(c, CS_START); } static inline void channel_open(struct channel *c) { channel_set_state(c, CS_UP); } -static inline void channel_close(struct channel *c) { channel_set_state(c, CS_FLUSHING); } +static inline void channel_close(struct channel *c) { channel_set_state(c, CS_STOP); } void channel_request_feeding(struct channel *c); void *channel_config_new(const struct channel_class *cc, const char *name, uint net_type, struct proto_config *proto); void *channel_config_get(const struct channel_class *cc, const char *name, uint net_type, struct proto_config *proto); int channel_reconfigure(struct channel *c, struct channel_config *cf); - -/* Moved from route.h to avoid dependency conflicts */ -static inline void rte_update(struct proto *p, const net_addr *n, rte *new) { rte_update2(p->main_channel, n, new, p->main_source); } - -static inline void -rte_update3(struct channel *c, const net_addr *n, rte *new, struct rte_src *src) -{ - if (c->in_table && !rte_update_in(c, n, new, src)) - return; - - rte_update2(c, n, new, src); -} - - #endif diff --git a/nest/rt-attr.c b/nest/rt-attr.c index b5be936b..f548a575 100644 --- a/nest/rt-attr.c +++ b/nest/rt-attr.c @@ -172,6 +172,12 @@ struct ea_class ea_gen_flowspec_valid = { .readonly = 1, }; +const char * flowspec_valid_names[FLOWSPEC__MAX] = { + [FLOWSPEC_UNKNOWN] = "unknown", + [FLOWSPEC_VALID] = "", + [FLOWSPEC_INVALID] = "invalid", +}; + pool *rta_pool; static slab *rta_slab; diff --git a/nest/rt-dev.c b/nest/rt-dev.c index c50e018c..fa224f9a 100644 --- a/nest/rt-dev.c +++ b/nest/rt-dev.c @@ -67,13 +67,10 @@ dev_ifa_notify(struct proto *P, uint flags, struct ifa *ad) /* Use iface ID as local source ID */ struct rte_src *src = rt_get_source(P, ad->iface->index); - rte_update2(c, net, NULL, src); + rte_update(c, net, NULL, src); } else if (flags & IF_CHANGE_UP) { - rta *a; - rte *e; - DBG("dev_if_notify: %s:%I going up\n", ad->iface->name, ad->ip); if (cf->check_link && !(ad->iface->flags & IF_LINK_UP)) @@ -92,10 +89,12 @@ dev_ifa_notify(struct proto *P, uint flags, struct ifa *ad) ea_set_attr_u32(&a0.eattrs, &ea_gen_source, 0, RTS_DEVICE); ea_set_attr_data(&a0.eattrs, &ea_gen_nexthop, 0, nhad.ad.data, nhad.ad.length); - a = rta_lookup(&a0); - e = rte_get_temp(a, src); - e->pflags = 0; - rte_update2(c, net, e, src); + rte e0 = { + .attrs = rta_lookup(&a0), + .src = src, + }; + + rte_update(c, net, &e0, src); } } diff --git a/nest/rt-show.c b/nest/rt-show.c index 0ad8f5c6..cc5a9a10 100644 --- a/nest/rt-show.c +++ b/nest/rt-show.c @@ -45,9 +45,11 @@ rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, int primary rta *a = e->attrs; int sync_error = d->kernel ? krt_get_sync_error(d->kernel, e) : 0; void (*get_route_info)(struct rte *, byte *buf); - eattr *nhea = ea_find(a->eattrs, &ea_gen_nexthop); + eattr *nhea = net_type_match(e->net, NB_DEST) ? + ea_find(a->eattrs, &ea_gen_nexthop) : NULL; struct nexthop_adata *nhad = nhea ? (struct nexthop_adata *) nhea->u.ptr : NULL; - int dest = NEXTHOP_IS_REACHABLE(nhad) ? RTD_UNICAST : nhad->dest; + int dest = nhad ? (NEXTHOP_IS_REACHABLE(nhad) ? RTD_UNICAST : nhad->dest) : RTD_NONE; + int flowspec_valid = net_is_flow(e->net) ? rt_get_flowspec_valid(e) : FLOWSPEC_UNKNOWN; tm_format_time(tm, &config->tf_route, e->lastmod); ip_addr a_from = ea_get_ip(a->eattrs, &ea_gen_from, IPA_NONE); @@ -69,8 +71,9 @@ rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, int primary if (d->last_table != d->tab) rt_show_table(c, d); - cli_printf(c, -1007, "%-20s %s [%s %s%s]%s%s", ia, rta_dest_name(dest), - e->src->proto->name, tm, from, primary ? (sync_error ? " !" : " *") : "", info); + cli_printf(c, -1007, "%-20s %s [%s %s%s]%s%s", ia, + net_is_flow(e->net) ? flowspec_valid_name(flowspec_valid) : rta_dest_name(dest), + e->src->proto->name, tm, from, primary ? (sync_error ? " !" : " *") : "", info); if (dest == RTD_UNICAST) NEXTHOP_WALK(nh, nhad) @@ -102,10 +105,32 @@ rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, int primary rta_show(c, a); } +static uint +rte_feed_count(net *n) +{ + uint count = 0; + for (struct rte_storage *e = n->routes; e; e = e->next) + if (rte_is_valid(RTE_OR_NULL(e))) + count++; + return count; +} + +static void +rte_feed_obtain(net *n, rte **feed, uint count) +{ + uint i = 0; + for (struct rte_storage *e = n->routes; e; e = e->next) + if (rte_is_valid(RTE_OR_NULL(e))) + { + ASSERT_DIE(i < count); + feed[i++] = &e->rte; + } + ASSERT_DIE(i == count); +} + static void rt_show_net(struct cli *c, net *n, struct rt_show_data *d) { - rte *e, *ee; byte ia[NET_MAX_TEXT_LENGTH+1]; struct channel *ec = d->tab->export_channel; @@ -117,9 +142,9 @@ rt_show_net(struct cli *c, net *n, struct rt_show_data *d) int first_show = 1; int pass = 0; - for (e = n->routes; e; e = e->next) + for (struct rte_storage *er = n->routes; er; er = er->next) { - if (rte_is_filtered(e) != d->filtered) + if (rte_is_filtered(&er->rte) != d->filtered) continue; d->rt_counter++; @@ -129,15 +154,15 @@ rt_show_net(struct cli *c, net *n, struct rt_show_data *d) if (pass) continue; - ee = e; + struct rte e = er->rte; /* Export channel is down, do not try to export routes to it */ - if (ec && (ec->export_state == ES_DOWN)) + if (ec && !ec->out_req.hook) goto skip; if (d->export_mode == RSEM_EXPORTED) { - if (!bmap_test(&ec->export_map, ee->id)) + if (!bmap_test(&ec->export_map, e.id)) goto skip; // if (ec->ra_mode != RA_ANY) @@ -146,17 +171,24 @@ rt_show_net(struct cli *c, net *n, struct rt_show_data *d) else if ((d->export_mode == RSEM_EXPORT) && (ec->ra_mode == RA_MERGED)) { /* Special case for merged export */ - rte *rt_free; - e = rt_export_merged(ec, n, &rt_free, c->show_pool, 1); pass = 1; + uint count = rte_feed_count(n); + if (!count) + goto skip; + + rte **feed = alloca(count * sizeof(rte *)); + rte_feed_obtain(n, feed, count); + rte *em = rt_export_merged(ec, feed, count, c->show_pool, 1); - if (!e) - { e = ee; goto skip; } + if (em) + e = *em; + else + goto skip; } else if (d->export_mode) { struct proto *ep = ec->proto; - int ic = ep->preexport ? ep->preexport(ep, e) : 0; + int ic = ep->preexport ? ep->preexport(ec, &e) : 0; if (ec->ra_mode == RA_OPTIMAL || ec->ra_mode == RA_MERGED) pass = 1; @@ -182,7 +214,7 @@ rt_show_net(struct cli *c, net *n, struct rt_show_data *d) } } - if (d->show_protocol && (d->show_protocol != e->src->proto)) + if (d->show_protocol && (d->show_protocol != e.src->proto)) goto skip; if (f_run(d->filter, &e, 0) > F_ACCEPT) @@ -195,18 +227,13 @@ rt_show_net(struct cli *c, net *n, struct rt_show_data *d) else ia[0] = 0; - rt_show_rte(c, ia, e, d, (e->net->routes == ee)); + rt_show_rte(c, ia, &e, d, (n->routes == er)); first_show = 0; } d->show_counter++; skip: - if (e != ee) - { - rte_free(e); - e = ee; - } lp_flush(c->show_pool); if (d->primary_only) @@ -378,7 +405,7 @@ rt_show_get_default_tables(struct rt_show_data *d) { WALK_LIST(c, d->export_protocol->channels) { - if (c->export_state == ES_DOWN) + if (!c->out_req.hook) continue; tab = rt_show_add_table(d, c->table); diff --git a/nest/rt-table.c b/nest/rt-table.c index 539e04d0..946f4021 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -112,19 +112,56 @@ pool *rt_table_pool; -static slab *rte_slab; static linpool *rte_update_pool; list routing_tables; +list deleted_routing_tables; static void rt_free_hostcache(rtable *tab); static void rt_notify_hostcache(rtable *tab, net *net); static void rt_update_hostcache(rtable *tab); static void rt_next_hop_update(rtable *tab); +static inline void rt_next_hop_resolve_rte(rte *r); +static inline void rt_flowspec_resolve_rte(rte *r, struct channel *c); static inline void rt_prune_table(rtable *tab); static inline void rt_schedule_notify(rtable *tab); static void rt_flowspec_notify(rtable *tab, net *net); -static inline rte *rt_next_hop_update_rte(rtable *tab, rte *old); +static void rt_feed_channel(void *); + +const char *rt_import_state_name_array[TIS_MAX] = { + [TIS_DOWN] = "DOWN", + [TIS_UP] = "UP", + [TIS_STOP] = "STOP", + [TIS_FLUSHING] = "FLUSHING", + [TIS_WAITING] = "WAITING", + [TIS_CLEARED] = "CLEARED", +}; + +const char *rt_export_state_name_array[TES_MAX] = { + [TES_DOWN] = "DOWN", + [TES_HUNGRY] = "HUNGRY", + [TES_FEEDING] = "FEEDING", + [TES_READY] = "READY", + [TES_STOP] = "STOP" +}; + +const char *rt_import_state_name(u8 state) +{ + if (state >= TIS_MAX) + return "!! INVALID !!"; + else + return rt_import_state_name_array[state]; +} + +const char *rt_export_state_name(u8 state) +{ + if (state >= TES_MAX) + return "!! INVALID !!"; + else + return rt_export_state_name_array[state]; +} + +static inline struct rte_storage *rt_next_hop_update_rte(rtable *tab, net *n, rte *old); static struct hostentry *rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep); static void @@ -395,7 +432,7 @@ net_roa_check_ip4_trie(rtable *tab, const net_addr_ip4 *px, u32 asn) net_addr_roa4 *roa = (void *) fn->addr; net *r = fib_node_to_user(&tab->fib, fn); - if (net_equal_prefix_roa4(roa, &roa0) && rte_is_valid(r->routes)) + if (net_equal_prefix_roa4(roa, &roa0) && r->routes && rte_is_valid(&r->routes->rte)) { anything = 1; if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen)) @@ -422,7 +459,7 @@ net_roa_check_ip4_fib(rtable *tab, const net_addr_ip4 *px, u32 asn) net_addr_roa4 *roa = (void *) fn->addr; net *r = fib_node_to_user(&tab->fib, fn); - if (net_equal_prefix_roa4(roa, &n) && rte_is_valid(r->routes)) + if (net_equal_prefix_roa4(roa, &n) && r->routes && rte_is_valid(&r->routes->rte)) { anything = 1; if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen)) @@ -455,7 +492,7 @@ net_roa_check_ip6_trie(rtable *tab, const net_addr_ip6 *px, u32 asn) net_addr_roa6 *roa = (void *) fn->addr; net *r = fib_node_to_user(&tab->fib, fn); - if (net_equal_prefix_roa6(roa, &roa0) && rte_is_valid(r->routes)) + if (net_equal_prefix_roa6(roa, &roa0) && r->routes && rte_is_valid(&r->routes->rte)) { anything = 1; if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen)) @@ -482,7 +519,7 @@ net_roa_check_ip6_fib(rtable *tab, const net_addr_ip6 *px, u32 asn) net_addr_roa6 *roa = (void *) fn->addr; net *r = fib_node_to_user(&tab->fib, fn); - if (net_equal_prefix_roa6(roa, &n) && rte_is_valid(r->routes)) + if (net_equal_prefix_roa6(roa, &n) && r->routes && rte_is_valid(&r->routes->rte)) { anything = 1; if (asn && (roa->asn == asn) && (roa->max_pxlen >= px->pxlen)) @@ -541,105 +578,52 @@ net_roa_check(rtable *tab, const net_addr *n, u32 asn) * @net: network node * @src: route source * - * The rte_find() function returns a route for destination @net - * which is from route source @src. + * The rte_find() function returns a pointer to a route for destination @net + * which is from route source @src. List end pointer is returned if no route is found. */ -rte * +static struct rte_storage ** rte_find(net *net, struct rte_src *src) { - rte *e = net->routes; + struct rte_storage **e = &net->routes; - while (e && e->src != src) - e = e->next; - return e; -} + while ((*e) && (*e)->rte.src != src) + e = &(*e)->next; -/** - * rte_get_temp - get a temporary &rte - * @a: attributes to assign to the new route (a &rta; in case it's - * un-cached, rte_update() will create a cached copy automatically) - * - * Create a temporary &rte and bind it with the attributes @a. - * Also set route preference to the default preference set for - * the protocol. - */ -rte * -rte_get_temp(rta *a, struct rte_src *src) -{ - rte *e = sl_alloc(rte_slab); - - e->attrs = a; - e->id = 0; - e->flags = 0; - rt_lock_source(e->src = src); return e; } -rte * -rte_do_cow(rte *r) + +struct rte_storage * +rte_store(const rte *r, net *net, rtable *tab) { - rte *e = sl_alloc(rte_slab); + struct rte_storage *e = sl_alloc(tab->rte_slab); - memcpy(e, r, sizeof(rte)); + e->rte = *r; + e->rte.net = net->n.addr; - rt_lock_source(e->src); - e->attrs = rta_clone(r->attrs); - e->flags = 0; - return e; -} + rt_lock_source(e->rte.src); -/** - * rte_cow_rta - get a private writable copy of &rte with writable &rta - * @r: a route entry to be copied - * @lp: a linpool from which to allocate &rta - * - * rte_cow_rta() takes a &rte and prepares it and associated &rta for - * modification. There are three possibilities: First, both &rte and &rta are - * private copies, in that case they are returned unchanged. Second, &rte is - * private copy, but &rta is cached, in that case &rta is duplicated using - * rta_do_cow(). Third, both &rte is shared and &rta is cached, in that case - * both structures are duplicated by rte_do_cow() and rta_do_cow(). - * - * Note that in the second case, cached &rta loses one reference, while private - * copy created by rta_do_cow() is a shallow copy sharing indirect data (eattrs, - * nexthops, ...) with it. To work properly, original shared &rta should have - * another reference during the life of created private copy. - * - * Result: a pointer to the new writable &rte with writable &rta. - */ -rte * -rte_cow_rta(rte *r, linpool *lp) -{ - if (!rta_is_cached(r->attrs)) - return r; + if (e->rte.attrs->cached) + e->rte.attrs = rta_clone(e->rte.attrs); + else + e->rte.attrs = rta_lookup(e->rte.attrs); - r = rte_cow(r); - rta *a = rta_do_cow(r->attrs, lp); - rta_free(r->attrs); - r->attrs = a; - return r; + return e; } /** * rte_free - delete a &rte - * @e: &rte to be deleted + * @e: &struct rte_storage to be deleted + * @tab: the table which the rte belongs to * * rte_free() deletes the given &rte from the routing table it's linked to. */ -void -rte_free(rte *e) -{ - rt_unlock_source(e->src); - if (rta_is_cached(e->attrs)) - rta_free(e->attrs); - sl_free(e); -} -static inline void -rte_free_quick(rte *e) +void +rte_free(struct rte_storage *e) { - rt_unlock_source(e->src); - rta_free(e->attrs); + rt_unlock_source(e->rte.src); + rta_free(e->rte.attrs); sl_free(e); } @@ -695,162 +679,187 @@ rte_mergable(rte *pri, rte *sec) } static void -rte_trace(struct channel *c, rte *e, int dir, char *msg) +rte_trace(const char *name, const rte *e, int dir, const char *msg) { - log(L_TRACE "%s.%s %c %s %N %uL %uG %s", - c->proto->name, c->name ?: "?", dir, msg, e->net->n.addr, e->src->private_id, e->src->global_id, + log(L_TRACE "%s %c %s %N %uL %uG %s", + name, dir, msg, e->net, e->src->private_id, e->src->global_id, rta_dest_name(rte_dest(e))); } static inline void -rte_trace_in(uint flag, struct channel *c, rte *e, char *msg) +channel_rte_trace_in(uint flag, struct channel *c, const rte *e, const char *msg) { if ((c->debug & flag) || (c->proto->debug & flag)) - rte_trace(c, e, '>', msg); + rte_trace(c->in_req.name, e, '>', msg); } static inline void -rte_trace_out(uint flag, struct channel *c, rte *e, char *msg) +channel_rte_trace_out(uint flag, struct channel *c, const rte *e, const char *msg) { if ((c->debug & flag) || (c->proto->debug & flag)) - rte_trace(c, e, '<', msg); + rte_trace(c->out_req.name, e, '<', msg); +} + +static inline void +rt_rte_trace_in(uint flag, struct rt_import_request *req, const rte *e, const char *msg) +{ + if (req->trace_routes & flag) + rte_trace(req->name, e, '>', msg); +} + +#if 0 +// seems to be unused at all +static inline void +rt_rte_trace_out(uint flag, struct rt_export_request *req, const rte *e, const char *msg) +{ + if (req->trace_routes & flag) + rte_trace(req->name, e, '<', msg); +} +#endif + +static uint +rte_feed_count(net *n) +{ + uint count = 0; + for (struct rte_storage *e = n->routes; e; e = e->next) + if (rte_is_valid(RTE_OR_NULL(e))) + count++; + return count; +} + +static void +rte_feed_obtain(net *n, struct rte **feed, uint count) +{ + uint i = 0; + for (struct rte_storage *e = n->routes; e; e = e->next) + if (rte_is_valid(RTE_OR_NULL(e))) + { + ASSERT_DIE(i < count); + feed[i++] = &e->rte; + } + ASSERT_DIE(i == count); } static rte * -export_filter(struct channel *c, rte *rt0, rte **rt_free, int silent) +export_filter(struct channel *c, rte *rt, int silent) { struct proto *p = c->proto; const struct filter *filter = c->out_filter; - struct proto_stats *stats = &c->stats; - rte *rt; - int v; + struct channel_export_stats *stats = &c->export_stats; - rt = rt0; - *rt_free = NULL; + /* Do nothing if we have already rejected the route */ + if (silent && bmap_test(&c->export_reject_map, rt->id)) + goto reject_noset; - v = p->preexport ? p->preexport(p, rt) : 0; + int v = p->preexport ? p->preexport(c, rt) : 0; if (v < 0) { if (silent) - goto reject; + goto reject_noset; - stats->exp_updates_rejected++; + stats->updates_rejected++; if (v == RIC_REJECT) - rte_trace_out(D_FILTERS, c, rt, "rejected by protocol"); - goto reject; + channel_rte_trace_out(D_FILTERS, c, rt, "rejected by protocol"); + goto reject_noset; + } if (v > 0) { if (!silent) - rte_trace_out(D_FILTERS, c, rt, "forced accept by protocol"); + channel_rte_trace_out(D_FILTERS, c, rt, "forced accept by protocol"); goto accept; } v = filter && ((filter == FILTER_REJECT) || - (f_run(filter, &rt, + (f_run(filter, rt, (silent ? FF_SILENT : 0)) > F_ACCEPT)); if (v) { if (silent) goto reject; - stats->exp_updates_filtered++; - rte_trace_out(D_FILTERS, c, rt, "filtered out"); + stats->updates_filtered++; + channel_rte_trace_out(D_FILTERS, c, rt, "filtered out"); goto reject; } accept: - if (rt != rt0) - *rt_free = rt; + /* We have accepted the route */ + bmap_clear(&c->export_reject_map, rt->id); return rt; reject: + /* We have rejected the route by filter */ + bmap_set(&c->export_reject_map, rt->id); + +reject_noset: /* Discard temporary rte */ - if (rt != rt0) - rte_free(rt); return NULL; } static void -do_rt_notify(struct channel *c, net *net, rte *new, rte *old, int refeed) +do_rt_notify(struct channel *c, const net_addr *net, rte *new, const rte *old) { struct proto *p = c->proto; - struct proto_stats *stats = &c->stats; + struct channel_export_stats *stats = &c->export_stats; - if (refeed && new) + if (c->refeeding && new) c->refeed_count++; - /* Apply export limit */ - struct channel_limit *l = &c->out_limit; - if (l->action && !old && new) - { - if (stats->exp_routes >= l->limit) - channel_notify_limit(c, l, PLD_OUT, stats->exp_routes); - - if (l->state == PLS_BLOCKED) + if (!old && new) + if (CHANNEL_LIMIT_PUSH(c, OUT)) { - stats->exp_updates_rejected++; - rte_trace_out(D_FILTERS, c, new, "rejected [limit]"); + stats->updates_rejected++; + channel_rte_trace_out(D_FILTERS, c, new, "rejected [limit]"); return; } - } + + if (!new && old) + CHANNEL_LIMIT_POP(c, OUT); /* Apply export table */ - struct rte *old_exported = NULL; + struct rte_storage *old_exported = NULL; if (c->out_table) { - if (!rte_update_out(c, net->n.addr, new, old, &old_exported, refeed)) + if (!rte_update_out(c, net, new, old, &old_exported)) + { + channel_rte_trace_out(D_ROUTES, c, new, "idempotent"); return; + } } - else if (c->out_filter == FILTER_ACCEPT) - old_exported = old; if (new) - stats->exp_updates_accepted++; + stats->updates_accepted++; else - stats->exp_withdraws_accepted++; + stats->withdraws_accepted++; if (old) - { bmap_clear(&c->export_map, old->id); - stats->exp_routes--; - } if (new) - { bmap_set(&c->export_map, new->id); - stats->exp_routes++; - } if (p->debug & D_ROUTES) { if (new && old) - rte_trace_out(D_ROUTES, c, new, "replaced"); + channel_rte_trace_out(D_ROUTES, c, new, "replaced"); else if (new) - rte_trace_out(D_ROUTES, c, new, "added"); + channel_rte_trace_out(D_ROUTES, c, new, "added"); else if (old) - rte_trace_out(D_ROUTES, c, old, "removed"); + channel_rte_trace_out(D_ROUTES, c, old, "removed"); } - p->rt_notify(p, c, net, new, old); + p->rt_notify(p, c, net, new, old_exported ? &old_exported->rte : old); if (c->out_table && old_exported) - rte_free_quick(old_exported); + rte_free(old_exported); } static void -rt_notify_basic(struct channel *c, net *net, rte *new, rte *old, int refeed) +rt_notify_basic(struct channel *c, const net_addr *net, rte *new, rte *old) { - // struct proto *p = c->proto; - rte *new_free = NULL; - if (new) - c->stats.exp_updates_received++; - else - c->stats.exp_withdraws_received++; - - if (new) - new = export_filter(c, new, &new_free, 0); + new = export_filter(c, new, 0); if (old && !bmap_test(&c->export_map, old->id)) old = NULL; @@ -858,133 +867,134 @@ rt_notify_basic(struct channel *c, net *net, rte *new, rte *old, int refeed) if (!new && !old) return; - do_rt_notify(c, net, new, old, refeed); - - /* Discard temporary rte */ - if (new_free) - rte_free(new_free); + do_rt_notify(c, net, new, old); } -static void -rt_notify_accepted(struct channel *c, net *net, rte *new_changed, rte *old_changed, int refeed) +void +rt_notify_accepted(struct rt_export_request *req, const net_addr *n, struct rt_pending_export *rpe, + struct rte **feed, uint count) { - // struct proto *p = c->proto; - rte *new_best = NULL; - rte *old_best = NULL; - rte *new_free = NULL; - int new_first = 0; - - /* - * We assume that there are no changes in net route order except (added) - * new_changed and (removed) old_changed. Therefore, the function is not - * compatible with deterministic_med (where nontrivial reordering can happen - * as a result of a route change) and with recomputation of recursive routes - * due to next hop update (where many routes can be changed in one step). - * - * Note that we need this assumption just for optimizations, we could just - * run full new_best recomputation otherwise. - * - * There are three cases: - * feed or old_best is old_changed -> we need to recompute new_best - * old_best is before new_changed -> new_best is old_best, ignore - * old_best is after new_changed -> try new_changed, otherwise old_best - */ + struct channel *c = SKIP_BACK(struct channel, out_req, req); - if (net->routes) - c->stats.exp_updates_received++; - else - c->stats.exp_withdraws_received++; + rte nb0, *new_best = NULL; + const rte *old_best = NULL; - /* Find old_best - either old_changed, or route for net->routes */ - if (old_changed && bmap_test(&c->export_map, old_changed->id)) - old_best = old_changed; - else + for (uint i = 0; i < count; i++) { - for (rte *r = net->routes; rte_is_valid(r); r = r->next) + if (!rte_is_valid(feed[i])) + continue; + + /* Has been already rejected, won't bother with it */ + if (!c->refeeding && bmap_test(&c->export_reject_map, feed[i]->id)) + continue; + + /* Previously exported */ + if (!old_best && bmap_test(&c->export_map, feed[i]->id)) { - if (bmap_test(&c->export_map, r->id)) + /* is still best */ + if (!new_best) { - old_best = r; - break; + DBG("rt_notify_accepted: idempotent\n"); + goto done; } - /* Note if new_changed found before old_best */ - if (r == new_changed) - new_first = 1; + /* is superseded */ + old_best = feed[i]; + break; + } + + /* Have no new best route yet */ + if (!new_best) + { + /* Try this route not seen before */ + nb0 = *feed[i]; + new_best = export_filter(c, &nb0, 0); + DBG("rt_notify_accepted: checking route id %u: %s\n", feed[i]->id, new_best ? "ok" : "no"); } } - /* Find new_best */ - if ((new_changed == old_changed) || (old_best == old_changed)) - { - /* Feed or old_best changed -> find first accepted by filters */ - for (rte *r = net->routes; rte_is_valid(r); r = r->next) - if (new_best = export_filter(c, r, &new_free, 0)) + /* Check obsolete routes for previously exported */ + if (!old_best) + if (rpe && rpe->old && bmap_test(&c->export_map, rpe->old->rte.id)) + old_best = &rpe->old->rte; + +/* for (; rpe; rpe = atomic_load_explicit(&rpe->next, memory_order_relaxed)) + { + if (rpe->old && bmap_test(&hook->accept_map, rpe->old->id)) + { + old_best = &rpe->old.rte; break; - } - else - { - /* Other cases -> either new_changed, or old_best (and nothing changed) */ - if (new_first && (new_changed = export_filter(c, new_changed, &new_free, 0))) - new_best = new_changed; - else - return; - } + } + + if (rpe == rpe_last) + break; + } + */ + /* Nothing to export */ if (!new_best && !old_best) - return; + { + DBG("rt_notify_accepted: nothing to export\n"); + goto done; + } - do_rt_notify(c, net, new_best, old_best, refeed); + do_rt_notify(c, n, new_best, old_best); - /* Discard temporary rte */ - if (new_free) - rte_free(new_free); +done: + /* Drop the old stored rejection if applicable. + * new->id == old->id happens when updating hostentries. */ + if (rpe && rpe->old && (!rpe->new || (rpe->new->rte.id != rpe->old->rte.id))) + bmap_clear(&c->export_reject_map, rpe->old->rte.id); } rte * -rt_export_merged(struct channel *c, net *net, rte **rt_free, linpool *pool, int silent) +rt_export_merged(struct channel *c, struct rte **feed, uint count, linpool *pool, int silent) { + _Thread_local static rte rloc; + // struct proto *p = c->proto; struct nexthop_adata *nhs = NULL; - rte *best0, *best, *rt0, *rt, *tmp; - - best0 = net->routes; - *rt_free = NULL; + rte *best0 = feed[0]; + rte *best = NULL; if (!rte_is_valid(best0)) return NULL; - best = export_filter(c, best0, rt_free, silent); + /* Already rejected, no need to re-run the filter */ + if (!c->refeeding && bmap_test(&c->export_reject_map, best0->id)) + return NULL; + + rloc = *best0; + best = export_filter(c, &rloc, silent); - if (!best || !rte_is_reachable(best)) + if (!best) + /* Best route doesn't pass the filter */ + return NULL; + + if (!rte_is_reachable(best)) + /* Unreachable routes can't be merged */ return best; - for (rt0 = best0->next; rt0; rt0 = rt0->next) + for (uint i = 1; i < count; i++) { - if (!rte_mergable(best0, rt0)) + if (!rte_mergable(best0, feed[i])) continue; - rt = export_filter(c, rt0, &tmp, 1); + rte tmp0 = *feed[i]; + rte *tmp = export_filter(c, &tmp0, 1); - if (!rt) + if (!tmp || !rte_is_reachable(tmp)) continue; - if (rte_is_reachable(rt)) - { - eattr *nhea = ea_find(rt->attrs->eattrs, &ea_gen_nexthop); - ASSERT_DIE(nhea); - - if (nhs) - nhs = nexthop_merge(nhs, (struct nexthop_adata *) nhea->u.ptr, c->merge_limit, pool); - else - nhs = (struct nexthop_adata *) nhea->u.ptr; - } + eattr *nhea = ea_find(tmp->attrs->eattrs, &ea_gen_nexthop); + ASSERT_DIE(nhea); - if (tmp) - rte_free(tmp); + if (nhs) + nhs = nexthop_merge(nhs, (struct nexthop_adata *) nhea->u.ptr, c->merge_limit, pool); + else + nhs = (struct nexthop_adata *) nhea->u.ptr; } - if (nhs) { eattr *nhea = ea_find(best->attrs->eattrs, &ea_gen_nexthop); @@ -992,66 +1002,115 @@ rt_export_merged(struct channel *c, net *net, rte **rt_free, linpool *pool, int nhs = nexthop_merge(nhs, (struct nexthop_adata *) nhea->u.ptr, c->merge_limit, pool); - best = rte_cow_rta(best, pool); + best->attrs = rta_cow(best->attrs, pool); ea_set_attr(&best->attrs->eattrs, EA_LITERAL_DIRECT_ADATA(&ea_gen_nexthop, 0, &nhs->ad)); } - if (best != best0) - *rt_free = best; - return best; } - -static void -rt_notify_merged(struct channel *c, net *net, rte *new_changed, rte *old_changed, - rte *new_best, rte *old_best, int refeed) +void +rt_notify_merged(struct rt_export_request *req, const net_addr *n, struct rt_pending_export *rpe, + struct rte **feed, uint count) { - // struct proto *p = c->proto; - rte *new_free = NULL; - - /* We assume that all rte arguments are either NULL or rte_is_valid() */ + struct channel *c = SKIP_BACK(struct channel, out_req, req); - /* This check should be done by the caller */ - if (!new_best && !old_best) - return; + // struct proto *p = c->proto; +#if 0 /* TODO: Find whether this check is possible when processing multiple changes at once. */ /* Check whether the change is relevant to the merged route */ if ((new_best == old_best) && (new_changed != old_changed) && !rte_mergable(new_best, new_changed) && !rte_mergable(old_best, old_changed)) return; +#endif - if (new_best) - c->stats.exp_updates_received++; - else - c->stats.exp_withdraws_received++; + rte *old_best = NULL; + /* Find old best route */ + for (uint i = 0; i < count; i++) + if (bmap_test(&c->export_map, feed[i]->id)) + { + old_best = feed[i]; + break; + } + + /* Check obsolete routes for previously exported */ + if (!old_best) + if (rpe && rpe->old && bmap_test(&c->export_map, rpe->old->rte.id)) + old_best = &rpe->old->rte; + +/* for (; rpe; rpe = atomic_load_explicit(&rpe->next, memory_order_relaxed)) + { + if (rpe->old && bmap_test(&hook->accept_map, rpe->old->id)) + { + old_best = &rpe->old.rte; + break; + } + + if (rpe == rpe_last) + break; + } + */ /* Prepare new merged route */ - if (new_best) - new_best = rt_export_merged(c, net, &new_free, rte_update_pool, 0); + rte *new_merged = count ? rt_export_merged(c, feed, count, rte_update_pool, 0) : NULL; - /* Check old merged route */ - if (old_best && !bmap_test(&c->export_map, old_best->id)) - old_best = NULL; + if (new_merged || old_best) + do_rt_notify(c, n, new_merged, old_best); - if (!new_best && !old_best) - return; + /* Drop the old stored rejection if applicable. + * new->id == old->id happens when updating hostentries. */ + if (rpe && rpe->old && (!rpe->new || (rpe->new->rte.id != rpe->old->rte.id))) + bmap_clear(&c->export_reject_map, rpe->old->rte.id); +} - do_rt_notify(c, net, new_best, old_best, refeed); +void +rt_notify_optimal(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe) +{ + struct channel *c = SKIP_BACK(struct channel, out_req, req); + rte n0; - /* Discard temporary rte */ - if (new_free) - rte_free(new_free); + if (rpe->new_best != rpe->old_best) + rt_notify_basic(c, net, RTE_COPY(rpe->new_best, &n0), RTE_OR_NULL(rpe->old_best)); + + /* Drop the old stored rejection if applicable. + * new->id == old->id happens when updating hostentries. */ + if (rpe->old && (!rpe->new || (rpe->new->rte.id != rpe->old->rte.id))) + bmap_clear(&c->export_reject_map, rpe->old->rte.id); } +void +rt_notify_any(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe) +{ + struct channel *c = SKIP_BACK(struct channel, out_req, req); + rte n0; + + if (rpe->new != rpe->old) + rt_notify_basic(c, net, RTE_COPY(rpe->new, &n0), RTE_OR_NULL(rpe->old)); + + /* Drop the old stored rejection if applicable. + * new->id == old->id happens when updating hostentries. */ + if (rpe->old && (!rpe->new || (rpe->new->rte.id != rpe->old->rte.id))) + bmap_clear(&c->export_reject_map, rpe->old->rte.id); +} + +void +rt_feed_any(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe UNUSED, rte **feed, uint count) +{ + struct channel *c = SKIP_BACK(struct channel, out_req, req); + + for (uint i=0; i<count; i++) + { + rte n0 = *feed[i]; + rt_notify_basic(c, net, &n0, NULL); + } +} /** * rte_announce - announce a routing table change * @tab: table the route has been added to - * @type: type of route announcement (RA_UNDEF or RA_ANY) * @net: network in question * @new: the new or changed route * @old: the previous route replaced by the new one @@ -1067,13 +1126,6 @@ rt_notify_merged(struct channel *c, net *net, rte *new_changed, rte *old_changed * and @new_best and @old_best describes best routes. Other routes are not * affected, but in sorted table the order of other routes might change. * - * Second, There is a bulk change of multiple routes in @net, with shared best - * route selection. In such case separate route changes are described using - * @type of %RA_ANY, with @new and @old specifying the changed route, while - * @new_best and @old_best are NULL. After that, another notification is done - * where @new_best and @old_best are filled (may be the same), but @new and @old - * are NULL. - * * The function announces the change to all associated channels. For each * channel, an appropriate preprocessing is done according to channel &ra_mode. * For example, %RA_OPTIMAL channels receive just changes of best routes. @@ -1088,19 +1140,19 @@ rt_notify_merged(struct channel *c, net *net, rte *new_changed, rte *old_changed * done outside of scope of rte_announce(). */ static void -rte_announce(rtable *tab, uint type, net *net, rte *new, rte *old, - rte *new_best, rte *old_best) +rte_announce(rtable *tab, net *net, struct rte_storage *new, struct rte_storage *old, + struct rte_storage *new_best, struct rte_storage *old_best) { - if (!rte_is_valid(new)) + if (!rte_is_valid(RTE_OR_NULL(new))) new = NULL; - if (!rte_is_valid(old)) + if (!rte_is_valid(RTE_OR_NULL(old))) old = NULL; - if (!rte_is_valid(new_best)) + if (!rte_is_valid(RTE_OR_NULL(new_best))) new_best = NULL; - if (!rte_is_valid(old_best)) + if (!rte_is_valid(RTE_OR_NULL(old_best))) old_best = NULL; if (!new && !old && !new_best && !old_best) @@ -1109,9 +1161,9 @@ rte_announce(rtable *tab, uint type, net *net, rte *new, rte *old, if (new_best != old_best) { if (new_best) - new_best->sender->stats.pref_routes++; + new_best->rte.sender->stats.pref++; if (old_best) - old_best->sender->stats.pref_routes--; + old_best->rte.sender->stats.pref--; if (tab->hostcache) rt_notify_hostcache(tab, net); @@ -1122,76 +1174,82 @@ rte_announce(rtable *tab, uint type, net *net, rte *new, rte *old, rt_schedule_notify(tab); - struct channel *c; node *n; - WALK_LIST2(c, n, tab->channels, table_node) + struct rt_pending_export rpe = { .new = new, .old = old, .new_best = new_best, .old_best = old_best }; + uint count = rte_feed_count(net); + rte **feed = NULL; + if (count) { - if (c->export_state == ES_DOWN) - continue; + feed = alloca(count * sizeof(rte *)); + rte_feed_obtain(net, feed, count); + } - if (type && (type != c->ra_mode)) + struct rt_export_hook *eh; + WALK_LIST(eh, tab->exports) + { + if (eh->export_state == TES_STOP) continue; - switch (c->ra_mode) - { - case RA_OPTIMAL: - if (new_best != old_best) - rt_notify_basic(c, net, new_best, old_best, 0); - break; - - case RA_ANY: - if (new != old) - rt_notify_basic(c, net, new, old, 0); - break; - - case RA_ACCEPTED: - rt_notify_accepted(c, net, new, old, 0); - break; + if (new) + eh->stats.updates_received++; + else + eh->stats.withdraws_received++; - case RA_MERGED: - rt_notify_merged(c, net, new, old, new_best, old_best, 0); - break; - } + if (eh->req->export_one) + eh->req->export_one(eh->req, net->n.addr, &rpe); + else if (eh->req->export_bulk) + eh->req->export_bulk(eh->req, net->n.addr, &rpe, feed, count); + else + bug("Export request must always provide an export method"); } } static inline int -rte_validate(rte *e) +rte_validate(struct channel *ch, rte *e) { int c; - net *n = e->net; + const net_addr *n = e->net; - if (!net_validate(n->n.addr)) + if (!net_validate(n)) { log(L_WARN "Ignoring bogus prefix %N received via %s", - n->n.addr, e->sender->proto->name); + n, ch->proto->name); return 0; } /* FIXME: better handling different nettypes */ - c = !net_is_flow(n->n.addr) ? - net_classify(n->n.addr): (IADDR_HOST | SCOPE_UNIVERSE); + c = !net_is_flow(n) ? + net_classify(n): (IADDR_HOST | SCOPE_UNIVERSE); if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK)) { log(L_WARN "Ignoring bogus route %N received via %s", - n->n.addr, e->sender->proto->name); + n, ch->proto->name); return 0; } - eattr *nhea = ea_find(e->attrs->eattrs, &ea_gen_nexthop); - int dest = nhea_dest(nhea); - - if (net_type_match(n->n.addr, NB_DEST) == !dest) + if (net_type_match(n, NB_DEST)) { - log(L_WARN "Ignoring route %N with invalid dest %d received via %s", - n->n.addr, dest, e->sender->proto->name); - return 0; - } + eattr *nhea = ea_find(e->attrs->eattrs, &ea_gen_nexthop); + int dest = nhea_dest(nhea); + + if (dest == RTD_NONE) + { + log(L_WARN "Ignoring route %N with no destination received via %s", + n, ch->proto->name); + return 0; + } - if ((dest == RTD_UNICAST) && - !nexthop_is_sorted((struct nexthop_adata *) nhea->u.ptr)) + if ((dest == RTD_UNICAST) && + !nexthop_is_sorted((struct nexthop_adata *) nhea->u.ptr)) + { + log(L_WARN "Ignoring unsorted multipath route %N received via %s", + n, ch->proto->name); + return 0; + } + } + else if (ea_find(e->attrs->eattrs, &ea_gen_nexthop)) { - log(L_WARN "Ignoring unsorted multipath route %N received via %s", - n->n.addr, e->sender->proto->name); + log(L_WARN "Ignoring route %N having a nexthop attribute received via %s", + n, ch->proto->name); return 0; } @@ -1212,41 +1270,36 @@ rte_same(rte *x, rte *y) static inline int rte_is_ok(rte *e) { return e && !rte_is_filtered(e); } static void -rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src) +rte_recalculate(struct rt_import_hook *c, net *net, rte *new, struct rte_src *src) { - struct proto *p = c->proto; + struct rt_import_request *req = c->req; struct rtable *table = c->table; - struct proto_stats *stats = &c->stats; - static struct tbf rl_pipe = TBF_DEFAULT_LOG_LIMITS; - rte *before_old = NULL; - rte *old_best = net->routes; + struct rt_import_stats *stats = &c->stats; + struct rte_storage *old_best_stored = net->routes, *old_stored = NULL; + rte *old_best = old_best_stored ? &old_best_stored->rte : NULL; rte *old = NULL; - rte **k; - k = &net->routes; /* Find and remove original route from the same protocol */ - while (old = *k) + /* Find and remove original route from the same protocol */ + struct rte_storage **before_old = rte_find(net, src); + + if (*before_old) { - if (old->src == src) + old = &(old_stored = (*before_old))->rte; + + /* If there is the same route in the routing table but from + * a different sender, then there are two paths from the + * source protocol to this routing table through transparent + * pipes, which is not allowed. + * We log that and ignore the route. */ + if (old->sender != c) { - /* If there is the same route in the routing table but from - * a different sender, then there are two paths from the - * source protocol to this routing table through transparent - * pipes, which is not allowed. - * - * We log that and ignore the route. If it is withdraw, we - * ignore it completely (there might be 'spurious withdraws', - * see FIXME in do_rte_announce()) - */ - if (old->sender->proto != p) - { - if (new) - { - log_rl(&rl_pipe, L_ERR "Pipe collision detected when sending %N to table %s", - net->n.addr, table->name); - rte_free_quick(new); - } - return; - } + if (!old->generation && !new->generation) + bug("Two protocols claim to author a route with the same rte_src in table %s: %N %s/%u:%u", + c->table->name, net->n.addr, old->src->proto->name, old->src->private_id, old->src->global_id); + + log_rl(&table->rl_pipe, L_ERR "Route source collision in table %s: %N %s/%u:%u", + c->table->name, net->n.addr, old->src->proto->name, old->src->private_id, old->src->global_id); + } if (new && rte_same(old, new)) { @@ -1256,123 +1309,56 @@ rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src) if (!rte_is_filtered(new)) { - stats->imp_updates_ignored++; - rte_trace_in(D_ROUTES, c, new, "ignored"); + stats->updates_ignored++; + rt_rte_trace_in(D_ROUTES, req, new, "ignored"); } + } - rte_free_quick(new); - return; - } - *k = old->next; - table->rt_count--; - break; - } - k = &old->next; - before_old = old; + *before_old = (*before_old)->next; + table->rt_count--; } - /* Save the last accessed position */ - rte **pos = k; - - if (!old) - before_old = NULL; - if (!old && !new) { - stats->imp_withdraws_ignored++; + stats->withdraws_ignored++; return; } + if (req->preimport) + new = req->preimport(req, new, old); + int new_ok = rte_is_ok(new); int old_ok = rte_is_ok(old); - struct channel_limit *l = &c->rx_limit; - if (l->action && !old && new && !c->in_table) - { - u32 all_routes = stats->imp_routes + stats->filt_routes; - - if (all_routes >= l->limit) - channel_notify_limit(c, l, PLD_RX, all_routes); - - if (l->state == PLS_BLOCKED) - { - /* In receive limit the situation is simple, old is NULL so - we just free new and exit like nothing happened */ - - stats->imp_updates_ignored++; - rte_trace_in(D_FILTERS, c, new, "ignored [limit]"); - rte_free_quick(new); - return; - } - } - - l = &c->in_limit; - if (l->action && !old_ok && new_ok) - { - if (stats->imp_routes >= l->limit) - channel_notify_limit(c, l, PLD_IN, stats->imp_routes); - - if (l->state == PLS_BLOCKED) - { - /* In import limit the situation is more complicated. We - shouldn't just drop the route, we should handle it like - it was filtered. We also have to continue the route - processing if old or new is non-NULL, but we should exit - if both are NULL as this case is probably assumed to be - already handled. */ - - stats->imp_updates_ignored++; - rte_trace_in(D_FILTERS, c, new, "ignored [limit]"); - - if (c->in_keep_filtered) - new->flags |= REF_FILTERED; - else - { rte_free_quick(new); new = NULL; } - - /* Note that old && !new could be possible when - c->in_keep_filtered changed in the recent past. */ - - if (!old && !new) - return; - - new_ok = 0; - goto skip_stats1; - } - } - if (new_ok) - stats->imp_updates_accepted++; + stats->updates_accepted++; else if (old_ok) - stats->imp_withdraws_accepted++; + stats->withdraws_accepted++; else - stats->imp_withdraws_ignored++; + stats->withdraws_ignored++; if (old_ok || new_ok) table->last_rt_change = current_time(); - skip_stats1: - - if (new) - rte_is_filtered(new) ? stats->filt_routes++ : stats->imp_routes++; - if (old) - rte_is_filtered(old) ? stats->filt_routes-- : stats->imp_routes--; + struct rte_storage *new_stored = new ? rte_store(new, net, table) : NULL; if (table->config->sorted) { /* If routes are sorted, just insert new route to appropriate position */ - if (new) + if (new_stored) { - if (before_old && !rte_better(new, before_old)) - k = &before_old->next; + struct rte_storage **k; + if ((before_old != &net->routes) && !rte_better(new, &SKIP_BACK(struct rte_storage, next, before_old)->rte)) + k = before_old; else k = &net->routes; for (; *k; k=&(*k)->next) - if (rte_better(new, *k)) + if (rte_better(new, &(*k)->rte)) break; - new->next = *k; - *k = new; + new_stored->next = *k; + *k = new_stored; table->rt_count++; } @@ -1382,16 +1368,17 @@ rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src) /* If routes are not sorted, find the best route and move it on the first position. There are several optimized cases. */ - if (src->proto->rte_recalculate && src->proto->rte_recalculate(table, net, new, old, old_best)) + if (src->proto->rte_recalculate && + src->proto->rte_recalculate(table, net, new_stored ? &new_stored->rte : NULL, old, old_best)) goto do_recalculate; - if (new && rte_better(new, old_best)) + if (new_stored && rte_better(&new_stored->rte, old_best)) { /* The first case - the new route is cleary optimal, we link it at the first position */ - new->next = net->routes; - net->routes = new; + new_stored->next = net->routes; + net->routes = new_stored; table->rt_count++; } @@ -1405,10 +1392,10 @@ rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src) do_recalculate: /* Add the new route to the list */ - if (new) + if (new_stored) { - new->next = *pos; - *pos = new; + new_stored->next = *before_old; + *before_old = new_stored; table->rt_count++; } @@ -1416,81 +1403,82 @@ rte_recalculate(struct channel *c, net *net, rte *new, struct rte_src *src) /* Find a new optimal route (if there is any) */ if (net->routes) { - rte **bp = &net->routes; - for (k=&(*bp)->next; *k; k=&(*k)->next) - if (rte_better(*k, *bp)) + struct rte_storage **bp = &net->routes; + for (struct rte_storage **k=&(*bp)->next; *k; k=&(*k)->next) + if (rte_better(&(*k)->rte, &(*bp)->rte)) bp = k; /* And relink it */ - rte *best = *bp; + struct rte_storage *best = *bp; *bp = best->next; best->next = net->routes; net->routes = best; } } - else if (new) + else if (new_stored) { /* The third case - the new route is not better than the old best route (therefore old_best != NULL) and the old best route was not removed (therefore old_best == net->routes). We just link the new route to the old/last position. */ - new->next = *pos; - *pos = new; + new_stored->next = *before_old; + *before_old = new_stored; table->rt_count++; } /* The fourth (empty) case - suboptimal route was removed, nothing to do */ } - if (new) + if (new_stored) { - new->lastmod = current_time(); + new_stored->rte.lastmod = current_time(); if (!old) { - new->id = hmap_first_zero(&table->id_map); - hmap_set(&table->id_map, new->id); + new_stored->rte.id = hmap_first_zero(&table->id_map); + hmap_set(&table->id_map, new_stored->rte.id); } else - new->id = old->id; + new_stored->rte.id = old->id; } /* Log the route change */ - if ((c->debug & D_ROUTES) || (p->debug & D_ROUTES)) + if (new_ok) + rt_rte_trace_in(D_ROUTES, req, &new_stored->rte, new_stored == net->routes ? "added [best]" : "added"); + else if (old_ok) { - if (new_ok) - rte_trace(c, new, '>', new == net->routes ? "added [best]" : "added"); - else if (old_ok) - { - if (old != old_best) - rte_trace(c, old, '>', "removed"); - else if (rte_is_ok(net->routes)) - rte_trace(c, old, '>', "removed [replaced]"); - else - rte_trace(c, old, '>', "removed [sole]"); - } + if (old != old_best) + rt_rte_trace_in(D_ROUTES, req, old, "removed"); + else if (net->routes && rte_is_ok(&net->routes->rte)) + rt_rte_trace_in(D_ROUTES, req, old, "removed [replaced]"); + else + rt_rte_trace_in(D_ROUTES, req, old, "removed [sole]"); } /* Propagate the route change */ - rte_announce(table, RA_UNDEF, net, new, old, net->routes, old_best); + rte_announce(table, net, new_stored, old_stored, + net->routes, old_best_stored); if (!net->routes && (table->gc_counter++ >= table->config->gc_max_ops) && (table->gc_time + table->config->gc_min_time <= current_time())) rt_schedule_prune(table); +#if 0 + /* Enable and reimplement these callbacks if anybody wants to use them */ if (old_ok && p->rte_remove) p->rte_remove(net, old); if (new_ok && p->rte_insert) - p->rte_insert(net, new); + p->rte_insert(net, &new_stored->rte); +#endif if (old) { - if (!new) + if (!new_stored) hmap_clear(&table->id_map, old->id); - rte_free_quick(old); + rte_free(old_stored); } } @@ -1509,184 +1497,158 @@ rte_update_unlock(void) lp_flush(rte_update_pool); } -/** - * rte_update - enter a new update to a routing table - * @table: table to be updated - * @c: channel doing the update - * @net: network node - * @p: protocol submitting the update - * @src: protocol originating the update - * @new: a &rte representing the new route or %NULL for route removal. - * - * This function is called by the routing protocols whenever they discover - * a new route or wish to update/remove an existing route. The right announcement - * sequence is to build route attributes first (either un-cached with @aflags set - * to zero or a cached one using rta_lookup(); in this case please note that - * you need to increase the use count of the attributes yourself by calling - * rta_clone()), call rte_get_temp() to obtain a temporary &rte, fill in all - * the appropriate data and finally submit the new &rte by calling rte_update(). - * - * @src specifies the protocol that originally created the route and the meaning - * of protocol-dependent data of @new. If @new is not %NULL, @src have to be the - * same value as @new->attrs->proto. @p specifies the protocol that called - * rte_update(). In most cases it is the same protocol as @src. rte_update() - * stores @p in @new->sender; - * - * When rte_update() gets any route, it automatically validates it (checks, - * whether the network and next hop address are valid IP addresses and also - * whether a normal routing protocol doesn't try to smuggle a host or link - * scope route to the table), converts all protocol dependent attributes stored - * in the &rte to temporary extended attributes, consults import filters of the - * protocol to see if the route should be accepted and/or its attributes modified, - * stores the temporary attributes back to the &rte. - * - * Now, having a "public" version of the route, we - * automatically find any old route defined by the protocol @src - * for network @n, replace it by the new one (or removing it if @new is %NULL), - * recalculate the optimal route for this destination and finally broadcast - * the change (if any) to all routing protocols by calling rte_announce(). - * - * All memory used for attribute lists and other temporary allocations is taken - * from a special linear pool @rte_update_pool and freed when rte_update() - * finishes. - */ +rte * +channel_preimport(struct rt_import_request *req, rte *new, rte *old) +{ + struct channel *c = SKIP_BACK(struct channel, in_req, req); + + if (new && !old) + if (CHANNEL_LIMIT_PUSH(c, RX)) + return NULL; + + if (!new && old) + CHANNEL_LIMIT_POP(c, RX); + + int new_in = new && !rte_is_filtered(new); + int old_in = old && !rte_is_filtered(old); + + if (new_in && !old_in) + if (CHANNEL_LIMIT_PUSH(c, IN)) + if (c->in_keep_filtered) + { + new->flags |= REF_FILTERED; + return new; + } + else + return NULL; + + if (!new_in && old_in) + CHANNEL_LIMIT_POP(c, IN); + + return new; +} + +static void rte_update_direct(struct channel *c, const net_addr *n, rte *new, struct rte_src *src); void -rte_update2(struct channel *c, const net_addr *n, rte *new, struct rte_src *src) +rte_update(struct channel *c, const net_addr *n, rte *new, struct rte_src *src) { - // struct proto *p = c->proto; - struct proto_stats *stats = &c->stats; - const struct filter *filter = c->in_filter; - net *nn; + if (!c->in_req.hook) + return; ASSERT(c->channel_state == CS_UP); + if (c->in_table && !rte_update_in(c, n, new, src)) + return; + + return rte_update_direct(c, n, new, src); +} + +static void +rte_update_direct(struct channel *c, const net_addr *n, rte *new, struct rte_src *src) +{ + const struct filter *filter = c->in_filter; + struct channel_import_stats *stats = &c->import_stats; + rte_update_lock(); if (new) { - /* Create a temporary table node */ - nn = alloca(sizeof(net) + n->length); - memset(nn, 0, sizeof(net) + n->length); - net_copy(nn->n.addr, n); - - new->net = nn; - new->sender = c; + new->net = n; - stats->imp_updates_received++; - if (filter == FILTER_REJECT) - { - stats->imp_updates_filtered++; - rte_trace_in(D_FILTERS, c, new, "filtered out"); + int fr; - if (! c->in_keep_filtered) - goto drop; - - /* new is a private copy, i could modify it */ - new->flags |= REF_FILTERED; - } - else if (filter) + stats->updates_received++; + if ((filter == FILTER_REJECT) || + ((fr = f_run(filter, new, 0)) > F_ACCEPT)) { - int fr = f_run(filter, &new, 0); - if (fr > F_ACCEPT) - { - stats->imp_updates_filtered++; - rte_trace_in(D_FILTERS, c, new, "filtered out"); - - if (! c->in_keep_filtered) - goto drop; + stats->updates_filtered++; + channel_rte_trace_in(D_FILTERS, c, new, "filtered out"); + if (c->in_keep_filtered) new->flags |= REF_FILTERED; - } + else + new = NULL; } - rte *new_resolved = rt_next_hop_update_rte(c->table, new); - if (new_resolved) - { - rte_free(new); - new = new_resolved; - } + if (new) + if (net_is_flow(n)) + rt_flowspec_resolve_rte(new, c); + else + rt_next_hop_resolve_rte(new); - /* After all checks, updates and filters have been done, - * validate the route */ - if (!rte_validate(new)) + if (new && !rte_validate(c, new)) { - rte_trace_in(D_FILTERS, c, new, "invalid"); - stats->imp_updates_invalid++; - goto drop; + channel_rte_trace_in(D_FILTERS, c, new, "invalid"); + stats->updates_invalid++; + new = NULL; } - if (!rta_is_cached(new->attrs)) /* Need to copy attributes */ - new->attrs = rta_lookup(new->attrs); - new->flags |= REF_COW; - - /* Use the actual struct network, not the dummy one */ - nn = net_get(c->table, n); - new->net = nn; } else - { - stats->imp_withdraws_received++; + stats->withdraws_received++; - if (!(nn = net_find(c->table, n)) || !src) - { - stats->imp_withdraws_ignored++; - rte_update_unlock(); - return; - } - } - - recalc: - /* And recalculate the best route */ - rte_recalculate(c, nn, new, src); + rte_import(&c->in_req, n, new, src); rte_update_unlock(); - return; +} - drop: - rte_free(new); - new = NULL; - if (nn = net_find(c->table, n)) - goto recalc; +void +rte_import(struct rt_import_request *req, const net_addr *n, rte *new, struct rte_src *src) +{ + struct rt_import_hook *hook = req->hook; + if (!hook) + return; - rte_update_unlock(); + net *nn; + if (new) + { + /* Use the actual struct network, not the dummy one */ + nn = net_get(hook->table, n); + new->net = nn->n.addr; + new->sender = hook; + } + else if (!(nn = net_find(hook->table, n))) + { + req->hook->stats.withdraws_ignored++; + return; + } + + /* And recalculate the best route */ + rte_recalculate(hook, nn, new, src); } /* Independent call to rte_announce(), used from next hop recalculation, outside of rte_update(). new must be non-NULL */ static inline void -rte_announce_i(rtable *tab, uint type, net *net, rte *new, rte *old, - rte *new_best, rte *old_best) +rte_announce_i(rtable *tab, net *net, struct rte_storage *new, struct rte_storage *old, + struct rte_storage *new_best, struct rte_storage *old_best) { rte_update_lock(); - rte_announce(tab, type, net, new, old, new_best, old_best); + rte_announce(tab, net, new, old, new_best, old_best); rte_update_unlock(); } static inline void -rte_discard(rte *old) /* Non-filtered route deletion, used during garbage collection */ +rte_discard(net *net, rte *old) /* Non-filtered route deletion, used during garbage collection */ { rte_update_lock(); - rte_recalculate(old->sender, old->net, NULL, old->src); + rte_recalculate(old->sender, net, NULL, old->src); rte_update_unlock(); } /* Modify existing route by protocol hook, used for long-lived graceful restart */ static inline void -rte_modify(rte *old) +rte_modify(net *net, rte *old) { rte_update_lock(); - rte *new = old->sender->proto->rte_modify(old, rte_update_pool); + rte *new = old->sender->req->rte_modify(old, rte_update_pool); if (new != old) { if (new) - { - if (!rta_is_cached(new->attrs)) - new->attrs = rta_lookup(new->attrs); - new->flags = (old->flags & ~REF_MODIFY) | REF_COW; - } + new->flags = old->flags & ~REF_MODIFY; - rte_recalculate(old->sender, old->net, new, old->src); + rte_recalculate(old->sender, net, new, old->src); } rte_update_unlock(); @@ -1694,30 +1656,148 @@ rte_modify(rte *old) /* Check rtable for best route to given net whether it would be exported do p */ int -rt_examine(rtable *t, net_addr *a, struct proto *p, const struct filter *filter) +rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filter) { net *n = net_find(t, a); - rte *rt = n ? n->routes : NULL; - if (!rte_is_valid(rt)) + if (!n || !rte_is_valid(RTE_OR_NULL(n->routes))) return 0; + rte rt = n->routes->rte; + rte_update_lock(); /* Rest is stripped down export_filter() */ - int v = p->preexport ? p->preexport(p, rt) : 0; + int v = c->proto->preexport ? c->proto->preexport(c, &rt) : 0; if (v == RIC_PROCESS) v = (f_run(filter, &rt, FF_SILENT) <= F_ACCEPT); - /* Discard temporary rte */ - if (rt != n->routes) - rte_free(rt); - rte_update_unlock(); return v > 0; } +static void +rt_export_stopped(void *data) +{ + struct rt_export_hook *hook = data; + rtable *tab = hook->table; + + /* Unlist */ + rem_node(&hook->n); + + /* Reporting the channel as stopped. */ + hook->stopped(hook->req); + + /* Freeing the hook together with its coroutine. */ + rfree(hook->pool); + rt_unlock_table(tab); + + DBG("Export hook %p in table %s finished uc=%u\n", hook, tab->name, tab->use_count); +} + + +static inline void +rt_set_import_state(struct rt_import_hook *hook, u8 state) +{ + hook->last_state_change = current_time(); + hook->import_state = state; + + if (hook->req->log_state_change) + hook->req->log_state_change(hook->req, state); +} + +static inline void +rt_set_export_state(struct rt_export_hook *hook, u8 state) +{ + hook->last_state_change = current_time(); + hook->export_state = state; + + if (hook->req->log_state_change) + hook->req->log_state_change(hook->req, state); +} + +void +rt_request_import(rtable *tab, struct rt_import_request *req) +{ + rt_lock_table(tab); + + struct rt_import_hook *hook = req->hook = mb_allocz(tab->rp, sizeof(struct rt_import_hook)); + + DBG("Lock table %s for import %p req=%p uc=%u\n", tab->name, hook, req, tab->use_count); + + hook->req = req; + hook->table = tab; + + rt_set_import_state(hook, TIS_UP); + + hook->n = (node) {}; + add_tail(&tab->imports, &hook->n); +} + +void +rt_stop_import(struct rt_import_request *req, void (*stopped)(struct rt_import_request *)) +{ + ASSERT_DIE(req->hook); + struct rt_import_hook *hook = req->hook; + + rt_schedule_prune(hook->table); + + rt_set_import_state(hook, TIS_STOP); + + hook->stopped = stopped; +} + +void +rt_request_export(rtable *tab, struct rt_export_request *req) +{ + rt_lock_table(tab); + + pool *p = rp_new(tab->rp, "Export hook"); + struct rt_export_hook *hook = req->hook = mb_allocz(p, sizeof(struct rt_export_hook)); + hook->pool = p; + hook->lp = lp_new_default(p); + + hook->req = req; + hook->table = tab; + + /* stats zeroed by mb_allocz */ + + rt_set_export_state(hook, TES_HUNGRY); + + hook->n = (node) {}; + add_tail(&tab->exports, &hook->n); + + FIB_ITERATE_INIT(&hook->feed_fit, &tab->fib); + + DBG("New export hook %p req %p in table %s uc=%u\n", hook, req, tab->name, tab->use_count); + + rt_set_export_state(hook, TES_FEEDING); + + hook->event = ev_new_init(p, rt_feed_channel, hook); + ev_schedule_work(hook->event); +} + +void +rt_stop_export(struct rt_export_request *req, void (*stopped)(struct rt_export_request *)) +{ + ASSERT_DIE(req->hook); + struct rt_export_hook *hook = req->hook; + + rtable *tab = hook->table; + + /* Stop feeding */ + ev_postpone(hook->event); + + if (hook->export_state == TES_FEEDING) + fit_get(&tab->fib, &hook->feed_fit); + + hook->event->hook = rt_export_stopped; + hook->stopped = stopped; + + rt_set_export_state(hook, TES_STOP); + ev_schedule(hook->event); +} /** * rt_refresh_begin - start a refresh cycle @@ -1734,14 +1814,13 @@ rt_examine(rtable *t, net_addr *a, struct proto *p, const struct filter *filter) * flag in rt_refresh_end() and then removing such routes in the prune loop. */ void -rt_refresh_begin(rtable *t, struct channel *c) +rt_refresh_begin(rtable *t, struct rt_import_request *req) { FIB_WALK(&t->fib, net, n) { - rte *e; - for (e = n->routes; e; e = e->next) - if (e->sender == c) - e->flags |= REF_STALE; + for (struct rte_storage *e = n->routes; e; e = e->next) + if (e->rte.sender == req->hook) + e->rte.flags |= REF_STALE; } FIB_WALK_END; } @@ -1755,17 +1834,16 @@ rt_refresh_begin(rtable *t, struct channel *c) * hook. See rt_refresh_begin() for description of refresh cycles. */ void -rt_refresh_end(rtable *t, struct channel *c) +rt_refresh_end(rtable *t, struct rt_import_request *req) { int prune = 0; FIB_WALK(&t->fib, net, n) { - rte *e; - for (e = n->routes; e; e = e->next) - if ((e->sender == c) && (e->flags & REF_STALE)) + for (struct rte_storage *e = n->routes; e; e = e->next) + if ((e->rte.sender == req->hook) && (e->rte.flags & REF_STALE)) { - e->flags |= REF_DISCARD; + e->rte.flags |= REF_DISCARD; prune = 1; } } @@ -1776,17 +1854,16 @@ rt_refresh_end(rtable *t, struct channel *c) } void -rt_modify_stale(rtable *t, struct channel *c) +rt_modify_stale(rtable *t, struct rt_import_request *req) { int prune = 0; FIB_WALK(&t->fib, net, n) { - rte *e; - for (e = n->routes; e; e = e->next) - if ((e->sender == c) && (e->flags & REF_STALE) && !(e->flags & REF_FILTERED)) + for (struct rte_storage *e = n->routes; e; e = e->next) + if ((e->rte.sender == req->hook) && (e->rte.flags & REF_STALE) && !(e->rte.flags & REF_FILTERED)) { - e->flags |= REF_MODIFY; + e->rte.flags |= REF_MODIFY; prune = 1; } } @@ -1803,12 +1880,11 @@ rt_modify_stale(rtable *t, struct channel *c) * This functions dumps contents of a &rte to debug output. */ void -rte_dump(rte *e) +rte_dump(struct rte_storage *e) { - net *n = e->net; - debug("%-1N ", n->n.addr); - debug("PF=%02x ", e->pflags); - rta_dump(e->attrs); + debug("%-1N ", e->rte.net); + debug("PF=%02x ", e->rte.pflags); + rta_dump(e->rte.attrs); debug("\n"); } @@ -1821,14 +1897,13 @@ rte_dump(rte *e) void rt_dump(rtable *t) { - debug("Dump of routing table <%s>\n", t->name); + debug("Dump of routing table <%s>%s\n", t->name, t->deleted ? " (deleted)" : ""); #ifdef DEBUGGING fib_check(&t->fib); #endif FIB_WALK(&t->fib, net, n) { - rte *e; - for(e=n->routes; e; e=e->next) + for(struct rte_storage *e=n->routes; e; e=e->next) rte_dump(e); } FIB_WALK_END; @@ -1848,6 +1923,54 @@ rt_dump_all(void) WALK_LIST2(t, n, routing_tables, n) rt_dump(t); + + WALK_LIST2(t, n, deleted_routing_tables, n) + rt_dump(t); +} + +void +rt_dump_hooks(rtable *tab) +{ + debug("Dump of hooks in routing table <%s>%s\n", tab->name, tab->deleted ? " (deleted)" : ""); + debug(" nhu_state=%u hcu_scheduled=%u use_count=%d rt_count=%u\n", + tab->nhu_state, tab->hcu_scheduled, tab->use_count, tab->rt_count); + debug(" last_rt_change=%t gc_time=%t gc_counter=%d prune_state=%u\n", + tab->last_rt_change, tab->gc_time, tab->gc_counter, tab->prune_state); + + struct rt_import_hook *ih; + WALK_LIST(ih, tab->imports) + { + ih->req->dump_req(ih->req); + debug(" Import hook %p requested by %p: pref=%u" + " last_state_change=%t import_state=%u stopped=%p\n", + ih, ih->req, ih->stats.pref, + ih->last_state_change, ih->import_state, ih->stopped); + } + + struct rt_export_hook *eh; + WALK_LIST(eh, tab->exports) + { + eh->req->dump_req(eh->req); + debug(" Export hook %p requested by %p:" + " refeed_pending=%u last_state_change=%t export_state=%u stopped=%p\n", + eh, eh->req, eh->refeed_pending, eh->last_state_change, eh->export_state, eh->stopped); + } + debug("\n"); +} + +void +rt_dump_hooks_all(void) +{ + rtable *t; + node *n; + + debug("Dump of all table hooks\n"); + + WALK_LIST2(t, n, routing_tables, n) + rt_dump_hooks(t); + + WALK_LIST2(t, n, deleted_routing_tables, n) + rt_dump_hooks(t); } static inline void @@ -1965,6 +2088,7 @@ rt_subscribe(rtable *tab, struct rt_subscription *s) { s->tab = tab; rt_lock_table(tab); + DBG("rt_subscribe(%s)\n", tab->name); add_tail(&tab->subscribers, &s->n); } @@ -2110,6 +2234,8 @@ rt_setup(pool *pp, struct rtable_config *cf) rtable *t = ralloc(p, &rt_class); t->rp = p; + t->rte_slab = sl_new(p, sizeof(struct rte_storage)); + t->name = cf->name; t->config = cf; t->addr_type = cf->addr_type; @@ -2124,18 +2250,22 @@ rt_setup(pool *pp, struct rtable_config *cf) t->fib.init = net_init_with_trie; } - init_list(&t->channels); init_list(&t->flowspec_links); - init_list(&t->subscribers); if (!(t->internal = cf->internal)) { + init_list(&t->imports); + init_list(&t->exports); hmap_init(&t->id_map, p, 1024); hmap_set(&t->id_map, 0); + init_list(&t->subscribers); + t->rt_event = ev_new_init(p, rt_event, t); t->last_rt_change = t->gc_time = current_time(); + t->rl_pipe = (struct tbf) TBF_DEFAULT_LOG_LIMITS; + if (rt_is_flow(t)) { t->flowspec_trie = f_new_trie(lp_new_default(p), 0); @@ -2158,8 +2288,8 @@ rt_init(void) rta_init(); rt_table_pool = rp_new(&root_pool, "Routing tables"); rte_update_pool = lp_new_default(rt_table_pool); - rte_slab = sl_new(rt_table_pool, sizeof(rte)); init_list(&routing_tables); + init_list(&deleted_routing_tables); } @@ -2181,9 +2311,9 @@ static void rt_prune_table(rtable *tab) { struct fib_iterator *fit = &tab->prune_fit; - int limit = 512; + int limit = 2000; - struct channel *c; + struct rt_import_hook *ih; node *n, *x; DBG("Pruning route table %s\n", tab->name); @@ -2197,9 +2327,9 @@ rt_prune_table(rtable *tab) if (tab->prune_state == 1) { /* Mark channels to flush */ - WALK_LIST2(c, n, tab->channels, table_node) - if (c->channel_state == CS_FLUSHING) - c->flush_active = 1; + WALK_LIST2(ih, n, tab->imports, n) + if (ih->import_state == TIS_STOP) + rt_set_import_state(ih, TIS_FLUSHING); FIB_ITERATE_INIT(fit, &tab->fib); tab->prune_state = 2; @@ -2215,8 +2345,6 @@ rt_prune_table(rtable *tab) again: FIB_ITERATE_START(&tab->fib, fit, net, n) { - rte *e; - rescan: if (limit <= 0) { @@ -2225,19 +2353,19 @@ again: return; } - for (e=n->routes; e; e=e->next) + for (struct rte_storage *e=n->routes; e; e=e->next) { - if (e->sender->flush_active || (e->flags & REF_DISCARD)) + if ((e->rte.sender->import_state == TIS_FLUSHING) || (e->rte.flags & REF_DISCARD)) { - rte_discard(e); + rte_discard(n, &e->rte); limit--; goto rescan; } - if (e->flags & REF_MODIFY) + if (e->rte.flags & REF_MODIFY) { - rte_modify(e); + rte_modify(n, &e->rte); limit--; goto rescan; @@ -2300,21 +2428,18 @@ again: } } - if (tab->prune_state > 0) - ev_schedule(tab->rt_event); - - /* FIXME: This should be handled in a better way */ rt_prune_sources(); /* Close flushed channels */ - WALK_LIST2_DELSAFE(c, n, x, tab->channels, table_node) - if (c->flush_active) - { - c->flush_active = 0; - channel_set_state(c, CS_DOWN); - } - - return; + WALK_LIST2_DELSAFE(ih, n, x, tab->imports, n) + if (ih->import_state == TIS_FLUSHING) + { + rt_set_import_state(ih, TIS_CLEARED); + ih->stopped(ih->req); + rem_node(&ih->n); + mb_free(ih); + rt_unlock_table(tab); + } } /** @@ -2536,8 +2661,8 @@ rta_next_hop_outdated(rta *a) ? head : NULL; } -static inline rte * -rt_next_hop_update_rte(rtable *tab UNUSED, rte *old) +static inline struct rte_storage * +rt_next_hop_update_rte(rtable *tab, net *n, rte *old) { struct hostentry_adata *head = rta_next_hop_outdated(old->attrs); if (!head) @@ -2547,14 +2672,31 @@ rt_next_hop_update_rte(rtable *tab UNUSED, rte *old) a.cached = 0; rta_apply_hostentry(&a, head); - rte *e = sl_alloc(rte_slab); - memcpy(e, old, sizeof(rte)); - e->attrs = rta_lookup(&a); - rt_lock_source(e->src); + rte e0 = *old; + e0.attrs = &a; - return e; + return rte_store(&e0, n, tab); } +static inline void +rt_next_hop_resolve_rte(rte *r) +{ + eattr *heea = ea_find(r->attrs->eattrs, &ea_gen_hostentry); + if (!heea) + return; + + struct hostentry_adata *head = (struct hostentry_adata *) heea->u.ptr; + + if (r->attrs->cached) + { + rta *a = tmp_alloc(RTA_MAX_SIZE); + *a = *r->attrs; + a->cached = 0; + r->attrs = a; + } + + rta_apply_hostentry(r->attrs, head); +} #ifdef CONFIG_BGP @@ -2594,7 +2736,7 @@ rta_get_first_asn(rta *a) return (e && as_path_get_first_regular(e->u.ptr, &asn)) ? asn : 0; } -int +static inline enum flowspec_valid rt_flowspec_check(rtable *tab_ip, rtable *tab_flow, const net_addr *n, rta *a, int interior) { ASSERT(rt_is_ip(tab_ip)); @@ -2603,11 +2745,11 @@ rt_flowspec_check(rtable *tab_ip, rtable *tab_flow, const net_addr *n, rta *a, i /* RFC 8955 6. a) Flowspec has defined dst prefix */ if (!net_flow_has_dst_prefix(n)) - return 0; + return FLOWSPEC_INVALID; /* RFC 9117 4.1. Accept AS_PATH is empty (fr */ if (interior && rta_as_path_is_empty(a)) - return 1; + return FLOWSPEC_VALID; /* RFC 8955 6. b) Flowspec and its best-match route have the same originator */ @@ -2621,7 +2763,7 @@ rt_flowspec_check(rtable *tab_ip, rtable *tab_flow, const net_addr *n, rta *a, i /* Find best-match BGP unicast route for flowspec dst prefix */ net *nb = net_route(tab_ip, &dst); - rte *rb = nb ? nb->routes : NULL; + const rte *rb = nb ? &nb->routes->rte : NULL; /* Register prefix to trie for tracking further changes */ int max_pxlen = (n->type == NET_FLOW4) ? IP4_MAX_PREFIX_LENGTH : IP6_MAX_PREFIX_LENGTH; @@ -2629,7 +2771,7 @@ rt_flowspec_check(rtable *tab_ip, rtable *tab_flow, const net_addr *n, rta *a, i /* No best-match BGP route -> no flowspec */ if (!rb || (rt_get_source_attr(rb) != RTS_BGP)) - return 0; + return FLOWSPEC_INVALID; /* Find ORIGINATOR_ID values */ u32 orig_a = ea_get_int(a->eattrs, "bgp_originator_id", 0); @@ -2640,17 +2782,17 @@ rt_flowspec_check(rtable *tab_ip, rtable *tab_flow, const net_addr *n, rta *a, i ea_get_ip(a->eattrs, &ea_gen_from, IPA_NONE), ea_get_ip(rb->attrs->eattrs, &ea_gen_from, IPA_NONE) ))) - return 0; + return FLOWSPEC_INVALID; /* Find ASN of the best-match route, for use in next checks */ u32 asn_b = rta_get_first_asn(rb->attrs); if (!asn_b) - return 0; + return FLOWSPEC_INVALID; /* RFC 9117 4.2. For EBGP, flowspec and its best-match route are from the same AS */ if (!interior && (rta_get_first_asn(a) != asn_b)) - return 0; + return FLOWSPEC_INVALID; /* RFC 8955 6. c) More-specific routes are from the same AS as the best-match route */ TRIE_WALK(tab_ip->trie, subnet, &dst) @@ -2659,103 +2801,149 @@ rt_flowspec_check(rtable *tab_ip, rtable *tab_flow, const net_addr *n, rta *a, i if (!nc) continue; - rte *rc = nc->routes; + const rte *rc = &nc->routes->rte; if (rt_get_source_attr(rc) != RTS_BGP) - return 0; + return FLOWSPEC_INVALID; if (rta_get_first_asn(rc->attrs) != asn_b) - return 0; + return FLOWSPEC_INVALID; } TRIE_WALK_END; - return 1; + return FLOWSPEC_VALID; } #endif /* CONFIG_BGP */ -static rte * -rt_flowspec_update_rte(rtable *tab, rte *r) +static struct rte_storage * +rt_flowspec_update_rte(rtable *tab, net *n, rte *r) { #ifdef CONFIG_BGP if (rt_get_source_attr(r) != RTS_BGP) return NULL; - struct bgp_channel *bc = (struct bgp_channel *) r->sender; + struct bgp_channel *bc = (struct bgp_channel *) SKIP_BACK(struct channel, in_req, r->sender->req); if (!bc->base_table) return NULL; - const net_addr *n = r->net->n.addr; - struct bgp_proto *p = (void *) r->src->proto; - int valid = rt_flowspec_check(bc->base_table, tab, n, r->attrs, p->is_interior); - int old = rt_get_flowspec_valid(r); + struct bgp_proto *p = SKIP_BACK(struct bgp_proto, p, bc->c.proto); + + enum flowspec_valid old = rt_get_flowspec_valid(r), + valid = rt_flowspec_check(bc->base_table, tab, n->n.addr, r->attrs, p->is_interior); + if (old == valid) return NULL; rta *a = alloca(RTA_MAX_SIZE); - memcpy(a, r->attrs, rta_size(r->attrs)); + *a = *r->attrs; a->cached = 0; ea_set_attr_u32(&a->eattrs, &ea_gen_flowspec_valid, 0, valid); - rte *new = sl_alloc(rte_slab); - memcpy(new, r, sizeof(rte)); - new->attrs = rta_lookup(a); + rte new; + memcpy(&new, r, sizeof(rte)); + new.attrs = a; - return new; + return rte_store(&new, n, tab); #else return NULL; #endif } +static inline void +rt_flowspec_resolve_rte(rte *r, struct channel *c) +{ +#ifdef CONFIG_BGP + enum flowspec_valid valid, old = rt_get_flowspec_valid(r); + struct bgp_channel *bc = (struct bgp_channel *) c; + + if ( (rt_get_source_attr(r) == RTS_BGP) + && (c->channel == &channel_bgp) + && (bc->base_table)) + { + struct bgp_proto *p = SKIP_BACK(struct bgp_proto, p, bc->c.proto); + valid = rt_flowspec_check( + bc->base_table, + c->in_req.hook->table, + r->net, r->attrs, p->is_interior); + } + else + valid = FLOWSPEC_UNKNOWN; + + if (valid == old) + return; + + if (r->attrs->cached) + { + rta *a = tmp_alloc(RTA_MAX_SIZE); + *a = *r->attrs; + a->cached = 0; + r->attrs = a; + } + + if (valid == FLOWSPEC_UNKNOWN) + ea_unset_attr(&r->attrs->eattrs, 0, &ea_gen_flowspec_valid); + else + ea_set_attr_u32(&r->attrs->eattrs, &ea_gen_flowspec_valid, 0, valid); +#endif +} static inline int rt_next_hop_update_net(rtable *tab, net *n) { - rte **k, *e, *new, *old_best, **new_best; + struct rte_storage *new; int count = 0; - int free_old_best = 0; + int is_flow = net_is_flow(n->n.addr); - old_best = n->routes; + struct rte_storage *old_best = n->routes; if (!old_best) return 0; - for (k = &n->routes; e = *k; k = &e->next) - { - if (!net_is_flow(n->n.addr)) - new = rt_next_hop_update_rte(tab, e); - else - new = rt_flowspec_update_rte(tab, e); + for (struct rte_storage *e, **k = &n->routes; e = *k; k = &e->next) + if (is_flow || rta_next_hop_outdated(e->rte.attrs)) + count++; - if (new) + if (!count) + return 0; + + struct rte_multiupdate { + struct rte_storage *old, *new; + } *updates = alloca(sizeof(struct rte_multiupdate) * count); + + int pos = 0; + for (struct rte_storage *e, **k = &n->routes; e = *k; k = &e->next) + if (is_flow || rta_next_hop_outdated(e->rte.attrs)) { - *k = new; + struct rte_storage *new = is_flow + ? rt_flowspec_update_rte(tab, n, &e->rte) + : rt_next_hop_update_rte(tab, n, &e->rte); - rte_trace_in(D_ROUTES, new->sender, new, "updated"); - rte_announce_i(tab, RA_ANY, n, new, e, NULL, NULL); + if (!new) + continue; /* Call a pre-comparison hook */ /* Not really an efficient way to compute this */ - if (e->src->proto->rte_recalculate) - e->src->proto->rte_recalculate(tab, n, new, e, NULL); + if (e->rte.src->proto->rte_recalculate) + e->rte.src->proto->rte_recalculate(tab, n, &new->rte, &e->rte, &old_best->rte); - if (e != old_best) - rte_free_quick(e); - else /* Freeing of the old best rte is postponed */ - free_old_best = 1; + updates[pos++] = (struct rte_multiupdate) { + .old = e, + .new = new, + }; - e = new; - count++; + /* Replace the route in the list */ + new->next = e->next; + *k = e = new; } - } - if (!count) - return 0; + ASSERT_DIE(pos <= count); + count = pos; /* Find the new best route */ - new_best = NULL; - for (k = &n->routes; e = *k; k = &e->next) + struct rte_storage **new_best = NULL; + for (struct rte_storage *e, **k = &n->routes; e = *k; k = &e->next) { - if (!new_best || rte_better(e, *new_best)) + if (!new_best || rte_better(&e->rte, &(*new_best)->rte)) new_best = k; } @@ -2768,15 +2956,17 @@ rt_next_hop_update_net(rtable *tab, net *n) n->routes = new; } - /* Announce the new best route */ - if (new != old_best) - rte_trace_in(D_ROUTES, new->sender, new, "updated [best]"); - - /* Propagate changes */ - rte_announce_i(tab, RA_UNDEF, n, NULL, NULL, n->routes, old_best); + /* Announce the changes */ + for (int i=0; i<count; i++) + { + _Bool nb = (new == updates[i].new), ob = (old_best == updates[i].old); + const char *best_indicator[2][2] = { { "updated", "updated [-best]" }, { "updated [+best]", "updated [best]" } }; + rt_rte_trace_in(D_ROUTES, updates[i].new->rte.sender->req, &updates[i].new->rte, best_indicator[nb][ob]); + rte_announce_i(tab, n, updates[i].new, updates[i].old, new, old_best); + } - if (free_old_best) - rte_free_quick(old_best); + for (int i=0; i<count; i++) + rte_free(updates[i].old); return count; } @@ -2956,19 +3146,6 @@ rt_commit(struct config *new, struct config *old) DBG("\tdone\n"); } -static inline void -do_feed_channel(struct channel *c, net *n, rte *e) -{ - rte_update_lock(); - if (c->ra_mode == RA_ACCEPTED) - rt_notify_accepted(c, n, NULL, NULL, c->refeeding); - else if (c->ra_mode == RA_MERGED) - rt_notify_merged(c, n, NULL, NULL, e, e, c->refeeding); - else /* RA_BASIC */ - rt_notify_basic(c, n, e, e, c->refeeding); - rte_update_unlock(); -} - /** * rt_feed_channel - advertise all routes to a channel * @c: channel to be fed @@ -2978,79 +3155,55 @@ do_feed_channel(struct channel *c, net *n, rte *e) * has something to do. (We avoid transferring all the routes in single pass in * order not to monopolize CPU time.) */ -int -rt_feed_channel(struct channel *c) +static void +rt_feed_channel(void *data) { + struct rt_export_hook *c = data; + struct fib_iterator *fit = &c->feed_fit; int max_feed = 256; - ASSERT(c->export_state == ES_FEEDING); - - if (!c->feed_active) - { - FIB_ITERATE_INIT(fit, &c->table->fib); - c->feed_active = 1; - } + ASSERT(c->export_state == TES_FEEDING); FIB_ITERATE_START(&c->table->fib, fit, net, n) { - rte *e = n->routes; if (max_feed <= 0) { FIB_ITERATE_PUT(fit); - return 0; + ev_schedule_work(c->event); + return; } - if ((c->ra_mode == RA_OPTIMAL) || - (c->ra_mode == RA_ACCEPTED) || - (c->ra_mode == RA_MERGED)) - if (rte_is_valid(e)) - { - /* In the meantime, the protocol may fell down */ - if (c->export_state != ES_FEEDING) - goto done; - - do_feed_channel(c, n, e); - max_feed--; - } - - if (c->ra_mode == RA_ANY) - for(e = n->routes; e; e = e->next) - { - /* In the meantime, the protocol may fell down */ - if (c->export_state != ES_FEEDING) - goto done; - - if (!rte_is_valid(e)) - continue; + if (c->export_state != TES_FEEDING) + goto done; - do_feed_channel(c, n, e); - max_feed--; - } + if (c->req->export_bulk) + { + uint count = rte_feed_count(n); + if (count) + { + rte_update_lock(); + rte **feed = alloca(count * sizeof(rte *)); + rte_feed_obtain(n, feed, count); + struct rt_pending_export rpe = { .new_best = n->routes }; + c->req->export_bulk(c->req, n->n.addr, &rpe, feed, count); + max_feed -= count; + rte_update_unlock(); + } + } + else if (n->routes && rte_is_valid(&n->routes->rte)) + { + rte_update_lock(); + struct rt_pending_export rpe = { .new = n->routes, .new_best = n->routes }; + c->req->export_one(c->req, n->n.addr, &rpe); + max_feed--; + rte_update_unlock(); + } } FIB_ITERATE_END; done: - c->feed_active = 0; - return 1; -} - -/** - * rt_feed_baby_abort - abort protocol feeding - * @c: channel - * - * This function is called by the protocol code when the protocol stops or - * ceases to exist during the feeding. - */ -void -rt_feed_channel_abort(struct channel *c) -{ - if (c->feed_active) - { - /* Unlink the iterator */ - fit_get(&c->table->fib, &c->feed_fit); - c->feed_active = 0; - } + rt_set_export_state(c, TES_READY); } @@ -3062,16 +3215,10 @@ int rte_update_in(struct channel *c, const net_addr *n, rte *new, struct rte_src *src) { struct rtable *tab = c->in_table; - rte *old, **pos; net *net; if (new) - { net = net_get(tab, n); - - if (!rta_is_cached(new->attrs)) - new->attrs = rta_lookup(new->attrs); - } else { net = net_find(tab, n); @@ -3081,9 +3228,10 @@ rte_update_in(struct channel *c, const net_addr *n, rte *new, struct rte_src *sr } /* Find the old rte */ - for (pos = &net->routes; old = *pos; pos = &old->next) - if (old->src == src) + struct rte_storage **pos = rte_find(net, src); + if (*pos) { + rte *old = &(*pos)->rte; if (new && rte_same(old, new)) { /* Refresh the old rte, continue with update to main rtable */ @@ -3096,60 +3244,52 @@ rte_update_in(struct channel *c, const net_addr *n, rte *new, struct rte_src *sr goto drop_update; } + if (!new) + CHANNEL_LIMIT_POP(c, RX); + /* Move iterator if needed */ - if (old == c->reload_next_rte) - c->reload_next_rte = old->next; + if (*pos == c->reload_next_rte) + c->reload_next_rte = (*pos)->next; /* Remove the old rte */ - *pos = old->next; - rte_free_quick(old); + struct rte_storage *del = *pos; + *pos = (*pos)->next; + rte_free(del); tab->rt_count--; + } + else if (new) + { + if (CHANNEL_LIMIT_PUSH(c, RX)) + { + /* Required by rte_trace_in() */ + new->net = n; - break; + channel_rte_trace_in(D_FILTERS, c, new, "ignored [limit]"); + goto drop_update; + } } + else + goto drop_withdraw; if (!new) { - if (!old) - goto drop_withdraw; - if (!net->routes) fib_delete(&tab->fib, net); return 1; } - struct channel_limit *l = &c->rx_limit; - if (l->action && !old) - { - if (tab->rt_count >= l->limit) - channel_notify_limit(c, l, PLD_RX, tab->rt_count); - - if (l->state == PLS_BLOCKED) - { - /* Required by rte_trace_in() */ - new->net = net; - - rte_trace_in(D_FILTERS, c, new, "ignored [limit]"); - goto drop_update; - } - } - /* Insert the new rte */ - rte *e = rte_do_cow(new); - e->flags |= REF_COW; - e->net = net; - e->sender = c; - e->lastmod = current_time(); + struct rte_storage *e = rte_store(new, net, tab); + e->rte.lastmod = current_time(); e->next = *pos; *pos = e; tab->rt_count++; return 1; drop_update: - c->stats.imp_updates_received++; - c->stats.imp_updates_ignored++; - rte_free(new); + c->import_stats.updates_received++; + c->in_req.hook->stats.updates_ignored++; if (!net->routes) fib_delete(&tab->fib, net); @@ -3157,8 +3297,8 @@ drop_update: return 0; drop_withdraw: - c->stats.imp_withdraws_received++; - c->stats.imp_withdraws_ignored++; + c->import_stats.withdraws_received++; + c->in_req.hook->stats.withdraws_ignored++; return 0; } @@ -3178,7 +3318,7 @@ rt_reload_channel(struct channel *c) } do { - for (rte *e = c->reload_next_rte; e; e = e->next) + for (struct rte_storage *e = c->reload_next_rte; e; e = e->next) { if (max_feed-- <= 0) { @@ -3187,7 +3327,8 @@ rt_reload_channel(struct channel *c) return 0; } - rte_update2(c, e->net->n.addr, rte_do_cow(e), e->src); + rte r = e->rte; + rte_update_direct(c, r.net, &r, r.src); } c->reload_next_rte = NULL; @@ -3230,14 +3371,14 @@ rt_prune_sync(rtable *t, int all) again: FIB_ITERATE_START(&t->fib, &fit, net, n) { - rte *e, **ee = &n->routes; + struct rte_storage *e, **ee = &n->routes; while (e = *ee) { - if (all || (e->flags & (REF_STALE | REF_DISCARD))) + if (all || (e->rte.flags & (REF_STALE | REF_DISCARD))) { *ee = e->next; - rte_free_quick(e); + rte_free(e); t->rt_count--; } else @@ -3260,20 +3401,16 @@ again: */ int -rte_update_out(struct channel *c, const net_addr *n, rte *new, rte *old0, rte **old_exported, int refeed) +rte_update_out(struct channel *c, const net_addr *n, rte *new, const rte *old0, struct rte_storage **old_exported) { struct rtable *tab = c->out_table; struct rte_src *src; - rte *old, **pos; net *net; if (new) { net = net_get(tab, n); src = new->src; - - if (!rta_is_cached(new->attrs)) - new->attrs = rta_lookup(new->attrs); } else { @@ -3281,39 +3418,28 @@ rte_update_out(struct channel *c, const net_addr *n, rte *new, rte *old0, rte ** src = old0->src; if (!net) - goto drop_withdraw; + goto drop; } /* Find the old rte */ - for (pos = &net->routes; old = *pos; pos = &old->next) - if ((c->ra_mode != RA_ANY) || (old->src == src)) - { - if (new && rte_same(old, new)) - { - /* REF_STALE / REF_DISCARD not used in export table */ - /* - if (old->flags & (REF_STALE | REF_DISCARD | REF_MODIFY)) - { - old->flags &= ~(REF_STALE | REF_DISCARD | REF_MODIFY); - return 1; - } - */ - - goto drop_update; - } + struct rte_storage **pos = (c->ra_mode == RA_ANY) ? rte_find(net, src) : &net->routes; + struct rte_storage *old = NULL; - /* Remove the old rte */ - *pos = old->next; - *old_exported = old; - tab->rt_count--; + if (old = *pos) + { + if (new && rte_same(&(*pos)->rte, new)) + goto drop; - break; - } + /* Remove the old rte */ + *pos = old->next; + *old_exported = old; + tab->rt_count--; + } if (!new) { if (!old) - goto drop_withdraw; + goto drop; if (!net->routes) fib_delete(&tab->fib, net); @@ -3322,23 +3448,43 @@ rte_update_out(struct channel *c, const net_addr *n, rte *new, rte *old0, rte ** } /* Insert the new rte */ - rte *e = rte_do_cow(new); - e->flags |= REF_COW; - e->net = net; - e->sender = c; - e->lastmod = current_time(); + struct rte_storage *e = rte_store(new, net, tab); + e->rte.lastmod = current_time(); e->next = *pos; *pos = e; tab->rt_count++; return 1; -drop_update: - return refeed; - -drop_withdraw: +drop: return 0; } +void +rt_refeed_channel(struct channel *c) +{ + if (!c->out_table) + { + channel_request_feeding(c); + return; + } + + ASSERT_DIE(c->ra_mode != RA_ANY); + + c->proto->feed_begin(c, 0); + + FIB_WALK(&c->out_table->fib, net, n) + { + if (!n->routes) + continue; + + rte e = n->routes->rte; + c->proto->rt_notify(c->proto, c, n->n.addr, &e, NULL); + } + FIB_WALK_END; + + c->proto->feed_end(c); +} + /* * Hostcache @@ -3504,7 +3650,7 @@ if_local_addr(ip_addr a, struct iface *i) } u32 -rt_get_igp_metric(rte *rt) +rt_get_igp_metric(const rte *rt) { eattr *ea = ea_find(rt->attrs->eattrs, "igp_metric"); @@ -3537,8 +3683,8 @@ rt_update_hostentry(rtable *tab, struct hostentry *he) net *n = net_route(tab, &he_addr); if (n) { - rte *e = n->routes; - rta *a = e->attrs; + struct rte_storage *e = n->routes; + rta *a = e->rte.attrs; pxlen = n->n.addr->pxlen; if (ea_find(a->eattrs, &ea_gen_hostentry)) @@ -3570,7 +3716,7 @@ rt_update_hostentry(rtable *tab, struct hostentry *he) he->src = rta_clone(a); he->nexthop_linkable = !direct; - he->igp_metric = rt_get_igp_metric(e); + he->igp_metric = rt_get_igp_metric(&e->rte); } done: @@ -2,6 +2,7 @@ * BIRD Internet Routing Daemon -- Routing Table * * (c) 1998--2000 Martin Mares <mj@ucw.cz> + * (c) 2019--2021 Maria Matejka <mq@jmq.cz> * * Can be freely distributed and used under the terms of the GNU GPL. */ @@ -20,6 +21,7 @@ struct ea_list; struct protocol; struct proto; +struct channel; struct rte_src; struct symbol; struct timer; @@ -58,16 +60,16 @@ typedef struct rtable { resource r; node n; /* Node in list of all tables */ pool *rp; /* Resource pool to allocate everything from, including itself */ + struct slab *rte_slab; /* Slab to allocate route objects */ struct fib fib; struct f_trie *trie; /* Trie of prefixes defined in fib */ char *name; /* Name of this table */ - list channels; /* List of attached channels (struct channel) */ uint addr_type; /* Type of address data stored in table (NET_*) */ - int pipe_busy; /* Pipe loop detection */ int use_count; /* Number of protocols using this table */ u32 rt_count; /* Number of routes in the table */ - byte internal; /* Internal table of a protocol */ + list imports; /* Registered route importers */ + list exports; /* Registered route exporters */ struct hmap id_map; struct hostcache *hostcache; @@ -85,12 +87,14 @@ typedef struct rtable { byte prune_trie; /* Prune prefix trie during next table prune */ byte hcu_scheduled; /* Hostcache update is scheduled */ byte nhu_state; /* Next Hop Update state */ + byte internal; /* This table is internal for some other object */ struct fib_iterator prune_fit; /* Rtable prune FIB iterator */ struct fib_iterator nhu_fit; /* Next Hop Update FIB iterator */ struct f_trie *trie_new; /* New prefix trie defined during pruning */ struct f_trie *trie_old; /* Old prefix trie waiting to be freed */ u32 trie_lock_count; /* Prefix trie locked by walks */ u32 trie_old_lock_count; /* Old prefix trie locked by walks */ + struct tbf rl_pipe; /* Rate limiting token buffer for pipe collisions */ list subscribers; /* Subscribers for notifications */ struct timer *settle_timer; /* Settle time for notifications */ @@ -118,7 +122,7 @@ struct rt_flowspec_link { #define NHU_DIRTY 3 typedef struct network { - struct rte *routes; /* Available routes for this network */ + struct rte_storage *routes; /* Available routes for this network */ struct fib_node n; /* FIB flags reserved for kernel syncer */ } net; @@ -148,6 +152,129 @@ struct hostentry { u32 igp_metric; /* Chosen route IGP metric */ }; +struct rte_storage { + struct rte_storage *next; /* Next in chain */ + struct rte rte; /* Route data */ +}; + +#define RTE_COPY(r, l) ((r) ? (((*(l)) = (r)->rte), (l)) : NULL) +#define RTE_OR_NULL(r) ((r) ? &((r)->rte) : NULL) + +/* Table-channel connections */ + +struct rt_import_request { + struct rt_import_hook *hook; /* The table part of importer */ + char *name; + u8 trace_routes; + + void (*dump_req)(struct rt_import_request *req); + void (*log_state_change)(struct rt_import_request *req, u8 state); + /* Preimport is called when the @new route is just-to-be inserted, replacing @old. + * Return a route (may be different or modified in-place) to continue or NULL to withdraw. */ + struct rte *(*preimport)(struct rt_import_request *req, struct rte *new, struct rte *old); + struct rte *(*rte_modify)(struct rte *, struct linpool *); +}; + +struct rt_import_hook { + node n; + rtable *table; /* The connected table */ + struct rt_import_request *req; /* The requestor */ + + struct rt_import_stats { + /* Import - from protocol to core */ + u32 pref; /* Number of routes selected as best in the (adjacent) routing table */ + u32 updates_ignored; /* Number of route updates rejected as already in route table */ + u32 updates_accepted; /* Number of route updates accepted and imported */ + u32 withdraws_ignored; /* Number of route withdraws rejected as already not in route table */ + u32 withdraws_accepted; /* Number of route withdraws accepted and processed */ + } stats; + + btime last_state_change; /* Time of last state transition */ + + u8 import_state; /* IS_* */ + + void (*stopped)(struct rt_import_request *); /* Stored callback when import is stopped */ +}; + +struct rt_pending_export { + struct rte_storage *new, *new_best, *old, *old_best; +}; + +struct rt_export_request { + struct rt_export_hook *hook; /* Table part of the export */ + char *name; + u8 trace_routes; + + /* There are two methods of export. You can either request feeding every single change + * or feeding the whole route feed. In case of regular export, &export_one is preferred. + * Anyway, when feeding, &export_bulk is preferred, falling back to &export_one. + * Thus, for RA_OPTIMAL, &export_one is only set, + * for RA_MERGED and RA_ACCEPTED, &export_bulk is only set + * and for RA_ANY, both are set to accomodate for feeding all routes but receiving single changes + */ + void (*export_one)(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe); + void (*export_bulk)(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe, rte **feed, uint count); + + void (*dump_req)(struct rt_export_request *req); + void (*log_state_change)(struct rt_export_request *req, u8); +}; + +struct rt_export_hook { + node n; + rtable *table; /* The connected table */ + + pool *pool; + linpool *lp; + + struct rt_export_request *req; /* The requestor */ + + struct rt_export_stats { + /* Export - from core to protocol */ + u32 updates_received; /* Number of route updates received */ + u32 withdraws_received; /* Number of route withdraws received */ + } stats; + + struct fib_iterator feed_fit; /* Routing table iterator used during feeding */ + + btime last_state_change; /* Time of last state transition */ + + u8 refeed_pending; /* Refeeding and another refeed is scheduled */ + u8 export_state; /* Route export state (TES_*, see below) */ + + struct event *event; /* Event running all the export operations */ + + void (*stopped)(struct rt_export_request *); /* Stored callback when export is stopped */ +}; + +#define TIS_DOWN 0 +#define TIS_UP 1 +#define TIS_STOP 2 +#define TIS_FLUSHING 3 +#define TIS_WAITING 4 +#define TIS_CLEARED 5 +#define TIS_MAX 6 + +#define TES_DOWN 0 +#define TES_HUNGRY 1 +#define TES_FEEDING 2 +#define TES_READY 3 +#define TES_STOP 4 +#define TES_MAX 5 + +void rt_request_import(rtable *tab, struct rt_import_request *req); +void rt_request_export(rtable *tab, struct rt_export_request *req); + +void rt_stop_import(struct rt_import_request *, void (*stopped)(struct rt_import_request *)); +void rt_stop_export(struct rt_export_request *, void (*stopped)(struct rt_export_request *)); + +const char *rt_import_state_name(u8 state); +const char *rt_export_state_name(u8 state); + +static inline u8 rt_import_get_state(struct rt_import_hook *ih) { return ih ? ih->import_state : TIS_DOWN; } +static inline u8 rt_export_get_state(struct rt_export_hook *eh) { return eh ? eh->export_state : TES_DOWN; } + +void rte_import(struct rt_import_request *req, const net_addr *net, rte *new, struct rte_src *src); + /* Types of route announcement, also used as flags */ #define RA_UNDEF 0 /* Undefined RA type */ #define RA_OPTIMAL 1 /* Announcement of optimal route change */ @@ -161,6 +288,41 @@ struct hostentry { #define RIC_REJECT -1 /* Rejected by protocol */ #define RIC_DROP -2 /* Silently dropped by protocol */ +#define rte_update channel_rte_import +/** + * rte_update - enter a new update to a routing table + * @c: channel doing the update + * @net: network address + * @rte: a &rte representing the new route + * @src: old route source identifier + * + * This function imports a new route to the appropriate table (via the channel). + * Table keys are @net (obligatory) and @rte->attrs->src. + * Both the @net and @rte pointers can be local. + * + * The route attributes (@rte->attrs) are obligatory. They can be also allocated + * locally. Anyway, if you use an already-cached attribute object, you shall + * call rta_clone() on that object yourself. (This semantics may change in future.) + * + * If the route attributes are local, you may set @rte->attrs->src to NULL, then + * the protocol's default route source will be supplied. + * + * When rte_update() gets a route, it automatically validates it. This includes + * checking for validity of the given network and next hop addresses and also + * checking for host-scope or link-scope routes. Then the import filters are + * processed and if accepted, the route is passed to route table recalculation. + * + * The accepted routes are then inserted into the table, replacing the old route + * for the same @net identified by @src. Then the route is announced + * to all the channels connected to the table using the standard export mechanism. + * Setting @rte to NULL makes this a withdraw, otherwise @rte->src must be the same + * as @src. + * + * All memory used for temporary allocations is taken from a special linpool + * @rte_update_pool and freed when rte_update() finishes. + */ +void rte_update(struct channel *c, const net_addr *net, struct rte *rte, struct rte_src *src); + extern list routing_tables; struct config; @@ -180,34 +342,29 @@ static inline void rt_shutdown(rtable *r) { rfree(r->rp); } static inline net *net_find(rtable *tab, const net_addr *addr) { return (net *) fib_find(&tab->fib, addr); } static inline net *net_find_valid(rtable *tab, const net_addr *addr) -{ net *n = net_find(tab, addr); return (n && rte_is_valid(n->routes)) ? n : NULL; } +{ net *n = net_find(tab, addr); return (n && n->routes && rte_is_valid(&n->routes->rte)) ? n : NULL; } static inline net *net_get(rtable *tab, const net_addr *addr) { return (net *) fib_get(&tab->fib, addr); } net *net_get(rtable *tab, const net_addr *addr); net *net_route(rtable *tab, const net_addr *n); -rte *rte_find(net *net, struct rte_src *src); -rte *rte_get_temp(struct rta *, struct rte_src *src); -void rte_update2(struct channel *c, const net_addr *n, rte *new, struct rte_src *src); -/* rte_update() moved to protocol.h to avoid dependency conflicts */ -int rt_examine(rtable *t, net_addr *a, struct proto *p, const struct filter *filter); -rte *rt_export_merged(struct channel *c, net *net, rte **rt_free, linpool *pool, int silent); -void rt_refresh_begin(rtable *t, struct channel *c); -void rt_refresh_end(rtable *t, struct channel *c); -void rt_modify_stale(rtable *t, struct channel *c); +int rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filter); +rte *rt_export_merged(struct channel *c, rte ** feed, uint count, linpool *pool, int silent); +void rt_refresh_begin(rtable *t, struct rt_import_request *); +void rt_refresh_end(rtable *t, struct rt_import_request *); +void rt_modify_stale(rtable *t, struct rt_import_request *); void rt_schedule_prune(rtable *t); -void rte_dump(rte *); -void rte_free(rte *); -rte *rte_do_cow(rte *); -static inline rte * rte_cow(rte *r) { return (r->flags & REF_COW) ? rte_do_cow(r) : r; } -rte *rte_cow_rta(rte *r, linpool *lp); +void rte_dump(struct rte_storage *); +void rte_free(struct rte_storage *); +struct rte_storage *rte_store(const rte *, net *net, rtable *); void rt_dump(rtable *); void rt_dump_all(void); -int rt_feed_channel(struct channel *c); -void rt_feed_channel_abort(struct channel *c); -int rte_update_in(struct channel *c, const net_addr *n, rte *new, struct rte_src *src); +void rt_dump_hooks(rtable *); +void rt_dump_hooks_all(void); int rt_reload_channel(struct channel *c); void rt_reload_channel_abort(struct channel *c); +void rt_refeed_channel(struct channel *c); void rt_prune_sync(rtable *t, int all); -int rte_update_out(struct channel *c, const net_addr *n, rte *new, rte *old, rte **old_exported, int refeed); +int rte_update_in(struct channel *c, const net_addr *n, rte *new, struct rte_src *src); +int rte_update_out(struct channel *c, const net_addr *n, rte *new, const rte *old, struct rte_storage **old_exported); struct rtable_config *rt_new_table(struct symbol *s, uint addr_type); static inline int rt_is_ip(rtable *tab) @@ -247,6 +404,7 @@ struct rt_show_data { struct channel *export_channel; struct config *running_on_config; struct krt_proto *kernel; + struct rt_export_hook *kernel_export_hook; int export_mode, addr_mode, primary_only, filtered, stats; int table_open; /* Iteration (fit) is open */ @@ -301,9 +459,6 @@ rta_set_recursive_next_hop(rtable *dep, rta *a, rtable *tab, ip_addr gw, ip_addr } */ -int rt_flowspec_check(rtable *tab_ip, rtable *tab_flow, const net_addr *n, rta *a, int interior); - - /* * Default protocol preferences */ diff --git a/proto/babel/babel.c b/proto/babel/babel.c index b90dcd3f..65d2567e 100644 --- a/proto/babel/babel.c +++ b/proto/babel/babel.c @@ -679,11 +679,13 @@ babel_announce_rte(struct babel_proto *p, struct babel_entry *e) rta a0 = { .eattrs = &eattrs.l, }; - rta *a = rta_lookup(&a0); - rte *rte = rte_get_temp(a, p->p.main_source); + rte e0 = { + .attrs = &a0, + .src = p->p.main_source, + }; e->unreachable = 0; - rte_update2(c, e->n.addr, rte, p->p.main_source); + rte_update(c, e->n.addr, &e0, p->p.main_source); } else if (e->valid && (e->router_id != p->router_id)) { @@ -694,18 +696,19 @@ babel_announce_rte(struct babel_proto *p, struct babel_entry *e) ea_set_attr_u32(&a0.eattrs, &ea_gen_source, 0, RTS_BABEL); ea_set_dest(&a0.eattrs, 0, RTD_UNREACHABLE); - rta *a = rta_lookup(&a0); - rte *rte = rte_get_temp(a, p->p.main_source); - rte->pflags = 0; + rte e0 = { + .attrs = &a0, + .src = p->p.main_source, + }; e->unreachable = 1; - rte_update2(c, e->n.addr, rte, p->p.main_source); + rte_update(c, e->n.addr, &e0, p->p.main_source); } else { /* Retraction */ e->unreachable = 0; - rte_update2(c, e->n.addr, NULL, p->p.main_source); + rte_update(c, e->n.addr, NULL, p->p.main_source); } } @@ -715,7 +718,7 @@ babel_announce_retraction(struct babel_proto *p, struct babel_entry *e) { struct channel *c = (e->n.addr->type == NET_IP4) ? p->ip4_channel : p->ip6_channel; e->unreachable = 0; - rte_update2(c, e->n.addr, NULL, p->p.main_source); + rte_update(c, e->n.addr, NULL, p->p.main_source); } @@ -856,14 +859,14 @@ babel_send_ihus(struct babel_iface *ifa) } static void -babel_send_hello(struct babel_iface *ifa) +babel_send_hello(struct babel_iface *ifa, uint interval) { struct babel_proto *p = ifa->proto; union babel_msg msg = {}; msg.type = BABEL_TLV_HELLO; msg.hello.seqno = ifa->hello_seqno++; - msg.hello.interval = ifa->cf->hello_interval; + msg.hello.interval = interval ?: ifa->cf->hello_interval; TRACE(D_PACKETS, "Sending hello on %s with seqno %d interval %t", ifa->ifname, msg.hello.seqno, (btime) msg.hello.interval); @@ -1571,7 +1574,7 @@ babel_iface_timer(timer *t) if (now_ >= ifa->next_hello) { - babel_send_hello(ifa); + babel_send_hello(ifa, 0); ifa->next_hello += hello_period * (1 + (now_ - ifa->next_hello) / hello_period); } @@ -1618,7 +1621,7 @@ babel_iface_start(struct babel_iface *ifa) tm_start(ifa->timer, 100 MS); ifa->up = 1; - babel_send_hello(ifa); + babel_send_hello(ifa, 0); babel_send_wildcard_retraction(ifa); babel_send_wildcard_request(ifa); babel_send_update(ifa, 0); /* Full update */ @@ -2257,9 +2260,9 @@ babel_kick_timer(struct babel_proto *p) static int -babel_preexport(struct proto *P, struct rte *new) +babel_preexport(struct channel *c, struct rte *new) { - if (new->src->proto != P) + if (new->src->proto != c->proto) return 0; /* Reject our own unreachable routes */ @@ -2276,8 +2279,8 @@ babel_preexport(struct proto *P, struct rte *new) * so store it into our data structures. */ static void -babel_rt_notify(struct proto *P, struct channel *c UNUSED, struct network *net, - struct rte *new, struct rte *old UNUSED) +babel_rt_notify(struct proto *P, struct channel *c UNUSED, const net_addr *net, + struct rte *new, const struct rte *old UNUSED) { struct babel_proto *p = (void *) P; struct babel_entry *e; @@ -2305,11 +2308,11 @@ babel_rt_notify(struct proto *P, struct channel *c UNUSED, struct network *net, if (rt_metric > BABEL_INFINITY) { log(L_WARN "%s: Invalid babel_metric value %u for route %N", - p->p.name, rt_metric, net->n.addr); + p->p.name, rt_metric, net); rt_metric = BABEL_INFINITY; } - e = babel_get_entry(p, net->n.addr); + e = babel_get_entry(p, net); /* Activate triggered updates */ if ((e->valid != BABEL_ENTRY_VALID) || @@ -2327,7 +2330,7 @@ babel_rt_notify(struct proto *P, struct channel *c UNUSED, struct network *net, else { /* Withdraw */ - e = babel_find_entry(p, net->n.addr); + e = babel_find_entry(p, net); if (!e || e->valid != BABEL_ENTRY_VALID) return; @@ -2350,7 +2353,7 @@ babel_rte_better(struct rte *new, struct rte *old) } static u32 -babel_rte_igp_metric(struct rte *rt) +babel_rte_igp_metric(const rte *rt) { return ea_get_int(rt->attrs->eattrs, &ea_babel_metric, BABEL_INFINITY); } @@ -2436,6 +2439,11 @@ babel_iface_shutdown(struct babel_iface *ifa) { if (ifa->sk) { + /* + * Retract all our routes and lower the hello interval so peers' neighbour + * state expires quickly + */ + babel_send_hello(ifa, BABEL_MIN_INTERVAL); babel_send_wildcard_retraction(ifa); babel_send_queue(ifa); } diff --git a/proto/babel/packets.c b/proto/babel/packets.c index 2a6d443d..d4acc170 100644 --- a/proto/babel/packets.c +++ b/proto/babel/packets.c @@ -2010,7 +2010,7 @@ babel_auth_sign(struct babel_iface *ifa, ip_addr dest) } DBG("Added MAC signatures (%d bytes) on ifa %s for dest %I\n", - tot_len, ifa->ifname, dest); + pos - (pkt + len), ifa->ifname, dest); return pos - (pkt + len); } diff --git a/proto/bfd/bfd.c b/proto/bfd/bfd.c index 1a2104ad..871ecf69 100644 --- a/proto/bfd/bfd.c +++ b/proto/bfd/bfd.c @@ -582,6 +582,9 @@ bfd_get_iface(struct bfd_proto *p, ip_addr local, struct iface *iface) ifa->sk = bfd_open_tx_sk(p, local, iface); ifa->uc = 1; + if (cf->strict_bind) + ifa->rx = bfd_open_rx_sk_bound(p, local, iface); + add_tail(&p->iface_list, &ifa->n); return ifa; @@ -599,6 +602,12 @@ bfd_free_iface(struct bfd_iface *ifa) rfree(ifa->sk); } + if (ifa->rx) + { + sk_stop(ifa->rx); + rfree(ifa->rx); + } + rem_node(&ifa->n); mb_free(ifa); } @@ -1031,17 +1040,20 @@ bfd_start(struct proto *P) birdloop_enter(p->loop); - if (cf->accept_ipv4 && cf->accept_direct) - p->rx4_1 = bfd_open_rx_sk(p, 0, SK_IPV4); + if (!cf->strict_bind) + { + if (cf->accept_ipv4 && cf->accept_direct) + p->rx4_1 = bfd_open_rx_sk(p, 0, SK_IPV4); - if (cf->accept_ipv4 && cf->accept_multihop) - p->rx4_m = bfd_open_rx_sk(p, 1, SK_IPV4); + if (cf->accept_ipv4 && cf->accept_multihop) + p->rx4_m = bfd_open_rx_sk(p, 1, SK_IPV4); - if (cf->accept_ipv6 && cf->accept_direct) - p->rx6_1 = bfd_open_rx_sk(p, 0, SK_IPV6); + if (cf->accept_ipv6 && cf->accept_direct) + p->rx6_1 = bfd_open_rx_sk(p, 0, SK_IPV6); - if (cf->accept_ipv6 && cf->accept_multihop) - p->rx6_m = bfd_open_rx_sk(p, 1, SK_IPV6); + if (cf->accept_ipv6 && cf->accept_multihop) + p->rx6_m = bfd_open_rx_sk(p, 1, SK_IPV6); + } birdloop_leave(p->loop); @@ -1095,7 +1107,8 @@ bfd_reconfigure(struct proto *P, struct proto_config *c) if ((new->accept_ipv4 != old->accept_ipv4) || (new->accept_ipv6 != old->accept_ipv6) || (new->accept_direct != old->accept_direct) || - (new->accept_multihop != old->accept_multihop)) + (new->accept_multihop != old->accept_multihop) || + (new->strict_bind != old->strict_bind)) return 0; birdloop_mask_wakeups(p->loop); diff --git a/proto/bfd/bfd.h b/proto/bfd/bfd.h index bbccd0b8..60b7916c 100644 --- a/proto/bfd/bfd.h +++ b/proto/bfd/bfd.h @@ -47,6 +47,7 @@ struct bfd_config u8 accept_ipv6; u8 accept_direct; u8 accept_multihop; + u8 strict_bind; }; struct bfd_iface_config @@ -116,6 +117,7 @@ struct bfd_iface struct bfd_proto *bfd; sock *sk; + sock *rx; u32 uc; u8 changed; }; @@ -221,6 +223,7 @@ void bfd_show_sessions(struct proto *P); /* packets.c */ void bfd_send_ctl(struct bfd_proto *p, struct bfd_session *s, int final); sock * bfd_open_rx_sk(struct bfd_proto *p, int multihop, int inet_version); +sock * bfd_open_rx_sk_bound(struct bfd_proto *p, ip_addr local, struct iface *ifa); sock * bfd_open_tx_sk(struct bfd_proto *p, ip_addr local, struct iface *ifa); diff --git a/proto/bfd/config.Y b/proto/bfd/config.Y index df1cba42..70461872 100644 --- a/proto/bfd/config.Y +++ b/proto/bfd/config.Y @@ -23,7 +23,8 @@ CF_DECLS CF_KEYWORDS(BFD, MIN, IDLE, RX, TX, INTERVAL, MULTIPLIER, PASSIVE, INTERFACE, MULTIHOP, NEIGHBOR, DEV, LOCAL, AUTHENTICATION, - NONE, SIMPLE, METICULOUS, KEYED, MD5, SHA1, IPV4, IPV6, DIRECT) + NONE, SIMPLE, METICULOUS, KEYED, MD5, SHA1, IPV4, IPV6, DIRECT, + STRICT, BIND) %type <iface> bfd_neigh_iface %type <a> bfd_neigh_local @@ -48,6 +49,7 @@ bfd_proto_item: | INTERFACE bfd_iface | MULTIHOP bfd_multihop | NEIGHBOR bfd_neighbor + | STRICT BIND bool { BFD_CFG->strict_bind = $3; } ; bfd_proto_opts: diff --git a/proto/bfd/packets.c b/proto/bfd/packets.c index 7618e20f..5f10734c 100644 --- a/proto/bfd/packets.c +++ b/proto/bfd/packets.c @@ -366,7 +366,9 @@ bfd_rx_hook(sock *sk, uint len) if (ps > BFD_STATE_DOWN) DROP("invalid init state", ps); - uint ifindex = (sk->sport == BFD_CONTROL_PORT) ? sk->lifindex : 0; + uint ifindex = (sk->sport == BFD_CONTROL_PORT) ? + (sk->iface ? sk->iface->index : sk->lifindex) : + 0; s = bfd_find_session_by_addr(p, sk->faddr, ifindex); /* FIXME: better session matching and message */ @@ -439,6 +441,38 @@ bfd_open_rx_sk(struct bfd_proto *p, int multihop, int af) } sock * +bfd_open_rx_sk_bound(struct bfd_proto *p, ip_addr local, struct iface *ifa) +{ + sock *sk = sk_new(p->tpool); + sk->type = SK_UDP; + sk->saddr = local; + sk->sport = ifa ? BFD_CONTROL_PORT : BFD_MULTI_CTL_PORT; + sk->iface = ifa; + sk->vrf = p->p.vrf; + sk->data = p; + + sk->rbsize = BFD_MAX_LEN; + sk->rx_hook = bfd_rx_hook; + sk->err_hook = bfd_err_hook; + + /* TODO: configurable ToS and priority */ + sk->tos = IP_PREC_INTERNET_CONTROL; + sk->priority = sk_priority_control; + sk->flags = SKF_THREAD | SKF_BIND | (ifa ? SKF_TTL_RX : 0); + + if (sk_open(sk) < 0) + goto err; + + sk_start(sk); + return sk; + + err: + sk_log_error(sk, p->p.name); + rfree(sk); + return NULL; +} + +sock * bfd_open_tx_sk(struct bfd_proto *p, ip_addr local, struct iface *ifa) { sock *sk = sk_new(p->tpool); diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c index 6a9e4026..0b715eaa 100644 --- a/proto/bgp/attrs.c +++ b/proto/bgp/attrs.c @@ -372,26 +372,28 @@ bgp_aigp_set_metric(struct linpool *pool, const struct adata *ad, u64 metric) } int -bgp_total_aigp_metric_(rte *e, u64 *metric, const struct adata **ad) +bgp_total_aigp_metric_(const rte *e, u64 *metric, const struct adata **ad) { - eattr *a = ea_find(e->attrs->eattrs, BGP_EA_ID(BA_AIGP)); - if (!a) + rta *a = e->attrs; + + eattr *ea = ea_find(a->eattrs, BGP_EA_ID(BA_AIGP)); + if (!ea) return 0; - const byte *b = bgp_aigp_get_tlv(a->u.ptr, BGP_AIGP_METRIC); + const byte *b = bgp_aigp_get_tlv(ea->u.ptr, BGP_AIGP_METRIC); if (!b) return 0; u64 aigp = get_u64(b + 3); u64 step = rt_get_igp_metric(e); - if (!rte_resolvable(e) || (step >= IGP_METRIC_UNKNOWN)) + if (!rta_resolvable(a) || (step >= IGP_METRIC_UNKNOWN)) step = BGP_AIGP_MAX; if (!step) step = 1; - *ad = a->u.ptr; + *ad = ea->u.ptr; *metric = aigp + step; if (*metric < aigp) *metric = BGP_AIGP_MAX; @@ -411,7 +413,7 @@ bgp_init_aigp_metric(rte *e, u64 *metric, const struct adata **ad) } u32 -bgp_rte_igp_metric(struct rte *rt) +bgp_rte_igp_metric(const rte *rt) { u64 metric = bgp_total_aigp_metric(rt); return (u32) MIN(metric, (u64) IGP_METRIC_UNKNOWN); @@ -942,7 +944,7 @@ bgp_decode_large_community(struct bgp_parse_state *s, uint code UNUSED, uint fla static void bgp_export_mpls_label_stack(struct bgp_export_state *s, eattr *a) { - net_addr *n = s->route->net->n.addr; + const net_addr *n = s->route->net; u32 *labels = (u32 *) a->u.ptr->data; uint lnum = a->u.ptr->length / 4; @@ -1513,7 +1515,7 @@ bgp_finish_attrs(struct bgp_parse_state *s, rta *a) #define RBH_FN(a,h) h #define RBH_REHASH bgp_rbh_rehash -#define RBH_PARAMS /8, *2, 2, 2, 8, 20 +#define RBH_PARAMS /8, *2, 2, 2, 12, 20 HASH_DEFINE_REHASH_FN(RBH, struct bgp_bucket) @@ -1629,7 +1631,7 @@ bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b) #define PXH_FN(n,i,h) h #define PXH_REHASH bgp_pxh_rehash -#define PXH_PARAMS /8, *2, 2, 2, 8, 24 +#define PXH_PARAMS /8, *2, 2, 2, 12, 24 HASH_DEFINE_REHASH_FN(PXH, struct bgp_prefix) @@ -1653,9 +1655,10 @@ bgp_free_prefix_table(struct bgp_channel *c) } static struct bgp_prefix * -bgp_get_prefix(struct bgp_channel *c, net_addr *net, u32 path_id) +bgp_get_prefix(struct bgp_channel *c, const net_addr *net, u32 path_id) { - u32 hash = net_hash(net) ^ u32_hash(path_id); + /* We must use a different hash function than the rtable */ + u32 hash = u32_hash(net_hash(net) ^ u32_hash(path_id)); struct bgp_prefix *px = HASH_FIND(c->prefix_hash, PXH, net, path_id, hash); if (px) @@ -1697,10 +1700,10 @@ bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *px) */ int -bgp_preexport(struct proto *P, rte *e) +bgp_preexport(struct channel *c, rte *e) { struct proto *SRC = e->src->proto; - struct bgp_proto *p = (struct bgp_proto *) P; + struct bgp_proto *p = (struct bgp_proto *) (c->proto); struct bgp_proto *src = (SRC->proto == &proto_bgp) ? (struct bgp_proto *) SRC : NULL; /* Reject our routes */ @@ -1711,8 +1714,8 @@ bgp_preexport(struct proto *P, rte *e) if (src == NULL) return 0; - /* Reject flowspec that failed or are pending validation */ - if (net_is_flow(e->net->n.addr)) + /* Reject flowspec that failed validation */ + if (net_is_flow(e->net)) switch (rt_get_flowspec_valid(e)) { case FLOWSPEC_VALID: @@ -1720,10 +1723,11 @@ bgp_preexport(struct proto *P, rte *e) case FLOWSPEC_INVALID: return -1; case FLOWSPEC_UNKNOWN: - if ((rt_get_source_attr(e) == RTS_BGP) && - ((struct bgp_channel *) e->sender)->base_table) - return -1; + ASSUME((rt_get_source_attr(e) != RTS_BGP) || + !((struct bgp_channel *) SKIP_BACK(struct channel, in_req, e->sender->req))->base_table); break; + case FLOWSPEC__MAX: + bug("This never happens."); } /* IBGP route reflection, RFC 4456 */ @@ -1740,11 +1744,11 @@ bgp_preexport(struct proto *P, rte *e) } /* Handle well-known communities, RFC 1997 */ - struct eattr *c; + struct eattr *com; if (p->cf->interpret_communities && - (c = ea_find(e->attrs->eattrs, BGP_EA_ID(BA_COMMUNITY)))) + (com = ea_find(e->attrs->eattrs, BGP_EA_ID(BA_COMMUNITY)))) { - const struct adata *d = c->u.ptr; + const struct adata *d = com->u.ptr; /* Do not export anywhere */ if (int_set_contains(d, BGP_COMM_NO_ADVERTISE)) @@ -1883,7 +1887,7 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at } void -bgp_rt_notify(struct proto *P, struct channel *C, net *n, rte *new, rte *old) +bgp_rt_notify(struct proto *P, struct channel *C, const net_addr *n, rte *new, const rte *old) { struct bgp_proto *p = (void *) P; struct bgp_channel *c = (void *) C; @@ -1897,7 +1901,7 @@ bgp_rt_notify(struct proto *P, struct channel *C, net *n, rte *new, rte *old) /* Error during attribute processing */ if (!attrs) - log(L_ERR "%s: Invalid route %N withdrawn", p->p.name, n->n.addr); + log(L_ERR "%s: Invalid route %N withdrawn", p->p.name, n); /* If attributes are invalid, we fail back to withdraw */ buck = attrs ? bgp_get_bucket(c, attrs) : bgp_get_withdraw_bucket(c); @@ -1909,7 +1913,7 @@ bgp_rt_notify(struct proto *P, struct channel *C, net *n, rte *new, rte *old) path = old->src->global_id; } - px = bgp_get_prefix(c, n->n.addr, c->add_path_tx ? path : 0); + px = bgp_get_prefix(c, n, c->add_path_tx ? path : 0); add_tail(&buck->prefixes, &px->buck_node); bgp_schedule_packet(p->conn, c, PKT_UPDATE); @@ -1970,8 +1974,8 @@ bgp_rte_better(rte *new, rte *old) return 1; /* RFC 4271 9.1.2.1. Route resolvability test */ - n = rte_resolvable(new); - o = rte_resolvable(old); + n = rta_resolvable(new->attrs); + o = rta_resolvable(old->attrs); if (n > o) return 1; if (n < o) @@ -2111,7 +2115,7 @@ bgp_rte_mergable(rte *pri, rte *sec) return 0; /* RFC 4271 9.1.2.1. Route resolvability test */ - if (rte_resolvable(pri) != rte_resolvable(sec)) + if (rta_resolvable(pri->attrs) != rta_resolvable(sec->attrs)) return 0; /* LLGR draft - depreference stale routes */ @@ -2184,16 +2188,15 @@ same_group(rte *r, u32 lpref, u32 lasn) } static inline int -use_deterministic_med(rte *r) +use_deterministic_med(struct rte_storage *r) { - struct proto *P = r->src->proto; + struct proto *P = r->rte.src->proto; return (P->proto == &proto_bgp) && ((struct bgp_proto *) P)->cf->deterministic_med; } int bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best) { - rte *r, *s; rte *key = new ? new : old; u32 lpref = rt_get_preference(key); u32 lasn = bgp_get_neighbor(key); @@ -2261,13 +2264,13 @@ bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best) } /* The default case - find a new best-in-group route */ - r = new; /* new may not be in the list */ - for (s=net->routes; rte_is_valid(s); s=s->next) - if (use_deterministic_med(s) && same_group(s, lpref, lasn)) + rte *r = new; /* new may not be in the list */ + for (struct rte_storage *s = net->routes; rte_is_valid(RTE_OR_NULL(s)); s = s->next) + if (use_deterministic_med(s) && same_group(&s->rte, lpref, lasn)) { - s->pflags |= BGP_REF_SUPPRESSED; - if (!r || bgp_rte_better(s, r)) - r = s; + s->rte.pflags |= BGP_REF_SUPPRESSED; + if (!r || bgp_rte_better(&s->rte, r)) + r = &s->rte; } /* Simple case - the last route in group disappears */ @@ -2279,10 +2282,10 @@ bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best) new->pflags &= ~BGP_REF_SUPPRESSED; /* Found all existing routes mergable with best-in-group */ - for (s=net->routes; rte_is_valid(s); s=s->next) - if (use_deterministic_med(s) && same_group(s, lpref, lasn)) - if ((s != r) && bgp_rte_mergable(r, s)) - s->pflags &= ~BGP_REF_SUPPRESSED; + for (struct rte_storage *s = net->routes; rte_is_valid(RTE_OR_NULL(s)); s = s->next) + if (use_deterministic_med(s) && same_group(&s->rte, lpref, lasn)) + if ((&s->rte != r) && bgp_rte_mergable(r, &s->rte)) + s->rte.pflags &= ~BGP_REF_SUPPRESSED; /* Found best-in-group */ r->pflags &= ~BGP_REF_SUPPRESSED; @@ -2317,12 +2320,12 @@ bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best) return !old_suppressed; } -struct rte * +rte * bgp_rte_modify_stale(struct rte *r, struct linpool *pool) { - eattr *a = ea_find(r->attrs->eattrs, BGP_EA_ID(BA_COMMUNITY)); - const struct adata *ad = a ? a->u.ptr : NULL; - uint flags = a ? a->flags : BAF_PARTIAL; + eattr *ea = ea_find(r->attrs->eattrs, BGP_EA_ID(BA_COMMUNITY)); + const struct adata *ad = ea ? ea->u.ptr : NULL; + uint flags = ea ? ea->flags : BAF_PARTIAL; if (ad && int_set_contains(ad, BGP_COMM_NO_LLGR)) return NULL; @@ -2330,12 +2333,17 @@ bgp_rte_modify_stale(struct rte *r, struct linpool *pool) if (ad && int_set_contains(ad, BGP_COMM_LLGR_STALE)) return r; - r = rte_cow_rta(r, pool); - bgp_set_attr_ptr(&(r->attrs->eattrs), BA_COMMUNITY, flags, + rta *a = rta_do_cow(r->attrs, pool); + + _Thread_local static rte e0; + e0 = *r; + e0.attrs = a; + + bgp_set_attr_ptr(&(a->eattrs), BA_COMMUNITY, flags, int_set_add(pool, ad, BGP_COMM_LLGR_STALE)); - r->pflags |= BGP_REF_STALE; + e0.pflags |= BGP_REF_STALE; - return r; + return &e0; } @@ -2394,25 +2402,28 @@ bgp_get_route_info(rte *e, byte *buf) buf += bsprintf(buf, " (%d", rt_get_preference(e)); - if (e->pflags & BGP_REF_SUPPRESSED) - buf += bsprintf(buf, "-"); + if (!net_is_flow(e->net)) + { + if (e->pflags & BGP_REF_SUPPRESSED) + buf += bsprintf(buf, "-"); - if (rte_stale(e)) - buf += bsprintf(buf, "s"); + if (rte_stale(e)) + buf += bsprintf(buf, "s"); - u64 metric = bgp_total_aigp_metric(e); - if (metric < BGP_AIGP_MAX) - { - buf += bsprintf(buf, "/%lu", metric); - } - else if (metric = rt_get_igp_metric(e)) - { - if (!rte_resolvable(e)) - buf += bsprintf(buf, "/-"); - else if (metric >= IGP_METRIC_UNKNOWN) - buf += bsprintf(buf, "/?"); - else - buf += bsprintf(buf, "/%d", metric); + u64 metric = bgp_total_aigp_metric(e); + if (metric < BGP_AIGP_MAX) + { + buf += bsprintf(buf, "/%lu", metric); + } + else if (metric = rt_get_igp_metric(e)) + { + if (!rta_resolvable(e->attrs)) + buf += bsprintf(buf, "/-"); + else if (metric >= IGP_METRIC_UNKNOWN) + buf += bsprintf(buf, "/?"); + else + buf += bsprintf(buf, "/%d", metric); + } } buf += bsprintf(buf, ") ["); diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index bd4c68b7..84430287 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -760,25 +760,25 @@ bgp_handle_graceful_restart(struct bgp_proto *p) { case BGP_GRS_NONE: c->gr_active = BGP_GRS_ACTIVE; - rt_refresh_begin(c->c.table, &c->c); + rt_refresh_begin(c->c.table, &c->c.in_req); break; case BGP_GRS_ACTIVE: - rt_refresh_end(c->c.table, &c->c); - rt_refresh_begin(c->c.table, &c->c); + rt_refresh_end(c->c.table, &c->c.in_req); + rt_refresh_begin(c->c.table, &c->c.in_req); break; case BGP_GRS_LLGR: - rt_refresh_begin(c->c.table, &c->c); - rt_modify_stale(c->c.table, &c->c); + rt_refresh_begin(c->c.table, &c->c.in_req); + rt_modify_stale(c->c.table, &c->c.in_req); break; } } else { /* Just flush the routes */ - rt_refresh_begin(c->c.table, &c->c); - rt_refresh_end(c->c.table, &c->c); + rt_refresh_begin(c->c.table, &c->c.in_req); + rt_refresh_end(c->c.table, &c->c.in_req); } /* Reset bucket and prefix tables */ @@ -819,7 +819,7 @@ bgp_graceful_restart_done(struct bgp_channel *c) BGP_TRACE(D_EVENTS, "Neighbor graceful restart done"); tm_stop(c->stale_timer); - rt_refresh_end(c->c.table, &c->c); + rt_refresh_end(c->c.table, &c->c.in_req); } /** @@ -861,7 +861,7 @@ bgp_graceful_restart_timeout(timer *t) /* Channel is in GR, and supports LLGR -> start LLGR */ c->gr_active = BGP_GRS_LLGR; tm_start(c->stale_timer, c->stale_time S); - rt_modify_stale(c->c.table, &c->c); + rt_modify_stale(c->c.table, &c->c.in_req); } } else @@ -899,10 +899,10 @@ bgp_refresh_begin(struct bgp_channel *c) { log(L_WARN "%s: BEGIN-OF-RR received before END-OF-RIB, ignoring", p->p.name); return; } c->load_state = BFS_REFRESHING; - rt_refresh_begin(c->c.table, &c->c); + rt_refresh_begin(c->c.table, &c->c.in_req); if (c->c.in_table) - rt_refresh_begin(c->c.in_table, &c->c); + rt_refresh_begin(c->c.in_table, &c->c.in_req); } /** @@ -923,7 +923,7 @@ bgp_refresh_end(struct bgp_channel *c) { log(L_WARN "%s: END-OF-RR received without prior BEGIN-OF-RR, ignoring", p->p.name); return; } c->load_state = BFS_NONE; - rt_refresh_end(c->c.table, &c->c); + rt_refresh_end(c->c.table, &c->c.in_req); if (c->c.in_table) rt_prune_sync(c->c.in_table, 0); @@ -2467,6 +2467,9 @@ bgp_show_proto_info(struct proto *P) else cli_msg(-1006, " Neighbor address: %I%J", p->remote_ip, p->cf->iface); + if ((p->conn == &p->outgoing_conn) && (p->cf->remote_port != BGP_PORT)) + cli_msg(-1006, " Neighbor port: %u", p->cf->remote_port); + cli_msg(-1006, " Neighbor AS: %u", p->remote_as); cli_msg(-1006, " Local AS: %u", p->cf->local_as); diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h index 2f98dc1b..662d9d48 100644 --- a/proto/bgp/bgp.h +++ b/proto/bgp/bgp.h @@ -517,9 +517,9 @@ struct rte_source *bgp_find_source(struct bgp_proto *p, u32 path_id); struct rte_source *bgp_get_source(struct bgp_proto *p, u32 path_id); static inline int -rte_resolvable(rte *rt) +rta_resolvable(rta *a) { - eattr *nhea = ea_find(rt->attrs->eattrs, &ea_gen_nexthop); + eattr *nhea = ea_find(a->eattrs, &ea_gen_nexthop); struct nexthop_adata *nhad = (void *) nhea->u.ptr; return NEXTHOP_IS_REACHABLE(nhad) || (nhad->dest != RTD_UNREACHABLE); } @@ -567,22 +567,22 @@ int bgp_rte_better(struct rte *, struct rte *); int bgp_rte_mergable(rte *pri, rte *sec); int bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best); struct rte *bgp_rte_modify_stale(struct rte *r, struct linpool *pool); -u32 bgp_rte_igp_metric(struct rte *); -void bgp_rt_notify(struct proto *P, struct channel *C, net *n, rte *new, rte *old); -int bgp_preexport(struct proto *, struct rte *); -void bgp_get_route_info(struct rte *, byte *buf); -int bgp_total_aigp_metric_(rte *e, u64 *metric, const struct adata **ad); +u32 bgp_rte_igp_metric(const rte *); +void bgp_rt_notify(struct proto *P, struct channel *C, const net_addr *n, rte *new, const rte *old); +int bgp_preexport(struct channel *, struct rte *); +void bgp_get_route_info(struct rte *, byte *); +int bgp_total_aigp_metric_(const rte *e, u64 *metric, const struct adata **ad); #define BGP_AIGP_METRIC 1 #define BGP_AIGP_MAX U64(0xffffffffffffffff) static inline u64 -bgp_total_aigp_metric(rte *r) +bgp_total_aigp_metric(const rte *e) { u64 metric = BGP_AIGP_MAX; const struct adata *ad; - bgp_total_aigp_metric_(r, &metric, &ad); + bgp_total_aigp_metric_(e, &metric, &ad); return metric; } diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c index 4c46c60e..9911738d 100644 --- a/proto/bgp/packets.c +++ b/proto/bgp/packets.c @@ -1033,27 +1033,6 @@ bgp_apply_mpls_labels(struct bgp_parse_state *s, rta *a, u32 lnum, u32 labels[ln } } -static void -bgp_apply_flow_validation(struct bgp_parse_state *s, const net_addr *n, rta *a) -{ - struct bgp_channel *c = s->channel; - uint valid = rt_flowspec_check(c->base_table, c->c.table, n, a, s->proto->is_interior); - - /* Invalidate cached rta */ - if (s->cached_rta) - { - /* Has't changed */ - if (valid == ea_get_int(s->cached_rta->eattrs, &ea_gen_flowspec_valid, FLOWSPEC_UNKNOWN)) - return; - - rta_free(s->cached_rta); - s->cached_rta = NULL; - } - - /* Set the value */ - ea_set_attr_u32(&a->eattrs, &ea_gen_flowspec_valid, 0, valid); -} - static int bgp_match_src(struct bgp_export_state *s, int mode) { @@ -1407,7 +1386,7 @@ bgp_rte_update(struct bgp_parse_state *s, const net_addr *n, u32 path_id, rta *a REPORT("Invalid route %N withdrawn", n); /* Route withdraw */ - rte_update3(&s->channel->c, n, NULL, s->last_src); + rte_update(&s->channel->c, n, NULL, s->last_src); return; } @@ -1420,11 +1399,12 @@ bgp_rte_update(struct bgp_parse_state *s, const net_addr *n, u32 path_id, rta *a a0->eattrs = ea; } - rta *a = rta_clone(s->cached_rta); - rte *e = rte_get_temp(a, s->last_src); + rte e0 = { + .attrs = s->cached_rta, + .src = s->last_src, + }; - e->pflags = 0; - rte_update3(&s->channel->c, n, e, s->last_src); + rte_update(&s->channel->c, n, &e0, s->last_src); } static void @@ -1928,10 +1908,6 @@ bgp_decode_nlri_flow4(struct bgp_parse_state *s, byte *pos, uint len, rta *a) net_fill_flow4(n, px, pxlen, pos, flen); ADVANCE(pos, len, flen); - /* Apply validation procedure per RFC 8955 (6) */ - if (a && s->channel->cf->validate) - bgp_apply_flow_validation(s, n, a); - bgp_rte_update(s, n, path_id, a); } } @@ -2020,10 +1996,6 @@ bgp_decode_nlri_flow6(struct bgp_parse_state *s, byte *pos, uint len, rta *a) net_fill_flow6(n, px, pxlen, pos, flen); ADVANCE(pos, len, flen); - /* Apply validation procedure per RFC 8955 (6) */ - if (a && s->channel->cf->validate) - bgp_apply_flow_validation(s, n, a); - bgp_rte_update(s, n, path_id, a); } } @@ -2762,7 +2734,7 @@ bgp_rx_route_refresh(struct bgp_conn *conn, byte *pkt, uint len) { case BGP_RR_REQUEST: BGP_TRACE(D_PACKETS, "Got ROUTE-REFRESH"); - channel_request_feeding(&c->c); + rt_refeed_channel(&c->c); break; case BGP_RR_BEGIN: diff --git a/proto/mrt/mrt.c b/proto/mrt/mrt.c index e45f7cb7..0f53dbe7 100644 --- a/proto/mrt/mrt.c +++ b/proto/mrt/mrt.c @@ -113,13 +113,13 @@ mrt_buffer_flush(buffer *b) } #define MRT_DEFINE_TYPE(S, T) \ - static inline void mrt_put_##S##_(buffer *b, T x) \ + UNUSED static inline void mrt_put_##S##_(buffer *b, T x) \ { \ put_##S(b->pos, x); \ b->pos += sizeof(T); \ } \ \ - static inline void mrt_put_##S(buffer *b, T x) \ + UNUSED static inline void mrt_put_##S(buffer *b, T x) \ { \ mrt_buffer_need(b, sizeof(T)); \ put_##S(b->pos, x); \ @@ -460,7 +460,7 @@ mrt_rib_table_entry_bgp_attrs(struct mrt_table_dump_state *s, rte *r) return; fail: - mrt_log(s, "Attribute list too long for %N", r->net->n.addr); + mrt_log(s, "Attribute list too long for %N", r->net); } #endif @@ -512,24 +512,21 @@ mrt_rib_table_dump(struct mrt_table_dump_state *s, net *n, int add_path) mrt_init_message(&s->buf, MRT_TABLE_DUMP_V2, subtype); mrt_rib_table_header(s, n->n.addr); - rte *rt, *rt0; - for (rt0 = n->routes; rt = rt0; rt0 = rt0->next) + for (struct rte_storage *rt, *rt0 = n->routes; rt = rt0; rt0 = rt0->next) { - if (rte_is_filtered(rt)) + if (rte_is_filtered(&rt->rte)) continue; /* Skip routes that should be reported in the other phase */ - if (!s->always_add_path && (!rt->src->private_id != !s->add_path)) + if (!s->always_add_path && (!rt->rte.src->private_id != !s->add_path)) { s->want_add_path = 1; continue; } - if (f_run(s->filter, &rt, 0) <= F_ACCEPT) - mrt_rib_table_entry(s, rt); - - if (rt != rt0) - rte_free(rt); + rte e = rt->rte; + if (f_run(s->filter, &e, 0) <= F_ACCEPT) + mrt_rib_table_entry(s, &e); lp_flush(s->linpool); } diff --git a/proto/ospf/ospf.c b/proto/ospf/ospf.c index 731adaa5..8fe64d95 100644 --- a/proto/ospf/ospf.c +++ b/proto/ospf/ospf.c @@ -108,10 +108,10 @@ #include "ospf.h" #include "lib/macro.h" -static int ospf_preexport(struct proto *P, rte *new); +static int ospf_preexport(struct channel *C, rte *new); static void ospf_reload_routes(struct channel *C); static int ospf_rte_better(struct rte *new, struct rte *old); -static u32 ospf_rte_igp_metric(struct rte *rt); +static u32 ospf_rte_igp_metric(const rte *rt); static void ospf_disp(timer *timer); @@ -414,7 +414,7 @@ ospf_rte_better(struct rte *new, struct rte *old) } static u32 -ospf_rte_igp_metric(struct rte *rt) +ospf_rte_igp_metric(const rte *rt) { if (rt_get_source_attr(rt) == RTS_OSPF_EXT2) return IGP_METRIC_UNKNOWN; @@ -486,13 +486,13 @@ ospf_disp(timer * timer) * import to the filters. */ static int -ospf_preexport(struct proto *P, rte *e) +ospf_preexport(struct channel *c, rte *e) { - struct ospf_proto *p = (struct ospf_proto *) P; + struct ospf_proto *p = (struct ospf_proto *) c->proto; struct ospf_area *oa = ospf_main_area(p); /* Reject our own routes */ - if (e->src->proto == P) + if (e->src->proto == c->proto) return -1; /* Do not export routes to stub areas */ diff --git a/proto/ospf/rt.c b/proto/ospf/rt.c index afe4a01f..1c76aee7 100644 --- a/proto/ospf/rt.c +++ b/proto/ospf/rt.c @@ -2090,15 +2090,18 @@ again1: ASSERT_DIE(ARRAY_SIZE(eattrs.a) >= eattrs.l.count); a0.eattrs = &eattrs.l; - rta *a = rta_lookup(&a0); - rte *e = rte_get_temp(a, p->p.main_source); - rta_free(nf->old_rta); - nf->old_rta = rta_clone(a); + nf->old_rta = rta_lookup(&a0); + + rte e0 = { + .attrs = nf->old_rta, + .src = p->p.main_source, + }; DBG("Mod rte type %d - %N via %I on iface %s, met %d\n", a0.source, nf->fn.addr, a0.gw, a0.iface ? a0.iface->name : "(none)", nf->n.metric1); - rte_update(&p->p, nf->fn.addr, e); + + rte_update(p->p.main_channel, nf->fn.addr, &e0, p->p.main_source); } } else if (nf->old_rta) @@ -2107,7 +2110,7 @@ again1: rta_free(nf->old_rta); nf->old_rta = NULL; - rte_update(&p->p, nf->fn.addr, NULL); + rte_update(p->p.main_channel, nf->fn.addr, NULL, p->p.main_source); } /* Remove unused rt entry, some special entries are persistent */ @@ -2123,7 +2126,6 @@ again1: } FIB_ITERATE_END; - WALK_LIST(oa, p->area_list) { /* Cleanup ASBR hash tables */ diff --git a/proto/ospf/topology.c b/proto/ospf/topology.c index 09ec9f28..6ff6a745 100644 --- a/proto/ospf/topology.c +++ b/proto/ospf/topology.c @@ -1300,7 +1300,7 @@ find_surrogate_fwaddr(struct ospf_proto *p, struct ospf_area *oa) } void -ospf_rt_notify(struct proto *P, struct channel *ch UNUSED, net *n, rte *new, rte *old UNUSED) +ospf_rt_notify(struct proto *P, struct channel *ch UNUSED, const net_addr *n, rte *new, const rte *old UNUSED) { struct ospf_proto *p = (struct ospf_proto *) P; struct ospf_area *oa = NULL; /* non-NULL for NSSA-LSA */ @@ -1319,7 +1319,7 @@ ospf_rt_notify(struct proto *P, struct channel *ch UNUSED, net *n, rte *new, rte if (!new) { - nf = fib_find(&p->rtf, n->n.addr); + nf = fib_find(&p->rtf, n); if (!nf || !nf->external_rte) return; @@ -1346,14 +1346,14 @@ ospf_rt_notify(struct proto *P, struct channel *ch UNUSED, net *n, rte *new, rte if (m1 > LSINFINITY) { log(L_WARN "%s: Invalid ospf_metric1 value %u for route %N", - p->p.name, m1, n->n.addr); + p->p.name, m1, n); m1 = LSINFINITY; } if (m2 > LSINFINITY) { log(L_WARN "%s: Invalid ospf_metric2 value %u for route %N", - p->p.name, m2, n->n.addr); + p->p.name, m2, n); m2 = LSINFINITY; } @@ -1367,12 +1367,10 @@ ospf_rt_notify(struct proto *P, struct channel *ch UNUSED, net *n, rte *new, rte ip_addr fwd = IPA_NONE; eattr *nhea = ea_find(a->eattrs, &ea_gen_nexthop); - if (nhea) - { - struct nexthop_adata *nhad = (struct nexthop_adata *) nhea->u.ptr; + struct nexthop_adata *nhad = (struct nexthop_adata *) nhea->u.ptr; + if (NEXTHOP_IS_REACHABLE(nhad)) if (use_gw_for_fwaddr(p, nhad->nh.gw, nhad->nh.iface)) fwd = nhad->nh.gw; - } /* NSSA-LSA with P-bit set must have non-zero forwarding address */ if (oa && ipa_zero(fwd)) @@ -1382,12 +1380,12 @@ ospf_rt_notify(struct proto *P, struct channel *ch UNUSED, net *n, rte *new, rte if (ipa_zero(fwd)) { log(L_ERR "%s: Cannot find forwarding address for NSSA-LSA %N", - p->p.name, n->n.addr); + p->p.name, n); return; } } - nf = fib_get(&p->rtf, n->n.addr); + nf = fib_get(&p->rtf, n); ospf_originate_ext_lsa(p, oa, nf, LSA_M_EXPORT, metric, ebit, fwd, tag, 1, p->vpn_pe); nf->external_rte = 1; } diff --git a/proto/ospf/topology.h b/proto/ospf/topology.h index fe102f77..3c92b431 100644 --- a/proto/ospf/topology.h +++ b/proto/ospf/topology.h @@ -198,7 +198,7 @@ void ospf_originate_sum_rt_lsa(struct ospf_proto *p, struct ospf_area *oa, u32 d void ospf_originate_ext_lsa(struct ospf_proto *p, struct ospf_area *oa, ort *nf, u8 mode, u32 metric, u32 ebit, ip_addr fwaddr, u32 tag, int pbit, int dn); void ospf_originate_gr_lsa(struct ospf_proto *p, struct ospf_iface *ifa); -void ospf_rt_notify(struct proto *P, struct channel *ch, net *n, rte *new, rte *old); +void ospf_rt_notify(struct proto *P, struct channel *ch, const net_addr *n, rte *new, const rte *old); void ospf_update_topology(struct ospf_proto *p); struct top_hash_entry *ospf_hash_find(struct top_graph *, u32 domain, u32 lsa, u32 rtr, u32 type); diff --git a/proto/perf/perf.c b/proto/perf/perf.c index d1ff6adf..8a883dd7 100644 --- a/proto/perf/perf.c +++ b/proto/perf/perf.c @@ -164,18 +164,17 @@ perf_loop(void *data) clock_gettime(CLOCK_MONOTONIC, &ts_generated); - for (uint i=0; i<N; i++) { - rte *e = rte_get_temp(p->data[i].a, p->p.main_source); - e->pflags = 0; - - rte_update(P, &(p->data[i].net), e); + for (uint i=0; i<N; i++) + { + rte e0 = { .attrs = p->data[i].a, .src = P->main_source, }; + rte_update(P->main_channel, &(p->data[i].net), &e0, P->main_source); } clock_gettime(CLOCK_MONOTONIC, &ts_update); if (!p->keep) for (uint i=0; i<N; i++) - rte_update(P, &(p->data[i].net), NULL); + rte_update(P->main_channel, &(p->data[i].net), NULL, P->main_source); clock_gettime(CLOCK_MONOTONIC, &ts_withdraw); @@ -208,7 +207,7 @@ perf_loop(void *data) } static void -perf_rt_notify(struct proto *P, struct channel *c UNUSED, struct network *net UNUSED, struct rte *new UNUSED, struct rte *old UNUSED) +perf_rt_notify(struct proto *P, struct channel *c UNUSED, const net_addr *net UNUSED, struct rte *new UNUSED, const struct rte *old UNUSED) { struct perf_proto *p = (struct perf_proto *) P; p->exp++; diff --git a/proto/pipe/config.Y b/proto/pipe/config.Y index 1202c169..c869de9f 100644 --- a/proto/pipe/config.Y +++ b/proto/pipe/config.Y @@ -16,7 +16,7 @@ CF_DEFINES CF_DECLS -CF_KEYWORDS(PIPE, PEER, TABLE) +CF_KEYWORDS(PIPE, PEER, TABLE, MAX, GENERATION) CF_GRAMMAR @@ -25,6 +25,7 @@ proto: pipe_proto '}' { this_channel = NULL; } ; pipe_proto_start: proto_start PIPE { this_proto = proto_config_new(&proto_pipe, $1); + PIPE_CFG->max_generation = 16; } proto_name { @@ -41,6 +42,10 @@ pipe_proto: | pipe_proto proto_item ';' | pipe_proto channel_item_ ';' | pipe_proto PEER TABLE rtable ';' { PIPE_CFG->peer = $4; } + | pipe_proto MAX GENERATION expr ';' { + if (($4 < 1) || ($4 > 254)) cf_error("Max generation must be in range 1..254, got %u", $4); + PIPE_CFG->max_generation = $4; + } ; CF_CODE diff --git a/proto/pipe/pipe.c b/proto/pipe/pipe.c index e458a238..e122d771 100644 --- a/proto/pipe/pipe.c +++ b/proto/pipe/pipe.c @@ -48,62 +48,54 @@ #endif static void -pipe_rt_notify(struct proto *P, struct channel *src_ch, net *n, rte *new, rte *old) +pipe_rt_notify(struct proto *P, struct channel *src_ch, const net_addr *n, rte *new, const rte *old) { struct pipe_proto *p = (void *) P; struct channel *dst = (src_ch == p->pri) ? p->sec : p->pri; - struct rte_src *src; - - rte *e; - rta *a; if (!new && !old) return; - if (dst->table->pipe_busy) - { - log(L_ERR "Pipe loop detected when sending %N to table %s", - n->n.addr, dst->table->name); - return; - } - if (new) { - src = new->src; - - a = alloca(rta_size(new->attrs)); + rta *a = alloca(rta_size(new->attrs)); memcpy(a, new->attrs, rta_size(new->attrs)); a->cached = 0; ea_unset_attr(&a->eattrs, 0, &ea_gen_hostentry); - e = rte_get_temp(a, src); - e->pflags = new->pflags; -#ifdef CONFIG_BGP - /* Hack to cleanup cached value */ - if (e->src->proto->proto == &proto_bgp) - e->pflags &= ~(BGP_REF_STALE | BGP_REF_NOT_STALE); -#endif + rte e0 = { + .attrs = a, + .src = new->src, + .generation = new->generation + 1, + }; + + rte_update(dst, n, &e0, new->src); } else - { - e = NULL; - src = old->src; - } - - src_ch->table->pipe_busy = 1; - rte_update2(dst, n->n.addr, e, src); - src_ch->table->pipe_busy = 0; + rte_update(dst, n, NULL, old->src); } static int -pipe_preexport(struct proto *P, rte *e) +pipe_preexport(struct channel *c, rte *e) { - struct proto *pp = e->sender->proto; + struct pipe_proto *p = (void *) c->proto; + + /* Avoid direct loopbacks */ + if (e->sender == c->in_req.hook) + return -1; + + /* Indirection check */ + uint max_generation = ((struct pipe_config *) p->p.cf)->max_generation; + if (e->generation >= max_generation) + { + log_rl(&p->rl_gen, L_ERR "Route overpiped (%u hops of %u configured in %s) in table %s: %N %s/%u:%u", + e->generation, max_generation, c->proto->name, + c->table->name, e->net, e->src->proto->name, e->src->private_id, e->src->global_id); - if (pp == P) - return -1; /* Avoid local loops automatically */ + return -1; + } return 0; } @@ -188,6 +180,8 @@ pipe_init(struct proto_config *CF) P->preexport = pipe_preexport; P->reload_routes = pipe_reload_routes; + p->rl_gen = (struct tbf) TBF_DEFAULT_LOG_LIMITS; + pipe_configure_channels(p, cf); return P; @@ -219,8 +213,18 @@ pipe_get_status(struct proto *P, byte *buf) static void pipe_show_stats(struct pipe_proto *p) { - struct proto_stats *s1 = &p->pri->stats; - struct proto_stats *s2 = &p->sec->stats; + struct channel_import_stats *s1i = &p->pri->import_stats; + struct channel_export_stats *s1e = &p->pri->export_stats; + struct channel_import_stats *s2i = &p->sec->import_stats; + struct channel_export_stats *s2e = &p->sec->export_stats; + + struct rt_import_stats *rs1i = p->pri->in_req.hook ? &p->pri->in_req.hook->stats : NULL; + struct rt_export_stats *rs1e = p->pri->out_req.hook ? &p->pri->out_req.hook->stats : NULL; + struct rt_import_stats *rs2i = p->sec->in_req.hook ? &p->sec->in_req.hook->stats : NULL; + struct rt_export_stats *rs2e = p->sec->out_req.hook ? &p->sec->out_req.hook->stats : NULL; + + u32 pri_routes = p->pri->in_limit.count; + u32 sec_routes = p->sec->in_limit.count; /* * Pipe stats (as anything related to pipes) are a bit tricky. There @@ -244,24 +248,22 @@ pipe_show_stats(struct pipe_proto *p) */ cli_msg(-1006, " Routes: %u imported, %u exported", - s1->imp_routes, s2->imp_routes); + pri_routes, sec_routes); cli_msg(-1006, " Route change stats: received rejected filtered ignored accepted"); cli_msg(-1006, " Import updates: %10u %10u %10u %10u %10u", - s2->exp_updates_received, s2->exp_updates_rejected + s1->imp_updates_invalid, - s2->exp_updates_filtered, s1->imp_updates_ignored, s1->imp_updates_accepted); + rs2e->updates_received, s2e->updates_rejected + s1i->updates_invalid, + s2e->updates_filtered, rs1i->updates_ignored, rs1i->updates_accepted); cli_msg(-1006, " Import withdraws: %10u %10u --- %10u %10u", - s2->exp_withdraws_received, s1->imp_withdraws_invalid, - s1->imp_withdraws_ignored, s1->imp_withdraws_accepted); + rs2e->withdraws_received, s1i->withdraws_invalid, + rs1i->withdraws_ignored, rs1i->withdraws_accepted); cli_msg(-1006, " Export updates: %10u %10u %10u %10u %10u", - s1->exp_updates_received, s1->exp_updates_rejected + s2->imp_updates_invalid, - s1->exp_updates_filtered, s2->imp_updates_ignored, s2->imp_updates_accepted); + rs1e->updates_received, s1e->updates_rejected + s2i->updates_invalid, + s1e->updates_filtered, rs2i->updates_ignored, rs2i->updates_accepted); cli_msg(-1006, " Export withdraws: %10u %10u --- %10u %10u", - s1->exp_withdraws_received, s2->imp_withdraws_invalid, - s2->imp_withdraws_ignored, s2->imp_withdraws_accepted); + rs1e->withdraws_received, s2i->withdraws_invalid, + rs2i->withdraws_ignored, rs2i->withdraws_accepted); } -static const char *pipe_feed_state[] = { [ES_DOWN] = "down", [ES_FEEDING] = "feed", [ES_READY] = "up" }; - static void pipe_show_proto_info(struct proto *P) { @@ -270,13 +272,17 @@ pipe_show_proto_info(struct proto *P) cli_msg(-1006, " Channel %s", "main"); cli_msg(-1006, " Table: %s", p->pri->table->name); cli_msg(-1006, " Peer table: %s", p->sec->table->name); - cli_msg(-1006, " Import state: %s", pipe_feed_state[p->sec->export_state]); - cli_msg(-1006, " Export state: %s", pipe_feed_state[p->pri->export_state]); + cli_msg(-1006, " Import state: %s", rt_export_state_name(rt_export_get_state(p->sec->out_req.hook))); + cli_msg(-1006, " Export state: %s", rt_export_state_name(rt_export_get_state(p->pri->out_req.hook))); cli_msg(-1006, " Import filter: %s", filter_name(p->sec->out_filter)); cli_msg(-1006, " Export filter: %s", filter_name(p->pri->out_filter)); - channel_show_limit(&p->pri->in_limit, "Import limit:"); - channel_show_limit(&p->sec->in_limit, "Export limit:"); + + + channel_show_limit(&p->pri->in_limit, "Import limit:", + (p->pri->limit_active & (1 << PLD_IN)), p->pri->limit_actions[PLD_IN]); + channel_show_limit(&p->sec->in_limit, "Export limit:", + (p->sec->limit_active & (1 << PLD_IN)), p->sec->limit_actions[PLD_IN]); if (P->proto_state != PS_DOWN) pipe_show_stats(p); diff --git a/proto/pipe/pipe.h b/proto/pipe/pipe.h index 038c6666..60c857eb 100644 --- a/proto/pipe/pipe.h +++ b/proto/pipe/pipe.h @@ -12,12 +12,14 @@ struct pipe_config { struct proto_config c; struct rtable_config *peer; /* Table we're connected to */ + u8 max_generation; }; struct pipe_proto { struct proto p; struct channel *pri; struct channel *sec; + struct tbf rl_gen; }; #endif diff --git a/proto/radv/radv.c b/proto/radv/radv.c index 5734dbc9..1f75b7c2 100644 --- a/proto/radv/radv.c +++ b/proto/radv/radv.c @@ -388,16 +388,16 @@ radv_trigger_valid(struct radv_config *cf) } static inline int -radv_net_match_trigger(struct radv_config *cf, net *n) +radv_net_match_trigger(struct radv_config *cf, const net_addr *n) { - return radv_trigger_valid(cf) && net_equal(n->n.addr, &cf->trigger); + return radv_trigger_valid(cf) && net_equal(n, &cf->trigger); } int -radv_preexport(struct proto *P, rte *new) +radv_preexport(struct channel *c, rte *new) { // struct radv_proto *p = (struct radv_proto *) P; - struct radv_config *cf = (struct radv_config *) (P->cf); + struct radv_config *cf = (struct radv_config *) (c->proto->cf); if (radv_net_match_trigger(cf, new->net)) return RIC_PROCESS; @@ -409,7 +409,7 @@ radv_preexport(struct proto *P, rte *new) } static void -radv_rt_notify(struct proto *P, struct channel *ch UNUSED, net *n, rte *new, rte *old UNUSED) +radv_rt_notify(struct proto *P, struct channel *ch UNUSED, const net_addr *n, rte *new, const rte *old UNUSED) { struct radv_proto *p = (struct radv_proto *) P; struct radv_config *cf = (struct radv_config *) (P->cf); @@ -460,14 +460,14 @@ radv_rt_notify(struct proto *P, struct channel *ch UNUSED, net *n, rte *new, rte (preference != RA_PREF_HIGH)) { log(L_WARN "%s: Invalid ra_preference value %u on route %N", - p->p.name, preference, n->n.addr); + p->p.name, preference, n); preference = RA_PREF_MEDIUM; preference_set = 1; lifetime = 0; lifetime_set = 1; } - rt = fib_get(&p->routes, n->n.addr); + rt = fib_get(&p->routes, n); /* Ignore update if nothing changed */ if (rt->valid && @@ -490,7 +490,7 @@ radv_rt_notify(struct proto *P, struct channel *ch UNUSED, net *n, rte *new, rte else { /* Withdraw */ - rt = fib_find(&p->routes, n->n.addr); + rt = fib_find(&p->routes, n); if (!rt || !rt->valid) return; @@ -558,7 +558,7 @@ radv_check_active(struct radv_proto *p) return 1; struct channel *c = p->p.main_channel; - return rt_examine(c->table, &cf->trigger, &p->p, c->out_filter); + return rt_examine(c->table, &cf->trigger, c, c->out_filter); } static void diff --git a/proto/rip/rip.c b/proto/rip/rip.c index a0f2fdc0..cc8b57eb 100644 --- a/proto/rip/rip.c +++ b/proto/rip/rip.c @@ -231,16 +231,15 @@ rip_announce_rte(struct rip_proto *p, struct rip_entry *en) ea_set_attr(&a0.eattrs, EA_LITERAL_DIRECT_ADATA(&ea_rip_from, 0, &riad.ad)); - rta *a = rta_lookup(&a0); - rte *e = rte_get_temp(a, p->p.main_source); + rte e0 = { + .attrs = &a0, + .src = p->p.main_source, + }; - rte_update(&p->p, en->n.addr, e); + rte_update(p->p.main_channel, en->n.addr, &e0, p->p.main_source); } else - { - /* Withdraw */ - rte_update(&p->p, en->n.addr, NULL); - } + rte_update(p->p.main_channel, en->n.addr, NULL, p->p.main_source); } /** @@ -335,8 +334,8 @@ rip_withdraw_rte(struct rip_proto *p, net_addr *n, struct rip_neighbor *from) * it into our data structures. */ static void -rip_rt_notify(struct proto *P, struct channel *ch UNUSED, struct network *net, struct rte *new, - struct rte *old UNUSED) +rip_rt_notify(struct proto *P, struct channel *ch UNUSED, const net_addr *net, struct rte *new, + const struct rte *old UNUSED) { struct rip_proto *p = (struct rip_proto *) P; struct rip_entry *en; @@ -353,14 +352,14 @@ rip_rt_notify(struct proto *P, struct channel *ch UNUSED, struct network *net, s if (rt_metric > p->infinity) { log(L_WARN "%s: Invalid rip_metric value %u for route %N", - p->p.name, rt_metric, net->n.addr); + p->p.name, rt_metric, net); rt_metric = p->infinity; } if (rt_tag > 0xffff) { log(L_WARN "%s: Invalid rip_tag value %u for route %N", - p->p.name, rt_tag, net->n.addr); + p->p.name, rt_tag, net); rt_metric = p->infinity; rt_tag = 0; } @@ -372,7 +371,7 @@ rip_rt_notify(struct proto *P, struct channel *ch UNUSED, struct network *net, s * collection. */ - en = fib_get(&p->rtable, net->n.addr); + en = fib_get(&p->rtable, net); old_metric = en->valid ? en->metric : -1; @@ -392,7 +391,7 @@ rip_rt_notify(struct proto *P, struct channel *ch UNUSED, struct network *net, s else { /* Withdraw */ - en = fib_find(&p->rtable, net->n.addr); + en = fib_find(&p->rtable, net); if (!en || en->valid != RIP_ENTRY_VALID) return; @@ -1127,7 +1126,7 @@ rip_rte_better(struct rte *new, struct rte *old) } static u32 -rip_rte_igp_metric(struct rte *rt) +rip_rte_igp_metric(const rte *rt) { return ea_get_int(rt->attrs->eattrs, &ea_rip_metric, IGP_METRIC_UNKNOWN); } diff --git a/proto/rpki/packets.c b/proto/rpki/packets.c index d246dd50..4a52b54b 100644 --- a/proto/rpki/packets.c +++ b/proto/rpki/packets.c @@ -661,9 +661,9 @@ rpki_handle_cache_response_pdu(struct rpki_cache *cache, const struct pdu_cache_ * a refresh cycle. */ if (cache->p->roa4_channel) - rt_refresh_begin(cache->p->roa4_channel->table, cache->p->roa4_channel); + rt_refresh_begin(cache->p->roa4_channel->table, &cache->p->roa4_channel->in_req); if (cache->p->roa6_channel) - rt_refresh_begin(cache->p->roa6_channel->table, cache->p->roa6_channel); + rt_refresh_begin(cache->p->roa6_channel->table, &cache->p->roa6_channel->in_req); cache->p->refresh_channels = 1; } @@ -846,9 +846,9 @@ rpki_handle_end_of_data_pdu(struct rpki_cache *cache, const struct pdu_end_of_da { cache->p->refresh_channels = 0; if (cache->p->roa4_channel) - rt_refresh_end(cache->p->roa4_channel->table, cache->p->roa4_channel); + rt_refresh_end(cache->p->roa4_channel->table, &cache->p->roa4_channel->in_req); if (cache->p->roa6_channel) - rt_refresh_end(cache->p->roa6_channel->table, cache->p->roa6_channel); + rt_refresh_end(cache->p->roa6_channel->table, &cache->p->roa6_channel->in_req); } cache->last_update = current_time(); @@ -924,6 +924,9 @@ rpki_rx_hook(struct birdsock *sk, uint size) struct rpki_cache *cache = sk->data; struct rpki_proto *p = cache->p; + if ((p->p.proto_state == PS_DOWN) || (p->cache != cache)) + return 0; + byte *pkt_start = sk->rbuf; byte *end = pkt_start + size; @@ -980,6 +983,8 @@ rpki_err_hook(struct birdsock *sk, int error_num) CACHE_TRACE(D_EVENTS, cache, "The other side closed a connection"); } + if (cache->p->cache != cache) + return; rpki_cache_change_state(cache, RPKI_CS_ERROR_TRANSPORT); } @@ -999,6 +1004,9 @@ rpki_tx_hook(sock *sk) { struct rpki_cache *cache = sk->data; + if (cache->p->cache != cache) + return; + while (rpki_fire_tx(cache) > 0) ; } @@ -1008,6 +1016,9 @@ rpki_connected_hook(sock *sk) { struct rpki_cache *cache = sk->data; + if (cache->p->cache != cache) + return; + CACHE_TRACE(D_EVENTS, cache, "Connected"); proto_notify_state(&cache->p->p, PS_UP); diff --git a/proto/rpki/rpki.c b/proto/rpki/rpki.c index 4318cbb7..c8b0ff67 100644 --- a/proto/rpki/rpki.c +++ b/proto/rpki/rpki.c @@ -125,19 +125,16 @@ rpki_table_add_roa(struct rpki_cache *cache, struct channel *channel, const net_ ea_set_attr_u32(&a0.eattrs, &ea_gen_preference, 0, channel->preference); ea_set_attr_u32(&a0.eattrs, &ea_gen_source, 0, RTS_RPKI); - rta *a = rta_lookup(&a0); - rte *e = rte_get_temp(a, p->p.main_source); + rte e0 = { .attrs = &a0, .src = p->p.main_source, }; - e->pflags = 0; - - rte_update2(channel, &pfxr->n, e, e->src); + rte_update(channel, &pfxr->n, &e0, p->p.main_source); } void rpki_table_remove_roa(struct rpki_cache *cache, struct channel *channel, const net_addr_union *pfxr) { struct rpki_proto *p = cache->p; - rte_update2(channel, &pfxr->n, NULL, p->p.main_source); + rte_update(channel, &pfxr->n, NULL, p->p.main_source); } @@ -385,6 +382,9 @@ rpki_refresh_hook(timer *tm) { struct rpki_cache *cache = tm->data; + if (cache->p->cache != cache) + return; + CACHE_DBG(cache, "%s", rpki_cache_state_to_str(cache->state)); switch (cache->state) @@ -431,6 +431,9 @@ rpki_retry_hook(timer *tm) { struct rpki_cache *cache = tm->data; + if (cache->p->cache != cache) + return; + CACHE_DBG(cache, "%s", rpki_cache_state_to_str(cache->state)); switch (cache->state) @@ -476,6 +479,9 @@ rpki_expire_hook(timer *tm) { struct rpki_cache *cache = tm->data; + if (cache->p->cache != cache) + return; + if (!cache->last_update) return; @@ -826,16 +832,27 @@ rpki_show_proto_info(struct proto *P) if (cache) { const char *transport_name = "---"; + uint default_port = 0; switch (cf->tr_config.type) { #if HAVE_LIBSSH - case RPKI_TR_SSH: transport_name = "SSHv2"; break; + case RPKI_TR_SSH: + transport_name = "SSHv2"; + default_port = RPKI_SSH_PORT; + break; #endif - case RPKI_TR_TCP: transport_name = "Unprotected over TCP"; break; + case RPKI_TR_TCP: + transport_name = "Unprotected over TCP"; + default_port = RPKI_TCP_PORT; + break; }; cli_msg(-1006, " Cache server: %s", cf->hostname); + + if (cf->port != default_port) + cli_msg(-1006, " Cache port: %u", cf->port); + cli_msg(-1006, " Status: %s", rpki_cache_state_to_str(cache->state)); cli_msg(-1006, " Transport: %s", transport_name); cli_msg(-1006, " Protocol version: %u", cache->version); diff --git a/proto/rpki/transport.c b/proto/rpki/transport.c index a1ac7587..81bd6dd8 100644 --- a/proto/rpki/transport.c +++ b/proto/rpki/transport.c @@ -85,6 +85,7 @@ rpki_tr_open(struct rpki_tr_sock *tr) sk->rbsize = RPKI_RX_BUFFER_SIZE; sk->tbsize = RPKI_TX_BUFFER_SIZE; sk->tos = IP_PREC_INTERNET_CONTROL; + sk->vrf = cache->p->p.vrf; if (ipa_zero(sk->daddr) && sk->host) { diff --git a/proto/static/static.c b/proto/static/static.c index 806849c4..6369fea5 100644 --- a/proto/static/static.c +++ b/proto/static/static.c @@ -114,24 +114,13 @@ static_announce_rte(struct static_proto *p, struct static_route *r) return; /* We skip rta_lookup() here */ - rte *e = rte_get_temp(a, src); - e->pflags = 0; + rte e0 = { .attrs = a, .src = src, .net = r->net, }, *e = &e0; + /* Evaluate the filter */ if (r->cmds) - { - /* Create a temporary table node */ - e->net = alloca(sizeof(net) + r->net->length); - memset(e->net, 0, sizeof(net) + r->net->length); - net_copy(e->net->n.addr, r->net); - - /* Evaluate the filter */ - f_eval_rte(r->cmds, &e); - - /* Remove the temporary node */ - e->net = NULL; - } + f_eval_rte(r->cmds, e); - rte_update2(p->p.main_channel, r->net, e, src); + rte_update(p->p.main_channel, r->net, e, src); r->state = SRS_CLEAN; return; @@ -139,7 +128,7 @@ withdraw: if (r->state == SRS_DOWN) return; - rte_update2(p->p.main_channel, r->net, NULL, src); + rte_update(p->p.main_channel, r->net, NULL, src); r->state = SRS_DOWN; } @@ -305,7 +294,7 @@ static void static_remove_rte(struct static_proto *p, struct static_route *r) { if (r->state) - rte_update2(p->p.main_channel, r->net, NULL, static_get_source(p, r->index)); + rte_update(p->p.main_channel, r->net, NULL, static_get_source(p, r->index)); static_reset_rte(p, r); } @@ -508,19 +497,13 @@ static_shutdown(struct proto *P) WALK_LIST(r, cf->routes) static_reset_rte(p, r); - return PS_DOWN; -} - -static void -static_cleanup(struct proto *P) -{ - struct static_proto *p = (void *) P; - if (p->igp_table_ip4) rt_unlock_table(p->igp_table_ip4); if (p->igp_table_ip6) rt_unlock_table(p->igp_table_ip6); + + return PS_DOWN; } static void @@ -778,7 +761,6 @@ struct protocol proto_static = { .dump = static_dump, .start = static_start, .shutdown = static_shutdown, - .cleanup = static_cleanup, .reconfigure = static_reconfigure, .copy_config = static_copy_config, .get_route_info = static_get_route_info, diff --git a/sysdep/bsd/krt-sock.c b/sysdep/bsd/krt-sock.c index 61ac7a5c..1c1bd50c 100644 --- a/sysdep/bsd/krt-sock.c +++ b/sysdep/bsd/krt-sock.c @@ -398,7 +398,6 @@ krt_read_route(struct ks_msg *msg, struct krt_proto *p, int scan) /* p is NULL iff KRT_SHARED_SOCKET and !scan */ int ipv6; - rte *e; net *net; sockaddr dst, gate, mask; ip_addr idst, igate, imask; @@ -519,7 +518,6 @@ krt_read_route(struct ks_msg *msg, struct krt_proto *p, int scan) net = net_get(p->p.main_channel->table, &ndst); rta a = { - .src = p->p.main_source, }; ea_set_attr_u32(&a->eattrs, &ea_gen_source, 0, RTS_INHERIT); @@ -579,17 +577,16 @@ krt_read_route(struct ks_msg *msg, struct krt_proto *p, int scan) } } - done: - e = rte_get_temp(&a); - e->net = net; + done:; + rte e0 = { .attrs = &a, .net = net, }; - ea_set_attr(e->attrs->eattrs, + ea_set_attr(e0.attrs->eattrs, EA_LITERAL_EMBEDDED(EA_KRT_SOURCE, T_INT, 0, src2)); if (scan) - krt_got_route(p, e, src); + krt_got_route(p, &e0, src); else - krt_got_route_async(p, e, new, src); + krt_got_route_async(p, &e0, new, src); } static void diff --git a/sysdep/linux/netlink.c b/sysdep/linux/netlink.c index 40f6212e..a4172a6d 100644 --- a/sysdep/linux/netlink.c +++ b/sysdep/linux/netlink.c @@ -110,7 +110,7 @@ struct nl_parse_state int scan; int merge; - net *net; + net_addr *net; rta *attrs; struct krt_proto *proto; s8 new; @@ -1437,10 +1437,9 @@ nh_bufsize(struct nexthop_adata *nhad) } static int -nl_send_route(struct krt_proto *p, rte *e, int op, int dest, struct nexthop_adata *nh) +nl_send_route(struct krt_proto *p, const rte *e, int op, int dest, struct nexthop_adata *nh) { eattr *ea; - net *net = e->net; rta *a = e->attrs; ea_list *eattrs = a->eattrs; @@ -1456,7 +1455,7 @@ nl_send_route(struct krt_proto *p, rte *e, int op, int dest, struct nexthop_adat int rsize = sizeof(*r) + bufsize; r = alloca(rsize); - DBG("nl_send_route(%N,op=%x)\n", net->n.addr, op); + DBG("nl_send_route(%N,op=%x)\n", e->net, op); bzero(&r->h, sizeof(r->h)); bzero(&r->r, sizeof(r->r)); @@ -1465,7 +1464,7 @@ nl_send_route(struct krt_proto *p, rte *e, int op, int dest, struct nexthop_adat r->h.nlmsg_flags = op | NLM_F_REQUEST | NLM_F_ACK; r->r.rtm_family = p->af; - r->r.rtm_dst_len = net_pxlen(net->n.addr); + r->r.rtm_dst_len = net_pxlen(e->net); r->r.rtm_protocol = RTPROT_BIRD; r->r.rtm_scope = RT_SCOPE_NOWHERE; #ifdef HAVE_MPLS_KERNEL @@ -1477,7 +1476,7 @@ nl_send_route(struct krt_proto *p, rte *e, int op, int dest, struct nexthop_adat * 2) Never use RTA_PRIORITY */ - u32 label = net_mpls(net->n.addr); + u32 label = net_mpls(e->net); nl_add_attr_mpls(&r->h, rsize, RTA_DST, 1, &label); r->r.rtm_scope = RT_SCOPE_UNIVERSE; r->r.rtm_type = RTN_UNICAST; @@ -1485,12 +1484,12 @@ nl_send_route(struct krt_proto *p, rte *e, int op, int dest, struct nexthop_adat else #endif { - nl_add_attr_ipa(&r->h, rsize, RTA_DST, net_prefix(net->n.addr)); + nl_add_attr_ipa(&r->h, rsize, RTA_DST, net_prefix(e->net)); /* Add source address for IPv6 SADR routes */ - if (net->n.addr->type == NET_IP6_SADR) + if (e->net->type == NET_IP6_SADR) { - net_addr_ip6_sadr *a = (void *) &net->n.addr; + net_addr_ip6_sadr *a = (void *) &e->net; nl_add_attr_ip6(&r->h, rsize, RTA_SRC, a->src_prefix); r->r.rtm_src_len = a->src_pxlen; } @@ -1626,7 +1625,7 @@ nl_add_rte(struct krt_proto *p, rte *e) } static inline int -nl_delete_rte(struct krt_proto *p, rte *e) +nl_delete_rte(struct krt_proto *p, const rte *e) { int err = 0; @@ -1650,7 +1649,7 @@ nl_replace_rte(struct krt_proto *p, rte *e) void -krt_replace_rte(struct krt_proto *p, net *n UNUSED, rte *new, rte *old) +krt_replace_rte(struct krt_proto *p, const net_addr *n UNUSED, rte *new, const rte *old) { int err = 0; @@ -1689,7 +1688,7 @@ krt_replace_rte(struct krt_proto *p, net *n UNUSED, rte *new, rte *old) } static int -nl_mergable_route(struct nl_parse_state *s, net *net, struct krt_proto *p, uint priority, uint krt_type, uint rtm_family) +nl_mergable_route(struct nl_parse_state *s, const net_addr *net, struct krt_proto *p, uint priority, uint krt_type, uint rtm_family) { /* Route merging is used for IPv6 scans */ if (!s->scan || (rtm_family != AF_INET6)) @@ -1709,23 +1708,25 @@ nl_mergable_route(struct nl_parse_state *s, net *net, struct krt_proto *p, uint static void nl_announce_route(struct nl_parse_state *s) { - rte *e = rte_get_temp(s->attrs, s->proto->p.main_source); - e->net = s->net; + rte e0 = { + .attrs = s->attrs, + .net = s->net, + }; EA_LOCAL_LIST(2) ea = { - .l = { .count = 2, .next = e->attrs->eattrs }, + .l = { .count = 2, .next = e0.attrs->eattrs }, .a = { EA_LITERAL_EMBEDDED(&ea_krt_source, 0, s->krt_proto), EA_LITERAL_EMBEDDED(&ea_krt_metric, 0, s->krt_metric), }, }; - e->attrs->eattrs = &ea.l; + e0.attrs->eattrs = &ea.l; if (s->scan) - krt_got_route(s->proto, e, s->krt_src); + krt_got_route(s->proto, &e0, s->krt_src); else - krt_got_route_async(s->proto, e, s->new, s->krt_src); + krt_got_route_async(s->proto, &e0, s->new, s->krt_src); s->net = NULL; s->attrs = NULL; @@ -1875,16 +1876,14 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) krt_src = KRT_SRC_ALIEN; } - net_addr *n = &dst; + net_addr *net = &dst; if (p->p.net_type == NET_IP6_SADR) { - n = alloca(sizeof(net_addr_ip6_sadr)); - net_fill_ip6_sadr(n, net6_prefix(&dst), net6_pxlen(&dst), + net = alloca(sizeof(net_addr_ip6_sadr)); + net_fill_ip6_sadr(net, net6_prefix(&dst), net6_pxlen(&dst), net6_prefix(&src), net6_pxlen(&src)); } - net *net = net_get(p->p.main_channel->table, n); - if (s->net && !nl_mergable_route(s, net, p, priority, i->rtm_type, i->rtm_family)) nl_announce_route(s); @@ -1910,7 +1909,7 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) case RTN_UNICAST: if (a[RTA_MULTIPATH]) { - struct nexthop_adata *nh = nl_parse_multipath(s, p, n, a[RTA_MULTIPATH], i->rtm_family, krt_src); + struct nexthop_adata *nh = nl_parse_multipath(s, p, net, a[RTA_MULTIPATH], i->rtm_family, krt_src); if (!nh) SKIP("strange RTA_MULTIPATH\n"); @@ -1925,7 +1924,7 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) nhad.nh.iface = if_find_by_index(oif); if (!nhad.nh.iface) { - log(L_ERR "KRT: Received route %N with unknown ifindex %u", net->n.addr, oif); + log(L_ERR "KRT: Received route %N with unknown ifindex %u", net, oif); return; } @@ -1952,7 +1951,7 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) (nhad.nh.flags & RNF_ONLINK) ? NEF_ONLINK : 0); if (!nbr || (nbr->scope == SCOPE_HOST)) { - log(L_ERR "KRT: Received route %N with strange next-hop %I", net->n.addr, + log(L_ERR "KRT: Received route %N with strange next-hop %I", net, nhad.nh.gw); return; } @@ -2021,7 +2020,7 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) u32 metrics[KRT_METRICS_MAX]; if (nl_parse_metrics(a[RTA_METRICS], metrics, ARRAY_SIZE(metrics)) < 0) { - log(L_ERR "KRT: Received route %N with strange RTA_METRICS attribute", net->n.addr); + log(L_ERR "KRT: Received route %N with strange RTA_METRICS attribute", net); return; } @@ -2042,7 +2041,9 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) if (!s->net) { /* Store the new route */ - s->net = net; + s->net = lp_alloc(s->pool, net->length); + net_copy(s->net, net); + s->attrs = ra; ea_set_attr_data(&ra->eattrs, &ea_gen_nexthop, 0, diff --git a/sysdep/unix/krt.c b/sysdep/unix/krt.c index a0789936..6e55d8f5 100644 --- a/sysdep/unix/krt.c +++ b/sysdep/unix/krt.c @@ -257,14 +257,14 @@ static inline void krt_trace_in(struct krt_proto *p, rte *e, char *msg) { if (p->p.debug & D_PACKETS) - log(L_TRACE "%s: %N: %s", p->p.name, e->net->n.addr, msg); + log(L_TRACE "%s: %N: %s", p->p.name, e->net, msg); } static inline void krt_trace_in_rl(struct tbf *f, struct krt_proto *p, rte *e, char *msg) { if (p->p.debug & D_PACKETS) - log_rl(f, L_TRACE "%s: %N: %s", p->p.name, e->net->n.addr, msg); + log_rl(f, L_TRACE "%s: %N: %s", p->p.name, e->net, msg); } /* @@ -305,38 +305,39 @@ krt_uptodate(rte *a, rte *b) static void krt_learn_announce_update(struct krt_proto *p, rte *e) { - net *n = e->net; - rta *aa = rta_clone(e->attrs); - rte *ee = rte_get_temp(aa, p->p.main_source); - rte_update(&p->p, n->n.addr, ee); + rte e0 = { + .attrs = rta_clone(e->attrs), + .src = p->p.main_source, + }; + + rte_update(p->p.main_channel, e->net, &e0, p->p.main_source); } static void -krt_learn_announce_delete(struct krt_proto *p, net *n) +krt_learn_announce_delete(struct krt_proto *p, net_addr *n) { - rte_update(&p->p, n->n.addr, NULL); + rte_update(p->p.main_channel, n, NULL, p->p.main_source); } /* Called when alien route is discovered during scan */ static void krt_learn_scan(struct krt_proto *p, rte *e) { - net *n0 = e->net; - net *n = net_get(p->krt_table, n0->n.addr); - rte *m, **mm; + net *n = net_get(p->krt_table, e->net); + struct rte_storage *m, **mm; - e->attrs = rta_lookup(e->attrs); + struct rte_storage *ee = rte_store(e, n, p->krt_table); - for(mm=&n->routes; m = *mm; mm=&m->next) - if (krt_same_key(m, e)) + for(mm = &n->routes; m = *mm; mm = &m->next) + if (krt_same_key(&m->rte, e)) break; if (m) { - if (krt_uptodate(m, e)) + if (krt_uptodate(&m->rte, e)) { krt_trace_in_rl(&rl_alien, p, e, "[alien] seen"); - rte_free(e); - m->pflags |= KRT_REF_SEEN; + rte_free(ee); + m->rte.pflags |= KRT_REF_SEEN; } else { @@ -348,11 +349,12 @@ krt_learn_scan(struct krt_proto *p, rte *e) } else krt_trace_in(p, e, "[alien] created"); + if (!m) { - e->next = n->routes; - n->routes = e; - e->pflags |= KRT_REF_SEEN; + ee->next = n->routes; + n->routes = ee; + ee->rte.pflags |= KRT_REF_SEEN; } } @@ -368,7 +370,7 @@ krt_learn_prune(struct krt_proto *p) again: FIB_ITERATE_START(fib, &fit, net, n) { - rte *e, **ee, *best, **pbest, *old_best; + struct rte_storage *e, **ee, *best, **pbest, *old_best; /* * Note that old_best may be NULL even if there was an old best route in @@ -382,48 +384,48 @@ again: ee = &n->routes; while (e = *ee) { - if (e->pflags & KRT_REF_BEST) + if (e->rte.pflags & KRT_REF_BEST) old_best = e; - if (!(e->pflags & KRT_REF_SEEN)) + if (!(e->rte.pflags & KRT_REF_SEEN)) { *ee = e->next; rte_free(e); continue; } - if (!best || krt_metric(best) > krt_metric(e)) + if (!best || krt_metric(&best->rte) > krt_metric(&e->rte)) { best = e; pbest = ee; } - e->pflags &= ~(KRT_REF_SEEN | KRT_REF_BEST); + e->rte.pflags &= ~(KRT_REF_SEEN | KRT_REF_BEST); ee = &e->next; } if (!n->routes) { DBG("%I/%d: deleting\n", n->n.prefix, n->n.pxlen); if (old_best) - krt_learn_announce_delete(p, n); + krt_learn_announce_delete(p, n->n.addr); FIB_ITERATE_PUT(&fit); fib_delete(fib, n); goto again; } - best->pflags |= KRT_REF_BEST; + best->rte.pflags |= KRT_REF_BEST; *pbest = best->next; best->next = n->routes; n->routes = best; if ((best != old_best) || p->reload) { - DBG("%I/%d: announcing (metric=%d)\n", n->n.prefix, n->n.pxlen, krt_metric(best)); - krt_learn_announce_update(p, best); + DBG("%I/%d: announcing (metric=%d)\n", n->n.prefix, n->n.pxlen, krt_metric(&best->rte)); + krt_learn_announce_update(p, &best->rte); } else - DBG("%I/%d: uptodate (metric=%d)\n", n->n.prefix, n->n.pxlen, krt_metric(best)); + DBG("%I/%d: uptodate (metric=%d)\n", n->n.prefix, n->n.pxlen, krt_metric(&best->rte)); } FIB_ITERATE_END; @@ -433,27 +435,26 @@ again: static void krt_learn_async(struct krt_proto *p, rte *e, int new) { - net *n0 = e->net; - net *n = net_get(p->krt_table, n0->n.addr); - rte *g, **gg, *best, **bestp, *old_best; + net *n = net_get(p->krt_table, e->net); + struct rte_storage *g, **gg, *best, **bestp, *old_best; ASSERT(!e->attrs->cached); ea_set_attr_u32(&e->attrs->eattrs, &ea_gen_preference, 0, p->p.main_channel->preference); - e->attrs = rta_lookup(e->attrs); + struct rte_storage *ee = rte_store(e, n, p->krt_table); old_best = n->routes; for(gg=&n->routes; g = *gg; gg = &g->next) - if (krt_same_key(g, e)) + if (krt_same_key(&g->rte, e)) break; if (new) { if (g) { - if (krt_uptodate(g, e)) + if (krt_uptodate(&g->rte, e)) { krt_trace_in(p, e, "[alien async] same"); - rte_free(e); + rte_free(ee); return; } krt_trace_in(p, e, "[alien async] updated"); @@ -463,38 +464,38 @@ krt_learn_async(struct krt_proto *p, rte *e, int new) else krt_trace_in(p, e, "[alien async] created"); - e->next = n->routes; - n->routes = e; + ee->next = n->routes; + n->routes = ee; } else if (!g) { krt_trace_in(p, e, "[alien async] delete failed"); - rte_free(e); + rte_free(ee); return; } else { krt_trace_in(p, e, "[alien async] removed"); *gg = g->next; - rte_free(e); + rte_free(ee); rte_free(g); } best = n->routes; bestp = &n->routes; for(gg=&n->routes; g=*gg; gg=&g->next) { - if (krt_metric(best) > krt_metric(g)) + if (krt_metric(&best->rte) > krt_metric(&g->rte)) { best = g; bestp = gg; } - g->pflags &= ~KRT_REF_BEST; + g->rte.pflags &= ~KRT_REF_BEST; } if (best) { - best->pflags |= KRT_REF_BEST; + best->rte.pflags |= KRT_REF_BEST; *bestp = best->next; best->next = n->routes; n->routes = best; @@ -504,9 +505,9 @@ krt_learn_async(struct krt_proto *p, rte *e, int new) { DBG("krt_learn_async: distributing change\n"); if (best) - krt_learn_announce_update(p, best); + krt_learn_announce_update(p, &best->rte); else - krt_learn_announce_delete(p, n); + krt_learn_announce_delete(p, n->n.addr); } } @@ -544,40 +545,53 @@ krt_dump(struct proto *P) static inline int krt_is_installed(struct krt_proto *p, net *n) { - return n->routes && bmap_test(&p->p.main_channel->export_map, n->routes->id); + return n->routes && bmap_test(&p->p.main_channel->export_map, n->routes->rte.id); } -static void -krt_flush_routes(struct krt_proto *p) +static uint +rte_feed_count(net *n) { - struct rtable *t = p->p.main_channel->table; + uint count = 0; + for (struct rte_storage *e = n->routes; e; e = e->next) + if (rte_is_valid(RTE_OR_NULL(e))) + count++; + return count; +} - KRT_TRACE(p, D_EVENTS, "Flushing kernel routes"); - FIB_WALK(&t->fib, net, n) +static void +rte_feed_obtain(net *n, rte **feed, uint count) +{ + uint i = 0; + for (struct rte_storage *e = n->routes; e; e = e->next) + if (rte_is_valid(RTE_OR_NULL(e))) { - if (krt_is_installed(p, n)) - { - /* FIXME: this does not work if gw is changed in export filter */ - krt_replace_rte(p, n, NULL, n->routes); - } + ASSERT_DIE(i < count); + feed[i++] = &e->rte; } - FIB_WALK_END; + ASSERT_DIE(i == count); } static struct rte * -krt_export_net(struct krt_proto *p, net *net, rte **rt_free) +krt_export_net(struct krt_proto *p, net *net) { struct channel *c = p->p.main_channel; const struct filter *filter = c->out_filter; - rte *rt; if (c->ra_mode == RA_MERGED) - return rt_export_merged(c, net, rt_free, krt_filter_lp, 1); + { + uint count = rte_feed_count(net); + if (!count) + return NULL; - rt = net->routes; - *rt_free = NULL; + rte **feed = alloca(count * sizeof(rte *)); + rte_feed_obtain(net, feed, count); + return rt_export_merged(c, feed, count, krt_filter_lp, 1); + } - if (!rte_is_valid(rt)) + static _Thread_local rte rt; + rt = net->routes->rte; + + if (!rte_is_valid(&rt)) return NULL; if (filter == FILTER_REJECT) @@ -593,13 +607,9 @@ krt_export_net(struct krt_proto *p, net *net, rte **rt_free) accept: - if (rt != net->routes) - *rt_free = rt; - return rt; + return &rt; reject: - if (rt != net->routes) - rte_free(rt); return NULL; } @@ -622,8 +632,7 @@ krt_same_dest(rte *k, rte *e) void krt_got_route(struct krt_proto *p, rte *e, s8 src) { - rte *new = NULL, *rt_free = NULL; - net *n = e->net; + rte *new = NULL; e->pflags = 0; #ifdef KRT_ALLOW_LEARN @@ -639,24 +648,25 @@ krt_got_route(struct krt_proto *p, rte *e, s8 src) if (KRT_CF->learn) krt_learn_scan(p, e); else - { - krt_trace_in_rl(&rl_alien, p, e, "[alien] ignored"); - rte_free(e); - } + krt_trace_in_rl(&rl_alien, p, e, "[alien] ignored"); return; } #endif /* The rest is for KRT_SRC_BIRD (or KRT_SRC_UNKNOWN) */ + /* Deleting all routes if flush is requested */ + if (p->flush_routes) + goto delete; /* We wait for the initial feed to have correct installed state */ if (!p->ready) goto ignore; - if (!krt_is_installed(p, n)) + net *net = net_find(p->p.main_channel->table, e->net); + if (!net || !krt_is_installed(p, net)) goto delete; - new = krt_export_net(p, n, &rt_free); + new = krt_export_net(p, net); /* Rejected by filters */ if (!new) @@ -689,20 +699,15 @@ ignore: update: krt_trace_in(p, new, "updating"); - krt_replace_rte(p, n, new, e); + krt_replace_rte(p, e->net, new, e); goto done; delete: krt_trace_in(p, e, "deleting"); - krt_replace_rte(p, n, NULL, e); + krt_replace_rte(p, e->net, NULL, e); goto done; done: - rte_free(e); - - if (rt_free) - rte_free(rt_free); - lp_flush(krt_filter_lp); } @@ -720,20 +725,16 @@ krt_prune(struct krt_proto *p) KRT_TRACE(p, D_EVENTS, "Pruning table %s", t->name); FIB_WALK(&t->fib, net, n) { - if (p->ready && krt_is_installed(p, n) && !bmap_test(&p->seen_map, n->routes->id)) + if (p->ready && krt_is_installed(p, n) && !bmap_test(&p->seen_map, n->routes->rte.id)) { - rte *rt_free = NULL; - rte *new = krt_export_net(p, n, &rt_free); + rte *new = krt_export_net(p, n); if (new) { krt_trace_in(p, new, "installing"); - krt_replace_rte(p, n, new, NULL); + krt_replace_rte(p, n->n.addr, new, NULL); } - if (rt_free) - rte_free(rt_free); - lp_flush(krt_filter_lp); } } @@ -748,10 +749,20 @@ krt_prune(struct krt_proto *p) p->initialized = 1; } +static void +krt_flush_routes(struct krt_proto *p) +{ + KRT_TRACE(p, D_EVENTS, "Flushing kernel routes"); + p->flush_routes = 1; + krt_init_scan(p); + krt_do_scan(p); + /* No prune! */ + p->flush_routes = 0; +} + void krt_got_route_async(struct krt_proto *p, rte *e, int new, s8 src) { - net *net = e->net; e->pflags = 0; switch (src) @@ -764,7 +775,7 @@ krt_got_route_async(struct krt_proto *p, rte *e, int new, s8 src) if (new) { krt_trace_in(p, e, "[redirect] deleting"); - krt_replace_rte(p, net, NULL, e); + krt_replace_rte(p, e->net, NULL, e); } /* If !new, it is probably echo of our deletion */ break; @@ -778,7 +789,6 @@ krt_got_route_async(struct krt_proto *p, rte *e, int new, s8 src) } #endif } - rte_free(e); } /* @@ -885,10 +895,9 @@ krt_scan_timer_kick(struct krt_proto *p) */ static int -krt_preexport(struct proto *P, rte *e) +krt_preexport(struct channel *c, rte *e) { - // struct krt_proto *p = (struct krt_proto *) P; - if (e->src->proto == P) + if (e->src->proto == c->proto) return -1; if (!krt_capable(e)) @@ -898,8 +907,8 @@ krt_preexport(struct proto *P, rte *e) } static void -krt_rt_notify(struct proto *P, struct channel *ch UNUSED, net *net, - rte *new, rte *old) +krt_rt_notify(struct proto *P, struct channel *ch UNUSED, const net_addr *net, + rte *new, const rte *old) { struct krt_proto *p = (struct krt_proto *) P; diff --git a/sysdep/unix/krt.h b/sysdep/unix/krt.h index 04b04162..69fc9aa1 100644 --- a/sysdep/unix/krt.h +++ b/sysdep/unix/krt.h @@ -70,6 +70,7 @@ struct krt_proto { byte ready; /* Initial feed has been finished */ byte initialized; /* First scan has been finished */ byte reload; /* Next scan is doing reload */ + byte flush_routes; /* Scanning to flush */ }; extern pool *krt_pool; @@ -147,7 +148,7 @@ void krt_sys_copy_config(struct krt_config *, struct krt_config *); int krt_capable(rte *e); void krt_do_scan(struct krt_proto *); -void krt_replace_rte(struct krt_proto *p, net *n, rte *new, rte *old); +void krt_replace_rte(struct krt_proto *p, const net_addr *n, rte *new, const rte *old); int krt_sys_get_attr(const eattr *a, byte *buf, int buflen); |