diff options
-rw-r--r-- | conf/confbase.Y | 2 | ||||
-rw-r--r-- | doc/bird.sgml | 8 | ||||
-rw-r--r-- | filter/config.Y | 11 | ||||
-rw-r--r-- | filter/f-inst.c | 12 | ||||
-rw-r--r-- | filter/test.conf | 6 | ||||
-rw-r--r-- | lib/attrs.h | 2 | ||||
-rw-r--r-- | lib/hash.h | 40 | ||||
-rw-r--r-- | lib/hash_test.c | 41 | ||||
-rw-r--r-- | lib/route.h | 37 | ||||
-rw-r--r-- | nest/cli.c | 8 | ||||
-rw-r--r-- | nest/cli.h | 4 | ||||
-rw-r--r-- | nest/config.Y | 28 | ||||
-rw-r--r-- | nest/proto.c | 102 | ||||
-rw-r--r-- | nest/protocol.h | 21 | ||||
-rw-r--r-- | nest/rt-attr.c | 255 | ||||
-rw-r--r-- | nest/rt-show.c | 362 | ||||
-rw-r--r-- | nest/rt-table.c | 600 | ||||
-rw-r--r-- | nest/rt.h | 73 | ||||
-rw-r--r-- | proto/bgp/attrs.c | 274 | ||||
-rw-r--r-- | proto/bgp/bgp.c | 19 | ||||
-rw-r--r-- | proto/bgp/bgp.h | 22 | ||||
-rw-r--r-- | proto/bgp/packets.c | 34 | ||||
-rw-r--r-- | proto/mrt/mrt.c | 9 | ||||
-rw-r--r-- | proto/ospf/rt.c | 2 | ||||
-rw-r--r-- | proto/perf/perf.c | 2 | ||||
-rw-r--r-- | proto/pipe/config.Y | 6 | ||||
-rw-r--r-- | proto/pipe/pipe.c | 10 | ||||
-rw-r--r-- | proto/pipe/pipe.h | 1 | ||||
-rw-r--r-- | proto/rip/rip.c | 2 | ||||
-rw-r--r-- | proto/static/static.c | 2 | ||||
-rw-r--r-- | sysdep/unix/krt.c | 253 | ||||
-rw-r--r-- | sysdep/unix/krt.h | 4 |
32 files changed, 1198 insertions, 1054 deletions
diff --git a/conf/confbase.Y b/conf/confbase.Y index 9a83083c..58890cd6 100644 --- a/conf/confbase.Y +++ b/conf/confbase.Y @@ -173,7 +173,7 @@ expr_us: | expr US { $$ = $1 US_; } ; -toksym: FROM ; +toksym: FROM | PREFERENCE ; symbol: CF_SYM_UNDEFINED | CF_SYM_KNOWN | toksym ; symbol_known: CF_SYM_KNOWN | toksym ; diff --git a/doc/bird.sgml b/doc/bird.sgml index d17de23f..c9ce670b 100644 --- a/doc/bird.sgml +++ b/doc/bird.sgml @@ -914,10 +914,12 @@ inherited from templates can be updated by new definitions. <cf/none/ is for dropping all routes. Default: <cf/all/ (except for EBGP). - <tag><label id="proto-export">export <m/filter/</tag> + <tag><label id="proto-export">export [ in <m/prefix/ ] <m/filter/</tag> This is similar to the <cf>import</cf> keyword, except that it works in - the direction from the routing table to the protocol. Default: <cf/none/ - (except for EBGP). + the direction from the routing table to the protocol. If <cf/in/ keyword is used, + only routes inside the given prefix are exported. Other routes are completely + ignored (e.g. no logging and no statistics). + Default: <cf/none/ (except for EBGP). <tag><label id="proto-import-keep-filtered">import keep filtered <m/switch/</tag> Usually, if an import filter rejects a route, the route is forgotten. diff --git a/filter/config.Y b/filter/config.Y index ca792593..8cecf936 100644 --- a/filter/config.Y +++ b/filter/config.Y @@ -27,6 +27,9 @@ static inline u32 pair_b(u32 p) { return p & 0xFFFF; } cf_error("Can't empty %s: not an attribute", sym->name); \ f_generate_complex(fi_code, sym->attribute, arg); \ }) + +#define f_generate_complex_default(fi_code, da, arg, def) \ + f_new_inst(FI_EA_SET, f_new_inst(fi_code, f_new_inst(FI_DEFAULT, f_new_inst(FI_EA_GET, da), f_new_inst(FI_CONSTANT, (struct f_val) { .type = T_INT, .val.i = def })), arg), da) /* @@ -902,10 +905,10 @@ cmd: } | attr_bit '=' term ';' { $$ = f_new_inst(FI_CONDITION, $3, - f_generate_complex(FI_BITOR, $1.class, - f_new_inst(FI_CONSTANT, (struct f_val) { .type = T_INT, .val.i = (1U << $1.bit)})), - f_generate_complex(FI_BITAND, $1.class, - f_new_inst(FI_CONSTANT, (struct f_val) { .type = T_INT, .val.i = ~(1U << $1.bit)})) + f_generate_complex_default(FI_BITOR, $1.class, + f_new_inst(FI_CONSTANT, (struct f_val) { .type = T_INT, .val.i = (1U << $1.bit)}), 0), + f_generate_complex_default(FI_BITAND, $1.class, + f_new_inst(FI_CONSTANT, (struct f_val) { .type = T_INT, .val.i = ~(1U << $1.bit)}), 0) ); } | break_command print_list ';' { diff --git a/filter/f-inst.c b/filter/f-inst.c index 9ff5f79a..e7b642ab 100644 --- a/filter/f-inst.c +++ b/filter/f-inst.c @@ -768,6 +768,18 @@ ea_unset_attr(fs->eattrs, 1, da); } + INST(FI_DEFAULT, 2, 1) { + ARG_ANY(1); + ARG_ANY(2); + + log(L_INFO "Type of arg 1 is: %d", v1.type); + + if (v1.type == T_VOID) + RESULT_VAL(v2); + else + RESULT_VAL(v1); + } + INST(FI_LENGTH, 1, 1) { /* Get length of */ ARG_ANY(1); switch(v1.type) { diff --git a/filter/test.conf b/filter/test.conf index 21e7330f..cd2fa8db 100644 --- a/filter/test.conf +++ b/filter/test.conf @@ -1445,6 +1445,8 @@ bool t; rip_metric = 14; unset(rip_metric); + preference = 1234; + test_ca_int1 = 42; test_ca_ip2 = 1.3.5.7; test_ca_quad3 = 2.4.6.8; @@ -1468,11 +1470,7 @@ bool t; bgp_community = -empty-; bgp_originator_id = 9.7.5.3; bgp_cluster_list = -empty-; - t = defined(bgp_mp_reach_nlri); - t = defined(bgp_mp_unreach_nlri); bgp_ext_community = --empty--; - bgp_as4_path = +empty+; - t = defined(bgp_as4_aggregator); t = defined(bgp_aigp); bgp_large_community = ---empty---; t = defined(bgp_mpls_label_stack); diff --git a/lib/attrs.h b/lib/attrs.h index 79b7b14a..a75abcd3 100644 --- a/lib/attrs.h +++ b/lib/attrs.h @@ -42,7 +42,7 @@ lp_store_adata(struct linpool *pool, const void *buf, uint len) #define tmp_copy_adata(ad) tmp_store_adata((ad)->data, (ad)->length) static inline int adata_same(const struct adata *a, const struct adata *b) -{ return (a->length == b->length && !memcmp(a->data, b->data, a->length)); } +{ return (!a && !b) || (a->length == b->length && !memcmp(a->data, b->data, a->length)); } @@ -10,7 +10,7 @@ #ifndef _BIRD_HASH_H_ #define _BIRD_HASH_H_ -#define HASH(type) struct { type **data; uint count, order; } +#define HASH(type) struct { type **data; uint count; u16 iterators; u8 order; u8 down_requested:1; } #define HASH_TYPE(v) typeof(** (v).data) #define HASH_SIZE(v) (1U << (v).order) @@ -125,20 +125,26 @@ #define HASH_MAY_STEP_DOWN_(v,pool,rehash_fn,args) \ ({ \ - if (((v).count < (HASH_SIZE(v) REHASH_LO_MARK(args))) && \ - ((v).order > (REHASH_LO_BOUND(args)))) \ + if ((v).iterators) \ + (v).down_requested = 1; \ + else if (((v).count < (HASH_SIZE(v) REHASH_LO_MARK(args))) && \ + ((v).order > (REHASH_LO_BOUND(args)))) \ rehash_fn(&(v), pool, -(REHASH_LO_STEP(args))); \ }) #define HASH_MAY_RESIZE_DOWN_(v,pool,rehash_fn,args) \ ({ \ - uint _o = (v).order; \ - while (((v).count < ((1U << _o) REHASH_LO_MARK(args))) && \ - (_o > (REHASH_LO_BOUND(args)))) \ - _o -= (REHASH_LO_STEP(args)); \ - if (_o < (v).order) \ - rehash_fn(&(v), pool, _o - (v).order); \ - }) + if ((v).iterators) \ + (v).down_requested = 1; \ + else { \ + uint _o = (v).order; \ + while (((v).count < ((1U << _o) REHASH_LO_MARK(args))) && \ + (_o > (REHASH_LO_BOUND(args)))) \ + _o -= (REHASH_LO_STEP(args)); \ + if (_o < (v).order) \ + rehash_fn(&(v), pool, _o - (v).order); \ + } \ + }) #define HASH_INSERT2(v,id,pool,node) \ @@ -195,6 +201,20 @@ #define HASH_WALK_FILTER_END } while (0) +#define HASH_WALK_ITER(v, id, n, iter) \ + do { \ + uint _hash_walk_iter_put = 0; \ + uint _shift = 32 - (v).order; \ + for ( ; !_hash_walk_iter_put; (iter) += (1U << _shift)) { \ + _hash_walk_iter_put = ((iter) + (1U << _shift) == 0); \ + for (HASH_TYPE(v) *n = (v).data[(iter) >> _shift]; n; n = id##_NEXT((n)))\ + if (HASH_FN(v, id, id##_KEY(n)) >= ((iter) >> _shift)) \ + +#define HASH_WALK_ITER_PUT (_hash_walk_iter_put = 1) + +#define HASH_WALK_ITER_END } } while (0) + + static inline void mem_hash_init(u64 *h) { diff --git a/lib/hash_test.c b/lib/hash_test.c index 4bce7017..ecfcdd66 100644 --- a/lib/hash_test.c +++ b/lib/hash_test.c @@ -285,6 +285,46 @@ t_walk_filter(void) return 1; } +static int +t_walk_iter(void) +{ + init_hash(); + fill_hash(); + + u32 hit = 0; + + u32 prev_hash = ~0; + for (uint cnt = 0; cnt < MAX_NUM; ) + { + u32 last_hash = ~0; +// printf("PUT!\n"); + HASH_WALK_ITER(hash, TEST, n, hit) + { + cnt++; + u32 cur_hash = HASH_FN(hash, TEST, n->key); + /* + printf("C%08x L%08x P%08x K%08x H%08x N%p S%d I%ld\n", + cur_hash, last_hash, prev_hash, n->key, hit, n, _shift, n - &nodes[0]); + */ + + if (last_hash == ~0U) + { + if (prev_hash != ~0U) + bt_assert(prev_hash < cur_hash); + last_hash = prev_hash = cur_hash; + } + else + bt_assert(last_hash == cur_hash); + + if (cnt < MAX_NUM) + HASH_WALK_ITER_PUT; + } + HASH_WALK_ITER_END; + } + + return 1; +} + int main(int argc, char *argv[]) { @@ -299,6 +339,7 @@ main(int argc, char *argv[]) bt_test_suite(t_walk_delsafe_remove, "HASH_WALK_DELSAFE and HASH_REMOVE"); bt_test_suite(t_walk_delsafe_remove2, "HASH_WALK_DELSAFE and HASH_REMOVE2. HASH_REMOVE2 is HASH_REMOVE and smart auto-resize function"); bt_test_suite(t_walk_filter, "HASH_WALK_FILTER"); + bt_test_suite(t_walk_iter, "HASH_WALK_ITER"); return bt_exit_value(); } diff --git a/lib/route.h b/lib/route.h index 1b2f4de6..68596316 100644 --- a/lib/route.h +++ b/lib/route.h @@ -35,6 +35,7 @@ typedef struct rte { #define REF_STALE 4 /* Route is stale in a refresh cycle */ #define REF_DISCARD 8 /* Route is scheduled for discard */ #define REF_MODIFY 16 /* Route is scheduled for modify */ +#define REF_PENDING 32 /* Route has not propagated completely yet */ /* Route is valid for propagation (may depend on other flags in the future), accepts NULL */ static inline int rte_is_valid(rte *r) { return r && !(r->flags & REF_FILTERED); } @@ -53,6 +54,7 @@ struct rte_src { struct rte_src *rt_find_source(struct proto *p, u32 id); struct rte_src *rt_get_source(struct proto *p, u32 id); +struct rte_src *rt_find_source_global(u32 id); static inline void rt_lock_source(struct rte_src *src) { src->uc++; } static inline void rt_unlock_source(struct rte_src *src) { src->uc--; } void rt_prune_sources(void); @@ -147,10 +149,6 @@ typedef struct eattr { #define EA_BIT_GET(ea) ((ea) >> 24) typedef struct ea_list { - struct ea_list *next_hash; /* Next in hash chain */ - struct ea_list **pprev_hash; /* Previous in hash chain */ - u32 uc; /* Use count */ - u32 hash_key; /* List hash */ struct ea_list *next; /* In case we have an override list */ byte flags; /* Flags: EALF_... */ byte rfu; @@ -158,6 +156,14 @@ typedef struct ea_list { eattr attrs[0]; /* Attribute definitions themselves */ } ea_list; +struct ea_storage { + struct ea_storage *next_hash; /* Next in hash chain */ + struct ea_storage **pprev_hash; /* Previous in hash chain */ + u32 uc; /* Use count */ + u32 hash_key; /* List hash */ + ea_list l[0]; /* The list itself */ +}; + #define EALF_SORTED 1 /* Attributes are sorted by code */ #define EALF_BISECT 2 /* Use interval bisection for searching */ #define EALF_CACHED 4 /* List is cached */ @@ -171,6 +177,7 @@ struct ea_class { btype type; /* Data type ID */ \ uint readonly:1; /* This attribute can't be changed by filters */ \ uint conf:1; /* Requested by config */ \ + uint hidden:1; /* Technical attribute, do not show, do not expose to filters */ \ void (*format)(const eattr *ea, byte *buf, uint size); \ void (*stored)(const eattr *ea); /* When stored into global hash */ \ void (*freed)(const eattr *ea); /* When released from global hash */ \ @@ -237,7 +244,7 @@ ea_list *ea_append(ea_list *to, ea_list *what); void ea_format_bitfield(const struct eattr *a, byte *buf, int bufsize, const char **names, int min, int max); /* Normalize ea_list; allocates the result from tmp_linpool */ -ea_list *ea_normalize(const ea_list *e); +ea_list *ea_normalize(ea_list *e, int overlay); uint ea_list_size(ea_list *); void ea_list_copy(ea_list *dest, ea_list *src, uint size); @@ -413,11 +420,21 @@ static inline int rte_dest(const rte *r) } void rta_init(void); -ea_list *ea_lookup(ea_list *); /* Get a cached (and normalized) variant of this attribute list */ -static inline int ea_is_cached(ea_list *r) { return r->flags & EALF_CACHED; } -static inline ea_list *ea_clone(ea_list *r) { r->uc++; return r; } -void ea__free(ea_list *r); -static inline void ea_free(ea_list *r) { if (r && !--r->uc) ea__free(r); } +ea_list *ea_lookup(ea_list *, int overlay); /* Get a cached (and normalized) variant of this attribute list */ +static inline int ea_is_cached(const ea_list *r) { return r->flags & EALF_CACHED; } +static inline struct ea_storage *ea_get_storage(ea_list *r) +{ + ASSERT_DIE(ea_is_cached(r)); + return SKIP_BACK(struct ea_storage, l, r); +} + +static inline ea_list *ea_clone(ea_list *r) { ea_get_storage(r)->uc++; return r; } +void ea__free(struct ea_storage *r); +static inline void ea_free(ea_list *l) { + if (!l) return; + struct ea_storage *r = ea_get_storage(l); + if (!--r->uc) ea__free(r); +} void ea_dump(ea_list *); void ea_dump_all(void); @@ -319,7 +319,6 @@ cli_new(void *priv) c->event->data = c; c->cont = cli_hello; c->parser_pool = lp_new_default(c->pool); - c->show_pool = lp_new_default(c->pool); c->rx_buf = mb_alloc(c->pool, CLI_RX_BUF_SIZE); ev_schedule(c->event); return c; @@ -409,11 +408,14 @@ void cli_free(cli *c) { cli_set_log_echo(c, 0, 0); + int defer = 0; if (c->cleanup) - c->cleanup(c); + defer = c->cleanup(c); if (c == cmd_reconfig_stored_cli) cmd_reconfig_stored_cli = NULL; - rfree(c->pool); + + if (!defer) + rfree(c->pool); } /** @@ -33,12 +33,12 @@ typedef struct cli { struct cli_out *tx_buf, *tx_pos, *tx_write; event *event; void (*cont)(struct cli *c); - void (*cleanup)(struct cli *c); + int (*cleanup)(struct cli *c); /* Return 0 if finished and cli may be freed immediately. + Otherwise return 1 and call rfree(c->pool) when appropriate. */ void *rover; /* Private to continuation routine */ int last_reply; int restricted; /* CLI is restricted to read-only commands */ struct linpool *parser_pool; /* Pool used during parsing */ - struct linpool *show_pool; /* Pool used during route show */ byte *ring_buf; /* Ring buffer for asynchronous messages */ byte *ring_end, *ring_read, *ring_write; /* Pointers to the ring buffer */ uint ring_overflow; /* Counter of ring overflows */ diff --git a/nest/config.Y b/nest/config.Y index 2d73b4c7..edddfc2b 100644 --- a/nest/config.Y +++ b/nest/config.Y @@ -306,12 +306,25 @@ channel_item_: this_channel->table = $2; } | IMPORT imexport { this_channel->in_filter = $2; } + | EXPORT IN net_any imexport { + if (this_channel->net_type && ($3->type != this_channel->net_type)) + cf_error("Incompatible export prefilter type"); + this_channel->out_subprefix = $3; + this_channel->out_filter = $4; + } | EXPORT imexport { this_channel->out_filter = $2; } | RECEIVE LIMIT limit_spec { this_channel->rx_limit = $3; } | IMPORT LIMIT limit_spec { this_channel->in_limit = $3; } | EXPORT LIMIT limit_spec { this_channel->out_limit = $3; } | PREFERENCE expr { this_channel->preference = $2; check_u16($2); } - | IMPORT KEEP FILTERED bool { this_channel->in_keep_filtered = $4; } + | IMPORT KEEP FILTERED bool { + if ($4) + this_channel->in_keep |= RIK_REJECTED; + else if ((this_channel->in_keep & RIK_PREFILTER) == RIK_PREFILTER) + cf_error("Import keep filtered is implied by the import table."); + else + this_channel->in_keep &= ~RIK_REJECTED; + } | RPKI RELOAD bool { this_channel->rpki_reload = $3; } ; @@ -640,25 +653,26 @@ r_args: init_list(&($$->tables)); $$->filter = FILTER_ACCEPT; $$->running_on_config = config; + $$->cli = this_cli; } | r_args net_any { $$ = $1; if ($$->addr) cf_error("Only one prefix expected"); $$->addr = $2; - $$->addr_mode = RSD_ADDR_EQUAL; + $$->addr_mode = TE_ADDR_EQUAL; } | r_args FOR r_args_for { $$ = $1; if ($$->addr) cf_error("Only one prefix expected"); $$->addr = $3; - $$->addr_mode = RSD_ADDR_FOR; + $$->addr_mode = TE_ADDR_FOR; } | r_args IN net_any { $$ = $1; if ($$->addr) cf_error("Only one prefix expected"); if (!net_type_match($3, NB_IP)) cf_error("Only IP networks accepted for 'in' argument"); $$->addr = $3; - $$->addr_mode = RSD_ADDR_IN; + $$->addr_mode = TE_ADDR_IN; } | r_args TABLE symbol_known { cf_assert_symbol($3, SYM_TABLE); @@ -674,13 +688,13 @@ r_args: $$->tables_defined_by = RSD_TDB_ALL; } | r_args IMPORT TABLE channel_arg { - if (!$4->in_table) cf_error("No import table in channel %s.%s", $4->proto->name, $4->name); - rt_show_add_table($$, $4->in_table); + if (!($4->in_keep & RIK_PREFILTER)) cf_error("No import table in channel %s.%s", $4->proto->name, $4->name); + rt_show_add_exporter($$, &$4->table->exporter, "import")->prefilter = $4; $$->tables_defined_by = RSD_TDB_DIRECT; } | r_args EXPORT TABLE channel_arg { if (!$4->out_table) cf_error("No export table in channel %s.%s", $4->proto->name, $4->name); - rt_show_add_table($$, $4->out_table); + rt_show_add_exporter($$, $4->out_table, "export"); $$->tables_defined_by = RSD_TDB_DIRECT; } | r_args FILTER filter { diff --git a/nest/proto.c b/nest/proto.c index 77817888..061205c1 100644 --- a/nest/proto.c +++ b/nest/proto.c @@ -177,7 +177,7 @@ proto_find_channel_by_name(struct proto *p, const char *n) return NULL; } -rte * channel_preimport(struct rt_import_request *req, rte *new, rte *old); +int channel_preimport(struct rt_import_request *req, rte *new, rte *old); void rt_notify_optimal(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe); void rt_notify_any(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe); @@ -214,6 +214,7 @@ proto_add_channel(struct proto *p, struct channel_config *cf) c->in_filter = cf->in_filter; c->out_filter = cf->out_filter; + c->out_subprefix = cf->out_subprefix; channel_init_limit(c, &c->rx_limit, PLD_RX, &cf->rx_limit); channel_init_limit(c, &c->in_limit, PLD_IN, &cf->in_limit); @@ -224,7 +225,7 @@ proto_add_channel(struct proto *p, struct channel_config *cf) c->preference = cf->preference; c->debug = cf->debug; c->merge_limit = cf->merge_limit; - c->in_keep_filtered = cf->in_keep_filtered; + c->in_keep = cf->in_keep; c->rpki_reload = cf->rpki_reload; c->channel_state = CS_DOWN; @@ -294,7 +295,7 @@ static void channel_roa_in_changed(struct rt_subscription *s) { struct channel *c = s->data; - int active = c->reload_event && ev_active(c->reload_event); + int active = !!c->reload_req.hook; CD(c, "Reload triggered by RPKI change%s", active ? " - already active" : ""); @@ -379,7 +380,7 @@ channel_roa_subscribe_filter(struct channel *c, int dir) #ifdef CONFIG_BGP /* No automatic reload for BGP channels without in_table / out_table */ if (c->channel == &channel_bgp) - valid = dir ? !!c->in_table : !!c->out_table; + valid = dir ? ((c->in_keep & RIK_PREFILTER) == RIK_PREFILTER) : !!c->out_table; #endif struct filter_iterator fit; @@ -467,6 +468,8 @@ channel_start_export(struct channel *c) c->out_req = (struct rt_export_request) { .name = rn, + .addr = c->out_subprefix, + .addr_mode = c->out_subprefix ? TE_ADDR_IN : TE_ADDR_NONE, .trace_routes = c->debug | c->proto->debug, .dump_req = channel_dump_export_req, .log_state_change = channel_export_log_state_change, @@ -498,7 +501,7 @@ channel_start_export(struct channel *c) } DBG("%s.%s: Channel start export req=%p\n", c->proto->name, c->name, &c->out_req); - rt_request_export(c->table, &c->out_req); + rt_request_export(&c->table->exporter, &c->out_req); } static void @@ -534,9 +537,6 @@ channel_import_stopped(struct rt_import_request *req) req->hook = NULL; - if (c->in_table) - rt_prune_sync(c->in_table, 1); - mb_free(c->in_req.name); c->in_req.name = NULL; @@ -555,14 +555,10 @@ channel_export_stopped(struct rt_export_request *req) { c->refeeding = 1; c->refeed_pending = 0; - rt_request_export(c->table, req); + rt_request_export(&c->table->exporter, req); return; } - /* Free the routes from out_table */ - if (c->out_table) - rt_prune_sync(c->out_table, 1); - mb_free(c->out_req.name); c->out_req.name = NULL; @@ -603,55 +599,48 @@ channel_schedule_reload(struct channel *c) { ASSERT(c->in_req.hook); - rt_reload_channel_abort(c); - ev_schedule_work(c->reload_event); + rt_request_export(&c->table->exporter, &c->reload_req); } static void -channel_reload_loop(void *ptr) +channel_reload_stopped(struct rt_export_request *req) { - struct channel *c = ptr; - - /* Start reload */ - if (!c->reload_active) - c->reload_pending = 0; - - if (!rt_reload_channel(c)) - { - ev_schedule_work(c->reload_event); - return; - } + struct channel *c = SKIP_BACK(struct channel, reload_req, req); /* Restart reload */ if (c->reload_pending) channel_request_reload(c); } -/* Called by protocol to activate in_table */ -void -channel_setup_in_table(struct channel *c) +static void +channel_reload_log_state_change(struct rt_export_request *req, u8 state) { - struct rtable_config *cf = mb_allocz(c->proto->pool, sizeof(struct rtable_config)); - - cf->name = "import"; - cf->addr_type = c->net_type; - cf->internal = 1; - - c->in_table = rt_setup(c->proto->pool, cf); + if (state == TES_READY) + rt_stop_export(req, channel_reload_stopped); +} - c->reload_event = ev_new_init(c->proto->pool, channel_reload_loop, c); +static void +channel_reload_dump_req(struct rt_export_request *req) +{ + struct channel *c = SKIP_BACK(struct channel, reload_req, req); + debug(" Channel %s.%s import reload request %p\n", c->proto->name, c->name, req); } -/* Called by protocol to activate out_table */ +void channel_reload_export_bulk(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe, rte **feed, uint count); + +/* Called by protocol to activate in_table */ void -channel_setup_out_table(struct channel *c) +channel_setup_in_table(struct channel *c) { - struct rtable_config *cf = mb_allocz(c->proto->pool, sizeof(struct rtable_config)); - cf->name = "export"; - cf->addr_type = c->net_type; - cf->internal = 1; + c->reload_req = (struct rt_export_request) { + .name = mb_sprintf(c->proto->pool, "%s.%s.import", c->proto->name, c->name), + .trace_routes = c->debug | c->proto->debug, + .export_bulk = channel_reload_export_bulk, + .dump_req = channel_reload_dump_req, + .log_state_change = channel_reload_log_state_change, + }; - c->out_table = rt_setup(c->proto->pool, cf); + c->in_keep |= RIK_PREFILTER; } @@ -680,10 +669,10 @@ static void channel_do_pause(struct channel *c) { /* Need to abort feeding */ - if (c->reload_event) + if (c->reload_req.hook) { - ev_postpone(c->reload_event); - rt_reload_channel_abort(c); + c->reload_pending = 0; + rt_stop_export(&c->reload_req, channel_reload_stopped); } /* Stop export */ @@ -710,15 +699,13 @@ channel_do_stop(struct channel *c) CALL(c->channel->shutdown, c); /* This have to be done in here, as channel pool is freed before channel_do_down() */ - c->in_table = NULL; - c->reload_event = NULL; c->out_table = NULL; } static void channel_do_down(struct channel *c) { - ASSERT(!c->reload_active); + ASSERT(!c->reload_req.hook); c->proto->active_channels--; @@ -726,8 +713,6 @@ channel_do_down(struct channel *c) memset(&c->import_stats, 0, sizeof(struct channel_import_stats)); memset(&c->export_stats, 0, sizeof(struct channel_export_stats)); - c->in_table = NULL; - c->reload_event = NULL; c->out_table = NULL; /* The in_table and out_table are going to be freed by freeing their resource pools. */ @@ -922,7 +907,12 @@ int channel_reconfigure(struct channel *c, struct channel_config *cf) { /* FIXME: better handle these changes, also handle in_keep_filtered */ - if ((c->table != cf->table->table) || (cf->ra_mode && (c->ra_mode != cf->ra_mode))) + if ((c->table != cf->table->table) || + (cf->ra_mode && (c->ra_mode != cf->ra_mode)) || + (cf->in_keep != c->in_keep) || + cf->out_subprefix && c->out_subprefix && + !net_equal(cf->out_subprefix, c->out_subprefix) || + (!cf->out_subprefix != !c->out_subprefix)) return 0; /* Note that filter_same() requires arguments in (new, old) order */ @@ -947,9 +937,9 @@ channel_reconfigure(struct channel *c, struct channel_config *cf) // c->ra_mode = cf->ra_mode; c->merge_limit = cf->merge_limit; c->preference = cf->preference; + c->out_req.addr = c->out_subprefix = cf->out_subprefix; c->debug = cf->debug; c->in_req.trace_routes = c->out_req.trace_routes = c->debug | c->proto->debug; - c->in_keep_filtered = cf->in_keep_filtered; c->rpki_reload = cf->rpki_reload; /* Execute channel-specific reconfigure hook */ @@ -2099,7 +2089,7 @@ channel_show_stats(struct channel *c) u32 in_routes = c->in_limit.count; u32 out_routes = c->out_limit.count; - if (c->in_keep_filtered) + if (c->in_keep) cli_msg(-1006, " Routes: %u imported, %u filtered, %u exported, %u preferred", in_routes, (rx_routes - in_routes), out_routes, SRI(pref)); else diff --git a/nest/protocol.h b/nest/protocol.h index aeb60ac6..3ccd364a 100644 --- a/nest/protocol.h +++ b/nest/protocol.h @@ -452,9 +452,10 @@ struct channel_config { struct proto_config *parent; /* Where channel is defined (proto or template) */ struct rtable_config *table; /* Table we're attached to */ const struct filter *in_filter, *out_filter; /* Attached filters */ + const net_addr *out_subprefix; /* Export only subprefixes of this net */ struct channel_limit rx_limit; /* Limit for receiving routes from protocol - (relevant when in_keep_filtered is active) */ + (relevant when in_keep & RIK_REJECTED) */ struct channel_limit in_limit; /* Limit for importing routes from protocol */ struct channel_limit out_limit; /* Limit for exporting routes to protocol */ @@ -463,7 +464,7 @@ struct channel_config { u16 preference; /* Default route preference */ u32 debug; /* Debugging flags (D_*) */ u8 merge_limit; /* Maximal number of nexthops for RA_MERGED */ - u8 in_keep_filtered; /* Routes rejected in import filter are kept */ + u8 in_keep; /* Which states of routes to keep (RIK_*) */ u8 rpki_reload; /* RPKI changes trigger channel reload */ }; @@ -477,10 +478,11 @@ struct channel { struct rtable *table; const struct filter *in_filter; /* Input filter */ const struct filter *out_filter; /* Output filter */ + const net_addr *out_subprefix; /* Export only subprefixes of this net */ struct bmap export_map; /* Keeps track which routes were really exported */ struct bmap export_reject_map; /* Keeps track which routes were rejected by export filter */ - struct limit rx_limit; /* Receive limit (for in_keep_filtered) */ + struct limit rx_limit; /* Receive limit (for in_keep & RIK_REJECTED) */ struct limit in_limit; /* Input limit */ struct limit out_limit; /* Output limit */ @@ -517,7 +519,7 @@ struct channel { u16 preference; /* Default route preference */ u32 debug; /* Debugging flags (D_*) */ u8 merge_limit; /* Maximal number of nexthops for RA_MERGED */ - u8 in_keep_filtered; /* Routes rejected in import filter are kept */ + u8 in_keep; /* Which states of routes to keep (RIK_*) */ u8 disabled; u8 stale; /* Used in reconfiguration */ @@ -529,21 +531,19 @@ struct channel { btime last_state_change; /* Time of last state transition */ - struct rtable *in_table; /* Internal table for received routes */ - struct event *reload_event; /* Event responsible for reloading from in_table */ - struct fib_iterator reload_fit; /* FIB iterator in in_table used during reloading */ - struct rte_storage *reload_next_rte; /* Route iterator in in_table used during reloading */ - u8 reload_active; /* Iterator reload_fit is linked */ + struct rt_export_request reload_req; /* Feeder for import reload */ u8 reload_pending; /* Reloading and another reload is scheduled */ u8 refeed_pending; /* Refeeding and another refeed is scheduled */ u8 rpki_reload; /* RPKI changes trigger channel reload */ - struct rtable *out_table; /* Internal table for exported routes */ + struct rt_exporter *out_table; /* Internal table for exported routes */ list roa_subscriptions; /* List of active ROA table subscriptions based on filters roa_check() */ }; +#define RIK_REJECTED 1 /* Routes rejected in import filter are kept */ +#define RIK_PREFILTER (2 | RIK_REJECTED) /* All routes' attribute state before import filter is kept */ /* * Channel states @@ -610,7 +610,6 @@ int proto_configure_channel(struct proto *p, struct channel **c, struct channel_ void channel_set_state(struct channel *c, uint state); void channel_setup_in_table(struct channel *c); -void channel_setup_out_table(struct channel *c); void channel_schedule_reload(struct channel *c); static inline void channel_init(struct channel *c) { channel_set_state(c, CS_START); } diff --git a/nest/rt-attr.c b/nest/rt-attr.c index 31e2057e..b31bc5cc 100644 --- a/nest/rt-attr.c +++ b/nest/rt-attr.c @@ -197,11 +197,14 @@ static struct idm src_ids; #define RSH_INIT_ORDER 6 static HASH(struct rte_src) src_hash; +static struct rte_src **rte_src_global; +static uint rte_src_global_max = SRC_ID_INIT_SIZE; static void rte_src_init(void) { rte_src_slab = sl_new(rta_pool, sizeof(struct rte_src)); + rte_src_global = mb_allocz(rta_pool, sizeof(struct rte_src *) * rte_src_global_max); idm_init(&src_ids, rta_pool, SRC_ID_INIT_SIZE); @@ -232,10 +235,27 @@ rt_get_source(struct proto *p, u32 id) src->uc = 0; HASH_INSERT2(src_hash, RSH, rta_pool, src); + if (src->global_id >= rte_src_global_max) + { + rte_src_global = mb_realloc(rte_src_global, sizeof(struct rte_src *) * (rte_src_global_max *= 2)); + memset(&rte_src_global[rte_src_global_max / 2], 0, + sizeof(struct rte_src *) * (rte_src_global_max / 2)); + } + + rte_src_global[src->global_id] = src; return src; } +struct rte_src * +rt_find_source_global(u32 id) +{ + if (id >= rte_src_global_max) + return NULL; + else + return rte_src_global[id]; +} + void rt_prune_sources(void) { @@ -436,7 +456,8 @@ ea_class_free(struct ea_class *cl) /* No more ea class references. Unregister the attribute. */ idm_free(&ea_class_idm, cl->id); ea_class_global[cl->id] = NULL; - ea_lex_unregister(cl); + if (!cl->hidden) + ea_lex_unregister(cl); } static void @@ -492,7 +513,8 @@ ea_register(pool *p, struct ea_class *def) ASSERT_DIE(def->id < ea_class_max); ea_class_global[def->id] = def; - ea_lex_register(def); + if (!def->hidden) + ea_lex_register(def); return ea_ref_class(p, def); } @@ -726,8 +748,8 @@ ea_do_prune(ea_list *e) s++; /* Now s0 is the most recent version, s[-1] the oldest one */ - /* Drop undefs */ - if (s0->undef) + /* Drop undefs unless this is a true overlay */ + if (s0->undef && (s[-1].undef || !e->next)) continue; /* Copy the newest version to destination */ @@ -760,18 +782,15 @@ ea_do_prune(ea_list *e) static void ea_sort(ea_list *e) { - while (e) - { - if (!(e->flags & EALF_SORTED)) - { - ea_do_sort(e); - ea_do_prune(e); - e->flags |= EALF_SORTED; - } - if (e->count > 5) - e->flags |= EALF_BISECT; - e = e->next; - } + if (!(e->flags & EALF_SORTED)) + { + ea_do_sort(e); + ea_do_prune(e); + e->flags |= EALF_SORTED; + } + + if (e->count > 5) + e->flags |= EALF_BISECT; } /** @@ -782,7 +801,7 @@ ea_sort(ea_list *e) * a given &ea_list after merging with ea_merge(). */ static unsigned -ea_scan(const ea_list *e) +ea_scan(const ea_list *e, int overlay) { unsigned cnt = 0; @@ -790,6 +809,8 @@ ea_scan(const ea_list *e) { cnt += e->count; e = e->next; + if (e && overlay && ea_is_cached(e)) + break; } return sizeof(ea_list) + sizeof(eattr)*cnt; } @@ -809,30 +830,35 @@ ea_scan(const ea_list *e) * by calling ea_sort(). */ static void -ea_merge(const ea_list *e, ea_list *t) +ea_merge(ea_list *e, ea_list *t, int overlay) { eattr *d = t->attrs; t->flags = 0; t->count = 0; - t->next = NULL; + while (e) { memcpy(d, e->attrs, sizeof(eattr)*e->count); t->count += e->count; d += e->count; e = e->next; + + if (e && overlay && ea_is_cached(e)) + break; } + + t->next = e; } ea_list * -ea_normalize(const ea_list *e) +ea_normalize(ea_list *e, int overlay) { - ea_list *t = tmp_alloc(ea_scan(e)); - ea_merge(e, t); + ea_list *t = tmp_alloc(ea_scan(e, overlay)); + ea_merge(e, t, overlay); ea_sort(t); - return t->count ? t : NULL; + return t->count ? t : t->next; } /** @@ -850,7 +876,8 @@ ea_same(ea_list *x, ea_list *y) if (!x || !y) return x == y; - ASSERT(!x->next && !y->next); + if (x->next != y->next) + return 0; if (x->count != y->count) return 0; for(c=0; c<x->count; c++) @@ -876,13 +903,12 @@ ea_list_size(ea_list *o) unsigned i, elen; ASSERT_DIE(o); - ASSERT_DIE(!o->next); elen = BIRD_CPU_ALIGN(sizeof(ea_list) + sizeof(eattr) * o->count); for(i=0; i<o->count; i++) { eattr *a = &o->attrs[i]; - if (!(a->type & EAF_EMBEDDED)) + if (!a->undef && !(a->type & EAF_EMBEDDED)) elen += ADATA_SIZE(a->u.ptr->length); } @@ -899,7 +925,7 @@ ea_list_copy(ea_list *n, ea_list *o, uint elen) for(uint i=0; i<o->count; i++) { eattr *a = &n->attrs[i]; - if (!(a->type & EAF_EMBEDDED)) + if (!a->undef && !(a->type & EAF_EMBEDDED)) { unsigned size = ADATA_SIZE(a->u.ptr->length); ASSERT_DIE(adpos + size <= elen); @@ -923,12 +949,21 @@ ea_list_ref(ea_list *l) eattr *a = &l->attrs[i]; ASSERT_DIE(a->id < ea_class_max); + if (a->undef) + continue; + struct ea_class *cl = ea_class_global[a->id]; ASSERT_DIE(cl && cl->uc); CALL(cl->stored, a); cl->uc++; } + + if (l->next) + { + ASSERT_DIE(ea_is_cached(l->next)); + ea_clone(l->next); + } } static void @@ -939,6 +974,9 @@ ea_list_unref(ea_list *l) eattr *a = &l->attrs[i]; ASSERT_DIE(a->id < ea_class_max); + if (a->undef) + continue; + struct ea_class *cl = ea_class_global[a->id]; ASSERT_DIE(cl && cl->uc); @@ -946,6 +984,9 @@ ea_list_unref(ea_list *l) if (!--cl->uc) ea_class_free(cl); } + + if (l->next) + ea_free(l->next); } void @@ -998,41 +1039,90 @@ opaque_format(const struct adata *ad, byte *buf, uint size) } static inline void -ea_show_int_set(struct cli *c, const struct adata *ad, int way, byte *pos, byte *buf, byte *end) +ea_show_int_set(struct cli *c, const char *name, const struct adata *ad, int way, byte *buf) { - int i = int_set_format(ad, way, 0, pos, end - pos); - cli_printf(c, -1012, "\t%s", buf); + int nlen = strlen(name); + int i = int_set_format(ad, way, 0, buf, CLI_MSG_SIZE - nlen - 3); + cli_printf(c, -1012, "\t%s: %s", name, buf); while (i) { - i = int_set_format(ad, way, i, buf, end - buf - 1); + i = int_set_format(ad, way, i, buf, CLI_MSG_SIZE - 1); cli_printf(c, -1012, "\t\t%s", buf); } } static inline void -ea_show_ec_set(struct cli *c, const struct adata *ad, byte *pos, byte *buf, byte *end) +ea_show_ec_set(struct cli *c, const char *name, const struct adata *ad, byte *buf) { - int i = ec_set_format(ad, 0, pos, end - pos); - cli_printf(c, -1012, "\t%s", buf); + int nlen = strlen(name); + int i = ec_set_format(ad, 0, buf, CLI_MSG_SIZE - nlen - 3); + cli_printf(c, -1012, "\t%s: %s", name, buf); while (i) { - i = ec_set_format(ad, i, buf, end - buf - 1); + i = ec_set_format(ad, i, buf, CLI_MSG_SIZE - 1); cli_printf(c, -1012, "\t\t%s", buf); } } static inline void -ea_show_lc_set(struct cli *c, const struct adata *ad, byte *pos, byte *buf, byte *end) +ea_show_lc_set(struct cli *c, const char *name, const struct adata *ad, byte *buf) { - int i = lc_set_format(ad, 0, pos, end - pos); - cli_printf(c, -1012, "\t%s", buf); + int nlen = strlen(name); + int i = lc_set_format(ad, 0, buf, CLI_MSG_SIZE - nlen - 3); + cli_printf(c, -1012, "\t%s: %s", name, buf); while (i) { - i = lc_set_format(ad, i, buf, end - buf - 1); + i = lc_set_format(ad, i, buf, CLI_MSG_SIZE - 1); cli_printf(c, -1012, "\t\t%s", buf); } } +void +ea_show_nexthop_list(struct cli *c, struct nexthop_adata *nhad) +{ + if (!NEXTHOP_IS_REACHABLE(nhad)) + return; + + NEXTHOP_WALK(nh, nhad) + { + char mpls[MPLS_MAX_LABEL_STACK*12 + 5], *lsp = mpls; + char *onlink = (nh->flags & RNF_ONLINK) ? " onlink" : ""; + char weight[16] = ""; + + if (nh->labels) + { + lsp += bsprintf(lsp, " mpls %d", nh->label[0]); + for (int i=1;i<nh->labels; i++) + lsp += bsprintf(lsp, "/%d", nh->label[i]); + } + *lsp = '\0'; + + if (!NEXTHOP_ONE(nhad)) + bsprintf(weight, " weight %d", nh->weight + 1); + + if (ipa_nonzero(nh->gw)) + if (nh->iface) + cli_printf(c, -1007, "\tvia %I on %s%s%s%s", + nh->gw, nh->iface->name, mpls, onlink, weight); + else + cli_printf(c, -1007, "\tvia %I", nh->gw); + else + cli_printf(c, -1007, "\tdev %s%s%s", + nh->iface->name, mpls, onlink, weight); + } +} + +void +ea_show_hostentry(const struct adata *ad, byte *buf, uint size) +{ + const struct hostentry_adata *had = (const struct hostentry_adata *) ad; + + if (ipa_nonzero(had->he->link) && !ipa_equal(had->he->link, had->he->addr)) + bsnprintf(buf, size, "via %I %I table %s", had->he->addr, had->he->link, had->he->tab->name); + else + bsnprintf(buf, size, "via %I table %s", had->he->addr, had->he->tab->name); +} + /** * ea_show - print an &eattr to CLI * @c: destination CLI @@ -1056,19 +1146,17 @@ ea_show(struct cli *c, const eattr *e) struct ea_class *cls = ea_class_global[e->id]; ASSERT_DIE(cls); - pos += bsprintf(pos, "%s", cls->name); - - *pos++ = ':'; - *pos++ = ' '; - - if (e->undef) - bsprintf(pos, "undefined (should not happen)"); + if (e->undef || cls->hidden) + return; else if (cls->format) cls->format(e, buf, end - buf); else switch (e->type) { case T_INT: + if ((cls == &ea_gen_igp_metric) && e->u.data >= IGP_METRIC_UNKNOWN) + return; + bsprintf(pos, "%u", e->u.data); break; case T_OPAQUE: @@ -1084,19 +1172,25 @@ ea_show(struct cli *c, const eattr *e) as_path_format(ad, pos, end - pos); break; case T_CLIST: - ea_show_int_set(c, ad, 1, pos, buf, end); + ea_show_int_set(c, cls->name, ad, 1, buf); return; case T_ECLIST: - ea_show_ec_set(c, ad, pos, buf, end); + ea_show_ec_set(c, cls->name, ad, buf); return; case T_LCLIST: - ea_show_lc_set(c, ad, pos, buf, end); + ea_show_lc_set(c, cls->name, ad, buf); return; + case T_NEXTHOP_LIST: + ea_show_nexthop_list(c, (struct nexthop_adata *) e->u.ptr); + return; + case T_HOSTENTRY: + ea_show_hostentry(ad, pos, end - pos); + break; default: bsprintf(pos, "<type %02x>", e->type); } - cli_printf(c, -1012, "\t%s", buf); + cli_printf(c, -1012, "\t%s: %s", cls->name, buf); } static void @@ -1135,11 +1229,12 @@ ea_dump(ea_list *e) } while (e) { + struct ea_storage *s = ea_is_cached(e) ? ea_get_storage(e) : NULL; debug("[%c%c%c] uc=%d h=%08x", (e->flags & EALF_SORTED) ? 'S' : 's', (e->flags & EALF_BISECT) ? 'B' : 'b', (e->flags & EALF_CACHED) ? 'C' : 'c', - e->uc, e->hash_key); + s ? s->uc : 0, s ? s->hash_key : 0); for(i=0; i<e->count; i++) { eattr *a = &e->attrs[i]; @@ -1183,11 +1278,13 @@ ea_hash(ea_list *e) if (e) /* Assuming chain of length 1 */ { - ASSERT_DIE(!e->next); + h ^= mem_hash(&e->next, sizeof(e->next)); for(i=0; i<e->count; i++) { struct eattr *a = &e->attrs[i]; h ^= a->id; h *= mul; + if (a->undef) + continue; if (a->type & EAF_EMBEDDED) h ^= a->u.data; else @@ -1231,12 +1328,12 @@ static uint rta_cache_count; static uint rta_cache_size = 32; static uint rta_cache_limit; static uint rta_cache_mask; -static ea_list **rta_hash_table; +static struct ea_storage **rta_hash_table; static void rta_alloc_hash(void) { - rta_hash_table = mb_allocz(rta_pool, sizeof(ea_list *) * rta_cache_size); + rta_hash_table = mb_allocz(rta_pool, sizeof(struct ea_storage *) * rta_cache_size); if (rta_cache_size < 32768) rta_cache_limit = rta_cache_size * 2; else @@ -1245,7 +1342,7 @@ rta_alloc_hash(void) } static inline void -rta_insert(ea_list *r) +rta_insert(struct ea_storage *r) { uint h = r->hash_key & rta_cache_mask; r->next_hash = rta_hash_table[h]; @@ -1260,8 +1357,8 @@ rta_rehash(void) { uint ohs = rta_cache_size; uint h; - ea_list *r, *n; - ea_list **oht = rta_hash_table; + struct ea_storage *r, *n; + struct ea_storage **oht = rta_hash_table; rta_cache_size = 2*rta_cache_size; DBG("Rehashing rta cache from %d to %d entries.\n", ohs, rta_cache_size); @@ -1289,25 +1386,25 @@ rta_rehash(void) * converted to the normalized form. */ ea_list * -ea_lookup(ea_list *o) +ea_lookup(ea_list *o, int overlay) { - ea_list *r; + struct ea_storage *r; uint h; ASSERT(!ea_is_cached(o)); - o = ea_normalize(o); + o = ea_normalize(o, overlay); h = ea_hash(o); for(r=rta_hash_table[h & rta_cache_mask]; r; r=r->next_hash) - if (r->hash_key == h && ea_same(r, o)) - return ea_clone(r); + if (r->hash_key == h && ea_same(r->l, o)) + return ea_clone(r->l); uint elen = ea_list_size(o); - r = mb_alloc(rta_pool, elen); - ea_list_copy(r, o, elen); - ea_list_ref(r); + r = mb_alloc(rta_pool, elen + sizeof(struct ea_storage)); + ea_list_copy(r->l, o, elen); + ea_list_ref(r->l); - r->flags |= EALF_CACHED; + r->l->flags |= EALF_CACHED; r->hash_key = h; r->uc = 1; @@ -1316,20 +1413,19 @@ ea_lookup(ea_list *o) if (++rta_cache_count > rta_cache_limit) rta_rehash(); - return r; + return r->l; } void -ea__free(ea_list *a) +ea__free(struct ea_storage *a) { - ASSERT(rta_cache_count && ea_is_cached(a)); + ASSERT(rta_cache_count); rta_cache_count--; *a->pprev_hash = a->next_hash; if (a->next_hash) a->next_hash->pprev_hash = a->pprev_hash; - ASSERT(!a->next); - ea_list_unref(a); + ea_list_unref(a->l); mb_free(a); } @@ -1344,10 +1440,10 @@ ea_dump_all(void) { debug("Route attribute cache (%d entries, rehash at %d):\n", rta_cache_count, rta_cache_limit); for (uint h=0; h < rta_cache_size; h++) - for (ea_list *a = rta_hash_table[h]; a; a = a->next_hash) + for (struct ea_storage *a = rta_hash_table[h]; a; a = a->next_hash) { debug("%p ", a); - ea_dump(a); + ea_dump(a->l); debug("\n"); } debug("\n"); @@ -1356,9 +1452,9 @@ ea_dump_all(void) void ea_show_list(struct cli *c, ea_list *eal) { - for( ; eal; eal=eal->next) - for(int i=0; i<eal->count; i++) - ea_show(c, &eal->attrs[i]); + ea_list *n = ea_normalize(eal, 0); + for (int i =0; i < n->count; i++) + ea_show(c, &n->attrs[i]); } /** @@ -1376,12 +1472,15 @@ rta_init(void) rte_src_init(); ea_class_init(); + /* These attributes are required to be first for nice "show route" output */ + ea_register_init(&ea_gen_nexthop); + ea_register_init(&ea_gen_hostentry); + + /* Other generic route attributes */ ea_register_init(&ea_gen_preference); ea_register_init(&ea_gen_igp_metric); ea_register_init(&ea_gen_from); ea_register_init(&ea_gen_source); - ea_register_init(&ea_gen_nexthop); - ea_register_init(&ea_gen_hostentry); ea_register_init(&ea_gen_flowspec_valid); } diff --git a/nest/rt-show.c b/nest/rt-show.c index f3852d17..c3294518 100644 --- a/nest/rt-show.c +++ b/nest/rt-show.c @@ -19,31 +19,27 @@ #include "sysdep/unix/krt.h" static void -rt_show_table(struct cli *c, struct rt_show_data *d) +rt_show_table(struct rt_show_data *d) { + struct cli *c = d->cli; + /* No table blocks in 'show route count' */ if (d->stats == 2) return; if (d->last_table) cli_printf(c, -1007, ""); - cli_printf(c, -1007, "Table %s:", d->tab->table->name); + cli_printf(c, -1007, "Table %s:", + d->tab->name); d->last_table = d->tab; } -static inline struct krt_proto * -rt_show_get_kernel(struct rt_show_data *d) -{ - struct proto_config *krt = d->tab->table->config->krt_attached; - return krt ? (struct krt_proto *) krt->proto : NULL; -} - static void rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, int primary) { byte from[IPA_MAX_TEXT_LENGTH+8]; byte tm[TM_DATETIME_BUFFER_SIZE], info[256]; ea_list *a = e->attrs; - int sync_error = d->kernel ? krt_get_sync_error(d->kernel, e) : 0; + int sync_error = d->tab->kernel ? krt_get_sync_error(d->tab->kernel, e) : 0; void (*get_route_info)(struct rte *, byte *buf); eattr *nhea = net_type_match(e->net, NB_DEST) ? ea_find(a, &ea_gen_nexthop) : NULL; @@ -60,7 +56,7 @@ rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, int primary /* Need to normalize the extended attributes */ if (d->verbose && !rta_is_cached(a) && a) - a = ea_normalize(a); + a = ea_normalize(a, 0); get_route_info = e->src->proto->proto->get_route_info; if (get_route_info) @@ -69,68 +65,33 @@ rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, int primary bsprintf(info, " (%d)", rt_get_preference(e)); if (d->last_table != d->tab) - rt_show_table(c, d); - - cli_printf(c, -1007, "%-20s %s [%s %s%s]%s%s", ia, - net_is_flow(e->net) ? flowspec_valid_name(flowspec_valid) : rta_dest_name(dest), - e->src->proto->name, tm, from, primary ? (sync_error ? " !" : " *") : "", info); + rt_show_table(d); - if (dest == RTD_UNICAST) - NEXTHOP_WALK(nh, nhad) - { - char mpls[MPLS_MAX_LABEL_STACK*12 + 5], *lsp = mpls; - char *onlink = (nh->flags & RNF_ONLINK) ? " onlink" : ""; - char weight[16] = ""; - - if (nh->labels) - { - lsp += bsprintf(lsp, " mpls %d", nh->label[0]); - for (int i=1;i<nh->labels; i++) - lsp += bsprintf(lsp, "/%d", nh->label[i]); - } - *lsp = '\0'; - - if (!NEXTHOP_ONE(nhad)) - bsprintf(weight, " weight %d", nh->weight + 1); + eattr *heea; + struct hostentry_adata *had = NULL; + if (!net_is_flow(e->net) && (dest == RTD_NONE) && (heea = ea_find(a, &ea_gen_hostentry))) + had = (struct hostentry_adata *) heea->u.ptr; - if (ipa_nonzero(nh->gw)) - cli_printf(c, -1007, "\tvia %I on %s%s%s%s", - nh->gw, nh->iface->name, mpls, onlink, weight); - else - cli_printf(c, -1007, "\tdev %s%s%s", - nh->iface->name, mpls, onlink, weight); - } + cli_printf(c, -1007, "%-20s %s [%s %s%s]%s%s", ia, + net_is_flow(e->net) ? flowspec_valid_name(flowspec_valid) : had ? "recursive" : rta_dest_name(dest), + e->src->proto->name, tm, from, primary ? (sync_error ? " !" : " *") : "", info); if (d->verbose) ea_show_list(c, a); -} - -static uint -rte_feed_count(net *n) -{ - uint count = 0; - for (struct rte_storage *e = n->routes; e; e = e->next) - if (rte_is_valid(RTE_OR_NULL(e))) - count++; - return count; -} - -static void -rte_feed_obtain(net *n, rte **feed, uint count) -{ - uint i = 0; - for (struct rte_storage *e = n->routes; e; e = e->next) - if (rte_is_valid(RTE_OR_NULL(e))) - { - ASSERT_DIE(i < count); - feed[i++] = &e->rte; - } - ASSERT_DIE(i == count); + else if (dest == RTD_UNICAST) + ea_show_nexthop_list(c, nhad); + else if (had) + { + char hetext[256]; + ea_show_hostentry(&had->ad, hetext, sizeof hetext); + cli_printf(c, -1007, "\t%s", hetext); + } } static void -rt_show_net(struct cli *c, net *n, struct rt_show_data *d) +rt_show_net(struct rt_show_data *d, const net_addr *n, rte **feed, uint count) { + struct cli *c = d->cli; byte ia[NET_MAX_TEXT_LENGTH+1]; struct channel *ec = d->tab->export_channel; @@ -142,9 +103,9 @@ rt_show_net(struct cli *c, net *n, struct rt_show_data *d) int first_show = 1; int pass = 0; - for (struct rte_storage *er = n->routes; er; er = er->next) + for (uint i = 0; i < count; i++) { - if (rte_is_filtered(&er->rte) != d->filtered) + if (!d->tab->prefilter && (rte_is_filtered(feed[i]) != d->filtered)) continue; d->rt_counter++; @@ -154,7 +115,12 @@ rt_show_net(struct cli *c, net *n, struct rt_show_data *d) if (pass) continue; - struct rte e = er->rte; + struct rte e = *feed[i]; + if (d->tab->prefilter) + if (e.sender != d->tab->prefilter->in_req.hook) + continue; + else while (e.attrs->next) + e.attrs = e.attrs->next; /* Export channel is down, do not try to export routes to it */ if (ec && !ec->out_req.hook) @@ -172,13 +138,7 @@ rt_show_net(struct cli *c, net *n, struct rt_show_data *d) { /* Special case for merged export */ pass = 1; - uint count = rte_feed_count(n); - if (!count) - goto skip; - - rte **feed = alloca(count * sizeof(rte *)); - rte_feed_obtain(n, feed, count); - rte *em = rt_export_merged(ec, feed, count, c->show_pool, 1); + rte *em = rt_export_merged(ec, feed, count, tmp_linpool, 1); if (em) e = *em; @@ -223,170 +183,177 @@ rt_show_net(struct cli *c, net *n, struct rt_show_data *d) if (d->stats < 2) { if (first_show) - net_format(n->n.addr, ia, sizeof(ia)); + net_format(n, ia, sizeof(ia)); else ia[0] = 0; - rt_show_rte(c, ia, &e, d, (n->routes == er)); + rt_show_rte(c, ia, &e, d, !d->tab->prefilter && !i); first_show = 0; } d->show_counter++; skip: - lp_flush(c->show_pool); - if (d->primary_only) break; } + + if ((d->show_counter - d->show_counter_last_flush) > 64) + { + d->show_counter_last_flush = d->show_counter; + cli_write_trigger(d->cli); + } } static void -rt_show_cleanup(struct cli *c) +rt_show_net_export_bulk(struct rt_export_request *req, const net_addr *n, + struct rt_pending_export *rpe UNUSED, rte **feed, uint count) { - struct rt_show_data *d = c->rover; - struct rt_show_data_rtable *tab; + struct rt_show_data *d = SKIP_BACK(struct rt_show_data, req, req); + return rt_show_net(d, n, feed, count); +} - /* Unlink the iterator */ - if (d->table_open && !d->trie_walk) - fit_get(&d->tab->table->fib, &d->fit); +static void +rt_show_export_stopped_cleanup(struct rt_export_request *req) +{ + struct rt_show_data *d = SKIP_BACK(struct rt_show_data, req, req); - if (d->walk_lock) - rt_unlock_trie(d->tab->table, d->walk_lock); + /* The hook is now invalid */ + req->hook = NULL; - /* Unlock referenced tables */ - WALK_LIST(tab, d->tables) - rt_unlock_table(tab->table); + /* And free the CLI (deferred) */ + rfree(d->cli->pool); } -static void -rt_show_cont(struct cli *c) +static int +rt_show_cleanup(struct cli *c) { struct rt_show_data *d = c->rover; - struct rtable *tab = d->tab->table; -#ifdef DEBUGGING - unsigned max = 4; -#else - unsigned max = 64; -#endif - struct fib *fib = &tab->fib; - struct fib_iterator *it = &d->fit; - if (d->running_on_config && (d->running_on_config != config)) + /* Cancel the feed */ + if (d->req.hook) { - cli_printf(c, 8004, "Stopped due to reconfiguration"); - goto done; + rt_stop_export(&d->req, rt_show_export_stopped_cleanup); + return 1; } + else + return 0; +} - if (!d->table_open) - { - /* We use either trie-based walk or fib-based walk */ - d->trie_walk = tab->trie && - (d->addr_mode == RSD_ADDR_IN) && - net_val_match(tab->addr_type, NB_IP); +static void rt_show_export_stopped(struct rt_export_request *req); - if (d->trie_walk && !d->walk_state) - d->walk_state = lp_allocz(c->parser_pool, sizeof (struct f_trie_walk_state)); +static void +rt_show_log_state_change(struct rt_export_request *req, u8 state) +{ + if (state == TES_READY) + rt_stop_export(req, rt_show_export_stopped); +} - if (d->trie_walk) - { - d->walk_lock = rt_lock_trie(tab); - trie_walk_init(d->walk_state, tab->trie, d->addr); - } - else - FIB_ITERATE_INIT(&d->fit, &tab->fib); +static void +rt_show_dump_req(struct rt_export_request *req) +{ + debug(" CLI Show Route Feed %p\n", req); +} - d->table_open = 1; - d->table_counter++; - d->kernel = rt_show_get_kernel(d); +static void +rt_show_cont(struct rt_show_data *d) +{ + struct cli *c = d->cli; - d->show_counter_last = d->show_counter; - d->rt_counter_last = d->rt_counter; - d->net_counter_last = d->net_counter; + if (d->running_on_config && (d->running_on_config != config)) + { + cli_printf(c, 8004, "Stopped due to reconfiguration"); - if (d->tables_defined_by & RSD_TDB_SET) - rt_show_table(c, d); + /* No more action */ + c->cleanup = NULL; + c->cont = NULL; + c->rover = NULL; + cli_write_trigger(c); + return; } - if (d->trie_walk) - { - /* Trie-based walk */ - net_addr addr; - while (trie_walk_next(d->walk_state, &addr)) - { - net *n = net_find(tab, &addr); - if (!n) - continue; + d->req = (struct rt_export_request) { + .addr = d->addr, + .name = "CLI Show Route", + .export_bulk = rt_show_net_export_bulk, + .dump_req = rt_show_dump_req, + .log_state_change = rt_show_log_state_change, + .addr_mode = d->addr_mode, + }; - rt_show_net(c, n, d); + d->table_counter++; - if (!--max) - return; - } + d->show_counter_last = d->show_counter; + d->rt_counter_last = d->rt_counter; + d->net_counter_last = d->net_counter; - rt_unlock_trie(tab, d->walk_lock); - d->walk_lock = NULL; - } - else - { - /* fib-based walk */ - FIB_ITERATE_START(fib, it, net, n) - { - if ((d->addr_mode == RSD_ADDR_IN) && (!net_in_netX(n->n.addr, d->addr))) - goto next; + if (d->tables_defined_by & RSD_TDB_SET) + rt_show_table(d); - if (!max--) - { - FIB_ITERATE_PUT(it); - return; - } - rt_show_net(c, n, d); + rt_request_export(d->tab->table, &d->req); +} - next:; - } - FIB_ITERATE_END; - } +static void +rt_show_export_stopped(struct rt_export_request *req) +{ + struct rt_show_data *d = SKIP_BACK(struct rt_show_data, req, req); + + /* The hook is now invalid */ + req->hook = NULL; if (d->stats) { if (d->last_table != d->tab) - rt_show_table(c, d); + rt_show_table(d); - cli_printf(c, -1007, "%d of %d routes for %d networks in table %s", + cli_printf(d->cli, -1007, "%d of %d routes for %d networks in table %s", d->show_counter - d->show_counter_last, d->rt_counter - d->rt_counter_last, - d->net_counter - d->net_counter_last, tab->name); + d->net_counter - d->net_counter_last, d->tab->name); } - d->kernel = NULL; - d->table_open = 0; d->tab = NODE_NEXT(d->tab); if (NODE_VALID(d->tab)) - return; + return rt_show_cont(d); + + /* Printout total stats */ if (d->stats && (d->table_counter > 1)) { - if (d->last_table) cli_printf(c, -1007, ""); - cli_printf(c, 14, "Total: %d of %d routes for %d networks in %d tables", + if (d->last_table) cli_printf(d->cli, -1007, ""); + cli_printf(d->cli, 14, "Total: %d of %d routes for %d networks in %d tables", d->show_counter, d->rt_counter, d->net_counter, d->table_counter); } + else if (!d->rt_counter && ((d->addr_mode == TE_ADDR_EQUAL) || (d->addr_mode == TE_ADDR_FOR))) + cli_printf(d->cli, 8001, "Network not found"); else - cli_printf(c, 0, ""); + cli_printf(d->cli, 0, ""); -done: - rt_show_cleanup(c); - c->cont = c->cleanup = NULL; + cli_write_trigger(d->cli); } struct rt_show_data_rtable * -rt_show_add_table(struct rt_show_data *d, rtable *t) +rt_show_add_exporter(struct rt_show_data *d, struct rt_exporter *t, const char *name) { struct rt_show_data_rtable *tab = cfg_allocz(sizeof(struct rt_show_data_rtable)); tab->table = t; + tab->name = name; add_tail(&(d->tables), &(tab->n)); return tab; } +struct rt_show_data_rtable * +rt_show_add_table(struct rt_show_data *d, struct rtable *t) +{ + struct rt_show_data_rtable *rsdr = rt_show_add_exporter(d, &t->exporter, t->name); + + struct proto_config *krt = t->config->krt_attached; + if (krt) + rsdr->kernel = (struct krt_proto *) krt->proto; + + return rsdr; +} + static inline void rt_show_get_default_tables(struct rt_show_data *d) { @@ -441,16 +408,16 @@ rt_show_prepare_tables(struct rt_show_data *d) if (d->export_mode) { if (!tab->export_channel && d->export_channel && - (tab->table == d->export_channel->table)) + (tab->table == &d->export_channel->table->exporter)) tab->export_channel = d->export_channel; if (!tab->export_channel && d->export_protocol) - tab->export_channel = proto_find_channel_by_table(d->export_protocol, tab->table); + tab->export_channel = proto_find_channel_by_table(d->export_protocol, SKIP_BACK(rtable, exporter, tab->table)); if (!tab->export_channel) { if (d->tables_defined_by & RSD_TDB_NMN) - cf_error("No export channel for table %s", tab->table->name); + cf_error("No export channel for table %s", tab->name); rem_node(&(tab->n)); continue; @@ -461,7 +428,7 @@ rt_show_prepare_tables(struct rt_show_data *d) if (d->addr && (tab->table->addr_type != d->addr->type)) { if (d->tables_defined_by & RSD_TDB_NMN) - cf_error("Incompatible type of prefix/ip for table %s", tab->table->name); + cf_error("Incompatible type of prefix/ip for table %s", tab->name); rem_node(&(tab->n)); continue; @@ -473,48 +440,29 @@ rt_show_prepare_tables(struct rt_show_data *d) cf_error("No valid tables"); } +static void +rt_show_dummy_cont(struct cli *c UNUSED) +{ + /* Explicitly do nothing to prevent CLI from trying to parse another command. */ +} + void rt_show(struct rt_show_data *d) { - struct rt_show_data_rtable *tab; - net *n; - /* Filtered routes are neither exported nor have sensible ordering */ if (d->filtered && (d->export_mode || d->primary_only)) cf_error("Incompatible show route options"); rt_show_prepare_tables(d); - if (!d->addr || (d->addr_mode == RSD_ADDR_IN)) - { - WALK_LIST(tab, d->tables) - rt_lock_table(tab->table); - - /* There is at least one table */ - d->tab = HEAD(d->tables); - this_cli->cont = rt_show_cont; - this_cli->cleanup = rt_show_cleanup; - this_cli->rover = d; - } - else - { - WALK_LIST(tab, d->tables) - { - d->tab = tab; - d->kernel = rt_show_get_kernel(d); + if (EMPTY_LIST(d->tables)) + cf_error("No suitable tables found"); - if (d->addr_mode == RSD_ADDR_FOR) - n = net_route(tab->table, d->addr); - else - n = net_find(tab->table, d->addr); + d->tab = HEAD(d->tables); - if (n) - rt_show_net(this_cli, n, d); - } + this_cli->cleanup = rt_show_cleanup; + this_cli->rover = d; + this_cli->cont = rt_show_dummy_cont; - if (d->rt_counter) - cli_msg(0, ""); - else - cli_msg(8001, "Network not found"); - } + rt_show_cont(d); } diff --git a/nest/rt-table.c b/nest/rt-table.c index 1631e00f..29690414 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -126,7 +126,11 @@ static inline void rt_flowspec_resolve_rte(rte *r, struct channel *c); static inline void rt_prune_table(rtable *tab); static inline void rt_schedule_notify(rtable *tab); static void rt_flowspec_notify(rtable *tab, net *net); -static void rt_feed_channel(void *); +static void rt_feed_by_fib(void *); +static void rt_feed_by_trie(void *); +static void rt_feed_equal(void *); +static void rt_feed_for(void *); +static uint rt_feed_net(struct rt_export_hook *c, net *n); const char *rt_import_state_name_array[TIS_MAX] = { [TIS_DOWN] = "DOWN", @@ -139,7 +143,6 @@ const char *rt_import_state_name_array[TIS_MAX] = { const char *rt_export_state_name_array[TES_MAX] = { [TES_DOWN] = "DOWN", - [TES_HUNGRY] = "HUNGRY", [TES_FEEDING] = "FEEDING", [TES_READY] = "READY", [TES_STOP] = "STOP" @@ -606,7 +609,7 @@ rte_store(const rte *r, net *net, rtable *tab) if (ea_is_cached(e->rte.attrs)) e->rte.attrs = rta_clone(e->rte.attrs); else - e->rte.attrs = rta_lookup(e->rte.attrs); + e->rte.attrs = rta_lookup(e->rte.attrs, 1); return e; } @@ -817,17 +820,6 @@ do_rt_notify(struct channel *c, const net_addr *net, rte *new, const rte *old) if (!new && old) CHANNEL_LIMIT_POP(c, OUT); - /* Apply export table */ - struct rte_storage *old_exported = NULL; - if (c->out_table) - { - if (!rte_update_out(c, net, new, old, &old_exported)) - { - channel_rte_trace_out(D_ROUTES, c, new, "idempotent"); - return; - } - } - if (new) stats->updates_accepted++; else @@ -849,10 +841,7 @@ do_rt_notify(struct channel *c, const net_addr *net, rte *new, const rte *old) channel_rte_trace_out(D_ROUTES, c, old, "removed"); } - p->rt_notify(p, c, net, new, old_exported ? &old_exported->rte : old); - - if (c->out_table && old_exported) - rte_free(old_exported); + p->rt_notify(p, c, net, new, old); } static void @@ -1183,11 +1172,33 @@ rte_announce(rtable *tab, net *net, struct rte_storage *new, struct rte_storage } struct rt_export_hook *eh; - WALK_LIST(eh, tab->exports) + WALK_LIST(eh, tab->exporter.hooks) { if (eh->export_state == TES_STOP) continue; + switch (eh->req->addr_mode) + { + case TE_ADDR_NONE: + break; + + case TE_ADDR_IN: + if (!net_in_netX(net->n.addr, eh->req->addr)) + continue; + break; + + case TE_ADDR_EQUAL: + if (!net_equal(net->n.addr, eh->req->addr)) + continue; + break; + + case TE_ADDR_FOR: + bug("Continuos export of best prefix match not implemented yet."); + + default: + bug("Strange table export address mode: %d", eh->req->addr_mode); + } + if (new) eh->stats.updates_received++; else @@ -1278,6 +1289,14 @@ rte_recalculate(struct rt_import_hook *c, net *net, rte *new, struct rte_src *sr rte *old_best = old_best_stored ? &old_best_stored->rte : NULL; rte *old = NULL; + /* If the new route is identical to the old one, we find the attributes in + * cache and clone these with no performance drop. OTOH, if we were to lookup + * the attributes, such a route definitely hasn't been anywhere yet, + * therefore it's definitely worth the time. */ + struct rte_storage *new_stored = NULL; + if (new) + new = &(new_stored = rte_store(new, net, table))->rte; + /* Find and remove original route from the same protocol */ struct rte_storage **before_old = rte_find(net, src); @@ -1300,7 +1319,7 @@ rte_recalculate(struct rt_import_hook *c, net *net, rte *new, struct rte_src *sr c->table->name, net->n.addr, old->src->proto->name, old->src->private_id, old->src->global_id); } - if (new && rte_same(old, new)) + if (new && rte_same(old, &new_stored->rte)) { /* No changes, ignore the new route and refresh the old one */ @@ -1311,6 +1330,10 @@ rte_recalculate(struct rt_import_hook *c, net *net, rte *new, struct rte_src *sr stats->updates_ignored++; rt_rte_trace_in(D_ROUTES, req, new, "ignored"); } + + /* We need to free the already stored route here before returning */ + rte_free(new_stored); + return; } *before_old = (*before_old)->next; @@ -1323,8 +1346,13 @@ rte_recalculate(struct rt_import_hook *c, net *net, rte *new, struct rte_src *sr return; } - if (req->preimport) - new = req->preimport(req, new, old); + /* If rejected by import limit, we need to pretend there is no route */ + if (req->preimport && (req->preimport(req, new, old) == 0)) + { + rte_free(new_stored); + new_stored = NULL; + new = NULL; + } int new_ok = rte_is_ok(new); int old_ok = rte_is_ok(old); @@ -1339,8 +1367,6 @@ rte_recalculate(struct rt_import_hook *c, net *net, rte *new, struct rte_src *sr if (old_ok || new_ok) table->last_rt_change = current_time(); - struct rte_storage *new_stored = new ? rte_store(new, net, table) : NULL; - if (table->config->sorted) { /* If routes are sorted, just insert new route to appropriate position */ @@ -1496,14 +1522,14 @@ rte_update_unlock(void) lp_flush(rte_update_pool); } -rte * +int channel_preimport(struct rt_import_request *req, rte *new, rte *old) { struct channel *c = SKIP_BACK(struct channel, in_req, req); if (new && !old) if (CHANNEL_LIMIT_PUSH(c, RX)) - return NULL; + return 0; if (!new && old) CHANNEL_LIMIT_POP(c, RX); @@ -1513,22 +1539,20 @@ channel_preimport(struct rt_import_request *req, rte *new, rte *old) if (new_in && !old_in) if (CHANNEL_LIMIT_PUSH(c, IN)) - if (c->in_keep_filtered) + if (c->in_keep & RIK_REJECTED) { new->flags |= REF_FILTERED; - return new; + return 1; } else - return NULL; + return 0; if (!new_in && old_in) CHANNEL_LIMIT_POP(c, IN); - return new; + return 1; } -static void rte_update_direct(struct channel *c, const net_addr *n, rte *new, struct rte_src *src); - void rte_update(struct channel *c, const net_addr *n, rte *new, struct rte_src *src) { @@ -1537,15 +1561,13 @@ rte_update(struct channel *c, const net_addr *n, rte *new, struct rte_src *src) ASSERT(c->channel_state == CS_UP); - if (c->in_table && !rte_update_in(c, n, new, src)) - return; - - return rte_update_direct(c, n, new, src); -} + /* The import reloader requires prefilter routes to be the first layer */ + if (new && (c->in_keep & RIK_PREFILTER)) + if (ea_is_cached(new->attrs) && !new->attrs->next) + new->attrs = ea_clone(new->attrs); + else + new->attrs = ea_lookup(new->attrs, 0); -static void -rte_update_direct(struct channel *c, const net_addr *n, rte *new, struct rte_src *src) -{ const struct filter *filter = c->in_filter; struct channel_import_stats *stats = &c->import_stats; @@ -1563,7 +1585,7 @@ rte_update_direct(struct channel *c, const net_addr *n, rte *new, struct rte_src stats->updates_filtered++; channel_rte_trace_in(D_FILTERS, c, new, "filtered out"); - if (c->in_keep_filtered) + if (c->in_keep & RIK_REJECTED) new->flags |= REF_FILTERED; else new = NULL; @@ -1588,6 +1610,18 @@ rte_update_direct(struct channel *c, const net_addr *n, rte *new, struct rte_src rte_import(&c->in_req, n, new, src); + /* Now the route attributes are kept by the in-table cached version + * and we may drop the local handle */ + if (new && (c->in_keep & RIK_PREFILTER)) + { + /* There may be some updates on top of the original attribute block */ + ea_list *a = new->attrs; + while (a->next) + a = a->next; + + ea_free(a); + } + rte_update_unlock(); } @@ -1677,10 +1711,20 @@ rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filte } static void +rt_table_export_done(struct rt_export_hook *hook) +{ + struct rt_exporter *re = hook->table; + struct rtable *tab = SKIP_BACK(struct rtable, exporter, re); + + rt_unlock_table(tab); + DBG("Export hook %p in table %s finished uc=%u\n", hook, tab->name, tab->use_count); +} + +static void rt_export_stopped(void *data) { struct rt_export_hook *hook = data; - rtable *tab = hook->table; + struct rt_exporter *tab = hook->table; /* Unlist */ rem_node(&hook->n); @@ -1688,14 +1732,13 @@ rt_export_stopped(void *data) /* Reporting the channel as stopped. */ hook->stopped(hook->req); + /* Reporting the hook as finished. */ + CALL(tab->done, hook); + /* Freeing the hook together with its coroutine. */ rfree(hook->pool); - rt_unlock_table(tab); - - DBG("Export hook %p in table %s finished uc=%u\n", hook, tab->name, tab->use_count); } - static inline void rt_set_import_state(struct rt_import_hook *hook, u8 state) { @@ -1706,7 +1749,7 @@ rt_set_import_state(struct rt_import_hook *hook, u8 state) hook->req->log_state_change(hook->req, state); } -static inline void +void rt_set_export_state(struct rt_export_hook *hook, u8 state) { hook->last_state_change = current_time(); @@ -1747,54 +1790,114 @@ rt_stop_import(struct rt_import_request *req, void (*stopped)(struct rt_import_r hook->stopped = stopped; } -void -rt_request_export(rtable *tab, struct rt_export_request *req) +static struct rt_export_hook * +rt_table_export_start(struct rt_exporter *re, struct rt_export_request *req) { + rtable *tab = SKIP_BACK(rtable, exporter, re); rt_lock_table(tab); pool *p = rp_new(tab->rp, "Export hook"); - struct rt_export_hook *hook = req->hook = mb_allocz(p, sizeof(struct rt_export_hook)); + struct rt_export_hook *hook = mb_allocz(p, sizeof(struct rt_export_hook)); hook->pool = p; hook->lp = lp_new_default(p); - - hook->req = req; - hook->table = tab; /* stats zeroed by mb_allocz */ + switch (req->addr_mode) + { + case TE_ADDR_IN: + if (tab->trie && net_val_match(tab->addr_type, NB_IP)) + { + hook->walk_state = mb_allocz(p, sizeof (struct f_trie_walk_state)); + hook->walk_lock = rt_lock_trie(tab); + trie_walk_init(hook->walk_state, tab->trie, req->addr); + hook->event = ev_new_init(p, rt_feed_by_trie, hook); + break; + } + /* fall through */ + case TE_ADDR_NONE: + FIB_ITERATE_INIT(&hook->feed_fit, &tab->fib); + hook->event = ev_new_init(p, rt_feed_by_fib, hook); + break; - rt_set_export_state(hook, TES_HUNGRY); + case TE_ADDR_EQUAL: + hook->event = ev_new_init(p, rt_feed_equal, hook); + break; - hook->n = (node) {}; - add_tail(&tab->exports, &hook->n); + case TE_ADDR_FOR: + hook->event = ev_new_init(p, rt_feed_for, hook); + break; - FIB_ITERATE_INIT(&hook->feed_fit, &tab->fib); + default: + bug("Requested an unknown export address mode"); + } DBG("New export hook %p req %p in table %s uc=%u\n", hook, req, tab->name, tab->use_count); - rt_set_export_state(hook, TES_FEEDING); + return hook; +} + +void +rt_request_export(struct rt_exporter *re, struct rt_export_request *req) +{ + struct rt_export_hook *hook = req->hook = re->start(re, req); + + hook->req = req; + hook->table = re; - hook->event = ev_new_init(p, rt_feed_channel, hook); + hook->n = (node) {}; + add_tail(&re->hooks, &hook->n); + + /* Regular export */ + rt_set_export_state(hook, TES_FEEDING); ev_schedule_work(hook->event); } +static void +rt_table_export_stop(struct rt_export_hook *hook) +{ + rtable *tab = SKIP_BACK(rtable, exporter, hook->table); + + if (hook->export_state != TES_FEEDING) + return; + + switch (hook->req->addr_mode) + { + case TE_ADDR_IN: + if (hook->walk_lock) + { + rt_unlock_trie(tab, hook->walk_lock); + hook->walk_lock = NULL; + mb_free(hook->walk_state); + hook->walk_state = NULL; + break; + } + /* fall through */ + case TE_ADDR_NONE: + fit_get(&tab->fib, &hook->feed_fit); + break; + } +} + void rt_stop_export(struct rt_export_request *req, void (*stopped)(struct rt_export_request *)) { ASSERT_DIE(req->hook); struct rt_export_hook *hook = req->hook; - rtable *tab = hook->table; - - /* Stop feeding */ + /* Cancel the feeder event */ ev_postpone(hook->event); - if (hook->export_state == TES_FEEDING) - fit_get(&tab->fib, &hook->feed_fit); + /* Stop feeding from the exporter */ + CALL(hook->table->stop, hook); + /* Reset the event as the stopped event */ hook->event->hook = rt_export_stopped; hook->stopped = stopped; + /* Update export state */ rt_set_export_state(hook, TES_STOP); + + /* Run the stopped event */ ev_schedule(hook->event); } @@ -1947,7 +2050,7 @@ rt_dump_hooks(rtable *tab) } struct rt_export_hook *eh; - WALK_LIST(eh, tab->exports) + WALK_LIST(eh, tab->exporter.hooks) { eh->req->dump_req(eh->req); debug(" Export hook %p requested by %p:" @@ -2251,10 +2354,18 @@ rt_setup(pool *pp, struct rtable_config *cf) init_list(&t->flowspec_links); + t->exporter = (struct rt_exporter) { + .addr_type = t->addr_type, + .start = rt_table_export_start, + .stop = rt_table_export_stop, + .done = rt_table_export_done, + }; + init_list(&t->exporter.hooks); + if (!(t->internal = cf->internal)) { init_list(&t->imports); - init_list(&t->exports); + hmap_init(&t->id_map, p, 1024); hmap_set(&t->id_map, 0); @@ -2806,7 +2917,7 @@ static struct rte_storage * rt_flowspec_update_rte(rtable *tab, net *n, rte *r) { #ifdef CONFIG_BGP - if (rt_get_source_attr(r) != RTS_BGP) + if (r->generation || (rt_get_source_attr(r) != RTS_BGP)) return NULL; struct bgp_channel *bc = (struct bgp_channel *) SKIP_BACK(struct channel, in_req, r->sender->req); @@ -3119,7 +3230,7 @@ rt_commit(struct config *new, struct config *old) } /** - * rt_feed_channel - advertise all routes to a channel + * rt_feed_by_fib - advertise all routes to a channel by walking a fib * @c: channel to be fed * * This function performs one pass of advertisement of routes to a channel that @@ -3128,7 +3239,7 @@ rt_commit(struct config *new, struct config *old) * order not to monopolize CPU time.) */ static void -rt_feed_channel(void *data) +rt_feed_by_fib(void *data) { struct rt_export_hook *c = data; @@ -3137,7 +3248,9 @@ rt_feed_channel(void *data) ASSERT(c->export_state == TES_FEEDING); - FIB_ITERATE_START(&c->table->fib, fit, net, n) + rtable *tab = SKIP_BACK(rtable, exporter, c->table); + + FIB_ITERATE_START(&tab->fib, fit, net, n) { if (max_feed <= 0) { @@ -3146,315 +3259,134 @@ rt_feed_channel(void *data) return; } - if (c->export_state != TES_FEEDING) - goto done; + ASSERT(c->export_state == TES_FEEDING); - if (c->req->export_bulk) - { - uint count = rte_feed_count(n); - if (count) - { - rte_update_lock(); - rte **feed = alloca(count * sizeof(rte *)); - rte_feed_obtain(n, feed, count); - struct rt_pending_export rpe = { .new_best = n->routes }; - c->req->export_bulk(c->req, n->n.addr, &rpe, feed, count); - max_feed -= count; - rte_update_unlock(); - } - } - else if (n->routes && rte_is_valid(&n->routes->rte)) - { - rte_update_lock(); - struct rt_pending_export rpe = { .new = n->routes, .new_best = n->routes }; - c->req->export_one(c->req, n->n.addr, &rpe); - max_feed--; - rte_update_unlock(); - } + if ((c->req->addr_mode == TE_ADDR_NONE) || net_in_netX(n->n.addr, c->req->addr)) + max_feed -= rt_feed_net(c, n); } FIB_ITERATE_END; -done: rt_set_export_state(c, TES_READY); } - -/* - * Import table - */ - -int -rte_update_in(struct channel *c, const net_addr *n, rte *new, struct rte_src *src) +static void +rt_feed_by_trie(void *data) { - struct rtable *tab = c->in_table; - net *net; - - if (new) - net = net_get(tab, n); - else - { - net = net_find(tab, n); - - if (!net) - goto drop_withdraw; - } - - /* Find the old rte */ - struct rte_storage **pos = rte_find(net, src); - if (*pos) - { - rte *old = &(*pos)->rte; - if (new && rte_same(old, new)) - { - /* Refresh the old rte, continue with update to main rtable */ - if (old->flags & (REF_STALE | REF_DISCARD | REF_MODIFY)) - { - old->flags &= ~(REF_STALE | REF_DISCARD | REF_MODIFY); - return 1; - } - - goto drop_update; - } - - if (!new) - CHANNEL_LIMIT_POP(c, RX); + struct rt_export_hook *c = data; + rtable *tab = SKIP_BACK(rtable, exporter, c->table); - /* Move iterator if needed */ - if (*pos == c->reload_next_rte) - c->reload_next_rte = (*pos)->next; + ASSERT_DIE(c->walk_state); + struct f_trie_walk_state *ws = c->walk_state; - /* Remove the old rte */ - struct rte_storage *del = *pos; - *pos = (*pos)->next; - rte_free(del); - tab->rt_count--; - } - else if (new) - { - if (CHANNEL_LIMIT_PUSH(c, RX)) - { - /* Required by rte_trace_in() */ - new->net = n; + int max_feed = 256; - channel_rte_trace_in(D_FILTERS, c, new, "ignored [limit]"); - goto drop_update; - } - } - else - goto drop_withdraw; + ASSERT_DIE(c->export_state == TES_FEEDING); - if (!new) + net_addr addr; + while (trie_walk_next(ws, &addr)) { - if (!net->routes) - fib_delete(&tab->fib, net); - - return 1; - } + net *n = net_find(tab, &addr); + if (!n) + continue; - /* Insert the new rte */ - struct rte_storage *e = rte_store(new, net, tab); - e->rte.lastmod = current_time(); - e->next = *pos; - *pos = e; - tab->rt_count++; - return 1; + if ((max_feed -= rt_feed_net(c, n)) <= 0) + return; -drop_update: - c->import_stats.updates_received++; - c->in_req.hook->stats.updates_ignored++; + ASSERT_DIE(c->export_state == TES_FEEDING); + } - if (!net->routes) - fib_delete(&tab->fib, net); + rt_unlock_trie(tab, c->walk_lock); + c->walk_lock = NULL; - return 0; + mb_free(c->walk_state); + c->walk_state = NULL; -drop_withdraw: - c->import_stats.withdraws_received++; - c->in_req.hook->stats.withdraws_ignored++; - return 0; + rt_set_export_state(c, TES_READY); } -int -rt_reload_channel(struct channel *c) +static void +rt_feed_equal(void *data) { - struct rtable *tab = c->in_table; - struct fib_iterator *fit = &c->reload_fit; - int max_feed = 64; - - ASSERT(c->channel_state == CS_UP); + struct rt_export_hook *c = data; + rtable *tab = SKIP_BACK(rtable, exporter, c->table); - if (!c->reload_active) - { - FIB_ITERATE_INIT(fit, &tab->fib); - c->reload_active = 1; - } + ASSERT_DIE(c->export_state == TES_FEEDING); + ASSERT_DIE(c->req->addr_mode == TE_ADDR_EQUAL); - do { - for (struct rte_storage *e = c->reload_next_rte; e; e = e->next) - { - if (max_feed-- <= 0) - { - c->reload_next_rte = e; - debug("%s channel reload burst split (max_feed=%d)", c->proto->name, max_feed); - return 0; - } + net *n = net_find(tab, c->req->addr); + if (n) + rt_feed_net(c, n); - rte r = e->rte; - rte_update_direct(c, r.net, &r, r.src); - } + rt_set_export_state(c, TES_READY); +} - c->reload_next_rte = NULL; +static void +rt_feed_for(void *data) +{ + struct rt_export_hook *c = data; + rtable *tab = SKIP_BACK(rtable, exporter, c->table); - FIB_ITERATE_START(&tab->fib, fit, net, n) - { - if (c->reload_next_rte = n->routes) - { - FIB_ITERATE_PUT_NEXT(fit, &tab->fib); - break; - } - } - FIB_ITERATE_END; - } - while (c->reload_next_rte); + ASSERT_DIE(c->export_state == TES_FEEDING); + ASSERT_DIE(c->req->addr_mode == TE_ADDR_FOR); - c->reload_active = 0; - return 1; -} + net *n = net_route(tab, c->req->addr); + if (n) + rt_feed_net(c, n); -void -rt_reload_channel_abort(struct channel *c) -{ - if (c->reload_active) - { - /* Unlink the iterator */ - fit_get(&c->in_table->fib, &c->reload_fit); - c->reload_next_rte = NULL; - c->reload_active = 0; - } + rt_set_export_state(c, TES_READY); } -void -rt_prune_sync(rtable *t, int all) +static uint +rt_feed_net(struct rt_export_hook *c, net *n) { - struct fib_iterator fit; - - FIB_ITERATE_INIT(&fit, &t->fib); - -again: - FIB_ITERATE_START(&t->fib, &fit, net, n) - { - struct rte_storage *e, **ee = &n->routes; + if (c->req->export_bulk) + { + uint count = rte_feed_count(n); + if (count) + { + rte_update_lock(); + rte **feed = alloca(count * sizeof(rte *)); + rte_feed_obtain(n, feed, count); + struct rt_pending_export rpe = { .new_best = n->routes }; + c->req->export_bulk(c->req, n->n.addr, &rpe, feed, count); + rte_update_unlock(); + } + return count; + } - while (e = *ee) - { - if (all || (e->rte.flags & (REF_STALE | REF_DISCARD))) + if (n->routes && rte_is_valid(&n->routes->rte)) { - *ee = e->next; - rte_free(e); - t->rt_count--; + rte_update_lock(); + struct rt_pending_export rpe = { .new = n->routes, .new_best = n->routes }; + c->req->export_one(c->req, n->n.addr, &rpe); + rte_update_unlock(); + return 1; } - else - ee = &e->next; - } - if (all || !n->routes) - { - FIB_ITERATE_PUT(&fit); - fib_delete(&t->fib, n); - goto again; - } - } - FIB_ITERATE_END; + return 0; } /* - * Export table + * Import table */ -int -rte_update_out(struct channel *c, const net_addr *n, rte *new, const rte *old0, struct rte_storage **old_exported) -{ - struct rtable *tab = c->out_table; - struct rte_src *src; - net *net; - - if (new) - { - net = net_get(tab, n); - src = new->src; - } - else - { - net = net_find(tab, n); - src = old0->src; - if (!net) - goto drop; - } - - /* Find the old rte */ - struct rte_storage **pos = (c->ra_mode == RA_ANY) ? rte_find(net, src) : &net->routes; - struct rte_storage *old = NULL; - - if (old = *pos) - { - if (new && rte_same(&(*pos)->rte, new)) - goto drop; - - /* Remove the old rte */ - *pos = old->next; - *old_exported = old; - tab->rt_count--; - } - - if (!new) - { - if (!old) - goto drop; - - if (!net->routes) - fib_delete(&tab->fib, net); - - return 1; - } - - /* Insert the new rte */ - struct rte_storage *e = rte_store(new, net, tab); - e->rte.lastmod = current_time(); - e->next = *pos; - *pos = e; - tab->rt_count++; - return 1; - -drop: - return 0; -} - -void -rt_refeed_channel(struct channel *c) +void channel_reload_export_bulk(struct rt_export_request *req, const net_addr *net, struct rt_pending_export *rpe UNUSED, rte **feed, uint count) { - if (!c->out_table) - { - channel_request_feeding(c); - return; - } - - ASSERT_DIE(c->ra_mode != RA_ANY); - - c->proto->feed_begin(c, 0); + struct channel *c = SKIP_BACK(struct channel, reload_req, req); - FIB_WALK(&c->out_table->fib, net, n) - { - if (!n->routes) - continue; - - rte e = n->routes->rte; - c->proto->rt_notify(c->proto, c, n->n.addr, &e, NULL); - } - FIB_WALK_END; + for (uint i=0; i<count; i++) + if (feed[i]->sender == c->in_req.hook) + { + /* Strip the later attribute layers */ + rte new = *feed[i]; + while (new.attrs->next) + new.attrs = new.attrs->next; - c->proto->feed_end(c); + /* And reload the route */ + rte_update(c, net, &new, new.src); + } } @@ -56,6 +56,17 @@ struct rtable_config { btime max_settle_time; /* Maximum settle time for notifications */ }; +struct rt_export_hook; +struct rt_export_request; + +struct rt_exporter { + list hooks; /* Registered route export hooks */ + uint addr_type; /* Type of address data exported (NET_*) */ + struct rt_export_hook *(*start)(struct rt_exporter *, struct rt_export_request *); + void (*stop)(struct rt_export_hook *); + void (*done)(struct rt_export_hook *); +}; + typedef struct rtable { resource r; node n; /* Node in list of all tables */ @@ -69,7 +80,7 @@ typedef struct rtable { u32 rt_count; /* Number of routes in the table */ list imports; /* Registered route importers */ - list exports; /* Registered route exporters */ + struct rt_exporter exporter; /* Exporter API structure */ struct hmap id_map; struct hostcache *hostcache; @@ -171,7 +182,7 @@ struct rt_import_request { void (*log_state_change)(struct rt_import_request *req, u8 state); /* Preimport is called when the @new route is just-to-be inserted, replacing @old. * Return a route (may be different or modified in-place) to continue or NULL to withdraw. */ - struct rte *(*preimport)(struct rt_import_request *req, struct rte *new, struct rte *old); + int (*preimport)(struct rt_import_request *req, struct rte *new, struct rte *old); struct rte *(*rte_modify)(struct rte *, struct linpool *); }; @@ -203,7 +214,9 @@ struct rt_pending_export { struct rt_export_request { struct rt_export_hook *hook; /* Table part of the export */ char *name; + const net_addr *addr; /* Network prefilter address */ u8 trace_routes; + u8 addr_mode; /* Network prefilter mode (TE_ADDR_*) */ /* There are two methods of export. You can either request feeding every single change * or feeding the whole route feed. In case of regular export, &export_one is preferred. @@ -221,7 +234,7 @@ struct rt_export_request { struct rt_export_hook { node n; - rtable *table; /* The connected table */ + struct rt_exporter *table; /* The connected table */ pool *pool; linpool *lp; @@ -234,12 +247,20 @@ struct rt_export_hook { u32 withdraws_received; /* Number of route withdraws received */ } stats; - struct fib_iterator feed_fit; /* Routing table iterator used during feeding */ + union { + struct fib_iterator feed_fit; /* Routing table iterator used during feeding */ + struct { + struct f_trie_walk_state *walk_state; /* Iterator over networks in trie */ + struct f_trie *walk_lock; /* Locked trie for walking */ + }; + u32 hash_iter; /* Iterator over hash */ + }; btime last_state_change; /* Time of last state transition */ u8 refeed_pending; /* Refeeding and another refeed is scheduled */ u8 export_state; /* Route export state (TES_*, see below) */ + u8 feed_type; /* Which feeding method is used (TFT_*, see below) */ struct event *event; /* Event running all the export operations */ @@ -255,14 +276,26 @@ struct rt_export_hook { #define TIS_MAX 6 #define TES_DOWN 0 -#define TES_HUNGRY 1 #define TES_FEEDING 2 #define TES_READY 3 #define TES_STOP 4 #define TES_MAX 5 +/* Value of addr_mode */ +#define TE_ADDR_NONE 0 /* No address matching */ +#define TE_ADDR_EQUAL 1 /* Exact query - show route <addr> */ +#define TE_ADDR_FOR 2 /* Longest prefix match - show route for <addr> */ +#define TE_ADDR_IN 3 /* Interval query - show route in <addr> */ + + +#define TFT_FIB 1 +#define TFT_TRIE 2 +#define TFT_HASH 3 + void rt_request_import(rtable *tab, struct rt_import_request *req); -void rt_request_export(rtable *tab, struct rt_export_request *req); +void rt_request_export(struct rt_exporter *tab, struct rt_export_request *req); + +void rt_export_once(struct rt_exporter *tab, struct rt_export_request *req); void rt_stop_import(struct rt_import_request *, void (*stopped)(struct rt_import_request *)); void rt_stop_export(struct rt_export_request *, void (*stopped)(struct rt_export_request *)); @@ -273,6 +306,8 @@ const char *rt_export_state_name(u8 state); static inline u8 rt_import_get_state(struct rt_import_hook *ih) { return ih ? ih->import_state : TIS_DOWN; } static inline u8 rt_export_get_state(struct rt_export_hook *eh) { return eh ? eh->export_state : TES_DOWN; } +void rt_set_export_state(struct rt_export_hook *hook, u8 state); + void rte_import(struct rt_import_request *req, const net_addr *net, rte *new, struct rte_src *src); /* Types of route announcement, also used as flags */ @@ -363,8 +398,6 @@ int rt_reload_channel(struct channel *c); void rt_reload_channel_abort(struct channel *c); void rt_refeed_channel(struct channel *c); void rt_prune_sync(rtable *t, int all); -int rte_update_in(struct channel *c, const net_addr *n, rte *new, struct rte_src *src); -int rte_update_out(struct channel *c, const net_addr *n, rte *new, const rte *old, struct rte_storage **old_exported); struct rtable_config *rt_new_table(struct symbol *s, uint addr_type); static inline int rt_is_ip(rtable *tab) @@ -385,36 +418,37 @@ extern const int rt_default_ecmp; struct rt_show_data_rtable { node n; - rtable *table; + const char *name; + struct rt_exporter *table; struct channel *export_channel; + struct channel *prefilter; + struct krt_proto *kernel; }; struct rt_show_data { + struct cli *cli; /* Pointer back to the CLI */ net_addr *addr; list tables; struct rt_show_data_rtable *tab; /* Iterator over table list */ struct rt_show_data_rtable *last_table; /* Last table in output */ - struct fib_iterator fit; /* Iterator over networks in table */ - struct f_trie_walk_state *walk_state; /* Iterator over networks in trie */ - struct f_trie *walk_lock; /* Locked trie for walking */ + struct rt_export_request req; /* Export request in use */ int verbose, tables_defined_by; const struct filter *filter; struct proto *show_protocol; struct proto *export_protocol; struct channel *export_channel; struct config *running_on_config; - struct krt_proto *kernel; struct rt_export_hook *kernel_export_hook; int export_mode, addr_mode, primary_only, filtered, stats; - int table_open; /* Iteration (fit) is open */ - int trie_walk; /* Current table is iterated using trie */ int net_counter, rt_counter, show_counter, table_counter; int net_counter_last, rt_counter_last, show_counter_last; + int show_counter_last_flush; }; void rt_show(struct rt_show_data *); -struct rt_show_data_rtable * rt_show_add_table(struct rt_show_data *d, rtable *t); +struct rt_show_data_rtable * rt_show_add_exporter(struct rt_show_data *d, struct rt_exporter *t, const char *name); +struct rt_show_data_rtable * rt_show_add_table(struct rt_show_data *d, struct rtable *t); /* Value of table definition mode in struct rt_show_data */ #define RSD_TDB_DEFAULT 0 /* no table specified */ @@ -425,11 +459,6 @@ struct rt_show_data_rtable * rt_show_add_table(struct rt_show_data *d, rtable *t #define RSD_TDB_SET 0x1 /* internal: show empty tables */ #define RSD_TDB_NMN 0x2 /* internal: need matching net */ -/* Value of addr_mode */ -#define RSD_ADDR_EQUAL 1 /* Exact query - show route <addr> */ -#define RSD_ADDR_FOR 2 /* Longest prefix match - show route for <addr> */ -#define RSD_ADDR_IN 3 /* Interval query - show route in <addr> */ - /* Value of export_mode in struct rt_show_data */ #define RSEM_NONE 0 /* Export mode not used */ #define RSEM_PREEXPORT 1 /* Routes ready for export, before filtering */ @@ -448,6 +477,8 @@ struct hostentry_adata { void ea_set_hostentry(ea_list **to, struct rtable *dep, struct rtable *tab, ip_addr gw, ip_addr ll, u32 lnum, u32 labels[lnum]); +void ea_show_hostentry(const struct adata *ad, byte *buf, uint size); +void ea_show_nexthop_list(struct cli *c, struct nexthop_adata *nhad); /* * Default protocol preferences diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c index 6d33ef2e..084c9b63 100644 --- a/proto/bgp/attrs.c +++ b/proto/bgp/attrs.c @@ -1122,12 +1122,14 @@ static union bgp_attr_desc bgp_attr_table[BGP_ATTR_MAX] = { [BA_MP_REACH_NLRI] = { .name = "bgp_mp_reach_nlri", .type = T_OPAQUE, + .hidden = 1, .flags = BAF_OPTIONAL, .decode = bgp_decode_mp_reach_nlri, }, [BA_MP_UNREACH_NLRI] = { .name = "bgp_mp_unreach_nlri", .type = T_OPAQUE, + .hidden = 1, .flags = BAF_OPTIONAL, .decode = bgp_decode_mp_unreach_nlri, }, @@ -1142,6 +1144,7 @@ static union bgp_attr_desc bgp_attr_table[BGP_ATTR_MAX] = { [BA_AS4_PATH] = { .name = "bgp_as4_path", .type = T_PATH, + .hidden = 1, .flags = BAF_OPTIONAL | BAF_TRANSITIVE, .encode = bgp_encode_raw, .decode = bgp_decode_as4_path, @@ -1149,6 +1152,7 @@ static union bgp_attr_desc bgp_attr_table[BGP_ATTR_MAX] = { [BA_AS4_AGGREGATOR] = { .name = "bgp_as4_aggregator", .type = T_OPAQUE, + .hidden = 1, .flags = BAF_OPTIONAL | BAF_TRANSITIVE, .encode = bgp_encode_raw, .decode = bgp_decode_as4_aggregator, @@ -1253,10 +1257,10 @@ bgp_export_attr(struct bgp_export_state *s, eattr *a, ea_list *to) * Result: one sorted attribute list segment, or NULL if attributes are unsuitable. */ static inline ea_list * -bgp_export_attrs(struct bgp_export_state *s, const ea_list *a) +bgp_export_attrs(struct bgp_export_state *s, ea_list *a) { /* Merge the attribute list */ - ea_list *new = ea_normalize(a); + ea_list *new = ea_normalize(a, 0); ASSERT_DIE(new); uint i, count; @@ -1546,7 +1550,6 @@ bgp_free_bucket_table(struct bgp_channel *c) static struct bgp_bucket * bgp_get_bucket(struct bgp_channel *c, ea_list *new) { - /* Hash and lookup */ u32 hash = ea_hash(new); struct bgp_bucket *b = HASH_FIND(c->bucket_hash, RBH, new, hash); @@ -1567,8 +1570,7 @@ bgp_get_bucket(struct bgp_channel *c, ea_list *new) /* Copy the ea_list */ ea_list_copy(b->eattrs, new, ea_size); - /* Insert the bucket to send queue and bucket hash */ - add_tail(&c->bucket_queue, &b->send_node); + /* Insert the bucket to bucket hash */ HASH_INSERT2(c->bucket_hash, RBH, c->pool, b); return b; @@ -1586,14 +1588,30 @@ bgp_get_withdraw_bucket(struct bgp_channel *c) return c->withdraw_bucket; } -void -bgp_free_bucket(struct bgp_channel *c, struct bgp_bucket *b) +static void +bgp_free_bucket_xx(struct bgp_channel *c, struct bgp_bucket *b) { - rem_node(&b->send_node); HASH_REMOVE2(c->bucket_hash, RBH, c->pool, b); mb_free(b); } +int +bgp_done_bucket(struct bgp_channel *c, struct bgp_bucket *b) +{ + /* Won't free the withdraw bucket */ + if (b == c->withdraw_bucket) + return 0; + + if (EMPTY_LIST(b->prefixes)) + rem_node(&b->send_node); + + if (b->px_uc || !EMPTY_LIST(b->prefixes)) + return 0; + + bgp_free_bucket_xx(c, b); + return 1; +} + void bgp_defer_bucket(struct bgp_channel *c, struct bgp_bucket *b) { @@ -1613,8 +1631,8 @@ bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b) struct bgp_prefix *px = HEAD(b->prefixes); log(L_ERR "%s: - withdrawing %N", p->p.name, &px->net); - rem_node(&px->buck_node); - add_tail(&wb->prefixes, &px->buck_node); + rem_node(&px->buck_node_xx); + add_tail(&wb->prefixes, &px->buck_node_xx); } } @@ -1625,7 +1643,7 @@ bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b) #define PXH_KEY(px) px->net, px->path_id, px->hash #define PXH_NEXT(px) px->next -#define PXH_EQ(n1,i1,h1,n2,i2,h2) h1 == h2 && i1 == i2 && net_equal(n1, n2) +#define PXH_EQ(n1,i1,h1,n2,i2,h2) h1 == h2 && (c->add_path_tx ? (i1 == i2) : 1) && net_equal(n1, n2) #define PXH_FN(n,i,h) h #define PXH_REHASH bgp_pxh_rehash @@ -1655,15 +1673,13 @@ bgp_free_prefix_table(struct bgp_channel *c) static struct bgp_prefix * bgp_get_prefix(struct bgp_channel *c, const net_addr *net, u32 path_id) { + u32 path_id_hash = c->add_path_tx ? path_id : 0; /* We must use a different hash function than the rtable */ - u32 hash = u32_hash(net_hash(net) ^ u32_hash(path_id)); - struct bgp_prefix *px = HASH_FIND(c->prefix_hash, PXH, net, path_id, hash); + u32 hash = u32_hash(net_hash(net) ^ u32_hash(path_id_hash)); + struct bgp_prefix *px = HASH_FIND(c->prefix_hash, PXH, net, path_id_hash, hash); if (px) - { - rem_node(&px->buck_node); return px; - } if (c->prefix_slab) px = sl_alloc(c->prefix_slab); @@ -1680,10 +1696,64 @@ bgp_get_prefix(struct bgp_channel *c, const net_addr *net, u32 path_id) return px; } -void +static void bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *px); + +static inline int +bgp_update_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bucket *b) +{ +#define BPX_TRACE(what) do { \ + if (c->c.debug & D_ROUTES) log(L_TRACE "%s.%s < %s %N %uG %s", \ + c->c.proto->name, c->c.name, what, \ + px->net, px->path_id, (b == c->withdraw_bucket) ? "withdraw" : "update"); } while (0) + px->lastmod = current_time(); + + /* Already queued for the same bucket */ + if (px->cur == b) + { + BPX_TRACE("already queued"); + return 0; + } + + /* Unqueue from the old bucket */ + if (px->cur) + { + rem_node(&px->buck_node_xx); + bgp_done_bucket(c, px->cur); + } + + /* The new bucket is the same as we sent before */ + if ((px->last == b) || c->c.out_table && !px->last && (b == c->withdraw_bucket)) + { + if (px->cur) + BPX_TRACE("reverted"); + else + BPX_TRACE("already sent"); + + /* Well, we haven't sent anything yet */ + if (!px->last) + bgp_free_prefix(c, px); + + px->cur = NULL; + return 0; + } + + /* Enqueue the bucket if it has been empty */ + if ((b != c->withdraw_bucket) && EMPTY_LIST(b->prefixes)) + add_tail(&c->bucket_queue, &b->send_node); + + /* Enqueue to the new bucket and indicate the change */ + add_tail(&b->prefixes, &px->buck_node_xx); + px->cur = b; + + BPX_TRACE("queued"); + return 1; + +#undef BPX_TRACE +} + +static void bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *px) { - rem_node(&px->buck_node); HASH_REMOVE2(c->prefix_hash, PXH, c->pool, px); if (c->prefix_slab) @@ -1692,6 +1762,167 @@ bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *px) mb_free(px); } +void +bgp_done_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bucket *buck) +{ + /* Cleanup: We're called from bucket senders. */ + ASSERT_DIE(px->cur == buck); + rem_node(&px->buck_node_xx); + + /* We may want to store the updates */ + if (c->c.out_table) + { + /* Nothing to be sent right now */ + px->cur = NULL; + + /* Unref the previous sent version */ + if (px->last) + px->last->px_uc--; + + /* Ref the current sent version */ + if (buck != c->withdraw_bucket) + { + px->last = buck; + px->last->px_uc++; + return; + } + + /* Prefixes belonging to the withdraw bucket are freed always */ + } + + bgp_free_prefix(c, px); +} + + +/* + * Prefix hash table exporter + */ + +static void +bgp_out_table_feed(void *data) +{ + struct rt_export_hook *hook = data; + struct bgp_channel *c = SKIP_BACK(struct bgp_channel, prefix_exporter, hook->table); + + int max = 512; + + const net_addr *neq = (hook->req->addr_mode == TE_ADDR_EQUAL) ? hook->req->addr : NULL; + const net_addr *cand = NULL; + + do { + HASH_WALK_ITER(c->prefix_hash, PXH, n, hook->hash_iter) + { + switch (hook->req->addr_mode) + { + case TE_ADDR_IN: + if (!net_in_netX(n->net, hook->req->addr)) + continue; + /* fall through */ + case TE_ADDR_NONE: + /* Splitting only for multi-net exports */ + if (--max <= 0) + HASH_WALK_ITER_PUT; + break; + + case TE_ADDR_FOR: + if (!neq) + { + if (net_in_netX(hook->req->addr, n->net) && (!cand || (n->net->length > cand->length))) + cand = n->net; + continue; + } + /* fall through */ + case TE_ADDR_EQUAL: + if (!net_equal(n->net, neq)) + continue; + break; + } + + struct bgp_bucket *buck = n->cur ?: n->last; + ea_list *ea = NULL; + if (buck == c->withdraw_bucket) + ea_set_dest(&ea, 0, RTD_UNREACHABLE); + else + { + ea = buck->eattrs; + eattr *eanh = bgp_find_attr(ea, BA_NEXT_HOP); + ASSERT_DIE(eanh); + const ip_addr *nh = (const void *) eanh->u.ptr->data; + + struct nexthop_adata nhad = { + .ad = { .length = sizeof (struct nexthop_adata) - sizeof (struct adata), }, + .nh = { .gw = nh[0], }, + }; + + ea_set_attr(&ea, EA_LITERAL_DIRECT_ADATA(&ea_gen_nexthop, 0, tmp_copy_adata(&nhad.ad))); + } + + struct rte_storage es = { + .rte = { + .attrs = ea, + .net = n->net, + .src = rt_find_source_global(n->path_id), + .sender = NULL, + .lastmod = n->lastmod, + .flags = n->cur ? REF_PENDING : 0, + }, + }; + + struct rt_pending_export rpe = { + .new = &es, .new_best = &es, + }; + + if (hook->req->export_bulk) + { + rte *feed = &es.rte; + hook->req->export_bulk(hook->req, n->net, &rpe, &feed, 1); + } + else if (hook->req->export_one) + hook->req->export_one(hook->req, n->net, &rpe); + else + bug("No export method in export request"); + } + HASH_WALK_ITER_END; + + neq = cand; + cand = NULL; + } while (neq); + + if (hook->hash_iter) + ev_schedule_work(hook->event); + else + rt_set_export_state(hook, TES_READY); +} + +static struct rt_export_hook * +bgp_out_table_export_start(struct rt_exporter *re, struct rt_export_request *req UNUSED) +{ + struct bgp_channel *c = SKIP_BACK(struct bgp_channel, prefix_exporter, re); + pool *p = rp_new(c->c.proto->pool, "Export hook"); + struct rt_export_hook *hook = mb_allocz(p, sizeof(struct rt_export_hook)); + hook->pool = p; + hook->lp = lp_new_default(p); + hook->event = ev_new_init(p, bgp_out_table_feed, hook); + hook->feed_type = TFT_HASH; + + return hook; +} + +void +bgp_setup_out_table(struct bgp_channel *c) +{ + ASSERT_DIE(c->c.out_table == NULL); + + c->prefix_exporter = (struct rt_exporter) { + .addr_type = c->c.table->addr_type, + .start = bgp_out_table_export_start, + }; + + init_list(&c->prefix_exporter.hooks); + + c->c.out_table = &c->prefix_exporter; +} + /* * BGP protocol glue @@ -1890,7 +2121,6 @@ bgp_rt_notify(struct proto *P, struct channel *C, const net_addr *n, rte *new, c struct bgp_proto *p = (void *) P; struct bgp_channel *c = (void *) C; struct bgp_bucket *buck; - struct bgp_prefix *px; u32 path; if (new) @@ -1911,10 +2141,8 @@ bgp_rt_notify(struct proto *P, struct channel *C, const net_addr *n, rte *new, c path = old->src->global_id; } - px = bgp_get_prefix(c, n, c->add_path_tx ? path : 0); - add_tail(&buck->prefixes, &px->buck_node); - - bgp_schedule_packet(p->conn, c, PKT_UPDATE); + if (bgp_update_prefix(c, bgp_get_prefix(c, n, path), buck)) + bgp_schedule_packet(p->conn, c, PKT_UPDATE); } diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index 84430287..6ffe8824 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -900,9 +900,6 @@ bgp_refresh_begin(struct bgp_channel *c) c->load_state = BFS_REFRESHING; rt_refresh_begin(c->c.table, &c->c.in_req); - - if (c->c.in_table) - rt_refresh_begin(c->c.in_table, &c->c.in_req); } /** @@ -924,9 +921,6 @@ bgp_refresh_end(struct bgp_channel *c) c->load_state = BFS_NONE; rt_refresh_end(c->c.table, &c->c.in_req); - - if (c->c.in_table) - rt_prune_sync(c->c.in_table, 0); } @@ -1393,9 +1387,9 @@ bgp_reload_routes(struct channel *C) struct bgp_proto *p = (void *) C->proto; struct bgp_channel *c = (void *) C; - ASSERT(p->conn && (p->route_refresh || c->c.in_table)); + ASSERT(p->conn && (p->route_refresh || (C->in_keep & RIK_PREFILTER))); - if (c->c.in_table) + if (C->in_keep & RIK_PREFILTER) channel_schedule_reload(C); else bgp_schedule_packet(p->conn, c, PKT_ROUTE_REFRESH); @@ -1746,14 +1740,15 @@ bgp_channel_start(struct channel *C) } c->pool = p->p.pool; // XXXX - bgp_init_bucket_table(c); - bgp_init_prefix_table(c); if (c->cf->import_table) channel_setup_in_table(C); if (c->cf->export_table) - channel_setup_out_table(C); + bgp_setup_out_table(c); + + bgp_init_bucket_table(c); + bgp_init_prefix_table(c); c->stale_timer = tm_new_init(c->pool, bgp_long_lived_stale_timeout, c, 0, 0); @@ -2153,7 +2148,7 @@ bgp_channel_reconfigure(struct channel *C, struct channel_config *CC, int *impor (new->cost != old->cost)) { /* import_changed itself does not force ROUTE_REFRESH when import_table is active */ - if (c->c.in_table && (c->c.channel_state == CS_UP)) + if ((c->c.in_keep & RIK_PREFILTER) && (c->c.channel_state == CS_UP)) bgp_schedule_packet(p->conn, c, PKT_ROUTE_REFRESH); *import_changed = 1; diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h index b3966bc3..003893e0 100644 --- a/proto/bgp/bgp.h +++ b/proto/bgp/bgp.h @@ -156,7 +156,7 @@ struct bgp_channel_config { u8 aigp_originate; /* AIGP is originated automatically */ u32 cost; /* IGP cost for direct next hops */ u8 import_table; /* Use c.in_table as Adj-RIB-In */ - u8 export_table; /* Use c.out_table as Adj-RIB-Out */ + u8 export_table; /* Keep Adj-RIB-Out and export it */ struct rtable_config *igp_table_ip4; /* Table for recursive IPv4 next hop lookups */ struct rtable_config *igp_table_ip6; /* Table for recursive IPv6 next hop lookups */ @@ -357,6 +357,8 @@ struct bgp_channel { HASH(struct bgp_prefix) prefix_hash; /* Prefixes to be sent */ slab *prefix_slab; /* Slab holding prefix nodes */ + struct rt_exporter prefix_exporter; /* Table-like exporter for prefix_hash */ + ip_addr next_hop_addr; /* Local address for NEXT_HOP attribute */ ip_addr link_addr; /* Link-local version of next_hop_addr */ @@ -378,8 +380,11 @@ struct bgp_channel { }; struct bgp_prefix { - node buck_node; /* Node in per-bucket list */ + node buck_node_xx; /* Node in per-bucket list */ struct bgp_prefix *next; /* Node in prefix hash table */ + struct bgp_bucket *last; /* Last bucket sent with this prefix */ + struct bgp_bucket *cur; /* Current bucket (cur == last) if no update is required */ + btime lastmod; /* Last modification of this prefix */ u32 hash; u32 path_id; net_addr net[0]; @@ -388,8 +393,9 @@ struct bgp_prefix { struct bgp_bucket { node send_node; /* Node in send queue */ struct bgp_bucket *next; /* Node in bucket hash table */ - list prefixes; /* Prefixes in this bucket (struct bgp_prefix) */ + list prefixes; /* Prefixes to send in this bucket (struct bgp_prefix) */ u32 hash; /* Hash over extended attributes */ + u32 px_uc; /* How many prefixes are linking this bucket */ ea_list eattrs[0]; /* Per-bucket extended attributes */ }; @@ -520,6 +526,9 @@ static inline int rte_resolvable(const rte *rt) { eattr *nhea = ea_find(rt->attrs, &ea_gen_nexthop); + if (!nhea) + return 0; + struct nexthop_adata *nhad = (void *) nhea->u.ptr; return NEXTHOP_IS_REACHABLE(nhad) || (nhad->dest != RTD_UNREACHABLE); } @@ -553,15 +562,16 @@ int bgp_encode_attrs(struct bgp_write_state *s, ea_list *attrs, byte *buf, byte ea_list * bgp_decode_attrs(struct bgp_parse_state *s, byte *data, uint len); void bgp_finish_attrs(struct bgp_parse_state *s, ea_list **to); +void bgp_setup_out_table(struct bgp_channel *c); + void bgp_init_bucket_table(struct bgp_channel *c); void bgp_free_bucket_table(struct bgp_channel *c); -void bgp_free_bucket(struct bgp_channel *c, struct bgp_bucket *b); -void bgp_defer_bucket(struct bgp_channel *c, struct bgp_bucket *b); void bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b); +int bgp_done_bucket(struct bgp_channel *c, struct bgp_bucket *b); void bgp_init_prefix_table(struct bgp_channel *c); void bgp_free_prefix_table(struct bgp_channel *c); -void bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *bp); +void bgp_done_prefix(struct bgp_channel *c, struct bgp_prefix *px, struct bgp_bucket *buck); int bgp_rte_better(struct rte *, struct rte *); int bgp_rte_mergable(rte *pri, rte *sec); diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c index 45ee4ed2..4d4ae3eb 100644 --- a/proto/bgp/packets.c +++ b/proto/bgp/packets.c @@ -1392,7 +1392,7 @@ bgp_rte_update(struct bgp_parse_state *s, const net_addr *n, u32 path_id, ea_lis /* Prepare cached route attributes */ if (s->cached_ea == NULL) - s->cached_ea = ea_lookup(a0); + s->cached_ea = ea_lookup(a0, 0); rte e0 = { .attrs = s->cached_ea, @@ -1488,7 +1488,7 @@ bgp_encode_nlri_ip4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *bu memcpy(pos, &a, b); ADVANCE(pos, size, b); - bgp_free_prefix(s->channel, px); + bgp_done_prefix(s->channel, px, buck); } return pos - buf; @@ -1573,7 +1573,7 @@ bgp_encode_nlri_ip6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *bu memcpy(pos, &a, b); ADVANCE(pos, size, b); - bgp_free_prefix(s->channel, px); + bgp_done_prefix(s->channel, px, buck); } return pos - buf; @@ -1661,7 +1661,7 @@ bgp_encode_nlri_vpn4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *b memcpy(pos, &a, b); ADVANCE(pos, size, b); - bgp_free_prefix(s->channel, px); + bgp_done_prefix(s->channel, px, buck); } return pos - buf; @@ -1758,7 +1758,7 @@ bgp_encode_nlri_vpn6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *b memcpy(pos, &a, b); ADVANCE(pos, size, b); - bgp_free_prefix(s->channel, px); + bgp_done_prefix(s->channel, px, buck); } return pos - buf; @@ -1845,7 +1845,7 @@ bgp_encode_nlri_flow4(struct bgp_write_state *s, struct bgp_bucket *buck, byte * memcpy(pos, net->data, flen); ADVANCE(pos, size, flen); - bgp_free_prefix(s->channel, px); + bgp_done_prefix(s->channel, px, buck); } return pos - buf; @@ -1933,7 +1933,7 @@ bgp_encode_nlri_flow6(struct bgp_write_state *s, struct bgp_bucket *buck, byte * memcpy(pos, net->data, flen); ADVANCE(pos, size, flen); - bgp_free_prefix(s->channel, px); + bgp_done_prefix(s->channel, px, buck); } return pos - buf; @@ -2167,6 +2167,8 @@ bgp_create_ip_reach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *bu * var IPv4 Network Layer Reachability Information */ + ASSERT_DIE(s->channel->withdraw_bucket != buck); + int lr, la; la = bgp_encode_attrs(s, buck->eattrs, buf+4, buf + MAX_ATTRS_LENGTH); @@ -2188,6 +2190,8 @@ bgp_create_ip_reach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *bu static byte * bgp_create_mp_reach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end) { + ASSERT_DIE(s->channel->withdraw_bucket != buck); + /* * 2 B IPv4 Withdrawn Routes Length (zero) * --- IPv4 Withdrawn Routes NLRI (unused) @@ -2341,9 +2345,8 @@ again: ; buck = HEAD(c->bucket_queue); /* Cleanup empty buckets */ - if (EMPTY_LIST(buck->prefixes)) + if (bgp_done_bucket(c, buck)) { - bgp_free_bucket(c, buck); lp_restore(tmp_linpool, &tmpp); goto again; } @@ -2352,10 +2355,7 @@ again: ; bgp_create_ip_reach(&s, buck, buf, end): bgp_create_mp_reach(&s, buck, buf, end); - if (EMPTY_LIST(buck->prefixes)) - bgp_free_bucket(c, buck); - else - bgp_defer_bucket(c, buck); + bgp_done_bucket(c, buck); if (!res) { @@ -2724,7 +2724,7 @@ bgp_rx_route_refresh(struct bgp_conn *conn, byte *pkt, uint len) { case BGP_RR_REQUEST: BGP_TRACE(D_PACKETS, "Got ROUTE-REFRESH"); - rt_refeed_channel(&c->c); + channel_request_feeding(&c->c); break; case BGP_RR_BEGIN: @@ -2903,7 +2903,11 @@ bgp_schedule_packet(struct bgp_conn *conn, struct bgp_channel *c, int type) { ASSERT(conn->sk); - DBG("BGP: Scheduling packet type %d\n", type); + struct bgp_proto *p = conn->bgp; + if (c) + BGP_TRACE(D_PACKETS, "Scheduling packet type %d for channel %s", type, c->c.name); + else + BGP_TRACE(D_PACKETS, "Scheduling packet type %d", type); if (c) { diff --git a/proto/mrt/mrt.c b/proto/mrt/mrt.c index fcc1dcfe..5ef4cd44 100644 --- a/proto/mrt/mrt.c +++ b/proto/mrt/mrt.c @@ -431,7 +431,7 @@ mrt_rib_table_entry_bgp_attrs(struct mrt_table_dump_state *s, rte *r) /* Attribute list must be normalized for bgp_encode_attrs() */ if (!rta_is_cached(r->attrs)) - eattrs = ea_normalize(eattrs); + eattrs = ea_normalize(eattrs, 0); mrt_buffer_need(b, MRT_ATTR_BUFFER_SIZE); byte *pos = b->pos; @@ -703,14 +703,17 @@ mrt_dump_cont(struct cli *c) cli_printf(c, 0, ""); mrt_table_dump_free(c->rover); - c->cont = c->cleanup = c->rover = NULL; + c->cont = NULL; + c->cleanup = NULL; + c->rover = NULL; } -static void +static int mrt_dump_cleanup(struct cli *c) { mrt_table_dump_free(c->rover); c->rover = NULL; + return 0; } void diff --git a/proto/ospf/rt.c b/proto/ospf/rt.c index aedf3df6..69c2907d 100644 --- a/proto/ospf/rt.c +++ b/proto/ospf/rt.c @@ -2086,7 +2086,7 @@ again1: ASSERT_DIE(ARRAY_SIZE(eattrs.a) >= eattrs.l.count); - ea_list *eal = ea_lookup(&eattrs.l); + ea_list *eal = ea_lookup(&eattrs.l, 0); ea_free(nf->old_ea); nf->old_ea = eal; diff --git a/proto/perf/perf.c b/proto/perf/perf.c index 67ad2ada..d82ac8aa 100644 --- a/proto/perf/perf.c +++ b/proto/perf/perf.c @@ -156,7 +156,7 @@ perf_loop(void *data) ea_set_attr_data(&ea, &ea_gen_nexthop, 0, &nhad.ad.data, sizeof nhad - sizeof nhad.ad); - p->data[i].a = rta_lookup(ea); + p->data[i].a = rta_lookup(ea, 0); } else p->data[i].a = rta_clone(p->data[i-1].a); diff --git a/proto/pipe/config.Y b/proto/pipe/config.Y index c869de9f..0990168e 100644 --- a/proto/pipe/config.Y +++ b/proto/pipe/config.Y @@ -41,6 +41,12 @@ pipe_proto: pipe_proto_start '{' | pipe_proto proto_item ';' | pipe_proto channel_item_ ';' + | pipe_proto IMPORT IN net_any imexport ';' { + if (this_channel->net_type && ($4->type != this_channel->net_type)) + cf_error("Incompatible export prefilter type"); + PIPE_CFG->in_subprefix = $4; + this_channel->in_filter = $5; + } | pipe_proto PEER TABLE rtable ';' { PIPE_CFG->peer = $4; } | pipe_proto MAX GENERATION expr ';' { if (($4 < 1) || ($4 > 254)) cf_error("Max generation must be in range 1..254, got %u", $4); diff --git a/proto/pipe/pipe.c b/proto/pipe/pipe.c index 99d4b737..351db36b 100644 --- a/proto/pipe/pipe.c +++ b/proto/pipe/pipe.c @@ -123,10 +123,16 @@ pipe_postconfig(struct proto_config *CF) if (cc->table->addr_type != cf->peer->addr_type) cf_error("Primary table and peer table must have the same type"); + if (cc->out_subprefix && (cc->table->addr_type != cc->out_subprefix->type)) + cf_error("Export subprefix must match table type"); + + if (cf->in_subprefix && (cc->table->addr_type != cf->in_subprefix->type)) + cf_error("Import subprefix must match table type"); + if (cc->rx_limit.action) cf_error("Pipe protocol does not support receive limits"); - if (cc->in_keep_filtered) + if (cc->in_keep) cf_error("Pipe protocol prohibits keeping filtered routes"); cc->debug = cf->c.debug; @@ -142,6 +148,7 @@ pipe_configure_channels(struct pipe_proto *p, struct pipe_config *cf) .channel = cc->channel, .table = cc->table, .out_filter = cc->out_filter, + .out_subprefix = cc->out_subprefix, .in_limit = cc->in_limit, .ra_mode = RA_ANY, .debug = cc->debug, @@ -153,6 +160,7 @@ pipe_configure_channels(struct pipe_proto *p, struct pipe_config *cf) .channel = cc->channel, .table = cf->peer, .out_filter = cc->in_filter, + .out_subprefix = cf->in_subprefix, .in_limit = cc->out_limit, .ra_mode = RA_ANY, .debug = cc->debug, diff --git a/proto/pipe/pipe.h b/proto/pipe/pipe.h index 60c857eb..a6534e1c 100644 --- a/proto/pipe/pipe.h +++ b/proto/pipe/pipe.h @@ -12,6 +12,7 @@ struct pipe_config { struct proto_config c; struct rtable_config *peer; /* Table we're connected to */ + const net_addr *in_subprefix; u8 max_generation; }; diff --git a/proto/rip/rip.c b/proto/rip/rip.c index d6edac14..f5c01380 100644 --- a/proto/rip/rip.c +++ b/proto/rip/rip.c @@ -1240,7 +1240,7 @@ rip_get_route_info(rte *rte, byte *buf) static void rip_tag_format(const eattr *a, byte *buf, uint buflen) { - bsnprintf(buf, buflen, "tag: %04x", a->u.data); + bsnprintf(buf, buflen, "%04x", a->u.data); } static struct ea_class ea_rip_metric = { diff --git a/proto/static/static.c b/proto/static/static.c index d1d5b92b..f0a514f7 100644 --- a/proto/static/static.c +++ b/proto/static/static.c @@ -695,7 +695,7 @@ static_get_route_info(rte *rte, byte *buf) { eattr *a = ea_find(rte->attrs, &ea_gen_igp_metric); u32 pref = rt_get_preference(rte); - if (a) + if (a && (a->u.data < IGP_METRIC_UNKNOWN)) buf += bsprintf(buf, " (%d/%u)", pref, a->u.data); else buf += bsprintf(buf, " (%d)", pref); diff --git a/sysdep/unix/krt.c b/sysdep/unix/krt.c index 4539feee..93d19614 100644 --- a/sysdep/unix/krt.c +++ b/sysdep/unix/krt.c @@ -302,241 +302,29 @@ krt_uptodate(rte *a, rte *b) return (a->attrs == b->attrs); } -static void -krt_learn_announce_update(struct krt_proto *p, rte *e) -{ - rte e0 = { - .attrs = ea_clone(e->attrs), - .src = p->p.main_source, - }; - - rte_update(p->p.main_channel, e->net, &e0, p->p.main_source); -} - -static void -krt_learn_announce_delete(struct krt_proto *p, net_addr *n) -{ - rte_update(p->p.main_channel, n, NULL, p->p.main_source); -} - -static struct rte_storage * -krt_store_async(struct krt_proto *p, net *n, rte *e) -{ - ea_set_attr_u32(&e->attrs, &ea_gen_preference, 0, p->p.main_channel->preference); - e->src = p->p.main_source; - return rte_store(e, n, p->krt_table); -} - /* Called when alien route is discovered during scan */ static void krt_learn_scan(struct krt_proto *p, rte *e) { - net *n = net_get(p->krt_table, e->net); - struct rte_storage *m, **mm; - struct rte_storage *ee = krt_store_async(p, n, e); - - for(mm = &n->routes; m = *mm; mm = &m->next) - if (krt_same_key(&m->rte, e)) - break; - if (m) - { - if (krt_uptodate(&m->rte, e)) - { - krt_trace_in_rl(&rl_alien, p, e, "[alien] seen"); - rte_free(ee); - m->rte.pflags |= KRT_REF_SEEN; - } - else - { - krt_trace_in(p, e, "[alien] updated"); - *mm = m->next; - rte_free(m); - m = NULL; - } - } - else - krt_trace_in(p, e, "[alien] created"); - - if (!m) - { - ee->next = n->routes; - n->routes = ee; - ee->rte.pflags |= KRT_REF_SEEN; - } -} - -static void -krt_learn_prune(struct krt_proto *p) -{ - struct fib *fib = &p->krt_table->fib; - struct fib_iterator fit; - - KRT_TRACE(p, D_EVENTS, "Pruning inherited routes"); - - FIB_ITERATE_INIT(&fit, fib); -again: - FIB_ITERATE_START(fib, &fit, net, n) - { - struct rte_storage *e, **ee, *best, **pbest, *old_best; - - /* - * Note that old_best may be NULL even if there was an old best route in - * the previous step, because it might be replaced in krt_learn_scan(). - * But in that case there is a new valid best route. - */ - - old_best = NULL; - best = NULL; - pbest = NULL; - ee = &n->routes; - while (e = *ee) - { - if (e->rte.pflags & KRT_REF_BEST) - old_best = e; - - if (!(e->rte.pflags & KRT_REF_SEEN)) - { - *ee = e->next; - rte_free(e); - continue; - } - - if (!best || krt_metric(&best->rte) > krt_metric(&e->rte)) - { - best = e; - pbest = ee; - } - - e->rte.pflags &= ~(KRT_REF_SEEN | KRT_REF_BEST); - ee = &e->next; - } - if (!n->routes) - { - DBG("%I/%d: deleting\n", n->n.prefix, n->n.pxlen); - if (old_best) - krt_learn_announce_delete(p, n->n.addr); - - FIB_ITERATE_PUT(&fit); - fib_delete(fib, n); - goto again; - } - - best->rte.pflags |= KRT_REF_BEST; - *pbest = best->next; - best->next = n->routes; - n->routes = best; + rte e0 = { + .attrs = e->attrs, + .src = rt_get_source(&p->p, krt_metric(e)), + }; - if ((best != old_best) || p->reload) - { - DBG("%I/%d: announcing (metric=%d)\n", n->n.prefix, n->n.pxlen, krt_metric(&best->rte)); - krt_learn_announce_update(p, &best->rte); - } - else - DBG("%I/%d: uptodate (metric=%d)\n", n->n.prefix, n->n.pxlen, krt_metric(&best->rte)); - } - FIB_ITERATE_END; + ea_set_attr_u32(&e0.attrs, &ea_gen_preference, 0, p->p.main_channel->preference); - p->reload = 0; + rte_update(p->p.main_channel, e->net, &e0, e0.src); } static void krt_learn_async(struct krt_proto *p, rte *e, int new) { - net *n = net_get(p->krt_table, e->net); - struct rte_storage *g, **gg, *best, **bestp, *old_best; - struct rte_storage *ee = krt_store_async(p, n, e); - - old_best = n->routes; - for(gg=&n->routes; g = *gg; gg = &g->next) - if (krt_same_key(&g->rte, e)) - break; if (new) - { - if (g) - { - if (krt_uptodate(&g->rte, e)) - { - krt_trace_in(p, e, "[alien async] same"); - rte_free(ee); - return; - } - krt_trace_in(p, e, "[alien async] updated"); - *gg = g->next; - rte_free(g); - } - else - krt_trace_in(p, e, "[alien async] created"); - - ee->next = n->routes; - n->routes = ee; - } - else if (!g) - { - krt_trace_in(p, e, "[alien async] delete failed"); - rte_free(ee); - return; - } - else - { - krt_trace_in(p, e, "[alien async] removed"); - *gg = g->next; - rte_free(ee); - rte_free(g); - } - best = n->routes; - bestp = &n->routes; - for(gg=&n->routes; g=*gg; gg=&g->next) - { - if (krt_metric(&best->rte) > krt_metric(&g->rte)) - { - best = g; - bestp = gg; - } - - g->rte.pflags &= ~KRT_REF_BEST; - } - - if (best) - { - best->rte.pflags |= KRT_REF_BEST; - *bestp = best->next; - best->next = n->routes; - n->routes = best; - } - - if (best != old_best) - { - DBG("krt_learn_async: distributing change\n"); - if (best) - krt_learn_announce_update(p, &best->rte); - else - krt_learn_announce_delete(p, n->n.addr); - } -} - -static void -krt_learn_init(struct krt_proto *p) -{ - if (KRT_CF->learn) - { - struct rtable_config *cf = mb_allocz(p->p.pool, sizeof(struct rtable_config)); - cf->name = "Inherited"; - cf->addr_type = p->p.net_type; - cf->internal = 1; + return krt_learn_scan(p, e); - p->krt_table = rt_setup(p->p.pool, cf); - } -} - -static void -krt_dump(struct proto *P) -{ - struct krt_proto *p = (struct krt_proto *) P; - - if (!KRT_CF->learn) - return; - debug("KRT: Table of inheritable routes\n"); - rt_dump(p->krt_table); + struct rte_src *src = rt_find_source(&p->p, krt_metric(e)); + if (src) + rte_update(p->p.main_channel, e->net, NULL, src); } #endif @@ -743,11 +531,6 @@ krt_prune(struct krt_proto *p) } FIB_WALK_END; -#ifdef KRT_ALLOW_LEARN - if (KRT_CF->learn) - krt_learn_prune(p); -#endif - if (p->ready) p->initialized = 1; } @@ -974,6 +757,14 @@ krt_feed_end(struct channel *C) krt_scan_timer_kick(p); } +static int +krt_rte_better(rte *new, rte *old) +{ + u32 n = ea_get_int(new->attrs, &ea_krt_metric, IGP_METRIC_UNKNOWN); + u32 o = ea_get_int(old->attrs, &ea_krt_metric, IGP_METRIC_UNKNOWN); + + return (n < o); +} /* * Protocol glue @@ -1033,6 +824,7 @@ krt_init(struct proto_config *CF) p->p.if_notify = krt_if_notify; p->p.reload_routes = krt_reload_routes; p->p.feed_end = krt_feed_end; + p->p.rte_better = krt_rte_better; krt_sys_init(p); return &p->p; @@ -1058,10 +850,6 @@ krt_start(struct proto *P) bmap_init(&p->seen_map, p->p.pool, 1024); add_tail(&krt_proto_list, &p->krt_node); -#ifdef KRT_ALLOW_LEARN - krt_learn_init(p); -#endif - if (!krt_sys_start(p)) { rem_node(&p->krt_node); @@ -1177,9 +965,6 @@ struct protocol proto_unix_kernel = { .shutdown = krt_shutdown, .reconfigure = krt_reconfigure, .copy_config = krt_copy_config, -#ifdef KRT_ALLOW_LEARN - .dump = krt_dump, -#endif }; void diff --git a/sysdep/unix/krt.h b/sysdep/unix/krt.h index 69fc9aa1..e0d60cbd 100644 --- a/sysdep/unix/krt.h +++ b/sysdep/unix/krt.h @@ -55,10 +55,6 @@ struct krt_proto { struct proto p; struct krt_state sys; /* Sysdep state */ -#ifdef KRT_ALLOW_LEARN - struct rtable *krt_table; /* Internal table of inherited routes */ -#endif - #ifndef CONFIG_ALL_TABLES_AT_ONCE timer *scan_timer; #endif |