From cf98be7b6743e45dde9e0458664cc0762bf08867 Mon Sep 17 00:00:00 2001 From: Ondrej Zajicek Date: Sat, 10 Nov 2012 14:26:13 +0100 Subject: Allows rejected routes to be kept and examined. When 'import keep rejected' protocol option is activated, routes rejected by the import filter are kept in the routing table, but they are hidden and not propagated to other protocols. It is possible to examine them using 'show route rejected'. --- doc/bird.sgml | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'doc') diff --git a/doc/bird.sgml b/doc/bird.sgml index 24bc3026..e5550590 100644 --- a/doc/bird.sgml +++ b/doc/bird.sgml @@ -459,6 +459,14 @@ to zero to disable it. An empty is equivalent to import keep rejected + Usually, if an import filter rejects a route, the route is + forgotten. When this option is active, rejected routes are + kept in the routing table, but they are hidden and not + propagated to other protocols. But it is possible to show them + using import limit Specify an import route limit (a maximum number of routes imported from the protocol) and optionally the action to be @@ -467,8 +475,11 @@ to zero to disable it. An empty is equivalent to export limit Specify an export route limit, works similarly to @@ -661,6 +672,9 @@ This argument can be omitted if there exists only a single instance.

You can also select just routes added by a specific protocol. protocol . +

If BIRD is configured to keep rejected routes (see The rdnss local + rdnss local Use only local (interface-specific) RDNSS definitions for this interface. Otherwise, both global and local definitions are used. Could also be used to disable RDNSS for given interface if no local definitons are specified. Default: no. - dnssl local + dnssl local Use only local DNSSL definitions for this interface. See -- cgit v1.2.3 From 15550957957f3c790f3bec3f6b8721559ea25969 Mon Sep 17 00:00:00 2001 From: Ondrej Zajicek Date: Thu, 15 Nov 2012 01:29:01 +0100 Subject: Changes 'rejected' to 'filtered' in one of the last patches. --- doc/bird.sgml | 12 ++++++------ nest/config.Y | 10 +++++----- nest/proto.c | 16 ++++++++-------- nest/protocol.h | 6 +++--- nest/route.h | 10 +++++----- nest/rt-table.c | 20 ++++++++++---------- proto/bgp/bgp.c | 2 +- 7 files changed, 38 insertions(+), 38 deletions(-) (limited to 'doc') diff --git a/doc/bird.sgml b/doc/bird.sgml index e5550590..7cea3921 100644 --- a/doc/bird.sgml +++ b/doc/bird.sgml @@ -459,12 +459,12 @@ to zero to disable it. An empty is equivalent to import keep rejected + import keep filtered Usually, if an import filter rejects a route, the route is - forgotten. When this option is active, rejected routes are + forgotten. When this option is active, these routes are kept in the routing table, but they are hidden and not propagated to other protocols. But it is possible to show them - using import limit @@ -476,7 +476,7 @@ to zero to disable it. An empty is equivalent to You can also select just routes added by a specific protocol. protocol . -

If BIRD is configured to keep rejected routes (see If BIRD is configured to keep filtered routes (see The out_filter = $2; } | IMPORT LIMIT limit_spec { this_proto->in_limit = $3; } | EXPORT LIMIT limit_spec { this_proto->out_limit = $3; } - | IMPORT KEEP REJECTED bool { this_proto->in_keep_rejected = $4; } + | IMPORT KEEP FILTERED bool { this_proto->in_keep_filtered = $4; } | TABLE rtable { this_proto->table = $2; } | ROUTER ID idval { this_proto->router_id = $3; } | DESCRIPTION TEXT { this_proto->dsc = $2; } @@ -406,7 +406,7 @@ CF_CLI(SHOW INTERFACES SUMMARY,,, [[Show summary of network interfaces]]) { if_show_summary(); } ; CF_CLI_HELP(SHOW ROUTE, ..., [[Show routing table]]) -CF_CLI(SHOW ROUTE, r_args, [[[|for |for ] [table ] [filter |where ] [all] [primary] [rejected] [(export|preexport)

] [protocol

] [stats|count]]], [[Show routing table]]) +CF_CLI(SHOW ROUTE, r_args, [[[|for |for ] [table ] [filter |where ] [all] [primary] [filtered] [(export|preexport)

] [protocol

] [stats|count]]], [[Show routing table]]) { rt_show($3); } ; r_args: @@ -452,9 +452,9 @@ r_args: $$ = $1; $$->primary_only = 1; } - | r_args REJECTED { + | r_args FILTERED { $$ = $1; - $$->rejected = 1; + $$->filtered = 1; } | r_args export_or_preexport SYM { struct proto_config *c = (struct proto_config *) $3->def; diff --git a/nest/proto.c b/nest/proto.c index 2fb0e796..e9afa2fe 100644 --- a/nest/proto.c +++ b/nest/proto.c @@ -414,7 +414,7 @@ proto_reconfigure(struct proto *p, struct proto_config *oc, struct proto_config p->main_ahook->out_filter = nc->out_filter; p->main_ahook->in_limit = nc->in_limit; p->main_ahook->out_limit = nc->out_limit; - p->main_ahook->in_keep_rejected = nc->in_keep_rejected; + p->main_ahook->in_keep_filtered = nc->in_keep_filtered; } /* Update routes when filters changed. If the protocol in not UP, @@ -720,7 +720,7 @@ proto_fell_down(struct proto *p) { DBG("Protocol %s down\n", p->name); - u32 all_routes = p->stats.imp_routes + p->stats.rej_routes; + u32 all_routes = p->stats.imp_routes + p->stats.filt_routes; if (all_routes != 0) log(L_ERR "Protocol %s is down but still has %d routes", p->name, all_routes); @@ -798,7 +798,7 @@ proto_schedule_feed(struct proto *p, int initial) p->main_ahook->out_filter = p->cf->out_filter; p->main_ahook->in_limit = p->cf->in_limit; p->main_ahook->out_limit = p->cf->out_limit; - p->main_ahook->in_keep_rejected = p->cf->in_keep_rejected; + p->main_ahook->in_keep_filtered = p->cf->in_keep_filtered; proto_reset_limit(p->main_ahook->in_limit); proto_reset_limit(p->main_ahook->out_limit); } @@ -1096,11 +1096,11 @@ proto_state_name(struct proto *p) } static void -proto_show_stats(struct proto_stats *s, int in_keep_rejected) +proto_show_stats(struct proto_stats *s, int in_keep_filtered) { - if (in_keep_rejected) - cli_msg(-1006, " Routes: %u imported, %u rejected, %u exported, %u preferred", - s->imp_routes, s->rej_routes, s->exp_routes, s->pref_routes); + if (in_keep_filtered) + cli_msg(-1006, " Routes: %u imported, %u filtered, %u exported, %u preferred", + s->imp_routes, s->filt_routes, s->exp_routes, s->pref_routes); else cli_msg(-1006, " Routes: %u imported, %u exported, %u preferred", s->imp_routes, s->exp_routes, s->pref_routes); @@ -1142,7 +1142,7 @@ proto_show_basic_info(struct proto *p) proto_show_limit(p->cf->out_limit, "Export limit:"); if (p->proto_state != PS_DOWN) - proto_show_stats(&p->stats, p->cf->in_keep_rejected); + proto_show_stats(&p->stats, p->cf->in_keep_filtered); } void diff --git a/nest/protocol.h b/nest/protocol.h index b10016d7..cf2ca0a4 100644 --- a/nest/protocol.h +++ b/nest/protocol.h @@ -91,7 +91,7 @@ struct proto_config { int class; /* SYM_PROTO or SYM_TEMPLATE */ u32 debug, mrtdump; /* Debugging bitfields, both use D_* constants */ unsigned preference, disabled; /* Generic parameters */ - int in_keep_rejected; /* Routes rejected in import filter are kept */ + int in_keep_filtered; /* Routes rejected in import filter are kept */ u32 router_id; /* Protocol specific router ID */ struct rtable_config *table; /* Table we're attached to */ struct filter *in_filter, *out_filter; /* Attached filters */ @@ -107,7 +107,7 @@ struct proto_config { struct proto_stats { /* Import - from protocol to core */ u32 imp_routes; /* Number of routes successfully imported to the (adjacent) routing table */ - u32 rej_routes; /* Number of routes rejected in import filter but kept in the routing table */ + u32 filt_routes; /* Number of routes rejected in import filter but kept in the routing table */ u32 pref_routes; /* Number of routes that are preferred, sum over all routing tables */ u32 imp_updates_received; /* Number of route updates received */ u32 imp_updates_invalid; /* Number of route updates rejected as invalid */ @@ -412,7 +412,7 @@ struct announce_hook { struct proto_limit *out_limit; /* Output limit */ struct proto_stats *stats; /* Per-table protocol statistics */ struct announce_hook *next; /* Next hook for the same protocol */ - int in_keep_rejected; /* Routes rejected in import filter are kept */ + int in_keep_filtered; /* Routes rejected in import filter are kept */ }; struct announce_hook *proto_add_announce_hook(struct proto *p, struct rtable *t, struct proto_stats *stats); diff --git a/nest/route.h b/nest/route.h index 3c10fc55..177baa38 100644 --- a/nest/route.h +++ b/nest/route.h @@ -221,13 +221,13 @@ typedef struct rte { } rte; #define REF_COW 1 /* Copy this rte on write */ -#define REF_REJECTED 2 /* Route is rejected by import filter */ +#define REF_FILTERED 2 /* Route is rejected by import filter */ /* Route is valid for propagation (may depend on other flags in the future), accepts NULL */ -static inline int rte_is_valid(rte *r) { return r && !(r->flags & REF_REJECTED); } +static inline int rte_is_valid(rte *r) { return r && !(r->flags & REF_FILTERED); } -/* Route just has REF_REJECTED flag */ -static inline int rte_is_rejected(rte *r) { return !!(r->flags & REF_REJECTED); } +/* Route just has REF_FILTERED flag */ +static inline int rte_is_filtered(rte *r) { return !!(r->flags & REF_FILTERED); } /* Types of route announcement, also used as flags */ @@ -271,7 +271,7 @@ struct rt_show_data { struct fib_iterator fit; struct proto *show_protocol; struct proto *export_protocol; - int export_mode, primary_only, rejected; + int export_mode, primary_only, filtered; struct config *running_on_config; int net_counter, rt_counter, show_counter; int stats, show_for; diff --git a/nest/rt-table.c b/nest/rt-table.c index 421a05ea..102218b2 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -667,7 +667,7 @@ rte_recalculate(struct announce_hook *ah, net *net, rte *new, ea_list *tmpa, str { /* No changes, ignore the new route */ - if (!rte_is_rejected(new)) + if (!rte_is_filtered(new)) { stats->imp_updates_ignored++; rte_trace_in(D_ROUTES, p, new, "ignored"); @@ -701,7 +701,7 @@ rte_recalculate(struct announce_hook *ah, net *net, rte *new, ea_list *tmpa, str struct proto_limit *l = ah->in_limit; if (l && !old && new) { - u32 all_routes = stats->imp_routes + stats->rej_routes; + u32 all_routes = stats->imp_routes + stats->filt_routes; if (all_routes >= l->limit) proto_notify_limit(ah, l, all_routes); @@ -715,15 +715,15 @@ rte_recalculate(struct announce_hook *ah, net *net, rte *new, ea_list *tmpa, str } } - if (new && !rte_is_rejected(new)) + if (new && !rte_is_filtered(new)) stats->imp_updates_accepted++; else stats->imp_withdraws_accepted++; if (new) - rte_is_rejected(new) ? stats->rej_routes++ : stats->imp_routes++; + rte_is_filtered(new) ? stats->filt_routes++ : stats->imp_routes++; if (old) - rte_is_rejected(old) ? stats->rej_routes-- : stats->imp_routes--; + rte_is_filtered(old) ? stats->filt_routes-- : stats->imp_routes--; if (table->config->sorted) { @@ -929,11 +929,11 @@ rte_update2(struct announce_hook *ah, net *net, rte *new, struct proto *src) stats->imp_updates_filtered++; rte_trace_in(D_FILTERS, p, new, "filtered out"); - if (! ah->in_keep_rejected) + if (! ah->in_keep_filtered) goto drop; /* new is a private copy, i could modify it */ - new->flags |= REF_REJECTED; + new->flags |= REF_FILTERED; } else { @@ -948,10 +948,10 @@ rte_update2(struct announce_hook *ah, net *net, rte *new, struct proto *src) stats->imp_updates_filtered++; rte_trace_in(D_FILTERS, p, new, "filtered out"); - if (! ah->in_keep_rejected) + if (! ah->in_keep_filtered) goto drop; - new->flags |= REF_REJECTED; + new->flags |= REF_FILTERED; } if (tmpa != old_tmpa && src->store_tmp_attrs) src->store_tmp_attrs(new, tmpa); @@ -2023,7 +2023,7 @@ rt_show_net(struct cli *c, net *n, struct rt_show_data *d) for(e=n->routes; e; e=e->next) { - if (rte_is_rejected(e) != d->rejected) + if (rte_is_filtered(e) != d->filtered) continue; struct ea_list *tmpa; diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index 2eb8ccb4..346c641b 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -1188,7 +1188,7 @@ bgp_show_proto_info(struct proto *P) cli_msg(-1006, " Source address: %I", p->source_addr); if (P->cf->in_limit) cli_msg(-1006, " Route limit: %d/%d", - p->p.stats.imp_routes + p->p.stats.rej_routes, P->cf->in_limit->limit); + p->p.stats.imp_routes + p->p.stats.filt_routes, P->cf->in_limit->limit); cli_msg(-1006, " Hold timer: %d/%d", tm_remains(c->hold_timer), c->hold_time); cli_msg(-1006, " Keepalive timer: %d/%d", -- cgit v1.2.3 From cf3a704b6a2263aba6bb6adb4c2c9dd93b72f470 Mon Sep 17 00:00:00 2001 From: Ondrej Zajicek Date: Fri, 16 Nov 2012 02:34:12 +0100 Subject: Updates the documentation. --- doc/bird.sgml | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'doc') diff --git a/doc/bird.sgml b/doc/bird.sgml index 7cea3921..d833f82f 100644 --- a/doc/bird.sgml +++ b/doc/bird.sgml @@ -1654,6 +1654,15 @@ use cases that use the direct protocol (like abusing eBGP as an IGP routing protocol), in most cases it is not needed to have these device routes in BIRD routing table and to use the direct protocol. +

There is one notable case when you definitely want to use the +direct protocol -- running BIRD on BSD systems. Having high priority +device routes for directly connected networks from the direct protocol +protects kernel device routes from being overwritten or removed by IGP +routes during some transient network conditions, because a lower +priority IGP route for the same network is not exported to the kernel +routing table. This is an issue on BSD systems only, as on Linux +systems BIRD cannot change non-BIRD route in the kernel routing table. +

The only configurable thing about direct is what interfaces it watches:

-- cgit v1.2.3 From 80a9cadc76101157707aecc0b482ad88ad702fc3 Mon Sep 17 00:00:00 2001 From: Ondrej Zajicek Date: Tue, 27 Nov 2012 02:08:04 +0100 Subject: Changes static route targets drop/reject to blackhole/unreachable. To be consistent with rest of BIRD and Linux. Old names are also allowed for compatibility. --- bird.conf | 10 +++++----- doc/bird.conf.example | 4 ++-- doc/bird.sgml | 9 +++++---- proto/static/config.Y | 11 +++++++---- 4 files changed, 19 insertions(+), 15 deletions(-) (limited to 'doc') diff --git a/bird.conf b/bird.conf index 2d10ef4b..bafd6ea1 100644 --- a/bird.conf +++ b/bird.conf @@ -25,14 +25,14 @@ protocol kernel { protocol static { # disabled; - route fec0:2::/64 reject; - route fec0:3::/64 reject; - route fec0:4::/64 reject; + route fec0:2::/64 blackhole; + route fec0:3::/64 unreachable; + route fec0:4::/64 prohibit; # route 0.0.0.0/0 via 195.113.31.113; -# route 62.168.0.0/25 reject; +# route 62.168.0.0/25 unreachable; # route 1.2.3.4/32 via 195.113.31.124; -# route 10.0.0.0/8 reject; +# route 10.0.0.0/8 unreachable; # route 10.1.1.0:255.255.255.0 via 62.168.0.3; # route 10.1.2.0:255.255.255.0 via 62.168.0.3; # route 10.1.3.0:255.255.255.0 via 62.168.0.4; diff --git a/doc/bird.conf.example b/doc/bird.conf.example index 5e07ab5a..dcc62e29 100644 --- a/doc/bird.conf.example +++ b/doc/bird.conf.example @@ -67,8 +67,8 @@ protocol static { # debug { states, routes, filters, interfaces, events, packets }; # debug all; # route 0.0.0.0/0 via 198.51.100.13; -# route 198.51.100.0/25 reject; -# route 10.0.0.0/8 reject; +# route 198.51.100.0/25 unreachable; +# route 10.0.0.0/8 unreachable; # route 10.1.1.0:255.255.255.0 via 198.51.100.3; # route 10.1.2.0:255.255.255.0 via 198.51.100.3; # route 10.1.3.0:255.255.255.0 via 198.51.100.4; diff --git a/doc/bird.sgml b/doc/bird.sgml index d833f82f..d351cedc 100644 --- a/doc/bird.sgml +++ b/doc/bird.sgml @@ -2733,9 +2733,10 @@ definition of the protocol contains mainly a list of static routes: route through an interface to hosts on a directly connected network. route Static recursive route, its nexthop depends on a route table lookup for given IP address. - route Special routes - specifying to drop the packet, return it as unreachable or return - it as administratively prohibited. + route Special routes + specifying to silently drop the packet, return it as unreachable or return + it as administratively prohibited. First two targets are also known + as check link If set, hardware link states of network interfaces are taken @@ -2761,7 +2762,7 @@ protocol static { via 198.51.100.10 weight 2 via 198.51.100.20 via 192.0.2.1; - route 203.0.113.0/24 reject; # Sink route + route 203.0.113.0/24 unreachable; # Sink route route 10.2.0.0/24 via "arc0"; # Secondary network } diff --git a/proto/static/config.Y b/proto/static/config.Y index f8e84f92..2d9d4b42 100644 --- a/proto/static/config.Y +++ b/proto/static/config.Y @@ -18,7 +18,7 @@ static struct static_route *this_srt, *this_srt_nh, *last_srt_nh; CF_DECLS CF_KEYWORDS(STATIC, ROUTE, VIA, DROP, REJECT, PROHIBIT, PREFERENCE, CHECK, LINK) -CF_KEYWORDS(MULTIPATH, WEIGHT, RECURSIVE, IGP, TABLE) +CF_KEYWORDS(MULTIPATH, WEIGHT, RECURSIVE, IGP, TABLE, BLACKHOLE, UNREACHABLE) CF_GRAMMAR @@ -86,9 +86,12 @@ stat_route: this_srt->dest = RTDX_RECURSIVE; this_srt->via = $3; } - | stat_route0 DROP { this_srt->dest = RTD_BLACKHOLE; } - | stat_route0 REJECT { this_srt->dest = RTD_UNREACHABLE; } - | stat_route0 PROHIBIT { this_srt->dest = RTD_PROHIBIT; } + + | stat_route0 DROP { this_srt->dest = RTD_BLACKHOLE; } + | stat_route0 REJECT { this_srt->dest = RTD_UNREACHABLE; } + | stat_route0 BLACKHOLE { this_srt->dest = RTD_BLACKHOLE; } + | stat_route0 UNREACHABLE { this_srt->dest = RTD_UNREACHABLE; } + | stat_route0 PROHIBIT { this_srt->dest = RTD_PROHIBIT; } ; CF_CLI(SHOW STATIC, optsym, [], [[Show details of static protocol]]) -- cgit v1.2.3 From a92cf57dd6ba021a495fe7268c86dc8e6aeecbb2 Mon Sep 17 00:00:00 2001 From: Ondrej Zajicek Date: Wed, 26 Dec 2012 12:40:48 +0100 Subject: Implements undo command and optional timeout for configuration Several new configure command variants: configure undo - undo last reconfiguration configure timeout - configure with scheduled undo if not confirmed in timeout configure confirm - confirm last configuration configure check - just parse and validate config file --- conf/conf.c | 253 ++++++++++++++++++++++++++++++++++++++------------- conf/conf.h | 29 +++--- conf/gen_commands.m4 | 3 + conf/gen_parser.m4 | 1 + doc/bird.sgml | 37 +++++++- doc/reply_codes | 6 ++ nest/cli.c | 20 +++- nest/cli.h | 2 + nest/cmds.c | 6 +- nest/proto.c | 4 +- sysdep/unix/config.Y | 33 +++++-- sysdep/unix/io.c | 5 +- sysdep/unix/krt.c | 2 +- sysdep/unix/main.c | 113 ++++++++++++++++++----- sysdep/unix/timer.h | 1 + sysdep/unix/unix.h | 7 +- 16 files changed, 407 insertions(+), 115 deletions(-) (limited to 'doc') diff --git a/conf/conf.c b/conf/conf.c index 9375861f..6dfa3691 100644 --- a/conf/conf.c +++ b/conf/conf.c @@ -21,9 +21,12 @@ * There can exist up to four different configurations at one time: an active * one (pointed to by @config), configuration we are just switching from * (@old_config), one queued for the next reconfiguration (@future_config; - * if it's non-%NULL and the user wants to reconfigure once again, we just + * if there is one and the user wants to reconfigure once again, we just * free the previous queued config and replace it with the new one) and - * finally a config being parsed (@new_config). + * finally a config being parsed (@new_config). The stored @old_config + * is also used for undo reconfiguration, which works in a similar way. + * Reconfiguration could also have timeout (using @config_timer) and undo + * is automatically called if the new configuration is not confirmed later. * * Loading of new configuration is very simple: just call config_alloc() * to get a new &config structure, then use config_parse() to parse a @@ -55,10 +58,23 @@ static jmp_buf conf_jmpbuf; -struct config *config, *new_config, *old_config, *future_config; -static event *config_event; -int shutting_down, future_type; -bird_clock_t boot_time; +struct config *config, *new_config; + +static struct config *old_config; /* Old configuration */ +static struct config *future_config; /* New config held here if recon requested during recon */ +static int old_cftype; /* Type of transition old_config -> config (RECONFIG_SOFT/HARD) */ +static int future_cftype; /* Type of scheduled transition, may also be RECONFIG_UNDO */ +/* Note that when future_cftype is RECONFIG_UNDO, then future_config is NULL, + therefore proper check for future scheduled config checks future_cftype */ + +static event *config_event; /* Event for finalizing reconfiguration */ +static timer *config_timer; /* Timer for scheduled configuration rollback */ + +/* These are public just for cmd_show_status(), should not be accessed elsewhere */ +int shutting_down; /* Shutdown requested, do not accept new config changes */ +int configuring; /* Reconfiguration is running */ +int undo_available; /* Undo was not requested from last reconfiguration */ +/* Note that both shutting_down and undo_available are related to requests, not processing */ /** * config_alloc - allocate a new configuration @@ -82,8 +98,6 @@ config_alloc(byte *name) c->load_time = now; c->tf_base.fmt1 = c->tf_log.fmt1 = "%d-%m-%Y %T"; - if (!boot_time) - boot_time = now; return c; } @@ -154,7 +168,8 @@ cli_parse(struct config *c) void config_free(struct config *c) { - rfree(c->pool); + if (c) + rfree(c->pool); } void @@ -170,10 +185,7 @@ config_del_obstacle(struct config *c) DBG("+++ deleting obstacle %d\n", c->obstacle_count); c->obstacle_count--; if (!c->obstacle_count) - { - ASSERT(config_event); - ev_schedule(config_event); - } + ev_schedule(config_event); } static int @@ -197,16 +209,31 @@ global_commit(struct config *new, struct config *old) static int config_do_commit(struct config *c, int type) { - int force_restart, nobs; + if (type == RECONFIG_UNDO) + { + c = old_config; + type = old_cftype; + } + else + config_free(old_config); - DBG("do_commit\n"); old_config = config; - config = new_config = c; + old_cftype = type; + config = c; + + configuring = 1; + if (old_config && !config->shutdown) + log(L_INFO "Reconfiguring"); + + /* This should not be necessary, but it seems there are some + functions that access new_config instead of config */ + new_config = config; + if (old_config) old_config->obstacle_count++; DBG("sysdep_commit\n"); - force_restart = sysdep_commit(c, old_config); + int force_restart = sysdep_commit(c, old_config); DBG("global_commit\n"); force_restart |= global_commit(c, old_config); DBG("rt_commit\n"); @@ -214,38 +241,38 @@ config_do_commit(struct config *c, int type) roa_commit(c, old_config); DBG("protos_commit\n"); protos_commit(c, old_config, force_restart, type); - new_config = NULL; /* Just to be sure nobody uses that now */ + + /* Just to be sure nobody uses that now */ + new_config = NULL; + + int obs = 0; if (old_config) - nobs = --old_config->obstacle_count; - else - nobs = 0; - DBG("do_commit finished with %d obstacles remaining\n", nobs); - return !nobs; + obs = --old_config->obstacle_count; + + DBG("do_commit finished with %d obstacles remaining\n", obs); + return !obs; } static void config_done(void *unused UNUSED) { - struct config *c; + if (config->shutdown) + sysdep_shutdown_done(); + + configuring = 0; + if (old_config) + log(L_INFO "Reconfigured"); - DBG("config_done\n"); - for(;;) + if (future_cftype) { - if (config->shutdown) - sysdep_shutdown_done(); - log(L_INFO "Reconfigured"); - if (old_config) - { - config_free(old_config); - old_config = NULL; - } - if (!future_config) - break; - c = future_config; + int type = future_cftype; + struct config *conf = future_config; + future_cftype = RECONFIG_NONE; future_config = NULL; + log(L_INFO "Reconfiguring to queued configuration"); - if (!config_do_commit(c, future_type)) - break; + if (config_do_commit(conf, type)) + config_done(NULL); } } @@ -253,6 +280,7 @@ config_done(void *unused UNUSED) * config_commit - commit a configuration * @c: new configuration * @type: type of reconfiguration (RECONFIG_SOFT or RECONFIG_HARD) + * @timeout: timeout for undo (or 0 for no timeout) * * When a configuration is parsed and prepared for use, the * config_commit() function starts the process of reconfiguration. @@ -265,6 +293,10 @@ config_done(void *unused UNUSED) * using config_del_obstacle(), the old configuration is freed and * everything runs according to the new one. * + * When @timeout is nonzero, the undo timer is activated with given + * timeout. The timer is deactivated when config_commit(), + * config_confirm() or config_undo() is called. + * * Result: %CONF_DONE if the configuration has been accepted immediately, * %CONF_PROGRESS if it will take some time to switch to it, %CONF_QUEUED * if it's been queued due to another reconfiguration being in progress now @@ -272,49 +304,147 @@ config_done(void *unused UNUSED) * are accepted. */ int -config_commit(struct config *c, int type) +config_commit(struct config *c, int type, int timeout) { - if (!config) /* First-time configuration */ + if (shutting_down) { - config_do_commit(c, RECONFIG_HARD); - return CONF_DONE; + config_free(c); + return CONF_SHUTDOWN; } - if (old_config) /* Reconfiguration already in progress */ + + undo_available = 1; + if (timeout > 0) + tm_start(config_timer, timeout); + else + tm_stop(config_timer); + + if (configuring) { - if (shutting_down == 2) - { - log(L_INFO "New configuration discarded due to shutdown"); - config_free(c); - return CONF_SHUTDOWN; - } - if (future_config) + if (future_cftype) { log(L_INFO "Queueing new configuration, ignoring the one already queued"); config_free(future_config); } else - log(L_INFO "Queued new configuration"); + log(L_INFO "Queueing new configuration"); + + future_cftype = type; future_config = c; - future_type = type; return CONF_QUEUED; } - if (!shutting_down) - log(L_INFO "Reconfiguring"); - if (config_do_commit(c, type)) { config_done(NULL); return CONF_DONE; } - if (!config_event) + return CONF_PROGRESS; +} + +/** + * config_confirm - confirm a commited configuration + * + * When the undo timer is activated by config_commit() with nonzero timeout, + * this function can be used to deactivate it and therefore confirm + * the current configuration. + * + * Result: %CONF_CONFIRM when the current configuration is confirmed, + * %CONF_NONE when there is nothing to confirm (i.e. undo timer is not active). + */ +int +config_confirm(void) +{ + if (config_timer->expires == 0) + return CONF_NOTHING; + + tm_stop(config_timer); + + return CONF_CONFIRM; +} + +/** + * config_undo - undo a configuration + * + * Function config_undo() can be used to change the current + * configuration back to stored %old_config. If no reconfiguration is + * running, this stored configuration is commited in the same way as a + * new configuration in config_commit(). If there is already a + * reconfiguration in progress and no next reconfiguration is + * scheduled, then the undo is scheduled for later processing as + * usual, but if another reconfiguration is already scheduled, then + * such reconfiguration is removed instead (i.e. undo is applied on + * the last commit that scheduled it). + * + * Result: %CONF_DONE if the configuration has been accepted immediately, + * %CONF_PROGRESS if it will take some time to switch to it, %CONF_QUEUED + * if it's been queued due to another reconfiguration being in progress now, + * %CONF_UNQUEUED if a scheduled reconfiguration is removed, %CONF_NOTHING + * if there is no relevant configuration to undo (the previous config request + * was config_undo() too) or %CONF_SHUTDOWN if BIRD is in shutdown mode and + * no new configuration changes are accepted. + */ +int +config_undo(void) +{ + if (shutting_down) + return CONF_SHUTDOWN; + + if (!undo_available || !old_config) + return CONF_NOTHING; + + undo_available = 0; + tm_stop(config_timer); + + if (configuring) { - config_event = ev_new(&root_pool); - config_event->hook = config_done; + if (future_cftype) + { + config_free(future_config); + future_config = NULL; + + log(L_INFO "Removing queued configuration"); + future_cftype = RECONFIG_NONE; + return CONF_UNQUEUED; + } + else + { + log(L_INFO "Queueing undo configuration"); + future_cftype = RECONFIG_UNDO; + return CONF_QUEUED; + } + } + + if (config_do_commit(NULL, RECONFIG_UNDO)) + { + config_done(NULL); + return CONF_DONE; } return CONF_PROGRESS; } +extern void cmd_reconfig_undo_notify(void); + +static void +config_timeout(struct timer *t) +{ + log(L_INFO "Config timeout expired, starting undo"); + cmd_reconfig_undo_notify(); + + int r = config_undo(); + if (r < 0) + log(L_ERR "Undo request failed"); +} + +void +config_init(void) +{ + config_event = ev_new(&root_pool); + config_event->hook = config_done; + + config_timer = tm_new(&root_pool); + config_timer->hook = config_timeout; +} + /** * order_shutdown - order BIRD shutdown * @@ -328,15 +458,16 @@ order_shutdown(void) if (shutting_down) return; + log(L_INFO "Shutting down"); c = lp_alloc(config->mem, sizeof(struct config)); memcpy(c, config, sizeof(struct config)); init_list(&c->protos); init_list(&c->tables); c->shutdown = 1; + + config_commit(c, RECONFIG_HARD, 0); shutting_down = 1; - config_commit(c, RECONFIG_HARD); - shutting_down = 2; } /** diff --git a/conf/conf.h b/conf/conf.h index c76832b6..19300f54 100644 --- a/conf/conf.h +++ b/conf/conf.h @@ -54,28 +54,33 @@ struct config { /* Please don't use these variables in protocols. Use proto_config->global instead. */ extern struct config *config; /* Currently active configuration */ extern struct config *new_config; /* Configuration being parsed */ -extern struct config *old_config; /* Old configuration when reconfiguration is in progress */ -extern struct config *future_config; /* New config held here if recon requested during recon */ - -extern int shutting_down; -extern bird_clock_t boot_time; struct config *config_alloc(byte *name); int config_parse(struct config *); int cli_parse(struct config *); void config_free(struct config *); -int config_commit(struct config *, int type); -#define RECONFIG_HARD 0 -#define RECONFIG_SOFT 1 +int config_commit(struct config *, int type, int timeout); +int config_confirm(void); +int config_undo(void); +void config_init(void); void cf_error(char *msg, ...) NORET; void config_add_obstacle(struct config *); void config_del_obstacle(struct config *); void order_shutdown(void); -#define CONF_DONE 0 -#define CONF_PROGRESS 1 -#define CONF_QUEUED 2 -#define CONF_SHUTDOWN 3 +#define RECONFIG_NONE 0 +#define RECONFIG_HARD 1 +#define RECONFIG_SOFT 2 +#define RECONFIG_UNDO 3 + +#define CONF_DONE 0 +#define CONF_PROGRESS 1 +#define CONF_QUEUED 2 +#define CONF_UNQUEUED 3 +#define CONF_CONFIRM 4 +#define CONF_SHUTDOWN -1 +#define CONF_NOTHING -2 + /* Pools */ diff --git a/conf/gen_commands.m4 b/conf/gen_commands.m4 index a88ba014..3ed21f13 100644 --- a/conf/gen_commands.m4 +++ b/conf/gen_commands.m4 @@ -10,6 +10,9 @@ m4_divert(-1)m4_dnl m4_define(CF_CLI, `m4_divert(0){ "m4_translit($1,A-Z,a-z)", "$3", "$4", 1 }, m4_divert(-1)') +m4_define(CF_CLI_CMD, `m4_divert(0){ "m4_translit($1,A-Z,a-z)", "$2", "$3", 1 }, +m4_divert(-1)') + m4_define(CF_CLI_HELP, `m4_divert(0){ "m4_translit($1,A-Z,a-z)", "$2", "$3", 0 }, m4_divert(-1)') diff --git a/conf/gen_parser.m4 b/conf/gen_parser.m4 index 74385f32..00b55023 100644 --- a/conf/gen_parser.m4 +++ b/conf/gen_parser.m4 @@ -44,6 +44,7 @@ m4_define(CF_CLI, `m4_define([[CF_cmd]], cmd_[[]]m4_translit($1, [[ ]], _))DNL m4_divert(2)CF_KEYWORDS(m4_translit($1, [[ ]], [[,]])) m4_divert(3)CF_ADDTO(cli_cmd, CF_cmd) CF_cmd: $1 $2 END') +m4_define(CF_CLI_CMD, `') m4_define(CF_CLI_HELP, `') # ENUM declarations are ignored diff --git a/doc/bird.sgml b/doc/bird.sgml index d351cedc..615ced98 100644 --- a/doc/bird.sgml +++ b/doc/bird.sgml @@ -702,19 +702,48 @@ This argument can be omitted if there exists only a single instance. flush roa [table ] Remove all dynamic ROA entries from a ROA table. - configure [soft] [" + configure [soft] [" Reload configuration from a given file. BIRD will smoothly switch itself to the new configuration, protocols are reconfigured if possible, restarted otherwise. Changes in - filters usually lead to restart of affected protocols. If - configure confirm + Deactivate the config undo timer and therefore confirm the current + configuration. + + configure undo + Undo the last configuration change and smoothly switch back to + the previous (stored) configuration. If the last configuration + change was soft, the undo change is also soft. There is only + one level of undo, but in some specific cases when several + reconfiguration requests are given immediately in a row and + the intermediate ones are skipped then the undo also skips them back. + + configure check [" + Read and parse given config file, but do not use it. useful + for checking syntactic and some semantic validity of an config + file. + enable|disable|restart - Enable, disable or restart a given protocol instance, instances matching the or or + reload [in|out] diff --git a/doc/reply_codes b/doc/reply_codes index 7ec2e27d..58807241 100644 --- a/doc/reply_codes +++ b/doc/reply_codes @@ -25,6 +25,12 @@ Reply codes of BIRD command-line interface 0014 Route count 0015 Reloading 0016 Access restricted +0017 Reconfiguration already in progress, removing queued config +0018 Reconfiguration confirmed +0019 Nothing to do (configure undo/confirm) +0020 Configuration OK +0021 Undo requested +0022 Undo scheduled 1000 BIRD version 1001 Interface list diff --git a/nest/cli.c b/nest/cli.c index d245790b..11f98794 100644 --- a/nest/cli.c +++ b/nest/cli.c @@ -122,6 +122,7 @@ cli_printf(cli *c, int code, char *msg, ...) va_list args; byte buf[CLI_LINE_SIZE]; int cd = code; + int errcode; int size, cnt; if (cd < 0) @@ -131,16 +132,26 @@ cli_printf(cli *c, int code, char *msg, ...) size = bsprintf(buf, " "); else size = bsprintf(buf, "%04d-", cd); + errcode = -8000; + } + else if (cd == CLI_ASYNC_CODE) + { + size = 1; buf[0] = '+'; + errcode = cd; } else - size = bsprintf(buf, "%04d ", cd); + { + size = bsprintf(buf, "%04d ", cd); + errcode = 8000; + } + c->last_reply = cd; va_start(args, msg); cnt = bvsnprintf(buf+size, sizeof(buf)-size-1, msg, args); va_end(args); if (cnt < 0) { - cli_printf(c, code < 0 ? -8000 : 8000, ""); + cli_printf(c, errcode, ""); return; } size += cnt; @@ -385,12 +396,17 @@ cli_echo(unsigned int class, byte *msg) } } +/* Hack for scheduled undo notification */ +extern cli *cmd_reconfig_stored_cli; + void cli_free(cli *c) { cli_set_log_echo(c, 0, 0); if (c->cleanup) c->cleanup(c); + if (c == cmd_reconfig_stored_cli) + cmd_reconfig_stored_cli = NULL; rfree(c->pool); } diff --git a/nest/cli.h b/nest/cli.h index ea64680a..396656e8 100644 --- a/nest/cli.h +++ b/nest/cli.h @@ -49,6 +49,8 @@ typedef struct cli { extern pool *cli_pool; extern struct cli *this_cli; /* Used during parsing */ +#define CLI_ASYNC_CODE 10000 + /* Functions to be called by command handlers */ void cli_printf(cli *, int, char *, ...); diff --git a/nest/cmds.c b/nest/cmds.c index 2a803930..54ace169 100644 --- a/nest/cmds.c +++ b/nest/cmds.c @@ -14,6 +14,9 @@ #include "lib/string.h" #include "lib/resource.h" +extern int shutting_down; +extern int configuring; + void cmd_show_status(void) { @@ -27,9 +30,10 @@ cmd_show_status(void) cli_msg(-1011, "Last reboot on %s", tim); tm_format_datetime(tim, &config->tf_base, config->load_time); cli_msg(-1011, "Last reconfiguration on %s", tim); + if (shutting_down) cli_msg(13, "Shutdown in progress"); - else if (old_config) + else if (configuring) cli_msg(13, "Reconfiguration in progress"); else cli_msg(13, "Daemon is up and running"); diff --git a/nest/proto.c b/nest/proto.c index e9afa2fe..1334884e 100644 --- a/nest/proto.c +++ b/nest/proto.c @@ -516,7 +516,7 @@ protos_commit(struct config *new, struct config *old, int force_reconfig, int ty p->down_code = nc->disabled ? PDC_CF_DISABLE : PDC_CF_RESTART; p->cf_new = nc; } - else if (!shutting_down) + else if (!new->shutdown) { log(L_INFO "Removing protocol %s", p->name); p->down_code = PDC_CF_REMOVE; @@ -537,7 +537,7 @@ protos_commit(struct config *new, struct config *old, int force_reconfig, int ty WALK_LIST(nc, new->protos) if (!nc->proto) { - if (old_config) /* Not a first-time configuration */ + if (old) /* Not a first-time configuration */ log(L_INFO "Adding protocol %s", nc->name); proto_init(nc); } diff --git a/sysdep/unix/config.Y b/sysdep/unix/config.Y index 844f53df..7bade918 100644 --- a/sysdep/unix/config.Y +++ b/sysdep/unix/config.Y @@ -14,9 +14,9 @@ CF_HDR CF_DECLS CF_KEYWORDS(LOG, SYSLOG, ALL, DEBUG, TRACE, INFO, REMOTE, WARNING, ERROR, AUTH, FATAL, BUG, STDERR, SOFT) -CF_KEYWORDS(TIMEFORMAT, ISO, SHORT, LONG, BASE, NAME) +CF_KEYWORDS(TIMEFORMAT, ISO, SHORT, LONG, BASE, NAME, CONFIRM, UNDO, CHECK, TIMEOUT) -%type log_mask log_mask_list log_cat +%type log_mask log_mask_list log_cat cfg_timeout %type log_file %type cfg_name %type timeformat_which @@ -104,13 +104,26 @@ timeformat_base: /* Unix specific commands */ -CF_CLI_HELP(CONFIGURE, [soft] [\"\"], [[Reload configuration]]) +CF_CLI_HELP(CONFIGURE, ..., [[Reload configuration]]) -CF_CLI(CONFIGURE, cfg_name, [\"\"], [[Reload configuration]]) -{ cmd_reconfig($2, RECONFIG_HARD); } ; +CF_CLI(CONFIGURE, cfg_name cfg_timeout, [\"\"] [timeout []], [[Reload configuration]]) +{ cmd_reconfig($2, RECONFIG_HARD, $3); } ; -CF_CLI(CONFIGURE SOFT, cfg_name, [\"\"], [[Reload configuration and ignore changes in filters]]) -{ cmd_reconfig($3, RECONFIG_SOFT); } ; +CF_CLI(CONFIGURE SOFT, cfg_name cfg_timeout, [\"\"] [timeout []], [[Reload configuration and ignore changes in filters]]) +{ cmd_reconfig($3, RECONFIG_SOFT, $4); } ; + +/* Hack to get input completion for 'timeout' */ +CF_CLI_CMD(CONFIGURE TIMEOUT, [], [[Reload configuration with undo timeout]]) +CF_CLI_CMD(CONFIGURE SOFT TIMEOUT, [], [[Reload configuration with undo timeout]]) + +CF_CLI(CONFIGURE CONFIRM,,, [[Confirm last configuration change - deactivate undo timeout]]) +{ cmd_reconfig_confirm(); } ; + +CF_CLI(CONFIGURE UNDO,,, [[Undo last configuration change]]) +{ cmd_reconfig_undo(); } ; + +CF_CLI(CONFIGURE CHECK, cfg_name, [\"\"], [[Parse configuration and check its validity]]) +{ cmd_check_config($3); } ; CF_CLI(DOWN,,, [[Shut the daemon down]]) { cmd_shutdown(); } ; @@ -120,6 +133,12 @@ cfg_name: | TEXT ; +cfg_timeout: + /* empty */ { $$ = 0; } + | TIMEOUT { $$ = UNIX_DEFAULT_CONFIGURE_TIMEOUT; } + | TIMEOUT expr { $$ = $2; } + ; + CF_CODE CF_END diff --git a/sysdep/unix/io.c b/sysdep/unix/io.c index f91b5278..80914afe 100644 --- a/sysdep/unix/io.c +++ b/sysdep/unix/io.c @@ -121,7 +121,7 @@ static list near_timers, far_timers; static bird_clock_t first_far_timer = TIME_INFINITY; /* now must be different from 0, because 0 is a special value in timer->expires */ -bird_clock_t now = 1, now_real; +bird_clock_t now = 1, now_real, boot_time; static void update_times_plain(void) @@ -1530,6 +1530,7 @@ io_init(void) krt_io_init(); init_times(); update_times(); + boot_time = now; srandom((int) now_real); } @@ -1557,7 +1558,7 @@ io_loop(void) tm_shot(); continue; } - timo.tv_sec = events ? 0 : tout - now; + timo.tv_sec = events ? 0 : MIN(tout - now, 3); timo.tv_usec = 0; if (sock_recalc_fdsets_p) diff --git a/sysdep/unix/krt.c b/sysdep/unix/krt.c index 6c0e5e91..3761ace6 100644 --- a/sysdep/unix/krt.c +++ b/sysdep/unix/krt.c @@ -900,7 +900,7 @@ krt_notify(struct proto *P, struct rtable *table UNUSED, net *net, { struct krt_proto *p = (struct krt_proto *) P; - if (shutting_down) + if (config->shutdown) return; if (!(net->n.flags & KRF_INSTALLED)) old = NULL; diff --git a/sysdep/unix/main.c b/sysdep/unix/main.c index f0344a8f..23040e54 100644 --- a/sysdep/unix/main.c +++ b/sysdep/unix/main.c @@ -210,7 +210,7 @@ read_config(void) else die("Unable to open configuration file %s: %m", config_name); } - config_commit(conf, RECONFIG_HARD); + config_commit(conf, RECONFIG_HARD, 0); } void @@ -228,19 +228,17 @@ async_config(void) config_free(conf); } else - config_commit(conf, RECONFIG_HARD); + config_commit(conf, RECONFIG_HARD, 0); } -void -cmd_reconfig(char *name, int type) +static struct config * +cmd_read_config(char *name) { struct config *conf; - if (cli_access_restricted()) - return; - if (!name) name = config_name; + cli_msg(-2, "Reading configuration from %s", name); if (!unix_read_config(&conf, name)) { @@ -249,24 +247,94 @@ cmd_reconfig(char *name, int type) else cli_msg(8002, "%s: %m", name); config_free(conf); + conf = NULL; } - else + + return conf; +} + +void +cmd_check_config(char *name) +{ + struct config *conf = cmd_read_config(name); + if (!conf) + return; + + cli_msg(20, "Configuration OK"); + config_free(conf); +} + +static void +cmd_reconfig_msg(int r) +{ + switch (r) { - switch (config_commit(conf, type)) - { - case CONF_DONE: - cli_msg(3, "Reconfigured."); - break; - case CONF_PROGRESS: - cli_msg(4, "Reconfiguration in progress."); - break; - case CONF_SHUTDOWN: - cli_msg(6, "Reconfiguration ignored, shutting down."); - break; - default: - cli_msg(5, "Reconfiguration already in progress, queueing new config"); - } + case CONF_DONE: cli_msg( 3, "Reconfigured"); break; + case CONF_PROGRESS: cli_msg( 4, "Reconfiguration in progress"); break; + case CONF_QUEUED: cli_msg( 5, "Reconfiguration already in progress, queueing new config"); break; + case CONF_UNQUEUED: cli_msg(17, "Reconfiguration already in progress, removing queued config"); break; + case CONF_CONFIRM: cli_msg(18, "Reconfiguration confirmed"); break; + case CONF_SHUTDOWN: cli_msg( 6, "Reconfiguration ignored, shutting down"); break; + case CONF_NOTHING: cli_msg(19, "Nothing to do"); break; + default: break; + } +} + +/* Hack for scheduled undo notification */ +cli *cmd_reconfig_stored_cli; + +void +cmd_reconfig_undo_notify(void) +{ + if (cmd_reconfig_stored_cli) + { + cli *c = cmd_reconfig_stored_cli; + cli_printf(c, CLI_ASYNC_CODE, "Config timeout expired, starting undo"); + cli_write_trigger(c); + } +} + +void +cmd_reconfig(char *name, int type, int timeout) +{ + if (cli_access_restricted()) + return; + + struct config *conf = cmd_read_config(name); + if (!conf) + return; + + int r = config_commit(conf, type, timeout); + + if ((r >= 0) && (timeout > 0)) + { + cmd_reconfig_stored_cli = this_cli; + cli_msg(-22, "Undo scheduled in %d s", timeout); } + + cmd_reconfig_msg(r); +} + +void +cmd_reconfig_confirm(void) +{ + if (cli_access_restricted()) + return; + + int r = config_confirm(); + cmd_reconfig_msg(r); +} + +void +cmd_reconfig_undo(void) +{ + if (cli_access_restricted()) + return; + + cli_msg(-21, "Undo requested"); + + int r = config_undo(); + cmd_reconfig_msg(r); } /* @@ -623,6 +691,7 @@ main(int argc, char **argv) rt_init(); if_init(); roa_init(); + config_init(); uid_t use_uid = get_uid(use_user); gid_t use_gid = get_gid(use_group); diff --git a/sysdep/unix/timer.h b/sysdep/unix/timer.h index a788ae27..17450322 100644 --- a/sysdep/unix/timer.h +++ b/sysdep/unix/timer.h @@ -32,6 +32,7 @@ void tm_dump_all(void); extern bird_clock_t now; /* Relative, monotonic time in seconds */ extern bird_clock_t now_real; /* Time in seconds since fixed known epoch */ +extern bird_clock_t boot_time; static inline bird_clock_t tm_remains(timer *t) diff --git a/sysdep/unix/unix.h b/sysdep/unix/unix.h index 3e85c85c..1fc26db2 100644 --- a/sysdep/unix/unix.h +++ b/sysdep/unix/unix.h @@ -19,9 +19,14 @@ extern char *bird_name; void async_config(void); void async_dump(void); void async_shutdown(void); -void cmd_reconfig(char *name, int type); +void cmd_check_config(char *name); +void cmd_reconfig(char *name, int type, int timeout); +void cmd_reconfig_confirm(void); +void cmd_reconfig_undo(void); void cmd_shutdown(void); +#define UNIX_DEFAULT_CONFIGURE_TIMEOUT 300 + /* io.c */ volatile int async_config_flag; -- cgit v1.2.3 From 79b4e12e6032faf6bb1f3feac385bd36ee53019e Mon Sep 17 00:00:00 2001 From: Ondrej Zajicek Date: Thu, 27 Dec 2012 12:56:23 +0100 Subject: Implements interface masks for choosing router id. Router ID could be automatically determined based of subset of ifaces/addresses specified by 'router id from' option. The patch also does some minor changes related to router ID reconfiguration. Thanks to Alexander V. Chernikov for most of the work. --- conf/conf.c | 16 +++++++++++--- conf/conf.h | 1 + doc/bird.sgml | 13 ++++++++++- nest/config.Y | 17 +++++++++++--- nest/iface.c | 66 +++++++++++++++++++++++++++++++++++++++---------------- nest/iface.h | 4 ++++ nest/proto.c | 14 +++++++++--- proto/bgp/bgp.c | 3 +++ proto/ospf/ospf.c | 3 +++ 9 files changed, 108 insertions(+), 29 deletions(-) (limited to 'doc') diff --git a/conf/conf.c b/conf/conf.c index 6dfa3691..14225d3b 100644 --- a/conf/conf.c +++ b/conf/conf.c @@ -200,9 +200,19 @@ global_commit(struct config *new, struct config *old) log(L_WARN "Reconfiguration of BGP listening socket not implemented, please restart BIRD."); if (!new->router_id) - new->router_id = old->router_id; - if (new->router_id != old->router_id) - return 1; + { + new->router_id = old->router_id; + + if (new->router_id_from) + { + u32 id = if_choose_router_id(new->router_id_from, old->router_id); + if (!id) + log(L_WARN "Cannot determine router ID, using old one"); + else + new->router_id = id; + } + } + return 0; } diff --git a/conf/conf.h b/conf/conf.h index 19300f54..683374e0 100644 --- a/conf/conf.h +++ b/conf/conf.h @@ -26,6 +26,7 @@ struct config { int mrtdump_file; /* Configured MRTDump file (sysdep, fd in unix) */ char *syslog_name; /* Name used for syslog (NULL -> no syslog) */ struct rtable_config *master_rtc; /* Configuration of master routing table */ + struct iface_patt *router_id_from; /* Configured list of router ID iface patterns */ u32 router_id; /* Our Router ID */ ip_addr listen_bgp_addr; /* Listening BGP socket should use this address */ diff --git a/doc/bird.sgml b/doc/bird.sgml index 615ced98..4e04a138 100644 --- a/doc/bird.sgml +++ b/doc/bird.sgml @@ -337,7 +337,18 @@ protocol rip { Besides, there are some predefined numeric constants based on /etc/iproute2/rt_* files. A list of defined constants can be seen (together with other symbols) using 'show symbols' command. - router id Set BIRD's router ID. It's a world-wide unique identification of your router, usually one of router's IPv4 addresses. Default: in IPv4 version, the lowest IP address of a non-loopback interface. In IPv6 version, this option is mandatory. + router id + Set BIRD's router ID. It's a world-wide unique identification + of your router, usually one of router's IPv4 addresses. + Default: in IPv4 version, the lowest IP address of a + non-loopback interface. In IPv6 version, this option is + mandatory. + + router id from [-] [ " + Set BIRD's router ID based on an IP address of an interface + specified by an interface pattern. The option is applicable + for IPv4 version only. See + section for detailed description of interface patterns. listen bgp [address This option allows to specify address and port where BGP diff --git a/nest/config.Y b/nest/config.Y index cb6a85c2..dbd72055 100644 --- a/nest/config.Y +++ b/nest/config.Y @@ -75,9 +75,9 @@ CF_GRAMMAR CF_ADDTO(conf, rtrid) -rtrid: ROUTER ID idval ';' { - new_config->router_id = $3; - } +rtrid: + ROUTER ID idval ';' { new_config->router_id = $3; } + | ROUTER ID FROM iface_patt ';' { new_config->router_id_from = this_ipatt; } ; idval: @@ -264,6 +264,17 @@ iface_patt_list: | iface_patt_list ',' iface_patt_node ; +iface_patt_init: { + /* Generic this_ipatt init */ + this_ipatt = cfg_allocz(sizeof(struct iface_patt)); + init_list(&this_ipatt->ipn_list); + } + ; + +iface_patt: + iface_patt_init iface_patt_list + ; + /* Direct device route protocol */ diff --git a/nest/iface.c b/nest/iface.c index eea3d3b1..da79b21f 100644 --- a/nest/iface.c +++ b/nest/iface.c @@ -35,8 +35,6 @@ static pool *if_pool; -static void auto_router_id(void); - list iface_list; /** @@ -354,9 +352,6 @@ if_end_update(void) struct iface *i; struct ifa *a, *b; - if (!config->router_id) - auto_router_id(); - WALK_LIST(i, iface_list) { if (!(i->flags & IF_UPDATED)) @@ -583,24 +578,57 @@ ifa_delete(struct ifa *a) } } -static void -auto_router_id(void) +u32 +if_choose_router_id(struct iface_patt *mask, u32 old_id) { #ifndef IPV6 - struct iface *i, *j; + struct iface *i; + struct ifa *a, *b; - j = NULL; + b = NULL; WALK_LIST(i, iface_list) - if ((i->flags & IF_ADMIN_UP) && - !(i->flags & (IF_IGNORE | IF_SHUTDOWN)) && - i->addr && - !(i->addr->flags & IA_PEER) && - (!j || ipa_to_u32(i->addr->ip) < ipa_to_u32(j->addr->ip))) - j = i; - if (!j) - die("Cannot determine router ID (no suitable network interface found), please configure it manually"); - log(L_INFO "Guessed router ID %I according to interface %s", j->addr->ip, j->name); - config->router_id = ipa_to_u32(j->addr->ip); + { + if (!(i->flags & IF_ADMIN_UP) || + (i->flags & (IF_IGNORE | IF_SHUTDOWN))) + continue; + + WALK_LIST(a, i->addrs) + { + if (a->flags & IA_SECONDARY) + continue; + + if (a->scope <= SCOPE_LINK) + continue; + + /* FIXME: This should go away */ + if (a->flags & IA_PEER) + continue; + + /* FIXME: This should go away too */ + if (!mask && (a != i->addr)) + continue; + + /* Check pattern if specified */ + if (mask && !iface_patt_match(mask, i, a)) + continue; + + /* No pattern or pattern matched */ + if (!b || ipa_to_u32(a->ip) < ipa_to_u32(b->ip)) + b = a; + } + } + + if (!b) + return 0; + + u32 id = ipa_to_u32(b->ip); + if (id != old_id) + log(L_INFO "Chosen router ID %R according to interface %s", id, b->iface->name); + + return id; + +#else + return 0; #endif } diff --git a/nest/iface.h b/nest/iface.h index 2416f82f..697ea543 100644 --- a/nest/iface.h +++ b/nest/iface.h @@ -101,6 +101,7 @@ struct iface *if_find_by_name(char *); struct iface *if_get_by_name(char *); void ifa_recalc_all_primary_addresses(void); + /* The Neighbor Cache */ typedef struct neighbor { @@ -161,4 +162,7 @@ int iface_patt_match(struct iface_patt *ifp, struct iface *i, struct ifa *a); struct iface_patt *iface_patt_find(list *l, struct iface *i, struct ifa *a); int iface_patts_equal(list *, list *, int (*)(struct iface_patt *, struct iface_patt *)); + +u32 if_choose_router_id(struct iface_patt *mask, u32 old_id); + #endif diff --git a/nest/proto.c b/nest/proto.c index 1334884e..b976a6cb 100644 --- a/nest/proto.c +++ b/nest/proto.c @@ -382,11 +382,9 @@ proto_reconfigure(struct proto *p, struct proto_config *oc, struct proto_config /* If there is a too big change in core attributes, ... */ if ((nc->protocol != oc->protocol) || (nc->disabled != p->disabled) || - (nc->table->table != oc->table->table) || - (proto_get_router_id(nc) != proto_get_router_id(oc))) + (nc->table->table != oc->table->table)) return 0; - p->debug = nc->debug; p->mrtdump = nc->mrtdump; proto_reconfig_type = type; @@ -552,6 +550,16 @@ protos_commit(struct config *new, struct config *old, int force_reconfig, int ty initial_device_proto = NULL; } + /* Determine router ID for the first time - it has to be here and not in + global_commit() because it is postponed after start of device protocol */ + if (!config->router_id) + { + config->router_id = if_choose_router_id(config->router_id_from, 0); + if (!config->router_id) + die("Cannot determine router ID, please configure it manually"); + } + + /* Start all other protocols */ WALK_LIST_DELSAFE(p, n, initial_proto_list) proto_rethink_goal(p); } diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index 346c641b..249d2e07 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -1009,6 +1009,9 @@ bgp_reconfigure(struct proto *P, struct proto_config *C) struct bgp_proto *p = (struct bgp_proto *) P; struct bgp_config *old = p->cf; + if (proto_get_router_id(C) != p->local_id) + return 0; + int same = !memcmp(((byte *) old) + sizeof(struct proto_config), ((byte *) new) + sizeof(struct proto_config), // password item is last and must be checked separately diff --git a/proto/ospf/ospf.c b/proto/ospf/ospf.c index 6654e107..a3b6b2e7 100644 --- a/proto/ospf/ospf.c +++ b/proto/ospf/ospf.c @@ -729,6 +729,9 @@ ospf_reconfigure(struct proto *p, struct proto_config *c) struct ospf_iface *ifa, *ifx; struct ospf_iface_patt *ip; + if (proto_get_router_id(c) != po->router_id) + return 0; + if (po->rfc1583 != new->rfc1583) return 0; -- cgit v1.2.3 From b662290f40ea0fa0b1a1ba283e50e833724f2050 Mon Sep 17 00:00:00 2001 From: Ondrej Zajicek Date: Thu, 10 Jan 2013 13:07:33 +0100 Subject: Separate import and receive limits. They have different behavior w.r.t. filtered routes that are kept. --- README | 2 +- doc/bird.sgml | 20 ++++++++++++++------ nest/config.Y | 3 ++- nest/proto.c | 21 +++++++++++++++------ nest/protocol.h | 15 ++++++++++++--- nest/rt-table.c | 49 ++++++++++++++++++++++++++++++++++++++++++++----- proto/bgp/bgp.c | 1 + proto/pipe/pipe.c | 5 +++++ 8 files changed, 94 insertions(+), 22 deletions(-) (limited to 'doc') diff --git a/README b/README index 5c2ef076..daeb18bd 100644 --- a/README +++ b/README @@ -3,7 +3,7 @@ (c) 1998--2008 Martin Mares (c) 1998--2000 Pavel Machek (c) 1998--2008 Ondrej Filip - (c) 2009--2011 CZ.NIC z.s.p.o. + (c) 2009--2013 CZ.NIC z.s.p.o. ================================================================================ diff --git a/doc/bird.sgml b/doc/bird.sgml index 4e04a138..b0d4e6a1 100644 --- a/doc/bird.sgml +++ b/doc/bird.sgml @@ -482,15 +482,23 @@ to zero to disable it. An empty is equivalent to receive limit + Specify an receive route limit (a maximum number of routes + received from the protocol and remembered). It works almost + identically to import limit option, the only + difference is that if export limit Specify an export route limit, works similarly to diff --git a/nest/config.Y b/nest/config.Y index dbd72055..e46b5fb5 100644 --- a/nest/config.Y +++ b/nest/config.Y @@ -44,7 +44,7 @@ CF_DECLS CF_KEYWORDS(ROUTER, ID, PROTOCOL, TEMPLATE, PREFERENCE, DISABLED, DEBUG, ALL, OFF, DIRECT) CF_KEYWORDS(INTERFACE, IMPORT, EXPORT, FILTER, NONE, TABLE, STATES, ROUTES, FILTERS) -CF_KEYWORDS(LIMIT, ACTION, WARN, BLOCK, RESTART, DISABLE, KEEP, FILTERED) +CF_KEYWORDS(RECEIVE, LIMIT, ACTION, WARN, BLOCK, RESTART, DISABLE, KEEP, FILTERED) CF_KEYWORDS(PASSWORD, FROM, PASSIVE, TO, ID, EVENTS, PACKETS, PROTOCOLS, INTERFACES) CF_KEYWORDS(PRIMARY, STATS, COUNT, FOR, COMMANDS, PREEXPORT, GENERATE, ROA, MAX, FLUSH) CF_KEYWORDS(LISTEN, BGP, V6ONLY, DUAL, ADDRESS, PORT, PASSWORDS, DESCRIPTION, SORTED) @@ -185,6 +185,7 @@ proto_item: | MRTDUMP mrtdump_mask { this_proto->mrtdump = $2; } | IMPORT imexport { this_proto->in_filter = $2; } | EXPORT imexport { this_proto->out_filter = $2; } + | RECEIVE LIMIT limit_spec { this_proto->rx_limit = $3; } | IMPORT LIMIT limit_spec { this_proto->in_limit = $3; } | EXPORT LIMIT limit_spec { this_proto->out_limit = $3; } | IMPORT KEEP FILTERED bool { this_proto->in_keep_filtered = $4; } diff --git a/nest/proto.c b/nest/proto.c index b976a6cb..7e7fb7fa 100644 --- a/nest/proto.c +++ b/nest/proto.c @@ -344,6 +344,7 @@ protos_postconfig(struct config *c) WALK_LIST(x, c->protos) { DBG(" %s", x->name); + p = x->protocol; if (p->postconfig) p->postconfig(x); @@ -410,6 +411,7 @@ proto_reconfigure(struct proto *p, struct proto_config *oc, struct proto_config { p->main_ahook->in_filter = nc->in_filter; p->main_ahook->out_filter = nc->out_filter; + p->main_ahook->rx_limit = nc->rx_limit; p->main_ahook->in_limit = nc->in_limit; p->main_ahook->out_limit = nc->out_limit; p->main_ahook->in_keep_filtered = nc->in_keep_filtered; @@ -804,9 +806,11 @@ proto_schedule_feed(struct proto *p, int initial) p->main_ahook = proto_add_announce_hook(p, p->table, &p->stats); p->main_ahook->in_filter = p->cf->in_filter; p->main_ahook->out_filter = p->cf->out_filter; + p->main_ahook->rx_limit = p->cf->rx_limit; p->main_ahook->in_limit = p->cf->in_limit; p->main_ahook->out_limit = p->cf->out_limit; p->main_ahook->in_keep_filtered = p->cf->in_keep_filtered; + proto_reset_limit(p->main_ahook->rx_limit); proto_reset_limit(p->main_ahook->in_limit); proto_reset_limit(p->main_ahook->out_limit); } @@ -978,6 +982,7 @@ proto_limit_name(struct proto_limit *l) * proto_notify_limit: notify about limit hit and take appropriate action * @ah: announce hook * @l: limit being hit + * @dir: limit direction (PLD_*) * @rt_count: the number of routes * * The function is called by the route processing core when limit @l @@ -985,10 +990,11 @@ proto_limit_name(struct proto_limit *l) * according to @l->action. */ void -proto_notify_limit(struct announce_hook *ah, struct proto_limit *l, u32 rt_count) +proto_notify_limit(struct announce_hook *ah, struct proto_limit *l, int dir, u32 rt_count) { + const char *dir_name[PLD_MAX] = { "receive", "import" , "export" }; + const byte dir_down[PLD_MAX] = { PDC_RX_LIMIT_HIT, PDC_IN_LIMIT_HIT, PDC_OUT_LIMIT_HIT }; struct proto *p = ah->proto; - int dir = (ah->in_limit == l); if (l->state == PLS_BLOCKED) return; @@ -996,7 +1002,7 @@ proto_notify_limit(struct announce_hook *ah, struct proto_limit *l, u32 rt_count /* For warning action, we want the log message every time we hit the limit */ if (!l->state || ((l->action == PLA_WARN) && (rt_count == l->limit))) log(L_WARN "Protocol %s hits route %s limit (%d), action: %s", - p->name, dir ? "import" : "export", l->limit, proto_limit_name(l)); + p->name, dir_name[dir], l->limit, proto_limit_name(l)); switch (l->action) { @@ -1011,8 +1017,7 @@ proto_notify_limit(struct announce_hook *ah, struct proto_limit *l, u32 rt_count case PLA_RESTART: case PLA_DISABLE: l->state = PLS_BLOCKED; - proto_schedule_down(p, l->action == PLA_RESTART, - dir ? PDC_IN_LIMIT_HIT : PDC_OUT_LIMIT_HIT); + proto_schedule_down(p, l->action == PLA_RESTART, dir_down[dir]); break; } } @@ -1146,6 +1151,7 @@ proto_show_basic_info(struct proto *p) cli_msg(-1006, " Input filter: %s", filter_name(p->cf->in_filter)); cli_msg(-1006, " Output filter: %s", filter_name(p->cf->out_filter)); + proto_show_limit(p->cf->rx_limit, "Receive limit:"); proto_show_limit(p->cf->in_limit, "Import limit:"); proto_show_limit(p->cf->out_limit, "Export limit:"); @@ -1267,7 +1273,10 @@ proto_cmd_reload(struct proto *p, unsigned int dir, int cnt UNUSED) * Perhaps, but these hooks work asynchronously. */ if (!p->proto->multitable) - proto_reset_limit(p->main_ahook->in_limit); + { + proto_reset_limit(p->main_ahook->rx_limit); + proto_reset_limit(p->main_ahook->in_limit); + } } /* re-exporting routes */ diff --git a/nest/protocol.h b/nest/protocol.h index cf2ca0a4..033a0ede 100644 --- a/nest/protocol.h +++ b/nest/protocol.h @@ -95,6 +95,8 @@ struct proto_config { u32 router_id; /* Protocol specific router ID */ struct rtable_config *table; /* Table we're attached to */ struct filter *in_filter, *out_filter; /* Attached filters */ + struct proto_limit *rx_limit; /* Limit for receiving routes from protocol + (relevant when in_keep_filtered is active) */ struct proto_limit *in_limit; /* Limit for importing routes from protocol */ struct proto_limit *out_limit; /* Limit for exporting routes to protocol */ @@ -225,8 +227,9 @@ struct proto_spec { #define PDC_CMD_DISABLE 0x11 /* Result of disable command */ #define PDC_CMD_RESTART 0x12 /* Result of restart command */ #define PDC_CMD_SHUTDOWN 0x13 /* Result of global shutdown */ -#define PDC_IN_LIMIT_HIT 0x21 /* Route import limit reached */ -#define PDC_OUT_LIMIT_HIT 0x22 /* Route export limit reached */ +#define PDC_RX_LIMIT_HIT 0x21 /* Route receive limit reached */ +#define PDC_IN_LIMIT_HIT 0x22 /* Route import limit reached */ +#define PDC_OUT_LIMIT_HIT 0x23 /* Route export limit reached */ void *proto_new(struct proto_config *, unsigned size); @@ -373,6 +376,11 @@ extern struct proto_config *cf_dev_proto; * Protocol limits */ +#define PLD_RX 0 /* Receive limit */ +#define PLD_IN 1 /* Import limit */ +#define PLD_OUT 2 /* Export limit */ +#define PLD_MAX 3 + #define PLA_WARN 1 /* Issue log warning */ #define PLA_BLOCK 2 /* Block new routes */ #define PLA_RESTART 4 /* Force protocol restart */ @@ -388,7 +396,7 @@ struct proto_limit { byte state; /* State of limit (PLS_*) */ }; -void proto_notify_limit(struct announce_hook *ah, struct proto_limit *l, u32 rt_count); +void proto_notify_limit(struct announce_hook *ah, struct proto_limit *l, int dir, u32 rt_count); static inline void proto_reset_limit(struct proto_limit *l) @@ -408,6 +416,7 @@ struct announce_hook { struct proto *proto; struct filter *in_filter; /* Input filter */ struct filter *out_filter; /* Output filter */ + struct proto_limit *rx_limit; /* Receive limit (for in_keep_filtered) */ struct proto_limit *in_limit; /* Input limit */ struct proto_limit *out_limit; /* Output limit */ struct proto_stats *stats; /* Per-table protocol statistics */ diff --git a/nest/rt-table.c b/nest/rt-table.c index 2f0840f0..99175448 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -285,7 +285,7 @@ do_rt_notify(struct announce_hook *ah, net *net, rte *new, rte *old, ea_list *tm if (l && new) { if ((!old || refeed) && (stats->exp_routes >= l->limit)) - proto_notify_limit(ah, l, stats->exp_routes); + proto_notify_limit(ah, l, PLD_OUT, stats->exp_routes); if (l->state == PLS_BLOCKED) { @@ -700,16 +700,22 @@ rte_recalculate(struct announce_hook *ah, net *net, rte *new, ea_list *tmpa, str return; } - struct proto_limit *l = ah->in_limit; + int new_ok = rte_is_ok(new); + int old_ok = rte_is_ok(old); + + struct proto_limit *l = ah->rx_limit; if (l && !old && new) { u32 all_routes = stats->imp_routes + stats->filt_routes; if (all_routes >= l->limit) - proto_notify_limit(ah, l, all_routes); + proto_notify_limit(ah, l, PLD_RX, all_routes); if (l->state == PLS_BLOCKED) { + /* In receive limit the situation is simple, old is NULL so + we just free new and exit like nothing happened */ + stats->imp_updates_ignored++; rte_trace_in(D_FILTERS, p, new, "ignored [limit]"); rte_free_quick(new); @@ -717,8 +723,39 @@ rte_recalculate(struct announce_hook *ah, net *net, rte *new, ea_list *tmpa, str } } - int new_ok = rte_is_ok(new); - int old_ok = rte_is_ok(old); + l = ah->in_limit; + if (l && !old_ok && new_ok) + { + if (stats->imp_routes >= l->limit) + proto_notify_limit(ah, l, PLD_IN, stats->imp_routes); + + if (l->state == PLS_BLOCKED) + { + /* In import limit the situation is more complicated. We + shouldn't just drop the route, we should handle it like + it was filtered. We also have to continue the route + processing if old or new is non-NULL, but we should exit + if both are NULL as this case is probably assumed to be + already handled. */ + + stats->imp_updates_ignored++; + rte_trace_in(D_FILTERS, p, new, "ignored [limit]"); + + if (ah->in_keep_filtered) + new->flags |= REF_FILTERED; + else + { rte_free_quick(new); new = NULL; } + + /* Note that old && !new could be possible when + ah->in_keep_filtered changed in the recent past. */ + + if (!old && !new) + return; + + new_ok = 0; + goto skip_stats1; + } + } if (new_ok) stats->imp_updates_accepted++; @@ -727,6 +764,8 @@ rte_recalculate(struct announce_hook *ah, net *net, rte *new, ea_list *tmpa, str else stats->imp_withdraws_ignored++; + skip_stats1: + if (new) rte_is_filtered(new) ? stats->filt_routes++ : stats->imp_routes++; if (old) diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index 249d2e07..0f351b44 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -878,6 +878,7 @@ bgp_shutdown(struct proto *P) subcode = 4; // Errcode 6, 4 - administrative reset break; + case PDC_RX_LIMIT_HIT: case PDC_IN_LIMIT_HIT: subcode = 1; // Errcode 6, 1 - max number of prefixes reached /* log message for compatibility */ diff --git a/proto/pipe/pipe.c b/proto/pipe/pipe.c index 6099d284..51be3c7d 100644 --- a/proto/pipe/pipe.c +++ b/proto/pipe/pipe.c @@ -200,6 +200,11 @@ pipe_postconfig(struct proto_config *C) cf_error("Name of peer routing table not specified"); if (c->peer == C->table) cf_error("Primary table and peer table must be different"); + + if (C->in_keep_filtered) + cf_error("Pipe protocol prohibits keeping filtered routes"); + if (C->rx_limit) + cf_error("Pipe protocol does not support receive limits"); } extern int proto_reconfig_type; -- cgit v1.2.3 From d214ae4fdc1e323f89efb8a80c068fef4a45758f Mon Sep 17 00:00:00 2001 From: Ondrej Zajicek Date: Sat, 12 Jan 2013 21:26:42 +0100 Subject: Fix missing documentation for one option. --- doc/bird.sgml | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'doc') diff --git a/doc/bird.sgml b/doc/bird.sgml index b0d4e6a1..f6f9aad7 100644 --- a/doc/bird.sgml +++ b/doc/bird.sgml @@ -2546,6 +2546,13 @@ interface definitions, prefix definitions and DNS definitions:

Prefix specific options: + skip + This option allows to specify that given prefix should not be + advertised. This is useful for making exceptions from a + default policy of advertising all prefixes. Note that for + withdrawing an already advertised prefix it is more useful to + advertise it with zero valid lifetime. Default: no + onlink This option specifies whether hosts may use the advertised prefix for onlink determination. Default: yes -- cgit v1.2.3 From 36da2857bc911924a250a234f38cf58c3b21f1bc Mon Sep 17 00:00:00 2001 From: Ondrej Zajicek Date: Fri, 8 Feb 2013 23:58:27 +0100 Subject: Implements router advertisements activated by received routes. The RAdv protocol could be configured to change its behavior based on availability of routes, e.g., do not announce router lifetime when a default route is not available. --- doc/bird.sgml | 43 +++++++++++++++++++------ filter/filter.c | 6 ++++ nest/route.h | 7 ++++ nest/rt-table.c | 37 ++++++++++++++++++--- proto/radv/config.Y | 36 ++++++++++++++++++--- proto/radv/packets.c | 32 ++++++++++-------- proto/radv/radv.c | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++-- proto/radv/radv.h | 8 +++++ 8 files changed, 225 insertions(+), 35 deletions(-) (limited to 'doc') diff --git a/doc/bird.sgml b/doc/bird.sgml index f6f9aad7..762834e3 100644 --- a/doc/bird.sgml +++ b/doc/bird.sgml @@ -691,8 +691,8 @@ This argument can be omitted if there exists only a single instance.

You can also select just routes added by a specific protocol. protocol . -

If BIRD is configured to keep filtered routes (see If BIRD is configured to keep filtered routes (see The dnssl that just specifies one DNS search domain. + +

Interface specific options: @@ -2525,11 +2545,12 @@ interface definitions, prefix definitions and DNS definitions: This option specifies which value of Hop Limit should be used by hosts. Valid values are 0-255, 0 means unspecified. Default: 64 - default lifetime + default lifetime This option specifies the time (in seconds) how long (after the receipt of RA) hosts may use the router as a default - router. 0 means do not use as a default router. Default: 3 * - . + Default: 3 * rdnss local Use only local (interface-specific) RDNSS definitions for this @@ -2561,18 +2582,20 @@ interface definitions, prefix definitions and DNS definitions: This option specifies whether hosts may use the advertised prefix for stateless autoconfiguration. Default: yes - valid lifetime + valid lifetime This option specifies the time (in seconds) how long (after the receipt of RA) the prefix information is valid, i.e., autoconfigured IP addresses can be assigned and hosts with that IP addresses are considered directly reachable. 0 means - the prefix is no longer valid. Default: 86400 (1 day) + the prefix is no longer valid. For . Default: 86400 (1 day), preferred lifetime + preferred lifetime This option specifies the time (in seconds) how long (after the receipt of RA) IP addresses generated from the prefix - using stateless autoconfiguration remain preferred. Default: - 14400 (4 hours) + using stateless autoconfiguration remain preferred. For + . + Default: 14400 (4 hours), diff --git a/filter/filter.c b/filter/filter.c index 44fcf293..c35d0425 100644 --- a/filter/filter.c +++ b/filter/filter.c @@ -1429,6 +1429,12 @@ i_same(struct f_inst *f1, struct f_inst *f2) int f_run(struct filter *filter, struct rte **rte, struct ea_list **tmp_attrs, struct linpool *tmp_pool, int flags) { + if (filter == FILTER_ACCEPT) + return F_ACCEPT; + + if (filter == FILTER_REJECT) + return F_REJECT; + int rte_cow = ((*rte)->flags & REF_COW); DBG( "Running filter `%s'...", filter->name ); diff --git a/nest/route.h b/nest/route.h index 177baa38..8fd01a66 100644 --- a/nest/route.h +++ b/nest/route.h @@ -235,6 +235,12 @@ static inline int rte_is_filtered(rte *r) { return !!(r->flags & REF_FILTERED); #define RA_ACCEPTED 2 /* Announcement of first accepted route */ #define RA_ANY 3 /* Announcement of any route change */ +/* Return value of import_control() callback */ +#define RIC_ACCEPT 1 /* Accepted by protocol */ +#define RIC_PROCESS 0 /* Process it through import filter */ +#define RIC_REJECT -1 /* Rejected by protocol */ +#define RIC_DROP -2 /* Silently dropped by protocol */ + struct config; void rt_init(void); @@ -250,6 +256,7 @@ rte *rte_get_temp(struct rta *); void rte_update2(struct announce_hook *ah, net *net, rte *new, struct proto *src); static inline void rte_update(rtable *tab, net *net, struct proto *p, struct proto *src, rte *new) { rte_update2(p->main_ahook, net, new, src); } void rte_discard(rtable *tab, rte *old); +int rt_examine(rtable *t, ip_addr prefix, int pxlen, struct proto *p, struct filter *filter); void rte_dump(rte *); void rte_free(rte *); rte *rte_do_cow(rte *); diff --git a/nest/rt-table.c b/nest/rt-table.c index 99175448..75bfa6ba 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -213,7 +213,8 @@ export_filter(struct announce_hook *ah, rte *rt0, rte **rt_free, ea_list **tmpa, goto reject; stats->exp_updates_rejected++; - rte_trace_out(D_FILTERS, p, rt, "rejected by protocol"); + if (v == RIC_REJECT) + rte_trace_out(D_FILTERS, p, rt, "rejected by protocol"); goto reject; } if (v > 0) @@ -1042,6 +1043,34 @@ rte_discard(rtable *t, rte *old) /* Non-filtered route deletion, used during gar rte_update_unlock(); } +/* Check rtable for best route to given net whether it would be exported do p */ +int +rt_examine(rtable *t, ip_addr prefix, int pxlen, struct proto *p, struct filter *filter) +{ + net *n = net_find(t, prefix, pxlen); + rte *rt = n ? n->routes : NULL; + + if (!rte_is_valid(rt)) + return 0; + + rte_update_lock(); + + /* Rest is stripped down export_filter() */ + struct proto *src = rt->attrs->proto; + ea_list *tmpa = src->make_tmp_attrs ? src->make_tmp_attrs(rt, rte_update_pool) : NULL; + int v = p->import_control ? p->import_control(p, &rt, &tmpa, rte_update_pool) : 0; + if (v == RIC_PROCESS) + v = (f_run(filter, &rt, &tmpa, rte_update_pool, FF_FORCE_TMPATTR) <= F_ACCEPT); + + /* Discard temporary rte */ + if (rt != n->routes) + rte_free(rt); + + rte_update_unlock(); + + return v > 0; +} + /** * rte_dump - dump a route * @e: &rte to be dumped @@ -2081,7 +2110,7 @@ rt_show_net(struct cli *c, net *n, struct rt_show_data *d) ee = e; rte_update_lock(); /* We use the update buffer for filtering */ tmpa = p0->make_tmp_attrs ? p0->make_tmp_attrs(e, rte_update_pool) : NULL; - ok = (d->filter == FILTER_ACCEPT || f_run(d->filter, &e, &tmpa, rte_update_pool, FF_FORCE_TMPATTR) <= F_ACCEPT); + ok = f_run(d->filter, &e, &tmpa, rte_update_pool, FF_FORCE_TMPATTR) <= F_ACCEPT; if (p2 && p2 != p0) ok = 0; if (ok && d->export_mode) { @@ -2095,8 +2124,8 @@ rt_show_net(struct cli *c, net *n, struct rt_show_data *d) 'configure soft' command may change the export filter and do not update routes */ - if ((a = proto_find_announce_hook(p1, d->table)) && ((a->out_filter == FILTER_REJECT) || - (a->out_filter && f_run(a->out_filter, &e, &tmpa, rte_update_pool, FF_FORCE_TMPATTR) > F_ACCEPT))) + if ((a = proto_find_announce_hook(p1, d->table)) && + (f_run(a->out_filter, &e, &tmpa, rte_update_pool, FF_FORCE_TMPATTR) > F_ACCEPT)) ok = 0; } } diff --git a/proto/radv/config.Y b/proto/radv/config.Y index abccd2c7..fbec5a0a 100644 --- a/proto/radv/config.Y +++ b/proto/radv/config.Y @@ -30,9 +30,9 @@ CF_KEYWORDS(RADV, PREFIX, INTERFACE, MIN, MAX, RA, DELAY, INTERVAL, MANAGED, OTHER, CONFIG, LINK, MTU, REACHABLE, TIME, RETRANS, TIMER, CURRENT, HOP, LIMIT, DEFAULT, VALID, PREFERRED, MULT, LIFETIME, SKIP, ONLINK, AUTONOMOUS, RDNSS, DNSSL, NS, DOMAIN, - LOCAL) + LOCAL, TRIGGER, SENSITIVE) -%type radv_mult +%type radv_mult radv_sensitive CF_GRAMMAR @@ -53,6 +53,11 @@ radv_proto_item: | PREFIX radv_prefix { add_tail(&RADV_CFG->pref_list, NODE this_radv_prefix); } | RDNSS { init_list(&radv_dns_list); } radv_rdnss { add_tail_list(&RADV_CFG->rdnss_list, &radv_dns_list); } | DNSSL { init_list(&radv_dns_list); } radv_dnssl { add_tail_list(&RADV_CFG->dnssl_list, &radv_dns_list); } + | TRIGGER prefix { + RADV_CFG->trigger_prefix = $2.addr; + RADV_CFG->trigger_pxlen = $2.len; + RADV_CFG->trigger_valid = 1; + } ; radv_proto_opts: @@ -78,6 +83,7 @@ radv_iface_start: RADV_IFACE->min_delay = DEFAULT_MIN_DELAY; RADV_IFACE->current_hop_limit = DEFAULT_CURRENT_HOP_LIMIT; RADV_IFACE->default_lifetime = -1; + RADV_IFACE->default_lifetime_sensitive = 1; }; radv_iface_item: @@ -90,7 +96,11 @@ radv_iface_item: | REACHABLE TIME expr { RADV_IFACE->reachable_time = $3; if (($3 < 0) || ($3 > 3600000)) cf_error("Reachable time must be in range 0-3600000"); } | RETRANS TIMER expr { RADV_IFACE->retrans_timer = $3; if ($3 < 0) cf_error("Retrans timer must be 0 or positive"); } | CURRENT HOP LIMIT expr { RADV_IFACE->current_hop_limit = $4; if (($4 < 0) || ($4 > 255)) cf_error("Current hop limit must be in range 0-255"); } - | DEFAULT LIFETIME expr { RADV_IFACE->default_lifetime = $3; if (($3 < 0) || ($3 > 9000)) cf_error("Default lifetime must be in range 0-9000"); } + | DEFAULT LIFETIME expr radv_sensitive { + RADV_IFACE->default_lifetime = $3; + if (($3 < 0) || ($3 > 9000)) cf_error("Default lifetime must be in range 0-9000"); + if ($4 != -1) RADV_IFACE->default_lifetime_sensitive = $4; + } | PREFIX radv_prefix { add_tail(&RADV_IFACE->pref_list, NODE this_radv_prefix); } | RDNSS { init_list(&radv_dns_list); } radv_rdnss { add_tail_list(&RADV_IFACE->rdnss_list, &radv_dns_list); } | DNSSL { init_list(&radv_dns_list); } radv_dnssl { add_tail_list(&RADV_IFACE->dnssl_list, &radv_dns_list); } @@ -147,14 +157,25 @@ radv_prefix_item: SKIP bool { RADV_PREFIX->skip = $2; } | ONLINK bool { RADV_PREFIX->onlink = $2; } | AUTONOMOUS bool { RADV_PREFIX->autonomous = $2; } - | VALID LIFETIME expr { RADV_PREFIX->valid_lifetime = $3; if ($3 < 0) cf_error("Valid lifetime must be 0 or positive"); } - | PREFERRED LIFETIME expr { RADV_PREFIX->preferred_lifetime = $3; if ($3 < 0) cf_error("Preferred lifetime must be 0 or positive"); } + | VALID LIFETIME expr radv_sensitive { + RADV_PREFIX->valid_lifetime = $3; + if ($3 < 0) cf_error("Valid lifetime must be 0 or positive"); + if ($4 != -1) RADV_PREFIX->valid_lifetime_sensitive = $4; + } + | PREFERRED LIFETIME expr radv_sensitive { + RADV_PREFIX->preferred_lifetime = $3; + if ($3 < 0) cf_error("Preferred lifetime must be 0 or positive"); + if ($4 != -1) RADV_PREFIX->preferred_lifetime_sensitive = $4; + } ; radv_prefix_finish: { if (RADV_PREFIX->preferred_lifetime > RADV_PREFIX->valid_lifetime) cf_error("Preferred lifetime must be at most Valid lifetime"); + + if (RADV_PREFIX->valid_lifetime_sensitive > RADV_PREFIX->preferred_lifetime_sensitive) + cf_error("Valid lifetime sensitive requires that Preferred lifetime is sensitive too"); }; radv_prefix_opts: @@ -268,6 +289,11 @@ radv_mult: | MULT expr { $$ = 0; radv_mult_val = $2; if (($2 < 1) || ($2 > 254)) cf_error("Multiplier must be in range 1-254"); } ; +radv_sensitive: + /* empty */ { $$ = -1 } + | SENSITIVE bool { $$ = $2 } + ; + CF_CODE CF_END diff --git a/proto/radv/packets.c b/proto/radv/packets.c index 6fdfcaa3..dd839536 100644 --- a/proto/radv/packets.c +++ b/proto/radv/packets.c @@ -240,6 +240,7 @@ radv_prepare_ra(struct radv_iface *ifa) { struct proto_radv *ra = ifa->ra; struct radv_config *cf = (struct radv_config *) (ra->p.cf); + struct radv_iface_config *ic = ifa->cf; char *buf = ifa->sk->tbuf; char *bufstart = buf; @@ -249,21 +250,22 @@ radv_prepare_ra(struct radv_iface *ifa) pkt->type = ICMPV6_RA; pkt->code = 0; pkt->checksum = 0; - pkt->current_hop_limit = ifa->cf->current_hop_limit; - pkt->flags = (ifa->cf->managed ? OPT_RA_MANAGED : 0) | - (ifa->cf->other_config ? OPT_RA_OTHER_CFG : 0); - pkt->router_lifetime = htons(ifa->cf->default_lifetime); - pkt->reachable_time = htonl(ifa->cf->reachable_time); - pkt->retrans_timer = htonl(ifa->cf->retrans_timer); + pkt->current_hop_limit = ic->current_hop_limit; + pkt->flags = (ic->managed ? OPT_RA_MANAGED : 0) | + (ic->other_config ? OPT_RA_OTHER_CFG : 0); + pkt->router_lifetime = (ra->active || !ic->default_lifetime_sensitive) ? + htons(ic->default_lifetime) : 0; + pkt->reachable_time = htonl(ic->reachable_time); + pkt->retrans_timer = htonl(ic->retrans_timer); buf += sizeof(*pkt); - if (ifa->cf->link_mtu) + if (ic->link_mtu) { struct radv_opt_mtu *om = (void *) buf; om->type = OPT_MTU; om->length = 1; om->reserved = 0; - om->mtu = htonl(ifa->cf->link_mtu); + om->mtu = htonl(ic->link_mtu); buf += sizeof (*om); } @@ -288,26 +290,28 @@ radv_prepare_ra(struct radv_iface *ifa) op->pxlen = addr->pxlen; op->flags = (pc->onlink ? OPT_PX_ONLINK : 0) | (pc->autonomous ? OPT_PX_AUTONOMOUS : 0); - op->valid_lifetime = htonl(pc->valid_lifetime); - op->preferred_lifetime = htonl(pc->preferred_lifetime); + op->valid_lifetime = (ra->active || !pc->valid_lifetime_sensitive) ? + htonl(pc->valid_lifetime) : 0; + op->preferred_lifetime = (ra->active || !pc->preferred_lifetime_sensitive) ? + htonl(pc->preferred_lifetime) : 0; op->reserved = 0; op->prefix = addr->prefix; ipa_hton(op->prefix); buf += sizeof(*op); } - if (! ifa->cf->rdnss_local) + if (! ic->rdnss_local) if (radv_prepare_rdnss(ifa, &cf->rdnss_list, &buf, bufend) < 0) goto done; - if (radv_prepare_rdnss(ifa, &ifa->cf->rdnss_list, &buf, bufend) < 0) + if (radv_prepare_rdnss(ifa, &ic->rdnss_list, &buf, bufend) < 0) goto done; - if (! ifa->cf->dnssl_local) + if (! ic->dnssl_local) if (radv_prepare_dnssl(ifa, &cf->dnssl_list, &buf, bufend) < 0) goto done; - if (radv_prepare_dnssl(ifa, &ifa->cf->dnssl_list, &buf, bufend) < 0) + if (radv_prepare_dnssl(ifa, &ic->dnssl_list, &buf, bufend) < 0) goto done; done: diff --git a/proto/radv/radv.c b/proto/radv/radv.c index 5e7296a3..a6b9b16c 100644 --- a/proto/radv/radv.c +++ b/proto/radv/radv.c @@ -30,6 +30,13 @@ * by RA_EV_* codes), and radv_timer(), which triggers sending RAs and * computes the next timeout. * + * The RAdv protocol could receive routes (through + * radv_import_control() and radv_rt_notify()), but only the + * configured trigger route is tracked (in &active var). When a radv + * protocol is reconfigured, the connected routing table is examined + * (in radv_check_active()) to have proper &active value in case of + * the specified trigger prefix was changed. + * * Supported standards: * - RFC 4861 - main RA standard * - RFC 6106 - DNS extensions (RDDNS, DNSSL) @@ -93,6 +100,16 @@ radv_iface_notify(struct radv_iface *ifa, int event) tm_start(ifa->timer, after); } +static void +radv_iface_notify_all(struct proto_radv *ra, int event) +{ + struct radv_iface *ifa; + + WALK_LIST(ifa, ra->iface_list) + radv_iface_notify(ifa, event); +} + + static struct radv_iface * radv_iface_find(struct proto_radv *ra, struct iface *what) { @@ -238,11 +255,68 @@ radv_ifa_notify(struct proto *p, unsigned flags, struct ifa *a) radv_iface_notify(ifa, RA_EV_CHANGE); } +static inline int radv_net_match_trigger(struct radv_config *cf, net *n) +{ + return cf->trigger_valid && + (n->n.pxlen == cf->trigger_pxlen) && + ipa_equal(n->n.prefix, cf->trigger_prefix); +} + +int +radv_import_control(struct proto *p, rte **new, ea_list **attrs UNUSED, struct linpool *pool UNUSED) +{ + // struct proto_radv *ra = (struct proto_radv *) p; + struct radv_config *cf = (struct radv_config *) (p->cf); + + if (radv_net_match_trigger(cf, (*new)->net)) + return RIC_PROCESS; + + return RIC_DROP; +} + +static void +radv_rt_notify(struct proto *p, rtable *tbl UNUSED, net *n, rte *new, rte *old UNUSED, ea_list *attrs UNUSED) +{ + struct proto_radv *ra = (struct proto_radv *) p; + struct radv_config *cf = (struct radv_config *) (p->cf); + + if (radv_net_match_trigger(cf, n)) + { + u8 old_active = ra->active; + ra->active = !!new; + + if (ra->active == old_active) + return; + + if (ra->active) + RADV_TRACE(D_EVENTS, "Triggered"); + else + RADV_TRACE(D_EVENTS, "Suppressed"); + + radv_iface_notify_all(ra, RA_EV_CHANGE); + } +} + +static int +radv_check_active(struct proto_radv *ra) +{ + struct radv_config *cf = (struct radv_config *) (ra->p.cf); + + if (! cf->trigger_valid) + return 1; + + return rt_examine(ra->p.table, cf->trigger_prefix, cf->trigger_pxlen, + &(ra->p), ra->p.cf->out_filter); +} + static struct proto * radv_init(struct proto_config *c) { struct proto *p = proto_new(c, sizeof(struct proto_radv)); + p->accept_ra_types = RA_OPTIMAL; + p->import_control = radv_import_control; + p->rt_notify = radv_rt_notify; p->if_notify = radv_if_notify; p->ifa_notify = radv_ifa_notify; return p; @@ -252,9 +326,10 @@ static int radv_start(struct proto *p) { struct proto_radv *ra = (struct proto_radv *) p; - // struct radv_config *cf = (struct radv_config *) (p->cf); + struct radv_config *cf = (struct radv_config *) (p->cf); init_list(&(ra->iface_list)); + ra->active = !cf->trigger_valid; return PS_UP; } @@ -293,6 +368,9 @@ radv_reconfigure(struct proto *p, struct proto_config *c) * causing nodes to temporary remove their default routes. */ + p->cf = c; /* radv_check_active() requires proper p->cf */ + ra->active = radv_check_active(ra); + struct iface *iface; WALK_LIST(iface, iface_list) { @@ -335,6 +413,14 @@ radv_copy_config(struct proto_config *dest, struct proto_config *src) cfg_copy_list(&d->pref_list, &s->pref_list, sizeof(struct radv_prefix_config)); } +static void +radv_get_status(struct proto *p, byte *buf) +{ + struct proto_radv *ra = (struct proto_radv *) p; + + if (!ra->active) + strcpy(buf, "Suppressed"); +} struct protocol proto_radv = { .name = "RAdv", @@ -343,5 +429,6 @@ struct protocol proto_radv = { .start = radv_start, .shutdown = radv_shutdown, .reconfigure = radv_reconfigure, - .copy_config = radv_copy_config + .copy_config = radv_copy_config, + .get_status = radv_get_status }; diff --git a/proto/radv/radv.h b/proto/radv/radv.h index 48af8c00..f80e4530 100644 --- a/proto/radv/radv.h +++ b/proto/radv/radv.h @@ -52,6 +52,10 @@ struct radv_config list pref_list; /* Global list of prefix configs (struct radv_prefix_config) */ list rdnss_list; /* Global list of RDNSS configs (struct radv_rdnss_config) */ list dnssl_list; /* Global list of DNSSL configs (struct radv_dnssl_config) */ + + ip_addr trigger_prefix; /* Prefix of a trigger route, if defined */ + u8 trigger_pxlen; /* Pxlen of a trigger route, if defined */ + u8 trigger_valid; /* Whether a trigger route is defined */ }; struct radv_iface_config @@ -75,6 +79,7 @@ struct radv_iface_config u32 retrans_timer; u32 current_hop_limit; u32 default_lifetime; + u8 default_lifetime_sensitive; /* Whether default_lifetime depends on trigger */ }; struct radv_prefix_config @@ -88,6 +93,8 @@ struct radv_prefix_config u8 autonomous; u32 valid_lifetime; u32 preferred_lifetime; + u8 valid_lifetime_sensitive; /* Whether valid_lifetime depends on trigger */ + u8 preferred_lifetime_sensitive; /* Whether preferred_lifetime depends on trigger */ }; struct radv_rdnss_config @@ -113,6 +120,7 @@ struct proto_radv { struct proto p; list iface_list; /* List of active ifaces */ + u8 active; /* Whether radv is active w.r.t. triggers */ }; struct radv_iface -- cgit v1.2.3 From 0bc3542ab6e0a96342e35ead8ff1c52f980facc2 Mon Sep 17 00:00:00 2001 From: Ondrej Filip Date: Sun, 10 Feb 2013 19:06:56 +0100 Subject: Route limits can be disabled - this makes sense for protocol templates --- doc/bird.sgml | 12 ++++++------ nest/config.Y | 1 + 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'doc') diff --git a/doc/bird.sgml b/doc/bird.sgml index 762834e3..1baa1528 100644 --- a/doc/bird.sgml +++ b/doc/bird.sgml @@ -478,7 +478,7 @@ to zero to disable it. An empty is equivalent to import limit + import limit [ Specify an import route limit (a maximum number of routes imported from the protocol) and optionally the action to be taken when the limit is hit. Warn action just prints warning @@ -486,9 +486,9 @@ to zero to disable it. An empty is equivalent to receive limit + receive limit [ Specify an receive route limit (a maximum number of routes received from the protocol and remembered). It works almost identically to import limit option, the only @@ -498,9 +498,9 @@ to zero to disable it. An empty is equivalent to export limit + export limit [ Specify an export route limit, works similarly to the import limit option, but for the routes exported to the protocol. This option is experimental, there are some @@ -509,7 +509,7 @@ to zero to disable it. An empty is equivalent to description " This is an optional description of the protocol. It is displayed as a part of the diff --git a/nest/config.Y b/nest/config.Y index e46b5fb5..75728e0d 100644 --- a/nest/config.Y +++ b/nest/config.Y @@ -216,6 +216,7 @@ limit_spec: l->action = $2; $$ = l; } + | OFF { $$ = 0; } ; rtable: -- cgit v1.2.3 From 2bf59bf4d3e4fcaff489d3445134e5e2e2af9cf6 Mon Sep 17 00:00:00 2001 From: Ondrej Filip Date: Thu, 21 Feb 2013 00:44:59 +0100 Subject: Hotfix to solve an issue with delaying timers reported by Aleksey Chudov. --- doc/bird.sgml | 4 ++-- proto/rip/rip.c | 12 +++++++++--- 2 files changed, 11 insertions(+), 5 deletions(-) (limited to 'doc') diff --git a/doc/bird.sgml b/doc/bird.sgml index 1baa1528..893d3bfa 100644 --- a/doc/bird.sgml +++ b/doc/bird.sgml @@ -2734,7 +2734,7 @@ other than equally configured BIRD. I have warned you. period number specifies the number of seconds between periodic updates. Default is 30 seconds. A lower number will mean faster convergence but bigger network - load. Do not use values lower than 10. + load. Do not use values lower than 12. timeout time number specifies how old route has to be to be considered unreachable. Default is 4*interfaces ); P->timer = tm_new( p->pool ); P->timer->data = p; - P->timer->randomize = 5; - P->timer->recurrent = (P_CF->period / 6)+1; + P->timer->randomize = 2; + P->timer->recurrent = (P_CF->period / 6) - 1; + if (P_CF->period < 12) { + log(L_WARN "Period %d is too low. So I am using 12 which is the lowest possible value.", P_CF->period); + P->timer->recurrent = 1; + } P->timer->hook = rip_timer; tm_start( P->timer, 5 ); rif = new_iface(p, NULL, 0, NULL); /* Initialize dummy interface */ @@ -956,9 +960,11 @@ rip_rte_insert(net *net UNUSED, rte *rte) static void rip_rte_remove(net *net UNUSED, rte *rte) { - // struct proto *p = rte->attrs->proto; +#ifdef LOCAL_DEBUG + struct proto *p = rte->attrs->proto; CHK_MAGIC; DBG( "rip_rte_remove: %p\n", rte ); +#endif rem_node( &rte->u.rip.garbage ); } -- cgit v1.2.3 From e667622a35722ec007137e678f4f70841562e57f Mon Sep 17 00:00:00 2001 From: Ondrej Filip Date: Mon, 25 Feb 2013 10:39:46 +0100 Subject: Default rounting table for 'show route export/preexport/protocol' is the one related to a respective protocol. --- doc/bird.sgml | 3 ++- nest/config.Y | 1 - nest/rt-table.c | 5 +++++ 3 files changed, 7 insertions(+), 2 deletions(-) (limited to 'doc') diff --git a/doc/bird.sgml b/doc/bird.sgml index 893d3bfa..e83cf0e1 100644 --- a/doc/bird.sgml +++ b/doc/bird.sgml @@ -670,7 +670,8 @@ This argument can be omitted if there exists only a single instance. Show the list of symbols defined in the configuration (names of protocols, routing tables etc.). show route [[for] - Show contents of a routing table (by default of the main one), + Show contents of a routing table (by default of the main one or + the table attached to a respective protocol), that is routes, their metrics and (in case the pxlen = 256; $$->filter = FILTER_ACCEPT; - $$->table = config->master_rtc->table; } | r_args prefix { $$ = $1; diff --git a/nest/rt-table.c b/nest/rt-table.c index 75bfa6ba..e3fd985c 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -2204,6 +2204,11 @@ rt_show(struct rt_show_data *d) { net *n; + /* Default is either a master table or a table related to a respective protocol */ + if ((!d->table) && d->export_protocol) d->table = d->export_protocol->table; + if ((!d->table) && d->show_protocol) d->table = d->show_protocol->table; + if (!d->table) d->table = config->master_rtc->table; + if (d->pxlen == 256) { FIB_ITERATE_INIT(&d->fit, &d->table->fib); -- cgit v1.2.3 From 9ff5257357d9975654279db17bbc8525583ba1cc Mon Sep 17 00:00:00 2001 From: Ondrej Zajicek Date: Tue, 16 Apr 2013 16:22:31 +0200 Subject: Better handling of global addresses as configured NBMA neighbors in OSPFv3. Configured NBMA neighbors in OSPFv3 should be link-local addresses, old behavior was to silently ignore global ones. The patch allows BIRD to accept global ones, but adds a warning and a documentation notice. Thanks to Wilco Baan Hofman for the bugreport. --- doc/bird.sgml | 5 ++++- proto/ospf/config.Y | 28 ++++++++++------------------ proto/ospf/iface.c | 24 ++++++++++++++++++++++-- 3 files changed, 36 insertions(+), 21 deletions(-) (limited to 'doc') diff --git a/doc/bird.sgml b/doc/bird.sgml index e83cf0e1..8e5641e0 100644 --- a/doc/bird.sgml +++ b/doc/bird.sgml @@ -2212,7 +2212,10 @@ protocol ospf <name> { neighbors { A set of neighbors to which Hello messages on NBMA or PtMP networks are to be sent. For NBMA networks, some of them - could be marked as eligible. + could be marked as eligible. In OSPFv3, link-local addresses + should be used, using global ones is possible, but it is + nonstandard and might be problematic. And definitely, + link-local and global addresses should not be mixed. diff --git a/proto/ospf/config.Y b/proto/ospf/config.Y index 3f09afba..b16d46a9 100644 --- a/proto/ospf/config.Y +++ b/proto/ospf/config.Y @@ -135,6 +135,7 @@ CF_KEYWORDS(GLOBAL, LSID, ROUTER, SELF, INSTANCE, REAL) %type opttext %type lsadb_args +%type nbma_eligible CF_GRAMMAR @@ -295,7 +296,7 @@ ospf_iface_item: | STUB bool { OSPF_PATT->stub = $2 ; } | CHECK LINK bool { OSPF_PATT->check_link = $3; } | ECMP WEIGHT expr { OSPF_PATT->ecmp_weight = $3 - 1; if (($3<1) || ($3>256)) cf_error("ECMP weight must be in range 1-256"); } - | NEIGHBORS '{' ipa_list '}' + | NEIGHBORS '{' nbma_list '}' | AUTHENTICATION NONE { OSPF_PATT->autype = OSPF_AUTH_NONE ; } | AUTHENTICATION SIMPLE { OSPF_PATT->autype = OSPF_AUTH_SIMPLE ; } | AUTHENTICATION CRYPTOGRAPHIC { OSPF_PATT->autype = OSPF_AUTH_CRYPT ; } @@ -327,33 +328,24 @@ pref_opt: | TAG expr { this_pref->tag = $2; } ; -ipa_list: +nbma_list: /* empty */ - | ipa_list ipa_item + | nbma_list nbma_item ; -ipa_item: - ipa_el - | ipa_ne; +nbma_eligible: + /* empty */ { $$ = 0; } + | ELIGIBLE { $$ = 1; } + ; -ipa_el: IPA ';' +nbma_item: IPA nbma_eligible ';' { this_nbma = cfg_allocz(sizeof(struct nbma_node)); add_tail(&OSPF_PATT->nbma_list, NODE this_nbma); this_nbma->ip=$1; - this_nbma->eligible=0; + this_nbma->eligible=$2; } ; - -ipa_ne: IPA ELIGIBLE ';' - { - this_nbma = cfg_allocz(sizeof(struct nbma_node)); - add_tail(&OSPF_PATT->nbma_list, NODE this_nbma); - this_nbma->ip=$1; - this_nbma->eligible=1; - } -; - ospf_iface_start: { diff --git a/proto/ospf/iface.c b/proto/ospf/iface.c index 290a8634..39084cef 100644 --- a/proto/ospf/iface.c +++ b/proto/ospf/iface.c @@ -574,8 +574,22 @@ ospf_iface_new(struct ospf_area *oa, struct ifa *addr, struct ospf_iface_patt *i init_list(&ifa->nbma_list); WALK_LIST(nb, ip->nbma_list) - if (ipa_in_net(nb->ip, addr->prefix, addr->pxlen)) - add_nbma_node(ifa, nb, 0); + { + /* In OSPFv3, addr is link-local while configured neighbors could + have global IP (although RFC 5340 C.5 says link-local addresses + should be used). Because OSPFv3 iface is not subnet-specific, + there is no need for ipa_in_net() check */ + +#ifdef OSPFv2 + if (!ipa_in_net(nb->ip, addr->prefix, addr->pxlen)) + continue; +#else + if (!ipa_has_link_scope(nb->ip)) + log(L_WARN "In OSPFv3, configured neighbor address (%I) should be link-local", nb->ip); +#endif + + add_nbma_node(ifa, nb, 0); + } ifa->state = OSPF_IS_DOWN; add_tail(&oa->po->iface_list, NODE ifa); @@ -771,8 +785,14 @@ ospf_iface_reconfigure(struct ospf_iface *ifa, struct ospf_iface_patt *new) /* NBMA LIST - add new */ WALK_LIST(nb, new->nbma_list) { + /* See related note in ospf_iface_new() */ +#ifdef OSPFv2 if (!ipa_in_net(nb->ip, ifa->addr->prefix, ifa->addr->pxlen)) continue; +#else + if (!ipa_has_link_scope(nb->ip)) + log(L_WARN "In OSPFv3, configured neighbor address (%I) should be link-local", nb->ip); +#endif if (! find_nbma_node(ifa, nb->ip)) { -- cgit v1.2.3 From 48bc232f08141d26691237c3d79db587ce16932b Mon Sep 17 00:00:00 2001 From: Ondrej Zajicek Date: Tue, 16 Apr 2013 17:27:34 +0200 Subject: Implements 'next hop keep' option for BGP. This option allows to keep the received next hop even in cases when the route is sent to an interface with a different subnet. --- doc/bird.sgml | 5 +++++ proto/bgp/attrs.c | 8 ++++++-- proto/bgp/bgp.h | 1 + proto/bgp/config.Y | 3 ++- 4 files changed, 14 insertions(+), 3 deletions(-) (limited to 'doc') diff --git a/doc/bird.sgml b/doc/bird.sgml index 8e5641e0..300a71f3 100644 --- a/doc/bird.sgml +++ b/doc/bird.sgml @@ -1329,6 +1329,11 @@ for each neighbor using the following configuration parameters: circumvent misconfigurations of other routers. Default: disabled. + next hop keep Forward the received Next Hop + attribute even in situations where the local address should be + used instead, like when the route is sent to an interface with + a different subnet. Default: disabled. + missing lladdr self|drop|ignoreNext Hop attribute in BGP-IPv6 sometimes contains just the global IPv6 address, but sometimes it has to contain both global and link-local diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c index 98b2f2c2..c27a4988 100644 --- a/proto/bgp/attrs.c +++ b/proto/bgp/attrs.c @@ -935,7 +935,8 @@ bgp_create_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *p rta->dest != RTD_ROUTER || ipa_equal(rta->gw, IPA_NONE) || ipa_has_link_scope(rta->gw) || - (!p->is_internal && (!p->neigh || (rta->iface != p->neigh->iface)))) + (!p->is_internal && !p->cf->next_hop_keep && + (!p->neigh || (rta->iface != p->neigh->iface)))) set_next_hop(z, p->source_addr); else set_next_hop(z, rta->gw); @@ -1003,10 +1004,13 @@ bgp_update_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *p /* iBGP -> keep next_hop, eBGP multi-hop -> use source_addr, * eBGP single-hop -> keep next_hop if on the same iface. * If the next_hop is zero (i.e. link-local), keep only if on the same iface. + * + * Note that same-iface-check uses iface from route, which is based on gw. */ a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP)); if (a && !p->cf->next_hop_self && - ((p->is_internal && ipa_nonzero(*((ip_addr *) a->u.ptr->data))) || + (p->cf->next_hop_keep || + (p->is_internal && ipa_nonzero(*((ip_addr *) a->u.ptr->data))) || (p->neigh && (e->attrs->iface == p->neigh->iface)))) { /* Leave the original next hop attribute, will check later where does it point */ diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h index c3adf254..13c7fd80 100644 --- a/proto/bgp/bgp.h +++ b/proto/bgp/bgp.h @@ -24,6 +24,7 @@ struct bgp_config { int multihop; /* Number of hops if multihop */ int ttl_security; /* Enable TTL security [RFC5082] */ int next_hop_self; /* Always set next hop to local IP address */ + int next_hop_keep; /* Do not touch next hop attribute */ int missing_lladdr; /* What we will do when we don' know link-local addr, see MLL_* */ int gw_mode; /* How we compute route gateway from next_hop attr, see GW_* */ int compare_path_lengths; /* Use path lengths when selecting best route */ diff --git a/proto/bgp/config.Y b/proto/bgp/config.Y index 8b80d7fd..d5e5aaca 100644 --- a/proto/bgp/config.Y +++ b/proto/bgp/config.Y @@ -76,7 +76,8 @@ bgp_proto: | bgp_proto KEEPALIVE TIME expr ';' { BGP_CFG->keepalive_time = $4; } | bgp_proto MULTIHOP ';' { BGP_CFG->multihop = 64; } | bgp_proto MULTIHOP expr ';' { BGP_CFG->multihop = $3; if (($3<1) || ($3>255)) cf_error("Multihop must be in range 1-255"); } - | bgp_proto NEXT HOP SELF ';' { BGP_CFG->next_hop_self = 1; } + | bgp_proto NEXT HOP SELF ';' { BGP_CFG->next_hop_self = 1; BGP_CFG->next_hop_keep = 0; } + | bgp_proto NEXT HOP KEEP ';' { BGP_CFG->next_hop_keep = 1; BGP_CFG->next_hop_self = 0; } | bgp_proto MISSING LLADDR SELF ';' { BGP_CFG->missing_lladdr = MLL_SELF; } | bgp_proto MISSING LLADDR DROP ';' { BGP_CFG->missing_lladdr = MLL_DROP; } | bgp_proto MISSING LLADDR IGNORE ';' { BGP_CFG->missing_lladdr = MLL_IGNORE; } -- cgit v1.2.3 From 8df02847e8af29863c325b7297e3a2b2ed5f961c Mon Sep 17 00:00:00 2001 From: Ondrej Zajicek Date: Wed, 17 Apr 2013 13:06:40 +0200 Subject: Fixes a compatibility issue in OSPFv2 PtP links. BIRD used zero netmask in hello packets on all PtP links, not just on unnumbered ones. This patch fixes it and adds option 'ptp netmask' for overriding the default behavior. Thanks to Alexander V. Chernikov for the original patch. --- doc/bird.sgml | 13 +++++++++++++ proto/ospf/config.Y | 4 +++- proto/ospf/hello.c | 3 ++- proto/ospf/iface.c | 3 +++ proto/ospf/ospf.h | 2 ++ 5 files changed, 23 insertions(+), 2 deletions(-) (limited to 'doc') diff --git a/doc/bird.sgml b/doc/bird.sgml index 300a71f3..fab49105 100644 --- a/doc/bird.sgml +++ b/doc/bird.sgml @@ -1940,6 +1940,7 @@ protocol ospf <name> { nonbroadcast|nbma|pointomultipoint|ptmp]; strict nonbroadcast <switch>; real broadcast <switch>; + ptp netmask <switch>; check link <switch>; ecmp weight <num>; authentication [none|simple|cryptographic]; @@ -2183,6 +2184,18 @@ protocol ospf <name> { probably is not interoperable with other OSPF implementations. Default value is no. + ptp netmask + In check link switch If set, a hardware link state (reported by OS) is taken into consideration. When a link disappears (e.g. an ethernet cable is diff --git a/proto/ospf/config.Y b/proto/ospf/config.Y index b16d46a9..2cc0b963 100644 --- a/proto/ospf/config.Y +++ b/proto/ospf/config.Y @@ -131,7 +131,7 @@ CF_KEYWORDS(NONE, SIMPLE, AUTHENTICATION, STRICT, CRYPTOGRAPHIC) CF_KEYWORDS(ELIGIBLE, POLL, NETWORKS, HIDDEN, VIRTUAL, CHECK, LINK) CF_KEYWORDS(RX, BUFFER, LARGE, NORMAL, STUBNET, HIDDEN, SUMMARY, TAG, EXTERNAL) CF_KEYWORDS(WAIT, DELAY, LSADB, ECMP, LIMIT, WEIGHT, NSSA, TRANSLATOR, STABILITY) -CF_KEYWORDS(GLOBAL, LSID, ROUTER, SELF, INSTANCE, REAL) +CF_KEYWORDS(GLOBAL, LSID, ROUTER, SELF, INSTANCE, REAL, NETMASK) %type opttext %type lsadb_args @@ -290,6 +290,7 @@ ospf_iface_item: | TYPE POINTOMULTIPOINT { OSPF_PATT->type = OSPF_IT_PTMP ; } | TYPE PTMP { OSPF_PATT->type = OSPF_IT_PTMP ; } | REAL BROADCAST bool { OSPF_PATT->real_bcast = $3; if (OSPF_VERSION != 2) cf_error("Real broadcast option requires OSPFv2"); } + | PTP NETMASK bool { OSPF_PATT->ptp_netmask = $3; if (OSPF_VERSION != 2) cf_error("Real netmask option requires OSPFv2"); } | TRANSMIT DELAY expr { OSPF_PATT->inftransdelay = $3 ; if (($3<=0) || ($3>65535)) cf_error("Transmit delay must be in range 1-65535"); } | PRIORITY expr { OSPF_PATT->priority = $2 ; if (($2<0) || ($2>255)) cf_error("Priority must be in range 0-255"); } | STRICT NONBROADCAST bool { OSPF_PATT->strictnbma = $3 ; } @@ -364,6 +365,7 @@ ospf_iface_start: OSPF_PATT->type = OSPF_IT_UNDEF; init_list(&OSPF_PATT->nbma_list); OSPF_PATT->autype = OSPF_AUTH_NONE; + OSPF_PATT->ptp_netmask = 2; /* not specified */ reset_passwords(); } ; diff --git a/proto/ospf/hello.c b/proto/ospf/hello.c index 6ec5c511..d5aa1b95 100644 --- a/proto/ospf/hello.c +++ b/proto/ospf/hello.c @@ -253,7 +253,8 @@ ospf_hello_send(struct ospf_iface *ifa, int kind, struct ospf_neighbor *dirn) #ifdef OSPFv2 pkt->netmask = ipa_mkmask(ifa->addr->pxlen); ipa_hton(pkt->netmask); - if ((ifa->type == OSPF_IT_VLINK) || (ifa->type == OSPF_IT_PTP)) + if ((ifa->type == OSPF_IT_VLINK) || + ((ifa->type == OSPF_IT_PTP) && !ifa->ptp_netmask)) pkt->netmask = IPA_NONE; #endif diff --git a/proto/ospf/iface.c b/proto/ospf/iface.c index 39084cef..9050f7b1 100644 --- a/proto/ospf/iface.c +++ b/proto/ospf/iface.c @@ -537,6 +537,9 @@ ospf_iface_new(struct ospf_area *oa, struct ifa *addr, struct ospf_iface_patt *i #ifdef OSPFv2 ifa->autype = ip->autype; ifa->passwords = ip->passwords; + ifa->ptp_netmask = !(addr->flags & IA_PEER); + if (ip->ptp_netmask < 2) + ifa->ptp_netmask = ip->ptp_netmask; #endif #ifdef OSPFv3 diff --git a/proto/ospf/ospf.h b/proto/ospf/ospf.h index 7111a13d..d924e657 100644 --- a/proto/ospf/ospf.h +++ b/proto/ospf/ospf.h @@ -273,6 +273,7 @@ struct ospf_iface u16 rxbuf; /* Buffer size */ u8 check_link; /* Whether iface link change is used */ u8 ecmp_weight; /* Weight used for ECMP */ + u8 ptp_netmask; /* Send real netmask for P2P */ }; struct ospf_md5 @@ -810,6 +811,7 @@ struct ospf_iface_patt u8 check_link; u8 ecmp_weight; u8 real_bcast; /* Not really used in OSPFv3 */ + u8 ptp_netmask; /* bool but 2 for unspecified */ #ifdef OSPFv2 list *passwords; -- cgit v1.2.3 From a5e9f3d26f887deb451a3ea086e52266c117aa0a Mon Sep 17 00:00:00 2001 From: Ondrej Zajicek Date: Tue, 23 Apr 2013 02:42:35 +0200 Subject: Restructures birdc and birdcl to merge duplicated code. The BIRD client code is restructured that most of the code (including main function) is shared in client.c, while birdc.c and birdcl.c contain just I/O-specific callbacks. This removes all duplicated code from variant-specific files. --- client/Makefile | 2 +- client/birdc.c | 223 +++++++++++++++++++++++++++++ client/birdcl.c | 165 +++++++++++++++++++++ client/client.c | 436 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ client/client.h | 31 ++-- doc/bird.sgml | 8 +- 6 files changed, 848 insertions(+), 17 deletions(-) create mode 100644 client/birdc.c create mode 100644 client/birdcl.c create mode 100644 client/client.c (limited to 'doc') diff --git a/client/Makefile b/client/Makefile index 8c2f91e0..a1578766 100644 --- a/client/Makefile +++ b/client/Makefile @@ -1,4 +1,4 @@ -source=commands.c util.c common.c +source=commands.c util.c client.c root-rel=../ dir-name=client diff --git a/client/birdc.c b/client/birdc.c new file mode 100644 index 00000000..9dd6d9b9 --- /dev/null +++ b/client/birdc.c @@ -0,0 +1,223 @@ +/* + * BIRD Client - Readline variant I/O + * + * (c) 1999--2004 Martin Mares + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#include +#include +#include +#include + +#include +#include +#include + +#include "nest/bird.h" +#include "lib/resource.h" +#include "lib/string.h" +#include "client/client.h" +#include "sysdep/unix/unix.h" + +static int input_hidden_end; +static int prompt_active; + +/*** Input ***/ + +/* HACK: libreadline internals we need to access */ +extern int _rl_vis_botlin; +extern void _rl_move_vert(int); +extern Function *rl_last_func; + +static void +add_history_dedup(char *cmd) +{ + /* Add history line if it differs from the last one */ + HIST_ENTRY *he = history_get(history_length); + if (!he || strcmp(he->line, cmd)) + add_history(cmd); +} + +static void +input_got_line(char *cmd_buffer) +{ + if (!cmd_buffer) + { + cleanup(); + exit(0); + } + + if (cmd_buffer[0]) + { + add_history_dedup(cmd_buffer); + submit_command(cmd_buffer); + } + + free(cmd_buffer); +} + +void +input_start_list(void) +{ + /* Leave the currently edited line and make space for listing */ + _rl_move_vert(_rl_vis_botlin); +#ifdef HAVE_RL_CRLF + rl_crlf(); +#endif +} + +void +input_stop_list(void) +{ + /* Reprint the currently edited line after listing */ + rl_on_new_line(); + rl_redisplay(); +} + +static int +input_complete(int arg UNUSED, int key UNUSED) +{ + static int complete_flag; + char buf[256]; + + if (rl_last_func != input_complete) + complete_flag = 0; + switch (cmd_complete(rl_line_buffer, rl_point, buf, complete_flag)) + { + case 0: + complete_flag = 1; + break; + case 1: + rl_insert_text(buf); + break; + default: + complete_flag = 1; +#ifdef HAVE_RL_DING + rl_ding(); +#endif + } + return 0; +} + +static int +input_help(int arg, int key UNUSED) +{ + int i, in_string, in_bracket; + + if (arg != 1) + return rl_insert(arg, '?'); + + in_string = in_bracket = 0; + for (i = 0; i < rl_point; i++) + { + + if (rl_line_buffer[i] == '"') + in_string = ! in_string; + else if (! in_string) + { + if (rl_line_buffer[i] == '[') + in_bracket++; + else if (rl_line_buffer[i] == ']') + in_bracket--; + } + } + + /* `?' inside string or path -> insert */ + if (in_string || in_bracket) + return rl_insert(1, '?'); + + rl_begin_undo_group(); /* HACK: We want to display `?' at point position */ + rl_insert_text("?"); + rl_redisplay(); + rl_end_undo_group(); + input_start_list(); + cmd_help(rl_line_buffer, rl_point); + rl_undo_command(1, 0); + input_stop_list(); + return 0; +} + +void +input_init(void) +{ + rl_readline_name = "birdc"; + rl_add_defun("bird-complete", input_complete, '\t'); + rl_add_defun("bird-help", input_help, '?'); + rl_callback_handler_install("bird> ", input_got_line); + + // rl_get_screen_size(); + term_lns = LINES ? LINES : 25; + term_cls = COLS ? COLS : 80; + + prompt_active = 1; + + // readline library does strange things when stdin is nonblocking. + // if (fcntl(0, F_SETFL, O_NONBLOCK) < 0) + // die("fcntl: %m"); +} + +static void +input_reveal(void) +{ + /* need this, otherwise some lib seems to eat pending output when + the prompt is displayed */ + fflush(stdout); + tcdrain(STDOUT_FILENO); + + rl_end = input_hidden_end; + rl_expand_prompt("bird> "); + rl_forced_update_display(); + + prompt_active = 1; +} + +static void +input_hide(void) +{ + input_hidden_end = rl_end; + rl_end = 0; + rl_expand_prompt(""); + rl_redisplay(); + + prompt_active = 0; +} + +void +input_notify(int prompt) +{ + if (prompt == prompt_active) + return; + + if (prompt) + input_reveal(); + else + input_hide(); +} + +void +input_read(void) +{ + rl_callback_read_char(); +} + +void +more_begin(void) +{ +} + +void +more_end(void) +{ +} + +void +cleanup(void) +{ + if (init) + return; + + input_hide(); + rl_callback_handler_remove(); +} diff --git a/client/birdcl.c b/client/birdcl.c new file mode 100644 index 00000000..c41b046c --- /dev/null +++ b/client/birdcl.c @@ -0,0 +1,165 @@ +/* + * BIRD Client - Light variant I/O + * + * (c) 1999--2004 Martin Mares + * (c) 2013 Tomas Hlavacek + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#include +#include +#include +#include + +#include +#include + +#include "nest/bird.h" +#include "lib/resource.h" +#include "lib/string.h" +#include "client/client.h" +#include "sysdep/unix/unix.h" + +#define INPUT_BUF_LEN 2048 + +struct termios tty_save; + +void +input_start_list(void) +{ + /* Empty in non-ncurses version. */ +} + +void +input_stop_list(void) +{ + /* Empty in non-ncurses version. */ +} + +void +input_notify(int prompt) +{ + /* No ncurses -> no status to reveal/hide, print prompt manually. */ + if (!prompt) + return; + + printf("bird> "); + fflush(stdout); +} + + +static int +lastnb(char *str, int i) +{ + while (i--) + if ((str[i] != ' ') && (str[i] != '\t')) + return str[i]; + + return 0; +} + +void +input_read(void) +{ + char buf[INPUT_BUF_LEN]; + + if ((fgets(buf, INPUT_BUF_LEN, stdin) == NULL) || (buf[0] == 0)) + { + putchar('\n'); + cleanup(); + exit(0); + } + + int l = strlen(buf); + if ((l+1) == INPUT_BUF_LEN) + { + printf("Input too long.\n"); + return; + } + + if (buf[l-1] == '\n') + buf[--l] = '\0'; + + if (!interactive) + printf("%s\n", buf); + + if (l == 0) + return; + + if (lastnb(buf, l) == '?') + { + cmd_help(buf, strlen(buf)); + return; + } + + submit_command(buf); +} + +static struct termios stored_tty; +static int more_active = 0; + +void +more_begin(void) +{ + static struct termios tty; + + tty = stored_tty; + tty.c_lflag &= (~ECHO); + tty.c_lflag &= (~ICANON); + + if (tcsetattr (0, TCSANOW, &tty) < 0) + die("tcsetattr: %m"); + + more_active = 1; +} + +void +more_end(void) +{ + more_active = 0; + + if (tcsetattr (0, TCSANOW, &stored_tty) < 0) + die("tcsetattr: %m"); +} + +static void +sig_handler(int signal) +{ + cleanup(); + exit(0); +} + +void +input_init(void) +{ + if (!interactive) + return; + + if (tcgetattr(0, &stored_tty) < 0) + die("tcgetattr: %m"); + + if (signal(SIGINT, sig_handler) == SIG_IGN) + signal(SIGINT, SIG_IGN); + if (signal(SIGTERM, sig_handler) == SIG_IGN) + signal(SIGTERM, SIG_IGN); + + struct winsize tws; + if (ioctl(0, TIOCGWINSZ, &tws) == 0) + { + term_lns = tws.ws_row; + term_cls = tws.ws_col; + } + else + { + term_lns = 25; + term_cls = 80; + } +} + +void +cleanup(void) +{ + if (more_active) + more_end(); +} diff --git a/client/client.c b/client/client.c new file mode 100644 index 00000000..61caf38b --- /dev/null +++ b/client/client.c @@ -0,0 +1,436 @@ +/* + * BIRD Client + * + * (c) 1999--2004 Martin Mares + * (c) 2013 Tomas Hlavacek + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +/** + * DOC: BIRD client + * + * There are two variants of BIRD client: regular and light. regular + * variant depends on readline and ncurses libraries, while light + * variant uses just libc. Most of the code and the main() is common + * for both variants (in client.c file) and just a few functions are + * different (in birdc.c for regular and birdcl.c for light). Two + * binaries are generated by linking common object files like client.o + * (which is compiled from client.c just once) with either birdc.o or + * birdcl.o for each variant. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "nest/bird.h" +#include "lib/resource.h" +#include "lib/string.h" +#include "client/client.h" +#include "sysdep/unix/unix.h" + +#define SERVER_READ_BUF_LEN 4096 + +static char *opt_list = "s:vr"; +static int verbose, restricted, once; +static char *init_cmd; + +static char *server_path = PATH_CONTROL_SOCKET; +static int server_fd; +static byte server_read_buf[SERVER_READ_BUF_LEN]; +static byte *server_read_pos = server_read_buf; + +int init = 1; /* During intial sequence */ +int busy = 1; /* Executing BIRD command */ +int interactive; /* Whether stdin is terminal */ + +static int num_lines, skip_input; +int term_lns, term_cls; + + +/*** Parsing of arguments ***/ + +static void +usage(char *name) +{ + fprintf(stderr, "Usage: %s [-s ] [-v] [-r]\n", name); + exit(1); +} + +static void +parse_args(int argc, char **argv) +{ + int c; + + while ((c = getopt(argc, argv, opt_list)) >= 0) + switch (c) + { + case 's': + server_path = optarg; + break; + case 'v': + verbose++; + break; + case 'r': + restricted = 1; + break; + default: + usage(argv[0]); + } + + /* If some arguments are not options, we take it as commands */ + if (optind < argc) + { + char *tmp; + int i; + int len = 0; + + for (i = optind; i < argc; i++) + len += strlen(argv[i]) + 1; + + tmp = init_cmd = malloc(len); + for (i = optind; i < argc; i++) + { + strcpy(tmp, argv[i]); + tmp += strlen(tmp); + *tmp++ = ' '; + } + tmp[-1] = 0; + + once = 1; + interactive = 0; + } +} + + +/*** Input ***/ + +static void server_send(char *cmd); + +static int +handle_internal_command(char *cmd) +{ + if (!strncmp(cmd, "exit", 4) || !strncmp(cmd, "quit", 4)) + { + cleanup(); + exit(0); + } + if (!strncmp(cmd, "help", 4)) + { + puts("Press `?' for context sensitive help."); + return 1; + } + return 0; +} + +static void +submit_server_command(char *cmd) +{ + busy = 1; + num_lines = 2; + server_send(cmd); +} + +void +submit_command(char *cmd_raw) +{ + char *cmd = cmd_expand(cmd_raw); + + if (!cmd) + return; + + if (!handle_internal_command(cmd)) + submit_server_command(cmd); + + free(cmd); +} + +static void +init_commands(void) +{ + if (restricted) + { + submit_server_command("restrict"); + restricted = 0; + return; + } + + if (init_cmd) + { + /* First transition - client received hello from BIRD + and there is waiting initial command */ + submit_server_command(init_cmd); + init_cmd = NULL; + return; + } + + if (once) + { + /* Initial command is finished and we want to exit */ + cleanup(); + exit(0); + } + + input_init(); + init = 0; +} + + +/*** Output ***/ + +void +more(void) +{ + more_begin(); + printf("--More--\015"); + fflush(stdout); + + redo: + switch (getchar()) + { + case ' ': + num_lines = 2; + break; + case '\n': + case '\r': + num_lines--; + break; + case 'q': + skip_input = 1; + break; + default: + goto redo; + } + + printf(" \015"); + fflush(stdout); + more_end(); +} + + +/*** Communication with server ***/ + +static void +server_connect(void) +{ + struct sockaddr_un sa; + + server_fd = socket(AF_UNIX, SOCK_STREAM, 0); + if (server_fd < 0) + die("Cannot create socket: %m"); + + if (strlen(server_path) >= sizeof(sa.sun_path)) + die("server_connect: path too long"); + + bzero(&sa, sizeof(sa)); + sa.sun_family = AF_UNIX; + strcpy(sa.sun_path, server_path); + if (connect(server_fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) < 0) + die("Unable to connect to server control socket (%s): %m", server_path); + if (fcntl(server_fd, F_SETFL, O_NONBLOCK) < 0) + die("fcntl: %m"); +} + + +#define PRINTF(LEN, PARGS...) do { if (!skip_input) len = printf(PARGS); } while(0) + +static void +server_got_reply(char *x) +{ + int code; + int len = 0; + + if (*x == '+') /* Async reply */ + PRINTF(len, ">>> %s\n", x+1); + else if (x[0] == ' ') /* Continuation */ + PRINTF(len, "%s%s\n", verbose ? " " : "", x+1); + else if (strlen(x) > 4 && + sscanf(x, "%d", &code) == 1 && code >= 0 && code < 10000 && + (x[4] == ' ' || x[4] == '-')) + { + if (code) + PRINTF(len, "%s\n", verbose ? x : x+5); + + if (x[4] == ' ') + { + busy = 0; + skip_input = 0; + return; + } + } + else + PRINTF(len, "??? <%s>\n", x); + + if (interactive && busy && !skip_input && !init && (len > 0)) + { + num_lines += (len + term_cls - 1) / term_cls; /* Divide and round up */ + if (num_lines >= term_lns) + more(); + } +} + +static void +server_read(void) +{ + int c; + byte *start, *p; + + redo: + c = read(server_fd, server_read_pos, server_read_buf + sizeof(server_read_buf) - server_read_pos); + if (!c) + die("Connection closed by server."); + if (c < 0) + { + if (errno == EINTR) + goto redo; + else + die("Server read error: %m"); + } + + start = server_read_buf; + p = server_read_pos; + server_read_pos += c; + while (p < server_read_pos) + if (*p++ == '\n') + { + p[-1] = 0; + server_got_reply(start); + start = p; + } + if (start != server_read_buf) + { + int l = server_read_pos - start; + memmove(server_read_buf, start, l); + server_read_pos = server_read_buf + l; + } + else if (server_read_pos == server_read_buf + sizeof(server_read_buf)) + { + strcpy(server_read_buf, "?"); + server_read_pos = server_read_buf + 11; + } +} + +static void +select_loop(void) +{ + int rv; + while (1) + { + if (init && !busy) + init_commands(); + + if (!init) + input_notify(!busy); + + fd_set select_fds; + FD_ZERO(&select_fds); + + FD_SET(server_fd, &select_fds); + if (!busy) + FD_SET(0, &select_fds); + + rv = select(server_fd+1, &select_fds, NULL, NULL, NULL); + if (rv < 0) + { + if (errno == EINTR) + continue; + else + die("select: %m"); + } + + if (FD_ISSET(0, &select_fds)) + { + input_read(); + continue; + } + + if (FD_ISSET(server_fd, &select_fds)) + { + server_read(); + continue; + } + } +} + +static void +wait_for_write(int fd) +{ + while (1) + { + int rv; + fd_set set; + FD_ZERO(&set); + FD_SET(fd, &set); + + rv = select(fd+1, NULL, &set, NULL, NULL); + if (rv < 0) + { + if (errno == EINTR) + continue; + else + die("select: %m"); + } + + if (FD_ISSET(server_fd, &set)) + return; + } +} + +static void +server_send(char *cmd) +{ + int l = strlen(cmd); + byte *z = alloca(l + 1); + + memcpy(z, cmd, l); + z[l++] = '\n'; + while (l) + { + int cnt = write(server_fd, z, l); + + if (cnt < 0) + { + if (errno == EAGAIN) + wait_for_write(server_fd); + else if (errno == EINTR) + continue; + else + die("Server write error: %m"); + } + else + { + l -= cnt; + z += cnt; + } + } +} + + +/* XXXX + + get_term_size(); + + if (tcgetattr(0, &tty_save) != 0) + { + perror("tcgetattr error"); + return(EXIT_FAILURE); + } + } + + */ +int +main(int argc, char **argv) +{ + interactive = isatty(0); + parse_args(argc, argv); + cmd_build_tree(); + server_connect(); + select_loop(); + return 0; +} diff --git a/client/client.h b/client/client.h index 2e4e2ea3..b194a772 100644 --- a/client/client.h +++ b/client/client.h @@ -6,12 +6,23 @@ * Can be freely distributed and used under the terms of the GNU GPL. */ -/* client.c callbacks */ -void cleanup(void); +extern int init, busy, interactive; +extern int term_lns, term_cls; + +/* birdc.c / birdcl.c */ + void input_start_list(void); void input_stop_list(void); -void server_got_reply(char *x); + +void input_init(void); +void input_notify(int prompt); +void input_read(void); + +void more_begin(void); +void more_end(void); + +void cleanup(void); /* commands.c */ @@ -20,16 +31,6 @@ void cmd_help(char *cmd, int len); int cmd_complete(char *cmd, int len, char *buf, int again); char *cmd_expand(char *cmd); -/* common.c */ - -#define STATE_PROMPT 0 -#define STATE_CMD_SERVER 1 -#define STATE_CMD_USER 2 - -#define SERVER_READ_BUF_LEN 4096 +/* client.c */ -int handle_internal_command(char *cmd); -void submit_server_command(char *cmd); -void server_connect(void); -void server_read(void); -void server_send(char *cmd); +void submit_command(char *cmd_raw); diff --git a/doc/bird.sgml b/doc/bird.sgml index e83cf0e1..88d35e49 100644 --- a/doc/bird.sgml +++ b/doc/bird.sgml @@ -623,7 +623,13 @@ codes along with the messages. You do not necessarily need to use -- the format of communication between BIRD and There is also lightweight variant of BIRD client called +Many commands have the Here is a brief list of supported functions: -- cgit v1.2.3 From f623ab9875cad2d129f708e95021d3a252930000 Mon Sep 17 00:00:00 2001 From: Ondrej Zajicek Date: Tue, 11 Jun 2013 12:12:11 +0200 Subject: Implements OSPF stub router option (RFC 3137). Also fixes OSPFv3 routing table calculcation w.r.t. errata 2078 to RFC 5340. --- doc/bird.sgml | 10 ++++++++++ proto/ospf/config.Y | 1 + proto/ospf/ospf.c | 7 +++++-- proto/ospf/ospf.h | 2 ++ proto/ospf/rt.c | 6 +++++- proto/ospf/topology.c | 8 ++++---- 6 files changed, 27 insertions(+), 7 deletions(-) (limited to 'doc') diff --git a/doc/bird.sgml b/doc/bird.sgml index 18f3601b..0681bd53 100644 --- a/doc/bird.sgml +++ b/doc/bird.sgml @@ -1904,6 +1904,7 @@ on nonbroadcast networks. protocol ospf <name> { rfc1583compat <switch>; + stub router <switch>; tick <num>; ecmp <switch> [limit <num>]; area <id> { @@ -1983,6 +1984,15 @@ protocol ospf <name> { url="ftp://ftp.rfc-editor.org/in-notes/rfc1583.txt">. Default value is no. + stub router switch + This option configures the router to be a stub router, i.e., + a router that participates in the OSPF topology but does not + allow transit traffic. In OSPFv2, this is implemented by + advertising maximum metric for outgoing links, as suggested + by RFC 3137. + In OSPFv3, the stub router behavior is announced by clearing + the R-bit in the router LSA. Default value is no. + tick num The routing table calculation and clean-up of areas' databases is not performed when a single link state diff --git a/proto/ospf/config.Y b/proto/ospf/config.Y index 2cc0b963..ba050d85 100644 --- a/proto/ospf/config.Y +++ b/proto/ospf/config.Y @@ -158,6 +158,7 @@ ospf_proto: ospf_proto_item: proto_item | RFC1583COMPAT bool { OSPF_CFG->rfc1583 = $2; } + | STUB ROUTER bool { OSPF_CFG->stub_router = $3; } | ECMP bool { OSPF_CFG->ecmp = $2 ? DEFAULT_ECMP_LIMIT : 0; } | ECMP bool LIMIT expr { OSPF_CFG->ecmp = $2 ? $4 : 0; if ($4 < 0) cf_error("ECMP limit cannot be negative"); } | TICK expr { OSPF_CFG->tick = $2; if($2<=0) cf_error("Tick must be greater than zero"); } diff --git a/proto/ospf/ospf.c b/proto/ospf/ospf.c index a3b6b2e7..2fa87201 100644 --- a/proto/ospf/ospf.c +++ b/proto/ospf/ospf.c @@ -167,7 +167,7 @@ ospf_area_add(struct proto_ospf *po, struct ospf_area_config *ac, int reconf) #ifdef OSPFv2 oa->options = ac->type; #else /* OSPFv3 */ - oa->options = OPT_R | ac->type | OPT_V6; + oa->options = ac->type | OPT_V6 | (po->stub_router ? 0 : OPT_R); #endif /* @@ -234,6 +234,7 @@ ospf_start(struct proto *p) po->router_id = proto_get_router_id(p->cf); po->last_vlink_id = 0x80000000; po->rfc1583 = c->rfc1583; + po->stub_router = c->stub_router; po->ebit = 0; po->ecmp = c->ecmp; po->tick = c->tick; @@ -690,7 +691,7 @@ ospf_area_reconfigure(struct ospf_area *oa, struct ospf_area_config *nac) #ifdef OSPFv2 oa->options = nac->type; #else /* OSPFv3 */ - oa->options = OPT_R | nac->type | OPT_V6; + oa->options = nac->type | OPT_V6 | (oa->po->stub_router ? 0 : OPT_R); #endif if (oa_is_nssa(oa) && (oa->po->areano > 1)) oa->po->ebit = 1; @@ -738,6 +739,7 @@ ospf_reconfigure(struct proto *p, struct proto_config *c) if (old->abr != new->abr) return 0; + po->stub_router = new->stub_router; po->ecmp = new->ecmp; po->tick = new->tick; po->disp_timer->recurrent = po->tick; @@ -831,6 +833,7 @@ ospf_sh(struct proto *p) cli_msg(-1014, "%s:", p->name); cli_msg(-1014, "RFC1583 compatibility: %s", (po->rfc1583 ? "enable" : "disabled")); + cli_msg(-1014, "Stub router: %s", (po->stub_router ? "Yes" : "No")); cli_msg(-1014, "RT scheduler tick: %d", po->tick); cli_msg(-1014, "Number of areas: %u", po->areano); cli_msg(-1014, "Number of LSAs in DB:\t%u", po->gr->hash_entries); diff --git a/proto/ospf/ospf.h b/proto/ospf/ospf.h index d924e657..7608225f 100644 --- a/proto/ospf/ospf.h +++ b/proto/ospf/ospf.h @@ -83,6 +83,7 @@ struct ospf_config struct proto_config c; unsigned tick; byte rfc1583; + byte stub_router; byte abr; int ecmp; list area_list; /* list of struct ospf_area_config */ @@ -771,6 +772,7 @@ struct proto_ospf int areano; /* Number of area I belong to */ struct fib rtf; /* Routing table */ byte rfc1583; /* RFC1583 compatibility */ + byte stub_router; /* Do not forward transit traffic */ byte ebit; /* Did I originate any ext lsa? */ byte ecmp; /* Maximal number of nexthops in ECMP route, or 0 */ struct ospf_area *backbone; /* If exists */ diff --git a/proto/ospf/rt.c b/proto/ospf/rt.c index 4b8de4b8..f509b896 100644 --- a/proto/ospf/rt.c +++ b/proto/ospf/rt.c @@ -501,6 +501,10 @@ ospf_rt_spfa(struct ospf_area *oa) #ifdef OSPFv2 ospf_rt_spfa_rtlinks(oa, act, act); #else /* OSPFv3 */ + /* Errata 2078 to RFC 5340 4.8.1 - skip links from non-routing nodes */ + if ((act != oa->rt) && !(rt->options & OPT_R)) + break; + for (tmp = ospf_hash_find_rt_first(po->gr, act->domain, act->lsa.rt); tmp; tmp = ospf_hash_find_rt_next(tmp)) ospf_rt_spfa_rtlinks(oa, act, tmp); @@ -1839,7 +1843,7 @@ add_cand(list * l, struct top_hash_entry *en, struct top_hash_entry *par, if (en->lsa.type == LSA_T_RT) { struct ospf_lsa_rt *rt = en->lsa_body; - if (!(rt->options & OPT_V6) || !(rt->options & OPT_R)) + if (!(rt->options & OPT_V6)) return; } #endif diff --git a/proto/ospf/topology.c b/proto/ospf/topology.c index 5f4d1d54..5d93c0e9 100644 --- a/proto/ospf/topology.c +++ b/proto/ospf/topology.c @@ -233,6 +233,7 @@ originate_rt_lsa_body(struct ospf_area *oa, u16 *length) WALK_LIST(ifa, po->iface_list) { int net_lsa = 0; + u32 link_cost = po->stub_router ? 0xffff : ifa->cost; if ((ifa->type == OSPF_IT_VLINK) && (ifa->voa == oa) && (!EMPTY_LIST(ifa->neigh_list))) @@ -268,8 +269,7 @@ originate_rt_lsa_body(struct ospf_area *oa, u16 *length) * this address as a next-hop. */ ln->data = ipa_to_u32(ifa->addr->ip); - - ln->metric = ifa->cost; + ln->metric = link_cost; ln->padding = 0; i++; } @@ -283,7 +283,7 @@ originate_rt_lsa_body(struct ospf_area *oa, u16 *length) ln->type = LSART_NET; ln->id = ipa_to_u32(ifa->drip); ln->data = ipa_to_u32(ifa->addr->ip); - ln->metric = ifa->cost; + ln->metric = link_cost; ln->padding = 0; i++; net_lsa = 1; @@ -298,7 +298,7 @@ originate_rt_lsa_body(struct ospf_area *oa, u16 *length) ln->type = LSART_VLNK; ln->id = neigh->rid; ln->data = ipa_to_u32(ifa->addr->ip); - ln->metric = ifa->cost; + ln->metric = link_cost; ln->padding = 0; i++; } -- cgit v1.2.3 From ef4a50be10c6dd0abffd957132cd146029c3d79d Mon Sep 17 00:00:00 2001 From: Ondrej Zajicek Date: Mon, 24 Jun 2013 16:37:30 +0200 Subject: Better packet priority and traffic class handling. Implements support for IPv6 traffic class, sets higher priority for OSPF and RIP outgoing packets by default and allows to configure ToS/DS/TClass IP header field and the local priority of outgoing packets. --- doc/bird.sgml | 48 +++++++++++++++++++++++++++++++++++++++++------- lib/ipv6.h | 7 +------ lib/socket.h | 4 +++- nest/config.Y | 8 ++++++-- proto/ospf/config.Y | 6 +++++- proto/ospf/iface.c | 8 ++++++-- proto/ospf/ospf.h | 2 ++ proto/rip/config.Y | 6 +++++- proto/rip/rip.c | 7 +++++-- proto/rip/rip.h | 2 ++ sysdep/bsd/sysio.h | 9 +++++++++ sysdep/linux/sysio.h | 19 +++++++++++++++++++ sysdep/unix/io.c | 11 +++++++++-- 13 files changed, 113 insertions(+), 24 deletions(-) (limited to 'doc') diff --git a/doc/bird.sgml b/doc/bird.sgml index 0681bd53..7277b2b9 100644 --- a/doc/bird.sgml +++ b/doc/bird.sgml @@ -567,6 +567,22 @@ to zero to disable it. An empty is equivalent to interface "eth*" 192.168.1.0/24; - start the protocol on all ethernet interfaces that have address from 192.168.1.0/24. +

There are two options that can be specified per-interface. First is metric, with -default one. Second is mode multicast|broadcast|quiet|nolisten|version1, it selects mode for -rip to work in. If nothing is specified, rip runs in multicast mode. version1 is -currently equivalent to broadcast, and it makes RIP talk to a broadcast address even -through multicast mode is possible. quiet option means that RIP will not transmit -any periodic messages to this interface and nolisten means that RIP will send to this -interface but not listen to it. +

There are some options that can be specified per-interface: + + + metric + This option specifies the metric of the interface. Valid + + mode multicast|broadcast|quiet|nolisten|version1 + This option selects the mode for RIP to work in. If nothing is + specified, RIP runs in multicast mode. tx class|dscp|priority + These options specify the ToS/DiffServ/Traffic class/Priority + of the outgoing RIP packets. See common option for detailed description. +

The following options generally override behavior specified in RFC. If you use any of these options, BIRD will no longer be RFC-compliant, which means it will not be able to talk to anything diff --git a/lib/ipv6.h b/lib/ipv6.h index 6f8e7b3c..2247d3fd 100644 --- a/lib/ipv6.h +++ b/lib/ipv6.h @@ -128,11 +128,6 @@ static inline byte * ipv6_put_addr(byte *buf, ip_addr a) return buf+16; } -/* - * RFC 1883 defines packet precendece, but RFC 2460 replaces it - * by generic Traffic Class ID with no defined semantics. Better - * not use it yet. - */ -#define IP_PREC_INTERNET_CONTROL -1 +#define IP_PREC_INTERNET_CONTROL 0xc0 #endif diff --git a/lib/socket.h b/lib/socket.h index 0ee43b52..fbddfb4c 100644 --- a/lib/socket.h +++ b/lib/socket.h @@ -20,7 +20,8 @@ typedef struct birdsock { void *data; /* User data */ ip_addr saddr, daddr; /* IPA_NONE = unspecified */ unsigned sport, dport; /* 0 = unspecified (for IP: protocol type) */ - int tos; /* TOS and priority, -1 = default */ + int tos; /* TOS / traffic class, -1 = default */ + int priority; /* Local socket priority, -1 = default */ int ttl; /* Time To Live, -1 = default */ u32 flags; struct iface *iface; /* Interface; specify this for broad/multicast sockets */ @@ -81,6 +82,7 @@ sk_send_buffer_empty(sock *sk) return sk->tbuf == sk->tpos; } +extern int sk_priority_control; /* Suggested priority for control traffic, should be sysdep define */ /* Socket flags */ diff --git a/nest/config.Y b/nest/config.Y index 183059e8..b85a5733 100644 --- a/nest/config.Y +++ b/nest/config.Y @@ -48,7 +48,7 @@ CF_KEYWORDS(RECEIVE, LIMIT, ACTION, WARN, BLOCK, RESTART, DISABLE, KEEP, FILTERE CF_KEYWORDS(PASSWORD, FROM, PASSIVE, TO, ID, EVENTS, PACKETS, PROTOCOLS, INTERFACES) CF_KEYWORDS(PRIMARY, STATS, COUNT, FOR, COMMANDS, PREEXPORT, GENERATE, ROA, MAX, FLUSH) CF_KEYWORDS(LISTEN, BGP, V6ONLY, DUAL, ADDRESS, PORT, PASSWORDS, DESCRIPTION, SORTED) -CF_KEYWORDS(RELOAD, IN, OUT, MRTDUMP, MESSAGES, RESTRICT, MEMORY, IGP_METRIC) +CF_KEYWORDS(RELOAD, IN, OUT, MRTDUMP, MESSAGES, RESTRICT, MEMORY, IGP_METRIC, CLASS, DSCP) CF_ENUM(T_ENUM_RTS, RTS_, DUMMY, STATIC, INHERIT, DEVICE, STATIC_DEVICE, REDIRECT, RIP, OSPF, OSPF_IA, OSPF_EXT1, OSPF_EXT2, BGP, PIPE) @@ -65,7 +65,7 @@ CF_ENUM(T_ENUM_ROA, ROA_, UNKNOWN, VALID, INVALID) %type roa_args %type roa_table_arg %type sym_args -%type proto_start echo_mask echo_size debug_mask debug_list debug_flag mrtdump_mask mrtdump_list mrtdump_flag export_or_preexport roa_mode limit_action tab_sorted +%type proto_start echo_mask echo_size debug_mask debug_list debug_flag mrtdump_mask mrtdump_list mrtdump_flag export_or_preexport roa_mode limit_action tab_sorted tos %type proto_patt proto_patt2 %type limit_spec @@ -277,6 +277,10 @@ iface_patt: iface_patt_init iface_patt_list ; +tos: + CLASS expr { $$ = $2 & 0xfc; if (($2 < 0) || ($2 > 255)) cf_error("TX class must be in range 0-255"); } + | DSCP expr { $$ = ($2 & 0x3f) << 2; if (($2 < 0) || ($2 > 63)) cf_error("TX DSCP must be in range 0-63"); } + ; /* Direct device route protocol */ diff --git a/proto/ospf/config.Y b/proto/ospf/config.Y index ba050d85..d9379a7c 100644 --- a/proto/ospf/config.Y +++ b/proto/ospf/config.Y @@ -131,7 +131,7 @@ CF_KEYWORDS(NONE, SIMPLE, AUTHENTICATION, STRICT, CRYPTOGRAPHIC) CF_KEYWORDS(ELIGIBLE, POLL, NETWORKS, HIDDEN, VIRTUAL, CHECK, LINK) CF_KEYWORDS(RX, BUFFER, LARGE, NORMAL, STUBNET, HIDDEN, SUMMARY, TAG, EXTERNAL) CF_KEYWORDS(WAIT, DELAY, LSADB, ECMP, LIMIT, WEIGHT, NSSA, TRANSLATOR, STABILITY) -CF_KEYWORDS(GLOBAL, LSID, ROUTER, SELF, INSTANCE, REAL, NETMASK) +CF_KEYWORDS(GLOBAL, LSID, ROUTER, SELF, INSTANCE, REAL, NETMASK, TX, PRIORITY) %type opttext %type lsadb_args @@ -305,6 +305,8 @@ ospf_iface_item: | RX BUFFER LARGE { OSPF_PATT->rxbuf = OSPF_RXBUF_LARGE ; } | RX BUFFER NORMAL { OSPF_PATT->rxbuf = OSPF_RXBUF_NORMAL ; } | RX BUFFER expr { OSPF_PATT->rxbuf = $3 ; if (($3 < OSPF_RXBUF_MINSIZE) || ($3 > OSPF_MAX_PKT_SIZE)) cf_error("Buffer size must be in range 256-65535"); } + | TX tos { OSPF_PATT->tx_tos = $2; } + | TX PRIORITY expr { OSPF_PATT->tx_priority = $3; } | password_list ; @@ -367,6 +369,8 @@ ospf_iface_start: init_list(&OSPF_PATT->nbma_list); OSPF_PATT->autype = OSPF_AUTH_NONE; OSPF_PATT->ptp_netmask = 2; /* not specified */ + OSPF_PATT->tx_tos = IP_PREC_INTERNET_CONTROL; + OSPF_PATT->tx_priority = sk_priority_control; reset_passwords(); } ; diff --git a/proto/ospf/iface.c b/proto/ospf/iface.c index 3da8f56c..bc3b1ef6 100644 --- a/proto/ospf/iface.c +++ b/proto/ospf/iface.c @@ -77,7 +77,8 @@ ospf_sk_open(struct ospf_iface *ifa) sk->dport = OSPF_PROTO; sk->saddr = IPA_NONE; - sk->tos = IP_PREC_INTERNET_CONTROL; + sk->tos = ifa->cf->tx_tos; + sk->priority = ifa->cf->tx_priority; sk->rx_hook = ospf_rx_hook; sk->tx_hook = ospf_tx_hook; sk->err_hook = ospf_err_hook; @@ -659,7 +660,10 @@ ospf_iface_reconfigure(struct ospf_iface *ifa, struct ospf_iface_patt *new) if (ifa->stub != new_stub) return 0; - if (new->real_bcast != ifa->cf->real_bcast) + /* Change of these options would require to reset the iface socket */ + if ((new->real_bcast != ifa->cf->real_bcast) || + (new->tx_tos != ifa->cf->tx_tos) || + (new->tx_priority != ifa->cf->tx_priority)) return 0; ifa->cf = new; diff --git a/proto/ospf/ospf.h b/proto/ospf/ospf.h index 7608225f..56ebcd31 100644 --- a/proto/ospf/ospf.h +++ b/proto/ospf/ospf.h @@ -800,6 +800,8 @@ struct ospf_iface_patt u32 priority; u32 voa; u32 vid; + int tx_tos; + int tx_priority; u16 rxbuf; #define OSPF_RXBUF_NORMAL 0 #define OSPF_RXBUF_LARGE 1 diff --git a/proto/rip/config.Y b/proto/rip/config.Y index cd4f30e7..ec82aa3d 100644 --- a/proto/rip/config.Y +++ b/proto/rip/config.Y @@ -27,7 +27,7 @@ CF_DECLS CF_KEYWORDS(RIP, INFINITY, METRIC, PORT, PERIOD, GARBAGE, TIMEOUT, MODE, BROADCAST, MULTICAST, QUIET, NOLISTEN, VERSION1, AUTHENTICATION, NONE, PLAINTEXT, MD5, - HONOR, NEVER, NEIGHBOR, ALWAYS, + HONOR, NEVER, NEIGHBOR, ALWAYS, TX, PRIORITY, RIP_METRIC, RIP_TAG) %type rip_mode rip_auth @@ -76,6 +76,8 @@ rip_mode: rip_iface_item: | METRIC expr { RIP_IPATT->metric = $2; } | MODE rip_mode { RIP_IPATT->mode |= $2; } + | TX tos { RIP_IPATT->tx_tos = $2; } + | TX PRIORITY expr { RIP_IPATT->tx_priority = $3; } ; rip_iface_opts: @@ -94,6 +96,8 @@ rip_iface_init: add_tail(&RIP_CFG->iface_list, NODE this_ipatt); init_list(&this_ipatt->ipn_list); RIP_IPATT->metric = 1; + RIP_IPATT->tx_tos = IP_PREC_INTERNET_CONTROL; + RIP_IPATT->tx_priority = sk_priority_control; } ; diff --git a/proto/rip/rip.c b/proto/rip/rip.c index 341df7eb..c09eae79 100644 --- a/proto/rip/rip.c +++ b/proto/rip/rip.c @@ -707,7 +707,8 @@ new_iface(struct proto *p, struct iface *new, unsigned long flags, struct iface_ if (new) { rif->sock->ttl = 1; - rif->sock->tos = IP_PREC_INTERNET_CONTROL; + rif->sock->tos = PATT->tx_tos; + rif->sock->priority = PATT->tx_priority; rif->sock->flags = SKF_LADDR_RX; } @@ -1007,7 +1008,9 @@ static int rip_pat_compare(struct rip_patt *a, struct rip_patt *b) { return ((a->metric == b->metric) && - (a->mode == b->mode)); + (a->mode == b->mode) && + (a->tx_tos == b->tx_tos) && + (a->tx_priority == b->tx_priority)); } static int diff --git a/proto/rip/rip.h b/proto/rip/rip.h index e0816d0e..2cce8c81 100644 --- a/proto/rip/rip.h +++ b/proto/rip/rip.h @@ -128,6 +128,8 @@ struct rip_patt { #define IM_QUIET 4 #define IM_NOLISTEN 8 #define IM_VERSION1 16 + int tx_tos; + int tx_priority; }; struct rip_proto_config { diff --git a/sysdep/bsd/sysio.h b/sysdep/bsd/sysio.h index 4f91def5..085f16fa 100644 --- a/sysdep/bsd/sysio.h +++ b/sysdep/bsd/sysio.h @@ -284,3 +284,12 @@ sk_set_min_ttl6(sock *s, int ttl) #endif + +int sk_priority_control = -1; + +static int +sk_set_priority(sock *s, int prio UNUSED) +{ + log(L_WARN "Socket priority not supported"); + return -1; +} diff --git a/sysdep/linux/sysio.h b/sysdep/linux/sysio.h index 90b3ebd9..41287e71 100644 --- a/sysdep/linux/sysio.h +++ b/sysdep/linux/sysio.h @@ -310,3 +310,22 @@ sk_set_min_ttl6(sock *s, int ttl) } #endif + + +#ifndef IPV6_TCLASS +#define IPV6_TCLASS 67 +#endif + +int sk_priority_control = 7; + +static int +sk_set_priority(sock *s, int prio) +{ + if (setsockopt(s->fd, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(prio)) < 0) + { + log(L_WARN "sk_set_priority: setsockopt: %m"); + return -1; + } + + return 0; +} diff --git a/sysdep/unix/io.c b/sysdep/unix/io.c index 80914afe..434a05be 100644 --- a/sysdep/unix/io.c +++ b/sysdep/unix/io.c @@ -598,7 +598,7 @@ sock_new(pool *p) sock *s = ralloc(p, &sk_class); s->pool = p; // s->saddr = s->daddr = IPA_NONE; - s->tos = s->ttl = -1; + s->tos = s->priority = s->ttl = -1; s->fd = -1; return s; } @@ -783,11 +783,18 @@ sk_setup(sock *s) ERR("fcntl(O_NONBLOCK)"); if (s->type == SK_UNIX) return NULL; -#ifndef IPV6 + +#ifdef IPV6 + if ((s->tos >= 0) && setsockopt(fd, SOL_IPV6, IPV6_TCLASS, &s->tos, sizeof(s->tos)) < 0) + WARN("IPV6_TCLASS"); +#else if ((s->tos >= 0) && setsockopt(fd, SOL_IP, IP_TOS, &s->tos, sizeof(s->tos)) < 0) WARN("IP_TOS"); #endif + if (s->priority >= 0) + sk_set_priority(s, s->priority); + #ifdef IPV6 int v = 1; if ((s->flags & SKF_V6ONLY) && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &v, sizeof(v)) < 0) -- cgit v1.2.3 From 6ac4f87a2d661c739e55a63577e7bccf696c7abd Mon Sep 17 00:00:00 2001 From: Ondrej Zajicek Date: Wed, 26 Jun 2013 14:35:39 +0200 Subject: Documentation for TTL security. --- doc/bird.sgml | 39 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) (limited to 'doc') diff --git a/doc/bird.sgml b/doc/bird.sgml index 7277b2b9..aa8a53ec 100644 --- a/doc/bird.sgml +++ b/doc/bird.sgml @@ -470,7 +470,7 @@ to zero to disable it. An empty is equivalent to import keep filtered + import keep filtered Usually, if an import filter rejects a route, the route is forgotten. When this option is active, these routes are kept in the routing table, but they are hidden and not @@ -1966,6 +1966,9 @@ protocol ospf <name> { ptp netmask <switch>; check link <switch>; ecmp weight <num>; + ttl security [<switch>; | tx only] + tx class|dscp <num>; + tx priority <num>; authentication [none|simple|cryptographic]; password "<text>"; password "<text>" { @@ -2236,6 +2239,20 @@ protocol ospf <name> { prefix) is propagated. It is possible that some hardware drivers or platforms do not implement this feature. Default value is no. + ttl security [ + TTL security is a feature that protects routing protocols + from remote spoofed packets by using TTL 255 instead of TTL 1 + for protocol packets destined to neighbors. Because TTL is + decremented when packets are forwarded, it is non-trivial to + spoof packets with TTL 255 from remote locations. Note that + this option would interfere with OSPF virtual links. + + If this option is enabled, the router will send OSPF packets + with TTL 255 and drop received packets with TTL less than + 255. If this option si set to tx class|dscp|priority These options specify the ToS/DiffServ/Traffic class/Priority of the outgoing OSPF packets. See There is one operator related to ROA infrastructure - data; + u8 *q = p+path->length; + int i, n; + + while (p Date: Thu, 25 Jul 2013 13:15:32 +0200 Subject: Implements eval command and minor CLI cleanups. Implemented eval command can be used to evaluate expressions. The patch also documents echo command and allows to use log classes instead of integer as a mask for echo. --- doc/bird.sgml | 16 ++++++++++++---- doc/reply_codes | 2 ++ filter/filter.c | 32 ++++++++++++++++++++++---------- filter/filter.h | 3 +++ nest/cmds.c | 20 ++++++++++++++++++++ nest/cmds.h | 3 +++ nest/config.Y | 8 ++++++-- sysdep/unix/log.c | 2 ++ 8 files changed, 70 insertions(+), 16 deletions(-) (limited to 'doc') diff --git a/doc/bird.sgml b/doc/bird.sgml index 27f8b869..a1d3dc2c 100644 --- a/doc/bird.sgml +++ b/doc/bird.sgml @@ -282,7 +282,7 @@ protocol rip { include " This statement causes inclusion of a new file. The maximal depth is set to 5. - log " +

There are several options that give sense only with certain protocols: -- cgit v1.2.3 From f8e8fcfabeb206287065f48e800743b0aa797cc2 Mon Sep 17 00:00:00 2001 From: Ondrej Zajicek Date: Mon, 29 Jul 2013 13:07:15 +0200 Subject: Test commit. --- doc/bird.sgml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'doc') diff --git a/doc/bird.sgml b/doc/bird.sgml index 162ab43d..6bf443e1 100644 --- a/doc/bird.sgml +++ b/doc/bird.sgml @@ -408,8 +408,8 @@ protocol rip { entries. The option may be used multiple times. Other entries can be added dynamically by eval Evaluates given filter expression. It - is used by us for testing of filters. + eval + Evaluates given filter expression. It is used by us for testing of filters. Protocol options -- cgit v1.2.3 From 00192d5ab88ff9eeccbc1bc10cb534976a56963d Mon Sep 17 00:00:00 2001 From: Ondrej Zajicek Date: Tue, 13 Aug 2013 20:25:05 +0200 Subject: Implements proper setting of 'gw' route attribute. Thanks to Sergey Popovich for the bugreport. --- doc/bird.sgml | 2 +- filter/config.Y | 1 - filter/filter.c | 23 ++++++++++++++++++++++- 3 files changed, 23 insertions(+), 3 deletions(-) (limited to 'doc') diff --git a/doc/bird.sgml b/doc/bird.sgml index 6bf443e1..7db9fad2 100644 --- a/doc/bird.sgml +++ b/doc/bird.sgml @@ -1169,7 +1169,7 @@ undefined value is regarded as empty clist for most purposes. Preference of the route. Valid values are 0-65535. (See the chapter about routing tables.) - The router which the route has originated from. Read-only. + The router which the route has originated from. Next hop packets routed using this route should be forwarded to. diff --git a/filter/config.Y b/filter/config.Y index 7f73b895..66234050 100644 --- a/filter/config.Y +++ b/filter/config.Y @@ -681,7 +681,6 @@ symbol: static_attr: FROM { $$ = f_new_inst(); $$->aux = T_IP; $$->a2.i = OFFSETOF(struct rta, from); $$->a1.i = 1; } - | GW { $$ = f_new_inst(); $$->aux = T_IP; $$->a2.i = OFFSETOF(struct rta, gw); $$->a1.i = 1; } | NET { $$ = f_new_inst(); $$->aux = T_PREFIX; $$->a2.i = 0x12345678; /* This is actually ok - T_PREFIX is special-cased. */ } | PROTO { $$ = f_new_inst(); $$->aux = T_STRING; $$->a2.i = 0x12345678; /* T_STRING is also special-cased. */ } diff --git a/filter/filter.c b/filter/filter.c index d784c253..98bae331 100644 --- a/filter/filter.c +++ b/filter/filter.c @@ -853,10 +853,29 @@ interpret(struct f_inst *what) f_rta_cow(); { struct rta *rta = (*f_rte)->attrs; + ip_addr ip; + switch (what->aux) { case T_IP: - * (ip_addr *) ((char *) rta + what->a2.i) = v1.val.px.ip; + ip = v1.val.px.ip; + + /* "gw" attribute? */ + if (what->a2.i == OFFSETOF(struct rta, gw)) + { + neighbor *n = neigh_find(rta->proto, &ip, 0); + if (!n || (n->scope == SCOPE_HOST)) + runtime( "Invalid gw address" ); + + rta->dest = RTD_ROUTER; + rta->gw = ip; + rta->iface = n->iface; + rta->nexthops = NULL; + rta->hostentry = NULL; + } + else /* or "from" attribute? */ + rta->from = ip; + break; case T_ENUM_SCOPE: @@ -867,10 +886,12 @@ interpret(struct f_inst *what) i = v1.val.i; if ((i != RTD_BLACKHOLE) && (i != RTD_UNREACHABLE) && (i != RTD_PROHIBIT)) runtime( "Destination can be changed only to blackhole, unreachable or prohibit" ); + rta->dest = i; rta->gw = IPA_NONE; rta->iface = NULL; rta->nexthops = NULL; + rta->hostentry = NULL; break; default: -- cgit v1.2.3 From bff9ce5130d16af2fd802d42bdb2bff00980c9ae Mon Sep 17 00:00:00 2001 From: Ondrej Zajicek Date: Thu, 15 Aug 2013 01:06:47 +0200 Subject: Extends delete/filter operators to work no bgp_paths. --- doc/bird.sgml | 17 +++++++++++++-- filter/filter.c | 29 +++++++++++++++++++++++++- filter/test.conf | 4 ++++ nest/a-path.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ nest/attrs.h | 2 ++ 5 files changed, 112 insertions(+), 3 deletions(-) (limited to 'doc') diff --git a/doc/bird.sgml b/doc/bird.sgml index 7db9fad2..3cd80c32 100644 --- a/doc/bird.sgml +++ b/doc/bird.sgml @@ -1033,10 +1033,23 @@ incompatible with each other (that is to prevent you from shooting in the foot). returns the length of path prepend( prepends ASN prepend( prepends ASN delete( deletes all instances of ASN + filter( deletes all ASNs from path + can be shortened to if from.type == T_INT)) + set = v2.val.t; + else + runtime("Can't delete non-integer (set)"); + + switch (what->aux) + { + case 'a': runtime("Can't add to path"); + case 'd': pos = 0; break; + case 'f': pos = 1; break; + default: bug("unknown Ca operation"); + } + + if (pos && !set) + runtime("Can't filter integer"); + + res.type = T_PATH; + res.val.ad = as_path_filter(f_pool, v1.val.ad, set, key, pos); + } + else if (v1.type == T_CLIST) { /* Community (or cluster) list */ struct f_val dummy; diff --git a/filter/test.conf b/filter/test.conf index 4f40abff..048983b5 100644 --- a/filter/test.conf +++ b/filter/test.conf @@ -104,6 +104,8 @@ eclist el2; print "Should be true: ", p2 ~ [= (3+2) (2*2) 3 2 1 =], " ", p2 ~ mkpath(5, 4); print "Should be true: ", p2.len = 5, " ", p2.first = 5, " ", p2.last = 1; print "5 = ", p2.len; + print "Delete 3: ", delete(p2, 3); + print "Filter 1-3: ", filter(p2, [1..3]); pm1 = [= 1 2 * 3 4 5 =]; p2 = prepend( + empty +, 5 ); @@ -113,6 +115,8 @@ eclist el2; p2 = prepend( p2, 2 ); p2 = prepend( p2, 1 ); print "Should be true: ", p2 ~ pm1, " ", p2, " ", pm1; + print "Delete 3: ", delete(p2, 3); + print "Delete 4-5: ", delete(p2, [4..5]); l = - empty -; print "Should be false in this special case: ", l ~ [(*,*)]; diff --git a/nest/a-path.c b/nest/a-path.c index 712e77a3..b1812981 100644 --- a/nest/a-path.c +++ b/nest/a-path.c @@ -287,6 +287,69 @@ as_path_match_set(struct adata *path, struct f_tree *set) return 0; } +struct adata * +as_path_filter(struct linpool *pool, struct adata *path, struct f_tree *set, u32 key, int pos) +{ + if (!path) + return NULL; + + int len = path->length; + u8 *p = path->data; + u8 *q = path->data + len; + u8 *d, *d2; + int i, bt, sn, dn; + u8 buf[len]; + + d = buf; + while (p 0) + { + /* Nonempty block, set block header and advance */ + d[0] = bt; + d[1] = dn; + d = d2; + } + } + + int nl = d - buf; + if (nl == path->length) + return path; + + struct adata *res = lp_alloc(pool, sizeof(struct adata) + nl); + res->length = nl; + memcpy(res->data, buf, nl); + + return res; +} + struct pm_pos { diff --git a/nest/attrs.h b/nest/attrs.h index 12f2fcf4..44a23e18 100644 --- a/nest/attrs.h +++ b/nest/attrs.h @@ -37,6 +37,8 @@ int as_path_get_first(struct adata *path, u32 *orig_as); int as_path_get_last(struct adata *path, u32 *last_as); int as_path_is_member(struct adata *path, u32 as); int as_path_match_set(struct adata *path, struct f_tree *set); +struct adata *as_path_filter(struct linpool *pool, struct adata *path, struct f_tree *set, u32 key, int pos); + #define PM_ASN 0 #define PM_QUESTION 1 -- cgit v1.2.3 From 6d90e57332e102e261d69a1a05dfaa19fb31d933 Mon Sep 17 00:00:00 2001 From: Ondrej Filip Date: Thu, 15 Aug 2013 19:54:18 +0200 Subject: Typo in documentation fixed. --- doc/bird.sgml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'doc') diff --git a/doc/bird.sgml b/doc/bird.sgml index 6bf443e1..a2266424 100644 --- a/doc/bird.sgml +++ b/doc/bird.sgml @@ -1496,8 +1496,8 @@ for each neighbor using the following configuration parameters: route limit The maximal number of routes that may be imported from the protocol. If the route limit is - exceeded, the connection is closed with error. Limit is currently implemented as - disable after error When an error is encountered (either locally or by the other side), disable the instance automatically -- cgit v1.2.3 From e628cad0ca9eb7d9bf4141e57201169c46faa661 Mon Sep 17 00:00:00 2001 From: Ondrej Filip Date: Thu, 15 Aug 2013 20:20:05 +0200 Subject: BGP option 'route limit' is marked as obsolete. 'import limit' should be used instead. --- doc/bird-6.html | 1731 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1731 insertions(+) create mode 100644 doc/bird-6.html (limited to 'doc') diff --git a/doc/bird-6.html b/doc/bird-6.html new file mode 100644 index 00000000..d21209ee --- /dev/null +++ b/doc/bird-6.html @@ -0,0 +1,1731 @@ + + + + + BIRD User's Guide: Protocols + + + + + +Next +Previous +Contents +


+

6. Protocols

+ +

6.1 BGP +

+ +

The Border Gateway Protocol is the routing protocol used for backbone +level routing in the today's Internet. Contrary to the other protocols, its convergence +doesn't rely on all routers following the same rules for route selection, +making it possible to implement any routing policy at any router in the +network, the only restriction being that if a router advertises a route, +it must accept and forward packets according to it. +

+

BGP works in terms of autonomous systems (often abbreviated as +AS). Each AS is a part of the network with common management and +common routing policy. It is identified by a unique 16-bit number +(ASN). Routers within each AS usually exchange AS-internal routing +information with each other using an interior gateway protocol (IGP, +such as OSPF or RIP). Boundary routers at the border of +the AS communicate global (inter-AS) network reachability information with +their neighbors in the neighboring AS'es via exterior BGP (eBGP) and +redistribute received information to other routers in the AS via +interior BGP (iBGP). +

+

Each BGP router sends to its neighbors updates of the parts of its +routing table it wishes to export along with complete path information +(a list of AS'es the packet will travel through if it uses the particular +route) in order to avoid routing loops. +

+

BIRD supports all requirements of the BGP4 standard as defined in +RFC 4271 +ftp://ftp.rfc-editor.org/in-notes/rfc4271.txt +It also supports the community attributes +(RFC 1997 +ftp://ftp.rfc-editor.org/in-notes/rfc1997.txt), +capability negotiation +(RFC 3392 +ftp://ftp.rfc-editor.org/in-notes/rfc3392.txt), +MD5 password authentication +(RFC 2385 +ftp://ftp.rfc-editor.org/in-notes/rfc2385.txt), +extended communities +(RFC 4360 +ftp://ftp.rfc-editor.org/in-notes/rfc4360.txt), +route reflectors +(RFC 4456 +ftp://ftp.rfc-editor.org/in-notes/rfc4456.txt), +multiprotocol extensions +(RFC 4760 +ftp://ftp.rfc-editor.org/in-notes/rfc4760.txt), +4B AS numbers +(RFC 4893 +ftp://ftp.rfc-editor.org/in-notes/rfc4893.txt), +and 4B AS numbers in extended communities +(RFC 5668 +ftp://ftp.rfc-editor.org/in-notes/rfc5668.txt). +

+

For IPv6, it uses the standard multiprotocol extensions defined in +RFC 2283 +ftp://ftp.rfc-editor.org/in-notes/rfc2283.txt +including changes described in the +latest draft +ftp://ftp.rfc-editor.org/internet-drafts/draft-ietf-idr-bgp4-multiprotocol-v2-05.txt +and applied to IPv6 according to +RFC 2545 +ftp://ftp.rfc-editor.org/in-notes/rfc2545.txt. +

+

Route selection rules

+ +

BGP doesn't have any simple metric, so the rules for selection of an optimal +route among multiple BGP routes with the same preference are a bit more complex +and they are implemented according to the following algorithm. It starts the first +rule, if there are more "best" routes, then it uses the second rule to choose +among them and so on. +

+

    +
  • Prefer route with the highest Local Preference attribute.
  • +
  • Prefer route with the shortest AS path.
  • +
  • Prefer IGP origin over EGP and EGP origin over incomplete.
  • +
  • Prefer the lowest value of the Multiple Exit Discriminator.
  • +
  • Prefer routes received via eBGP over ones received via iBGP.
  • +
  • Prefer routes with lower internal distance to a boundary router.
  • +
  • Prefer the route with the lowest value of router ID of the +advertising router.
  • +
+

+

IGP routing table

+ +

BGP is mainly concerned with global network reachability and with +routes to other autonomous systems. When such routes are redistributed +to routers in the AS via BGP, they contain IP addresses of a boundary +routers (in route attribute NEXT_HOP). BGP depends on existing IGP +routing table with AS-internal routes to determine immediate next hops +for routes and to know their internal distances to boundary routers +for the purpose of BGP route selection. In BIRD, there is usually +one routing table used for both IGP routes and BGP routes. +

+

Configuration

+ +

Each instance of the BGP corresponds to one neighboring router. +This allows to set routing policy and all the other parameters differently +for each neighbor using the following configuration parameters: +

+

+
local [ip] as number

Define which AS we +are part of. (Note that contrary to other IP routers, BIRD is +able to act as a router located in multiple AS'es +simultaneously, but in such cases you need to tweak the BGP +paths manually in the filters to get consistent behavior.) +Optional ip argument specifies a source address, +equivalent to the source address option (see below). +This parameter is mandatory. +

+

neighbor ip as number

Define neighboring router +this instance will be talking to and what AS it's located in. Unless +you use the multihop clause, it must be directly connected to one +of your router's interfaces. In case the neighbor is in the same AS +as we are, we automatically switch to iBGP. This parameter is mandatory. +

+

multihop [number]

Configure multihop BGP +session to a neighbor that isn't directly connected. +Accurately, this option should be used if the configured +neighbor IP address does not match with any local network +subnets. Such IP address have to be reachable through system +routing table. For multihop BGP it is recommended to +explicitly configure source address to have it +stable. Optional number argument can be used to specify +the number of hops (used for TTL). Note that the number of +networks (edges) in a path is counted, i.e. if two BGP +speakers are separated by one router, the number of hops is +2. Default: switched off. +

+

source address ip

Define local address we +should use for next hop calculation and as a source address +for the BGP session. Default: the address of the local +end of the interface our neighbor is connected to. +

+

next hop self

Avoid calculation of the Next Hop +attribute and always advertise our own source address as a +next hop. This needs to be used only occasionally to +circumvent misconfigurations of other routers. Default: +disabled. +

+

next hop keep

Forward the received Next Hop +attribute even in situations where the local address should be +used instead, like when the route is sent to an interface with +a different subnet. Default: disabled. +

+

missing lladdr self|drop|ignore

Next Hop attribute +in BGP-IPv6 sometimes contains just the global IPv6 address, +but sometimes it has to contain both global and link-local +IPv6 addresses. This option specifies what to do if BIRD have +to send both addresses but does not know link-local address. +This situation might happen when routes from other protocols +are exported to BGP, or when improper updates are received +from BGP peers. self means that BIRD advertises its own +local address instead. drop means that BIRD skips that +prefixes and logs error. ignore means that BIRD ignores +the problem and sends just the global address (and therefore +forms improper BGP update). Default: self, unless BIRD +is configured as a route server (option rs client), in +that case default is ignore, because route servers usually +do not forward packets themselves. +

+

gateway direct|recursive

For received routes, their +gw (immediate next hop) attribute is computed from +received bgp_next_hop attribute. This option specifies +how it is computed. Direct mode means that the IP address from +bgp_next_hop is used if it is directly reachable, +otherwise the neighbor IP address is used. Recursive mode +means that the gateway is computed by an IGP routing table +lookup for the IP address from bgp_next_hop. Recursive +mode is the behavior specified by the BGP standard. Direct +mode is simpler, does not require any routes in a routing +table, and was used in older versions of BIRD, but does not +handle well nontrivial iBGP setups and multihop. Recursive +mode is incompatible with +sorted tables. Default: direct for singlehop eBGP, +recursive otherwise. +

+

igp table name

Specifies a table that is used +as an IGP routing table. Default: the same as the table BGP is +connected to. +

+

ttl security switch

Use GTSM (RFC 5082 - the +generalized TTL security mechanism). GTSM protects against +spoofed packets by ignoring received packets with a smaller +than expected TTL. To work properly, GTSM have to be enabled +on both sides of a BGP session. If both ttl security and +multihop options are enabled, multihop option should +specify proper hop value to compute expected TTL. Kernel +support required: Linux: 2.6.34+ (IPv4), 2.6.35+ (IPv6), BSD: +since long ago, IPv4 only. Note that full (ICMP protection, +for example) RFC 5082 support is provided by Linux +only. Default: disabled. +

+

password string

Use this password for MD5 authentication +of BGP sessions. Default: no authentication. Password has to be set by +external utility (e.g. setkey(8)) on BSD systems. +

+

passive switch

Standard BGP behavior is both +initiating outgoing connections and accepting incoming +connections. In passive mode, outgoing connections are not +initiated. Default: off. +

+

rr client

Be a route reflector and treat the neighbor as +a route reflection client. Default: disabled. +

+

rr cluster id IPv4 address

Route reflectors use cluster id +to avoid route reflection loops. When there is one route reflector in a cluster +it usually uses its router id as a cluster id, but when there are more route +reflectors in a cluster, these need to be configured (using this option) to +use a common cluster id. Clients in a cluster need not know their cluster +id and this option is not allowed for them. Default: the same as router id. +

+

rs client

Be a route server and treat the neighbor +as a route server client. A route server is used as a +replacement for full mesh EBGP routing in Internet exchange +points in a similar way to route reflectors used in IBGP routing. +BIRD does not implement obsoleted RFC 1863, but uses ad-hoc implementation, +which behaves like plain EBGP but reduces modifications to advertised route +attributes to be transparent (for example does not prepend its AS number to +AS PATH attribute and keeps MED attribute). Default: disabled. +

+

secondary switch

Usually, if an import filter +rejects a selected route, no other route is propagated for +that network. This option allows to try the next route in +order until one that is accepted is found or all routes for +that network are rejected. This can be used for route servers +that need to propagate different tables to each client but do +not want to have these tables explicitly (to conserve memory). +This option requires that the connected routing table is +sorted. Default: off. +

+

enable route refresh switch

When BGP speaker +changes its import filter, it has to re-examine all routes +received from its neighbor against the new filter. As these +routes might not be available, there is a BGP protocol +extension Route Refresh (specified in RFC 2918) that allows +BGP speaker to request re-advertisement of all routes from its +neighbor. This option specifies whether BIRD advertises this +capability and accepts such requests. Even when disabled, BIRD +can send route refresh requests. Default: on. +

+

interpret communities switch

RFC 1997 demands +that BGP speaker should process well-known communities like +no-export (65535, 65281) or no-advertise (65535, 65282). For +example, received route carrying a no-adverise community +should not be advertised to any of its neighbors. If this +option is enabled (which is by default), BIRD has such +behavior automatically (it is evaluated when a route is +exported to the BGP protocol just before the export filter). +Otherwise, this integrated processing of well-known +communities is disabled. In that case, similar behavior can be +implemented in the export filter. Default: on. +

+

enable as4 switch

BGP protocol was designed to use 2B AS numbers +and was extended later to allow 4B AS number. BIRD supports 4B AS extension, +but by disabling this option it can be persuaded not to advertise it and +to maintain old-style sessions with its neighbors. This might be useful for +circumventing bugs in neighbor's implementation of 4B AS extension. +Even when disabled (off), BIRD behaves internally as AS4-aware BGP router. +Default: on. +

+

capabilities switch

Use capability advertisement +to advertise optional capabilities. This is standard behavior +for newer BGP implementations, but there might be some older +BGP implementations that reject such connection attempts. +When disabled (off), features that request it (4B AS support) +are also disabled. Default: on, with automatic fallback to +off when received capability-related error. +

+

advertise ipv4 switch

Advertise IPv4 multiprotocol capability. +This is not a correct behavior according to the strict interpretation +of RFC 4760, but it is widespread and required by some BGP +implementations (Cisco and Quagga). This option is relevant +to IPv4 mode with enabled capability advertisement only. Default: on. +

+

route limit number

The maximal number of routes +that may be imported from the protocol. If the route limit is +exceeded, the connection is closed with an error. Limit is currently implemented as +import limit number action restart. This option is obsolete and it is +replaced by +import limit option. Default: no limit. +

+

disable after error switch

When an error is encountered (either +locally or by the other side), disable the instance automatically +and wait for an administrator to fix the problem manually. Default: off. +

+

hold time number

Time in seconds to wait for a Keepalive +message from the other side before considering the connection stale. +Default: depends on agreement with the neighboring router, we prefer +240 seconds if the other side is willing to accept it. +

+

startup hold time number

Value of the hold timer used +before the routers have a chance to exchange open messages and agree +on the real value. Default: 240 seconds. +

+

keepalive time number

Delay in seconds between sending +of two consecutive Keepalive messages. Default: One third of the hold time. +

+

connect retry time number

Time in seconds to wait before +retrying a failed attempt to connect. Default: 120 seconds. +

+

start delay time number

Delay in seconds between protocol +startup and the first attempt to connect. Default: 5 seconds. +

+

error wait time number,number

Minimum and maximum delay in seconds between a protocol +failure (either local or reported by the peer) and automatic restart. +Doesn't apply when disable after error is configured. If consecutive +errors happen, the delay is increased exponentially until it reaches the maximum. Default: 60, 300. +

+

error forget time number

Maximum time in seconds between two protocol +failures to treat them as a error sequence which makes the error wait time +increase exponentially. Default: 300 seconds. +

+

path metric switch

Enable comparison of path lengths +when deciding which BGP route is the best one. Default: on. +

+

med metric switch

Enable comparison of MED +attributes (during best route selection) even between routes +received from different ASes. This may be useful if all MED +attributes contain some consistent metric, perhaps enforced in +import filters of AS boundary routers. If this option is +disabled, MED attributes are compared only if routes are +received from the same AS (which is the standard behavior). +Default: off. +

+

deterministic med switch

BGP route selection +algorithm is often viewed as a comparison between individual +routes (e.g. if a new route appears and is better than the +current best one, it is chosen as the new best one). But the +proper route selection, as specified by RFC 4271, cannot be +fully implemented in that way. The problem is mainly in +handling the MED attribute. BIRD, by default, uses an +simplification based on individual route comparison, which in +some cases may lead to temporally dependent behavior (i.e. the +selection is dependent on the order in which routes appeared). +This option enables a different (and slower) algorithm +implementing proper RFC 4271 route selection, which is +deterministic. Alternative way how to get deterministic +behavior is to use med metric option. This option is +incompatible with +sorted tables. +Default: off. +

+

igp metric switch

Enable comparison of internal +distances to boundary routers during best route selection. Default: on. +

+

prefer older switch

Standard route selection algorithm +breaks ties by comparing router IDs. This changes the behavior +to prefer older routes (when both are external and from different +peer). For details, see RFC 5004. Default: off. +

+

default bgp_med number

Value of the Multiple Exit +Discriminator to be used during route selection when the MED attribute +is missing. Default: 0. +

+

default bgp_local_pref number

A default value +for the Local Preference attribute. It is used when a new +Local Preference attribute is attached to a route by the BGP +protocol itself (for example, if a route is received through +eBGP and therefore does not have such attribute). Default: 100 +(0 in pre-1.2.0 versions of BIRD). +

+

+

Attributes

+ +

BGP defines several route attributes. Some of them (those marked with `I' in the +table below) are available on internal BGP connections only, some of them (marked +with `O') are optional. +

+

+
bgppath bgp_path

Sequence of AS numbers describing the AS path +the packet will travel through when forwarded according to the particular route. +In case of internal BGP it doesn't contain the number of the local AS. +

+

int bgp_local_pref [I]

Local preference value used for +selection among multiple BGP routes (see the selection rules above). It's +used as an additional metric which is propagated through the whole local AS. +

+

int bgp_med [O]

The Multiple Exit Discriminator of the route +is an optional attribute which is used on external (inter-AS) links to +convey to an adjacent AS the optimal entry point into the local AS. +The received attribute is also propagated over internal BGP links. +The attribute value is zeroed when a route is exported to an external BGP +instance to ensure that the attribute received from a neighboring AS is +not propagated to other neighboring ASes. A new value might be set in +the export filter of an external BGP instance. +See RFC 4451 +ftp://ftp.rfc-editor.org/in-notes/rfc4451.txt +for further discussion of BGP MED attribute. +

+

enum bgp_origin

Origin of the route: either ORIGIN_IGP +if the route has originated in an interior routing protocol or +ORIGIN_EGP if it's been imported from the EGP protocol +(nowadays it seems to be obsolete) or ORIGIN_INCOMPLETE if the origin +is unknown. +

+

ip bgp_next_hop

Next hop to be used for forwarding of packets +to this destination. On internal BGP connections, it's an address of the +originating router if it's inside the local AS or a boundary router the +packet will leave the AS through if it's an exterior route, so each BGP +speaker within the AS has a chance to use the shortest interior path +possible to this point. +

+

void bgp_atomic_aggr [O]

This is an optional attribute +which carries no value, but the sole presence of which indicates that the route +has been aggregated from multiple routes by some router on the path from +the originator. +

+

clist bgp_community [O]

List of community values associated +with the route. Each such value is a pair (represented as a pair data +type inside the filters) of 16-bit integers, the first of them containing the number of the AS which defines +the community and the second one being a per-AS identifier. There are lots +of uses of the community mechanism, but generally they are used to carry +policy information like "don't export to USA peers". As each AS can define +its own routing policy, it also has a complete freedom about which community +attributes it defines and what will their semantics be. +

+

eclist bgp_ext_community [O]

List of extended community +values associated with the route. Extended communities have similar usage +as plain communities, but they have an extended range (to allow 4B ASNs) +and a nontrivial structure with a type field. Individual community values are +represented using an ec data type inside the filters. +

+

quad bgp_originator_id [I, O]

This attribute is created by the +route reflector when reflecting the route and contains the router ID of the +originator of the route in the local AS. +

+

clist bgp_cluster_list [I, O]

This attribute contains a list +of cluster IDs of route reflectors. Each route reflector prepends its +cluster ID when reflecting the route. +

+

+

Example

+ +

+


+
+protocol bgp {
+        local as 65000;                      # Use a private AS number
+        neighbor 198.51.100.130 as 64496;    # Our neighbor ...
+        multihop;                            # ... which is connected indirectly
+        export filter {                      # We use non-trivial export rules
+                if source = RTS_STATIC then { # Export only static routes
+                        # Assign our community
+                        bgp_community.add((65000,64501));
+                        # Artificially increase path length
+                        # by advertising local AS number twice
+                        if bgp_path ~ [= 65000 =] then
+                                bgp_path.prepend(65000);
+                        accept;
+                }
+                reject;
+        };
+        import all;
+        source address 198.51.100.14;   # Use a non-standard source address
+}
+
+
+

+

6.2 Device +

+ +

The Device protocol is not a real routing protocol. It doesn't generate +any routes and it only serves as a module for getting information about network +interfaces from the kernel. +

+

Except for very unusual circumstances, you probably should include +this protocol in the configuration since almost all other protocols +require network interfaces to be defined for them to work with. +

+

Configuration

+ +

+

+
scan time number

Time in seconds between two scans +of the network interface list. On systems where we are notified about +interface status changes asynchronously (such as newer versions of +Linux), we need to scan the list only in order to avoid confusion by lost +notification messages, so the default time is set to a large value. +

+

primary [ "mask" ] prefix

If a network interface has more than one network address, BIRD +has to choose one of them as a primary one. By default, BIRD +chooses the lexicographically smallest address as the primary +one. +

This option allows to specify which network address should be +chosen as a primary one. Network addresses that match +prefix are preferred to non-matching addresses. If more +primary options are used, the first one has the highest +preference. If "mask" is specified, then such +primary option is relevant only to matching network +interfaces. +

In all cases, an address marked by operating system as +secondary cannot be chosen as the primary one. +

+

+

As the Device protocol doesn't generate any routes, it cannot have +any attributes. Example configuration looks like this: +

+

+


+
+protocol device {
+        scan time 10;           # Scan the interfaces often
+        primary "eth0" 192.168.1.1;
+        primary 192.168.0.0/16;
+}
+
+
+

+

6.3 Direct +

+ +

The Direct protocol is a simple generator of device routes for all the +directly connected networks according to the list of interfaces provided +by the kernel via the Device protocol. +

+

The question is whether it is a good idea to have such device +routes in BIRD routing table. OS kernel usually handles device routes +for directly connected networks by itself so we don't need (and don't +want) to export these routes to the kernel protocol. OSPF protocol +creates device routes for its interfaces itself and BGP protocol is +usually used for exporting aggregate routes. Although there are some +use cases that use the direct protocol (like abusing eBGP as an IGP +routing protocol), in most cases it is not needed to have these device +routes in BIRD routing table and to use the direct protocol. +

+

There is one notable case when you definitely want to use the +direct protocol -- running BIRD on BSD systems. Having high priority +device routes for directly connected networks from the direct protocol +protects kernel device routes from being overwritten or removed by IGP +routes during some transient network conditions, because a lower +priority IGP route for the same network is not exported to the kernel +routing table. This is an issue on BSD systems only, as on Linux +systems BIRD cannot change non-BIRD route in the kernel routing table. +

+

The only configurable thing about direct is what interfaces it watches: +

+

+

+
interface pattern [, ...]

By default, the Direct +protocol will generate device routes for all the interfaces +available. If you want to restrict it to some subset of interfaces +(for example if you're using multiple routing tables for policy +routing and some of the policy domains don't contain all interfaces), +just use this clause. +

+

+

Direct device routes don't contain any specific attributes. +

+

Example config might look like this: +

+

+


+
+protocol direct {
+        interface "-arc*", "*";         # Exclude the ARCnets
+}
+
+
+

+

6.4 Kernel +

+ +

The Kernel protocol is not a real routing protocol. Instead of communicating +with other routers in the network, it performs synchronization of BIRD's routing +tables with the OS kernel. Basically, it sends all routing table updates to the kernel +and from time to time it scans the kernel tables to see whether some routes have +disappeared (for example due to unnoticed up/down transition of an interface) +or whether an `alien' route has been added by someone else (depending on the +learn switch, such routes are either ignored or accepted to our +table). +

+

Unfortunately, there is one thing that makes the routing table +synchronization a bit more complicated. In the kernel routing table +there are also device routes for directly connected networks. These +routes are usually managed by OS itself (as a part of IP address +configuration) and we don't want to touch that. They are completely +ignored during the scan of the kernel tables and also the export of +device routes from BIRD tables to kernel routing tables is restricted +to prevent accidental interference. This restriction can be disabled using +device routes switch. +

+

If your OS supports only a single routing table, you can configure +only one instance of the Kernel protocol. If it supports multiple +tables (in order to allow policy routing; such an OS is for example +Linux), you can run as many instances as you want, but each of them +must be connected to a different BIRD routing table and to a different +kernel table. +

+

Because the kernel protocol is partially integrated with the +connected routing table, there are two limitations - it is not +possible to connect more kernel protocols to the same routing table +and changing route destination/gateway in an export +filter of a kernel protocol does not work. Both limitations can be +overcome using another routing table and the pipe protocol. +

+

Configuration

+ +

+

+
persist switch

Tell BIRD to leave all its routes in the +routing tables when it exits (instead of cleaning them up). +

scan time number

Time in seconds between two consecutive scans of the +kernel routing table. +

learn switch

Enable learning of routes added to the kernel +routing tables by other routing daemons or by the system administrator. +This is possible only on systems which support identification of route +authorship. +

+

device routes switch

Enable export of device +routes to the kernel routing table. By default, such routes +are rejected (with the exception of explicitly configured +device routes from the static protocol) regardless of the +export filter to protect device routes in kernel routing table +(managed by OS itself) from accidental overwriting or erasing. +

+

kernel table number

Select which kernel table should +this particular instance of the Kernel protocol work with. Available +only on systems supporting multiple routing tables. +

+

+

Attributes

+ +

The Kernel protocol defines several attributes. These attributes +are translated to appropriate system (and OS-specific) route attributes. +We support these attributes: +

+

+
int krt_source

The original source of the imported +kernel route. The value is system-dependent. On Linux, it is +a value of the protocol field of the route. See +/etc/iproute2/rt_protos for common values. On BSD, it is +based on STATIC and PROTOx flags. The attribute is read-only. +

+

int krt_metric

The kernel metric of +the route. When multiple same routes are in a kernel routing +table, the Linux kernel chooses one with lower metric. +

+

ip krt_prefsrc

(Linux) The preferred source address. +Used in source address selection for outgoing packets. Have to +be one of IP addresses of the router. +

+

int krt_realm

(Linux) The realm of the route. Can be +used for traffic classification. +

+

+

Example

+ +

A simple configuration can look this way: +

+

+


+
+protocol kernel {
+        export all;
+}
+
+
+

+

Or for a system with two routing tables: +

+

+


+
+protocol kernel {               # Primary routing table
+        learn;                  # Learn alien routes from the kernel
+        persist;                # Don't remove routes on bird shutdown
+        scan time 10;           # Scan kernel routing table every 10 seconds
+        import all;
+        export all;
+}
+
+protocol kernel {               # Secondary routing table
+        table auxtable;
+        kernel table 100;
+        export all;
+}
+
+
+

+

6.5 OSPF +

+ +

Introduction

+ +

Open Shortest Path First (OSPF) is a quite complex interior gateway +protocol. The current IPv4 version (OSPFv2) is defined in RFC +2328 +ftp://ftp.rfc-editor.org/in-notes/rfc2328.txt and +the current IPv6 version (OSPFv3) is defined in RFC 5340 +ftp://ftp.rfc-editor.org/in-notes/rfc5340.txt It's a link state +(a.k.a. shortest path first) protocol -- each router maintains a +database describing the autonomous system's topology. Each participating +router has an identical copy of the database and all routers run the +same algorithm calculating a shortest path tree with themselves as a +root. OSPF chooses the least cost path as the best path. +

+

In OSPF, the autonomous system can be split to several areas in order +to reduce the amount of resources consumed for exchanging the routing +information and to protect the other areas from incorrect routing data. +Topology of the area is hidden to the rest of the autonomous system. +

+

Another very important feature of OSPF is that +it can keep routing information from other protocols (like Static or BGP) +in its link state database as external routes. Each external route can +be tagged by the advertising router, making it possible to pass additional +information between routers on the boundary of the autonomous system. +

+

OSPF quickly detects topological changes in the autonomous system (such +as router interface failures) and calculates new loop-free routes after a short +period of convergence. Only a minimal amount of +routing traffic is involved. +

+

Each router participating in OSPF routing periodically sends Hello messages +to all its interfaces. This allows neighbors to be discovered dynamically. +Then the neighbors exchange theirs parts of the link state database and keep it +identical by flooding updates. The flooding process is reliable and ensures +that each router detects all changes. +

+

Configuration

+ +

In the main part of configuration, there can be multiple definitions of +OSPF areas, each with a different id. These definitions includes many other +switches and multiple definitions of interfaces. Definition of interface +may contain many switches and constant definitions and list of neighbors +on nonbroadcast networks. +

+


+
+protocol ospf <name> {
+        rfc1583compat <switch>;
+        stub router <switch>;
+        tick <num>;
+        ecmp <switch> [limit <num>];
+        area <id> {
+                stub;
+                nssa;
+                summary <switch>;
+                default nssa <switch>;
+                default cost <num>;
+                default cost2 <num>;
+                translator <switch>;
+                translator stability <num>;
+
+                networks {
+                        <prefix>;
+                        <prefix> hidden;
+                }
+                external {
+                        <prefix>;
+                        <prefix> hidden;
+                        <prefix> tag <num>;
+                }
+                stubnet <prefix>;
+                stubnet <prefix> {
+                        hidden <switch>;
+                        summary <switch>;
+                        cost <num>;
+                }
+                interface <interface pattern> [instance <num>] {
+                        cost <num>;
+                        stub <switch>;
+                        hello <num>;
+                        poll <num>;
+                        retransmit <num>;
+                        priority <num>;
+                        wait <num>;
+                        dead count <num>;
+                        dead <num>;
+                        rx buffer [normal|large|<num>];
+                        type [broadcast|bcast|pointopoint|ptp|
+                                nonbroadcast|nbma|pointomultipoint|ptmp];
+                        strict nonbroadcast <switch>;
+                        real broadcast <switch>;
+                        ptp netmask <switch>;
+                        check link <switch>;
+                        ecmp weight <num>;
+                        ttl security [<switch>; | tx only]
+                        tx class|dscp <num>;
+                        tx priority <num>;
+                        authentication [none|simple|cryptographic];
+                        password "<text>";
+                        password "<text>" {
+                                id <num>;
+                                generate from "<date>";
+                                generate to "<date>";
+                                accept from "<date>";
+                                accept to "<date>";
+                        };
+                        neighbors {
+                                <ip>;
+                                <ip> eligible;
+                        };
+                };
+                virtual link <id> [instance <num>] {
+                        hello <num>;
+                        retransmit <num>;
+                        wait <num>;
+                        dead count <num>;
+                        dead <num>;
+                        authentication [none|simple|cryptographic];
+                        password "<text>";
+                };
+        };
+}
+
+
+

+

+
rfc1583compat switch

This option controls compatibility of routing table +calculation with RFC 1583 +ftp://ftp.rfc-editor.org/in-notes/rfc1583.txt. Default +value is no. +

+

stub router switch

This option configures the router to be a stub router, i.e., +a router that participates in the OSPF topology but does not +allow transit traffic. In OSPFv2, this is implemented by +advertising maximum metric for outgoing links, as suggested +by RFC 3137 +ftp://ftp.rfc-editor.org/in-notes/rfc3137.txt. +In OSPFv3, the stub router behavior is announced by clearing +the R-bit in the router LSA. Default value is no. +

+

tick num

The routing table calculation and clean-up of areas' databases +is not performed when a single link state +change arrives. To lower the CPU utilization, it's processed later +at periodical intervals of num seconds. The default value is 1. +

+

ecmp switch [limit number]

This option specifies whether OSPF is allowed to generate +ECMP (equal-cost multipath) routes. Such routes are used when +there are several directions to the destination, each with +the same (computed) cost. This option also allows to specify +a limit on maximal number of nexthops in one route. By +default, ECMP is disabled. If enabled, default value of the +limit is 16. +

+

area id

This defines an OSPF area with given area ID (an integer or an IPv4 +address, similarly to a router ID). The most important area is +the backbone (ID 0) to which every other area must be connected. +

+

stub

This option configures the area to be a stub area. External +routes are not flooded into stub areas. Also summary LSAs can be +limited in stub areas (see option summary). +By default, the area is not a stub area. +

+

nssa

This option configures the area to be a NSSA (Not-So-Stubby +Area). NSSA is a variant of a stub area which allows a +limited way of external route propagation. Global external +routes are not propagated into a NSSA, but an external route +can be imported into NSSA as a (area-wide) NSSA-LSA (and +possibly translated and/or aggregated on area boundary). +By default, the area is not NSSA. +

+

summary switch

This option controls propagation of summary LSAs into stub or +NSSA areas. If enabled, summary LSAs are propagated as usual, +otherwise just the default summary route (0.0.0.0/0) is +propagated (this is sometimes called totally stubby area). If +a stub area has more area boundary routers, propagating +summary LSAs could lead to more efficient routing at the cost +of larger link state database. Default value is no. +

+

default nssa switch

When summary option is enabled, default summary route is +no longer propagated to the NSSA. In that case, this option +allows to originate default route as NSSA-LSA to the NSSA. +Default value is no. +

+

default cost num

This option controls the cost of a default route propagated to +stub and NSSA areas. Default value is 1000. +

+

default cost2 num

When a default route is originated as NSSA-LSA, its cost +can use either type 1 or type 2 metric. This option allows +to specify the cost of a default route in type 2 metric. +By default, type 1 metric (option default cost) is used. +

+

translator switch

This option controls translation of NSSA-LSAs into external +LSAs. By default, one translator per NSSA is automatically +elected from area boundary routers. If enabled, this area +boundary router would unconditionally translate all NSSA-LSAs +regardless of translator election. Default value is no. +

+

translator stability num

This option controls the translator stability interval (in +seconds). When the new translator is elected, the old one +keeps translating until the interval is over. Default value +is 40. +

+

networks { set }

Definition of area IP ranges. This is used in summary LSA origination. +Hidden networks are not propagated into other areas. +

+

external { set }

Definition of external area IP ranges for NSSAs. This is used +for NSSA-LSA translation. Hidden networks are not translated +into external LSAs. Networks can have configured route tag. +

+

stubnet prefix { options }

Stub networks are networks that are not transit networks +between OSPF routers. They are also propagated through an +OSPF area as a part of a link state database. By default, +BIRD generates a stub network record for each primary network +address on each OSPF interface that does not have any OSPF +neighbors, and also for each non-primary network address on +each OSPF interface. This option allows to alter a set of +stub networks propagated by this router. +

Each instance of this option adds a stub network with given +network prefix to the set of propagated stub network, unless +option hidden is used. It also suppresses default stub +networks for given network prefix. When option +summary is used, also default stub networks that are +subnetworks of given stub network are suppressed. This might +be used, for example, to aggregate generated stub networks. +

+

interface pattern [instance num]

Defines that the specified interfaces belong to the area being defined. +See +interface common option for detailed description. +In OSPFv3, you can specify instance ID for that interface +description, so it is possible to have several instances of +that interface with different options or even in different areas. +

+

virtual link id [instance num]

Virtual link to router with the router id. Virtual link acts +as a point-to-point interface belonging to backbone. The +actual area is used as transport area. This item cannot be in +the backbone. In OSPFv3, you could also use several virtual +links to one destination with different instance IDs. +

+

cost num

Specifies output cost (metric) of an interface. Default value is 10. +

+

stub switch

If set to interface it does not listen to any packet and does not send +any hello. Default value is no. +

+

hello num

Specifies interval in seconds between sending of Hello messages. Beware, all +routers on the same network need to have the same hello interval. +Default value is 10. +

+

poll num

Specifies interval in seconds between sending of Hello messages for +some neighbors on NBMA network. Default value is 20. +

+

retransmit num

Specifies interval in seconds between retransmissions of unacknowledged updates. +Default value is 5. +

+

priority num

On every multiple access network (e.g., the Ethernet) Designed Router +and Backup Designed router are elected. These routers have some +special functions in the flooding process. Higher priority increases +preferences in this election. Routers with priority 0 are not +eligible. Default value is 1. +

+

wait num

After start, router waits for the specified number of seconds between starting +election and building adjacency. Default value is 40. +

+

dead count num

When the router does not receive any messages from a neighbor in +dead count*hello seconds, it will consider the neighbor down. +

+

dead num

When the router does not receive any messages from a neighbor in +dead seconds, it will consider the neighbor down. If both directives +dead count and dead are used, dead has precendence. +

+

rx buffer num

This sets the size of buffer used for receiving packets. The buffer should +be bigger than maximal size of any packets. Value NORMAL (default) +means 2*MTU, value LARGE means maximal allowed packet - 65535. +

+

type broadcast|bcast

BIRD detects a type of a connected network automatically, but +sometimes it's convenient to force use of a different type +manually. On broadcast networks (like ethernet), flooding +and Hello messages are sent using multicasts (a single packet +for all the neighbors). A designated router is elected and it +is responsible for synchronizing the link-state databases and +originating network LSAs. This network type cannot be used on +physically NBMA networks and on unnumbered networks (networks +without proper IP prefix). +

+

type pointopoint|ptp

Point-to-point networks connect just 2 routers together. No +election is performed and no network LSA is originated, which +makes it simpler and faster to establish. This network type +is useful not only for physically PtP ifaces (like PPP or +tunnels), but also for broadcast networks used as PtP links. +This network type cannot be used on physically NBMA networks. +

+

type nonbroadcast|nbma

On NBMA networks, the packets are sent to each neighbor +separately because of lack of multicast capabilities. +Like on broadcast networks, a designated router is elected, +which plays a central role in propagation of LSAs. +This network type cannot be used on unnumbered networks. +

+

type pointomultipoint|ptmp

This is another network type designed to handle NBMA +networks. In this case the NBMA network is treated as a +collection of PtP links. This is useful if not every pair of +routers on the NBMA network has direct communication, or if +the NBMA network is used as an (possibly unnumbered) PtP +link. +

+

strict nonbroadcast switch

If set, don't send hello to any undefined neighbor. This switch +is ignored on other than NBMA or PtMP networks. Default value is no. +

+

real broadcast switch

In type broadcast or type ptp network +configuration, OSPF packets are sent as IP multicast +packets. This option changes the behavior to using +old-fashioned IP broadcast packets. This may be useful as a +workaround if IP multicast for some reason does not work or +does not work reliably. This is a non-standard option and +probably is not interoperable with other OSPF +implementations. Default value is no. +

+

ptp netmask switch

In type ptp network configurations, OSPFv2 +implementations should ignore received netmask field in hello +packets and should send hello packets with zero netmask field +on unnumbered PtP links. But some OSPFv2 implementations +perform netmask checking even for PtP links. This option +specifies whether real netmask will be used in hello packets +on type ptp interfaces. You should ignore this option +unless you meet some compatibility problems related to this +issue. Default value is no for unnumbered PtP links, yes +otherwise. +

+

check link switch

If set, a hardware link state (reported by OS) is taken into +consideration. When a link disappears (e.g. an ethernet cable is +unplugged), neighbors are immediately considered unreachable +and only the address of the iface (instead of whole network +prefix) is propagated. It is possible that some hardware +drivers or platforms do not implement this feature. Default value is no. +

+

ttl security [switch | tx only]

TTL security is a feature that protects routing protocols +from remote spoofed packets by using TTL 255 instead of TTL 1 +for protocol packets destined to neighbors. Because TTL is +decremented when packets are forwarded, it is non-trivial to +spoof packets with TTL 255 from remote locations. Note that +this option would interfere with OSPF virtual links. +

If this option is enabled, the router will send OSPF packets +with TTL 255 and drop received packets with TTL less than +255. If this option si set to tx only, TTL 255 is used +for sent packets, but is not checked for received +packets. Default value is no. +

+

tx class|dscp|priority num

These options specify the ToS/DiffServ/Traffic class/Priority +of the outgoing OSPF packets. See +tx class common option for detailed description. +

+

ecmp weight num

When ECMP (multipath) routes are allowed, this value specifies +a relative weight used for nexthops going through the iface. +Allowed values are 1-256. Default value is 1. +

+

authentication none

No passwords are sent in OSPF packets. This is the default value. +

+

authentication simple

Every packet carries 8 bytes of password. Received packets +lacking this password are ignored. This authentication mechanism is +very weak. +

+

authentication cryptographic

16-byte long MD5 digest is appended to every packet. For the digest +generation 16-byte long passwords are used. Those passwords are +not sent via network, so this mechanism is quite secure. +Packets can still be read by an attacker. +

+

password "text"

An 8-byte or 16-byte password used for authentication. +See +password common option for detailed description. +

+

neighbors { set }

A set of neighbors to which Hello messages on NBMA or PtMP +networks are to be sent. For NBMA networks, some of them +could be marked as eligible. In OSPFv3, link-local addresses +should be used, using global ones is possible, but it is +nonstandard and might be problematic. And definitely, +link-local and global addresses should not be mixed. +

+

+

+

Attributes

+ +

OSPF defines four route attributes. Each internal route has a metric. +Metric is ranging from 1 to infinity (65535). +External routes use metric type 1 or metric type 2. +A metric of type 1 is comparable with internal metric, a +metric of type 2 is always longer +than any metric of type 1 or any internal metric. +Internal metric or metric of type 1 is stored in attribute +ospf_metric1, metric type 2 is stored in attribute ospf_metric2. +If you specify both metrics only metric1 is used. +

Each external route can also carry attribute ospf_tag which is a +32-bit integer which is used when exporting routes to other protocols; +otherwise, it doesn't affect routing inside the OSPF domain at all. +The fourth attribute ospf_router_id is a router ID of the router +advertising that route/network. This attribute is read-only. Default +is ospf_metric2 = 10000 and ospf_tag = 0. +

+

Example

+ +

+

+


+
+protocol ospf MyOSPF {
+        rfc1583compat yes;
+        tick 2;
+        export filter {
+                if source = RTS_BGP then {
+                        ospf_metric1 = 100;
+                        accept;
+                }
+                reject;
+        };
+        area 0.0.0.0 {
+                interface "eth*" {
+                        cost 11;
+                        hello 15;
+                        priority 100;
+                        retransmit 7;
+                        authentication simple;
+                        password "aaa";
+                };
+                interface "ppp*" {
+                        cost 100;
+                        authentication cryptographic;
+                        password "abc" {
+                                id 1;
+                                generate to "22-04-2003 11:00:06";
+                                accept from "17-01-2001 12:01:05";
+                        };
+                        password "def" {
+                                id 2;
+                                generate to "22-07-2005 17:03:21";
+                                accept from "22-02-2001 11:34:06";
+                        };
+                };
+                interface "arc0" {
+                        cost 10;
+                        stub yes;
+                };
+                interface "arc1";
+        };
+        area 120 {
+                stub yes;
+                networks {
+                        172.16.1.0/24;
+                        172.16.2.0/24 hidden;
+                }
+                interface "-arc0" , "arc*" {
+                        type nonbroadcast;
+                        authentication none;
+                        strict nonbroadcast yes;
+                        wait 120;
+                        poll 40;
+                        dead count 8;
+                        neighbors {
+                                192.168.120.1 eligible;
+                                192.168.120.2;
+                                192.168.120.10;
+                        };
+                };
+        };
+}
+
+
+

+

6.6 Pipe +

+ +

Introduction

+ +

The Pipe protocol serves as a link between two routing tables, allowing routes to be +passed from a table declared as primary (i.e., the one the pipe is connected to using the +table configuration keyword) to the secondary one (declared using peer table) +and vice versa, depending on what's allowed by the filters. Export filters control export +of routes from the primary table to the secondary one, import filters control the opposite +direction. +

+

The Pipe protocol may work in the transparent mode mode or in the opaque mode. +In the transparent mode, the Pipe protocol retransmits all routes from +one table to the other table, retaining their original source and +attributes. If import and export filters are set to accept, then both +tables would have the same content. The transparent mode is the default mode. +

+

In the opaque mode, the Pipe protocol retransmits optimal route +from one table to the other table in a similar way like other +protocols send and receive routes. Retransmitted route will have the +source set to the Pipe protocol, which may limit access to protocol +specific route attributes. This mode is mainly for compatibility, it +is not suggested for new configs. The mode can be changed by +mode option. +

+

The primary use of multiple routing tables and the Pipe protocol is for policy routing, +where handling of a single packet doesn't depend only on its destination address, but also +on its source address, source interface, protocol type and other similar parameters. +In many systems (Linux being a good example), the kernel allows to enforce routing policies +by defining routing rules which choose one of several routing tables to be used for a packet +according to its parameters. Setting of these rules is outside the scope of BIRD's work +(on Linux, you can use the ip command), but you can create several routing tables in BIRD, +connect them to the kernel ones, use filters to control which routes appear in which tables +and also you can employ the Pipe protocol for exporting a selected subset of one table to +another one. +

+

Configuration

+ +

+

+
peer table table

Defines secondary routing table to connect to. The +primary one is selected by the table keyword. +

+

mode opaque|transparent

Specifies the mode for the pipe to work in. Default is transparent. +

+

+

Attributes

+ +

The Pipe protocol doesn't define any route attributes. +

+

Example

+ +

Let's consider a router which serves as a boundary router of two different autonomous +systems, each of them connected to a subset of interfaces of the router, having its own +exterior connectivity and wishing to use the other AS as a backup connectivity in case +of outage of its own exterior line. +

+

Probably the simplest solution to this situation is to use two routing tables (we'll +call them as1 and as2) and set up kernel routing rules, so that packets having +arrived from interfaces belonging to the first AS will be routed according to as1 +and similarly for the second AS. Thus we have split our router to two logical routers, +each one acting on its own routing table, having its own routing protocols on its own +interfaces. In order to use the other AS's routes for backup purposes, we can pass +the routes between the tables through a Pipe protocol while decreasing their preferences +and correcting their BGP paths to reflect the AS boundary crossing. +

+


+
+table as1;                              # Define the tables
+table as2;
+
+protocol kernel kern1 {                 # Synchronize them with the kernel
+        table as1;
+        kernel table 1;
+}
+
+protocol kernel kern2 {
+        table as2;
+        kernel table 2;
+}
+
+protocol bgp bgp1 {                     # The outside connections
+        table as1;
+        local as 1;
+        neighbor 192.168.0.1 as 1001;
+        export all;
+        import all;
+}
+
+protocol bgp bgp2 {
+        table as2;
+        local as 2;
+        neighbor 10.0.0.1 as 1002;
+        export all;
+        import all;
+}
+
+protocol pipe {                         # The Pipe
+        table as1;
+        peer table as2;
+        export filter {
+                if net ~ [ 1.0.0.0/8+] then {   # Only AS1 networks
+                        if preference>10 then preference = preference-10;
+                        if source=RTS_BGP then bgp_path.prepend(1);
+                        accept;
+                }
+                reject;
+        };
+        import filter {
+                if net ~ [ 2.0.0.0/8+] then {   # Only AS2 networks
+                        if preference>10 then preference = preference-10;
+                        if source=RTS_BGP then bgp_path.prepend(2);
+                        accept;
+                }
+                reject;
+        };
+}
+
+
+

+

6.7 RAdv +

+ +

Introduction

+ +

The RAdv protocol is an implementation of Router Advertisements, +which are used in the IPv6 stateless autoconfiguration. IPv6 routers +send (in irregular time intervals or as an answer to a request) +advertisement packets to connected networks. These packets contain +basic information about a local network (e.g. a list of network +prefixes), which allows network hosts to autoconfigure network +addresses and choose a default route. BIRD implements router behavior +as defined in +RFC 4861 +ftp://ftp.rfc-editor.org/in-notes/rfc4861.txt +and also the DNS extensions from +RFC 6106 +ftp://ftp.rfc-editor.org/in-notes/rfc6106.txt. +

+

Configuration

+ +

There are several classes of definitions in RAdv configuration -- +interface definitions, prefix definitions and DNS definitions: +

+

+
interface pattern [, ...] { options }

Interface definitions specify a set of interfaces on which the +protocol is activated and contain interface specific options. +See +interface common options for +detailed description. +

+

prefix prefix { options }

Prefix definitions allow to modify a list of advertised +prefixes. By default, the advertised prefixes are the same as +the network prefixes assigned to the interface. For each +network prefix, the matching prefix definition is found and +its options are used. If no matching prefix definition is +found, the prefix is used with default options. +

Prefix definitions can be either global or interface-specific. +The second ones are part of interface options. The prefix +definition matching is done in the first-match style, when +interface-specific definitions are processed before global +definitions. As expected, the prefix definition is matching if +the network prefix is a subnet of the prefix in prefix +definition. +

+

rdnss { options }

RDNSS definitions allow to specify a list of advertised +recursive DNS servers together with their options. As options +are seldom necessary, there is also a short variant rdnss +address that just specifies one DNS server. Multiple +definitions are cumulative. RDNSS definitions may also be +interface-specific when used inside interface options. By +default, interface uses both global and interface-specific +options, but that can be changed by rdnss local option. +

+

dnssl { options }

DNSSL definitions allow to specify a list of advertised DNS +search domains together with their options. Like rdnss +above, multiple definitions are cumulative, they can be used +also as interface-specific options and there is a short +variant dnssl domain that just specifies one DNS +search domain. +

+ +

trigger prefix

RAdv protocol could be configured to change its behavior based +on availability of routes. When this option is used, the +protocol waits in suppressed state until a trigger route +(for the specified network) is exported to the protocol, the +protocol also returnsd to suppressed state if the +trigger route disappears. Note that route export depends +on specified export filter, as usual. This option could be +used, e.g., for handling failover in multihoming scenarios. +

During suppressed state, router advertisements are generated, +but with some fields zeroed. Exact behavior depends on which +fields are zeroed, this can be configured by +sensitive option for appropriate fields. By default, just +default lifetime (also called router lifetime) is +zeroed, which means hosts cannot use the router as a default +router. preferred lifetime and valid lifetime could +also be configured as sensitive for a prefix, which would +cause autoconfigured IPs to be deprecated or even removed. +

+

+

Interface specific options: +

+

+
max ra interval expr

Unsolicited router advertisements are sent in irregular time +intervals. This option specifies the maximum length of these +intervals, in seconds. Valid values are 4-1800. Default: 600 +

+

min ra interval expr

This option specifies the minimum length of that intervals, in +seconds. Must be at least 3 and at most 3/4 * max ra interval. +Default: about 1/3 * max ra interval. +

+

min delay expr

The minimum delay between two consecutive router advertisements, +in seconds. Default: 3 +

+

managed switch

This option specifies whether hosts should use DHCPv6 for +IP address configuration. Default: no +

+

other config switch

This option specifies whether hosts should use DHCPv6 to +receive other configuration information. Default: no +

+

link mtu expr

This option specifies which value of MTU should be used by +hosts. 0 means unspecified. Default: 0 +

+

reachable time expr

This option specifies the time (in milliseconds) how long +hosts should assume a neighbor is reachable (from the last +confirmation). Maximum is 3600000, 0 means unspecified. +Default 0. +

+

retrans timer expr

This option specifies the time (in milliseconds) how long +hosts should wait before retransmitting Neighbor Solicitation +messages. 0 means unspecified. Default 0. +

+

current hop limit expr

This option specifies which value of Hop Limit should be used +by hosts. Valid values are 0-255, 0 means unspecified. Default: 64 +

+

default lifetime expr [sensitive switch]

This option specifies the time (in seconds) how long (after +the receipt of RA) hosts may use the router as a default +router. 0 means do not use as a default router. For +sensitive option, see +trigger. +Default: 3 * max ra interval, sensitive yes. +

+

rdnss local switch

Use only local (interface-specific) RDNSS definitions for this +interface. Otherwise, both global and local definitions are +used. Could also be used to disable RDNSS for given interface +if no local definitons are specified. Default: no. +

+

dnssl local switch

Use only local DNSSL definitions for this interface. See +rdnss local option above. Default: no. +

+

+

+

Prefix specific options: +

+

+
skip switch

This option allows to specify that given prefix should not be +advertised. This is useful for making exceptions from a +default policy of advertising all prefixes. Note that for +withdrawing an already advertised prefix it is more useful to +advertise it with zero valid lifetime. Default: no +

+

onlink switch

This option specifies whether hosts may use the advertised +prefix for onlink determination. Default: yes +

+

autonomous switch

This option specifies whether hosts may use the advertised +prefix for stateless autoconfiguration. Default: yes +

+

valid lifetime expr [sensitive switch]

This option specifies the time (in seconds) how long (after +the receipt of RA) the prefix information is valid, i.e., +autoconfigured IP addresses can be assigned and hosts with +that IP addresses are considered directly reachable. 0 means +the prefix is no longer valid. For sensitive option, see +trigger. Default: 86400 (1 day), sensitive no. +

+

preferred lifetime expr [sensitive switch]

This option specifies the time (in seconds) how long (after +the receipt of RA) IP addresses generated from the prefix +using stateless autoconfiguration remain preferred. For +sensitive option, see +trigger. +Default: 14400 (4 hours), sensitive no. +

+

+

+

RDNSS specific options: +

+

+
ns address

This option specifies one recursive DNS server. Can be used +multiple times for multiple servers. It is mandatory to have +at least one ns option in rdnss definition. +

+

lifetime [mult] expr

This option specifies the time how long the RDNSS information +may be used by clients after the receipt of RA. It is +expressed either in seconds or (when mult is used) in +multiples of max ra interval. Note that RDNSS information +is also invalidated when default lifetime expires. 0 +means these addresses are no longer valid DNS servers. +Default: 3 * max ra interval. +

+

+

+

DNSSL specific options: +

+

+
domain address

This option specifies one DNS search domain. Can be used +multiple times for multiple domains. It is mandatory to have +at least one domain option in dnssl definition. +

+

lifetime [mult] expr

This option specifies the time how long the DNSSL information +may be used by clients after the receipt of RA. Details are +the same as for RDNSS lifetime option above. +Default: 3 * max ra interval. +

+

+

+

Example

+ +

+


+
+protocol radv {
+        interface "eth2" {
+                max ra interval 5;      # Fast failover with more routers
+                managed yes;            # Using DHCPv6 on eth2
+                prefix ::/0 {
+                        autonomous off; # So do not autoconfigure any IP
+                };
+        };
+
+        interface "eth*";               # No need for any other options
+
+        prefix 2001:0DB8:1234::/48 {
+                preferred lifetime 0;   # Deprecated address range
+        };
+
+        prefix 2001:0DB8:2000::/48 {
+                autonomous off;         # Do not autoconfigure
+        };
+
+        rdnss 2001:0DB8:1234::10;       # Short form of RDNSS
+
+        rdnss {
+                lifetime mult 10;
+                ns 2001:0DB8:1234::11;
+                ns 2001:0DB8:1234::12;
+        };
+
+        dnssl {
+                lifetime 3600;
+                domain "abc.com";
+                domain "xyz.com";
+        };
+}
+
+
+

+

6.8 RIP +

+ +

Introduction

+ +

The RIP protocol (also sometimes called Rest In Pieces) is a simple protocol, where each router broadcasts (to all its neighbors) +distances to all networks it can reach. When a router hears distance to another network, it increments +it and broadcasts it back. Broadcasts are done in regular intervals. Therefore, if some network goes +unreachable, routers keep telling each other that its distance is the original distance plus 1 (actually, plus +interface metric, which is usually one). After some time, the distance reaches infinity (that's 15 in +RIP) and all routers know that network is unreachable. RIP tries to minimize situations where +counting to infinity is necessary, because it is slow. Due to infinity being 16, you can't use +RIP on networks where maximal distance is higher than 15 hosts. You can read more about RIP at +http://www.ietf.org/html.charters/rip-charter.html. Both IPv4 +(RFC 1723 +ftp://ftp.rfc-editor.org/in-notes/rfc1723.txt) +and IPv6 (RFC 2080 +ftp://ftp.rfc-editor.org/in-notes/rfc2080.txt) versions of RIP are supported by BIRD, historical RIPv1 (RFC 1058 +ftp://ftp.rfc-editor.org/in-notes/rfc1058.txt)is +not currently supported. RIPv4 MD5 authentication (RFC 2082 +ftp://ftp.rfc-editor.org/in-notes/rfc2082.txt) is supported. +

+

RIP is a very simple protocol, and it has a lot of shortcomings. Slow +convergence, big network load and inability to handle larger networks +makes it pretty much obsolete. (It is still usable on very small networks.) +

+

Configuration

+ +

In addition to options common for all to other protocols, RIP supports the following ones: +

+

+
authentication none|plaintext|md5

selects authentication method to be used. none means that +packets are not authenticated at all, plaintext means that a plaintext password is embedded +into each packet, and md5 means that packets are authenticated using a MD5 cryptographic +hash. If you set authentication to not-none, it is a good idea to add password +section. Default: none. +

+

honor always|neighbor|never

specifies when should requests for dumping routing table +be honored. (Always, when sent from a host on a directly connected +network or never.) Routing table updates are honored only from +neighbors, that is not configurable. Default: never. +

+

+

There are some options that can be specified per-interface: +

+

+
metric num

This option specifies the metric of the interface. Valid +

+

mode multicast|broadcast|quiet|nolisten|version1

This option selects the mode for RIP to work in. If nothing is +specified, RIP runs in multicast mode. version1 is +currently equivalent to broadcast, and it makes RIP talk +to a broadcast address even through multicast mode is +possible. quiet option means that RIP will not transmit +any periodic messages to this interface and nolisten +means that RIP will send to this interface butnot listen to it. +

+

ttl security [switch | tx only]

TTL security is a feature that protects routing protocols +from remote spoofed packets by using TTL 255 instead of TTL 1 +for protocol packets destined to neighbors. Because TTL is +decremented when packets are forwarded, it is non-trivial to +spoof packets with TTL 255 from remote locations. +

If this option is enabled, the router will send RIP packets +with TTL 255 and drop received packets with TTL less than +255. If this option si set to tx only, TTL 255 is used +for sent packets, but is not checked for received +packets. Such setting does not offer protection, but offers +compatibility with neighbors regardless of whether they use +ttl security. +

Note that for RIPng, TTL security is a standard behavior +(required by RFC 2080), but BIRD uses tx only by +default, for compatibility with older versions. For IPv4 RIP, +default value is no. +

+

tx class|dscp|priority num

These options specify the ToS/DiffServ/Traffic class/Priority +of the outgoing RIP packets. See +tx class common option for detailed description. +

+

+

The following options generally override behavior specified in RFC. If you use any of these +options, BIRD will no longer be RFC-compliant, which means it will not be able to talk to anything +other than equally configured BIRD. I have warned you. +

+

+
port number

selects IP port to operate on, default 520. (This is useful when testing BIRD, if you +set this to an address >1024, you will not need to run bird with UID==0). +

+

infinity number

selects the value of infinity, default is 16. Bigger values will make protocol convergence +even slower. +

+

period number

specifies the number of seconds between periodic updates. Default is 30 seconds. A lower +number will mean faster convergence but bigger network +load. Do not use values lower than 12. +

+

timeout time number

specifies how old route has to be to be considered unreachable. Default is 4*period. +

+

garbage time number

specifies how old route has to be to be discarded. Default is 10*period. +

+

+

Attributes

+ +

RIP defines two route attributes: +

+

+
int rip_metric

RIP metric of the route (ranging from 0 to infinity). +When routes from different RIP instances are available and all of them have the same +preference, BIRD prefers the route with lowest rip_metric. +When importing a non-RIP route, the metric defaults to 5. +

+

int rip_tag

RIP route tag: a 16-bit number which can be used +to carry additional information with the route (for example, an originating AS number +in case of external routes). When importing a non-RIP route, the tag defaults to 0. +

+

+

Example

+ +

+


+
+protocol rip MyRIP_test {
+        debug all;
+        port 1520;
+        period 12;
+        garbage time 60;
+        interface "eth0" { metric 3; mode multicast; };
+        interface "eth*" { metric 2; mode broadcast; };
+        honor neighbor;
+        authentication none;
+        import filter { print "importing"; accept; };
+        export filter { print "exporting"; accept; };
+}
+
+
+

+

6.9 Static +

+ +

The Static protocol doesn't communicate with other routers in the network, +but instead it allows you to define routes manually. This is often used for +specifying how to forward packets to parts of the network which don't use +dynamic routing at all and also for defining sink routes (i.e., those +telling to return packets as undeliverable if they are in your IP block, +you don't have any specific destination for them and you don't want to send +them out through the default route to prevent routing loops). +

+

There are five types of static routes: `classical' routes telling +to forward packets to a neighboring router, multipath routes +specifying several (possibly weighted) neighboring routers, device +routes specifying forwarding to hosts on a directly connected network, +recursive routes computing their nexthops by doing route table lookups +for a given IP and special routes (sink, blackhole etc.) which specify +a special action to be done instead of forwarding the packet. +

+

When the particular destination is not available (the interface is down or +the next hop of the route is not a neighbor at the moment), Static just +uninstalls the route from the table it is connected to and adds it again as soon +as the destination becomes adjacent again. +

+

The Static protocol does not have many configuration options. The +definition of the protocol contains mainly a list of static routes: +

+

+
route prefix via ip

Static route through +a neighboring router. +

route prefix multipath via ip [weight num] [via ...]

Static multipath route. Contains several nexthops (gateways), possibly +with their weights. +

route prefix via "interface"

Static device +route through an interface to hosts on a directly connected network. +

route prefix recursive ip

Static recursive route, +its nexthop depends on a route table lookup for given IP address. +

route prefix blackhole|unreachable|prohibit

Special routes +specifying to silently drop the packet, return it as unreachable or return +it as administratively prohibited. First two targets are also known +as drop and reject. +

+

check link switch

If set, hardware link states of network interfaces are taken +into consideration. When link disappears (e.g. ethernet cable +is unplugged), static routes directing to that interface are +removed. It is possible that some hardware drivers or +platforms do not implement this feature. Default: off. +

+

igp table name

Specifies a table that is used +for route table lookups of recursive routes. Default: the +same table as the protocol is connected to. +

+

+

Static routes have no specific attributes. +

+

Example static config might look like this: +

+

+


+
+protocol static {
+        table testable;                  # Connect to a non-default routing table
+        route 0.0.0.0/0 via 198.51.100.130; # Default route
+        route 10.0.0.0/8 multipath       # Multipath route
+                via 198.51.100.10 weight 2
+                via 198.51.100.20
+                via 192.0.2.1;
+        route 203.0.113.0/24 unreachable; # Sink route
+        route 10.2.0.0/24 via "arc0";    # Secondary network
+}
+
+
+

+


+Next +Previous +Contents + + -- cgit v1.2.3 From b0a8c7fc8547eef21ede33887580b5e867ee742c Mon Sep 17 00:00:00 2001 From: Ondrej Filip Date: Thu, 15 Aug 2013 20:26:50 +0200 Subject: Wrong change commited - 'route limit' marked as obsolete. --- doc/bird-6.html | 1731 ------------------------------------------------------- doc/bird.sgml | 5 +- 2 files changed, 3 insertions(+), 1733 deletions(-) delete mode 100644 doc/bird-6.html (limited to 'doc') diff --git a/doc/bird-6.html b/doc/bird-6.html deleted file mode 100644 index d21209ee..00000000 --- a/doc/bird-6.html +++ /dev/null @@ -1,1731 +0,0 @@ - - - - - BIRD User's Guide: Protocols - - - - - -Next -Previous -Contents -
-

6. Protocols

- -

6.1 BGP -

- -

The Border Gateway Protocol is the routing protocol used for backbone -level routing in the today's Internet. Contrary to the other protocols, its convergence -doesn't rely on all routers following the same rules for route selection, -making it possible to implement any routing policy at any router in the -network, the only restriction being that if a router advertises a route, -it must accept and forward packets according to it. -

-

BGP works in terms of autonomous systems (often abbreviated as -AS). Each AS is a part of the network with common management and -common routing policy. It is identified by a unique 16-bit number -(ASN). Routers within each AS usually exchange AS-internal routing -information with each other using an interior gateway protocol (IGP, -such as OSPF or RIP). Boundary routers at the border of -the AS communicate global (inter-AS) network reachability information with -their neighbors in the neighboring AS'es via exterior BGP (eBGP) and -redistribute received information to other routers in the AS via -interior BGP (iBGP). -

-

Each BGP router sends to its neighbors updates of the parts of its -routing table it wishes to export along with complete path information -(a list of AS'es the packet will travel through if it uses the particular -route) in order to avoid routing loops. -

-

BIRD supports all requirements of the BGP4 standard as defined in -RFC 4271 -ftp://ftp.rfc-editor.org/in-notes/rfc4271.txt -It also supports the community attributes -(RFC 1997 -ftp://ftp.rfc-editor.org/in-notes/rfc1997.txt), -capability negotiation -(RFC 3392 -ftp://ftp.rfc-editor.org/in-notes/rfc3392.txt), -MD5 password authentication -(RFC 2385 -ftp://ftp.rfc-editor.org/in-notes/rfc2385.txt), -extended communities -(RFC 4360 -ftp://ftp.rfc-editor.org/in-notes/rfc4360.txt), -route reflectors -(RFC 4456 -ftp://ftp.rfc-editor.org/in-notes/rfc4456.txt), -multiprotocol extensions -(RFC 4760 -ftp://ftp.rfc-editor.org/in-notes/rfc4760.txt), -4B AS numbers -(RFC 4893 -ftp://ftp.rfc-editor.org/in-notes/rfc4893.txt), -and 4B AS numbers in extended communities -(RFC 5668 -ftp://ftp.rfc-editor.org/in-notes/rfc5668.txt). -

-

For IPv6, it uses the standard multiprotocol extensions defined in -RFC 2283 -ftp://ftp.rfc-editor.org/in-notes/rfc2283.txt -including changes described in the -latest draft -ftp://ftp.rfc-editor.org/internet-drafts/draft-ietf-idr-bgp4-multiprotocol-v2-05.txt -and applied to IPv6 according to -RFC 2545 -ftp://ftp.rfc-editor.org/in-notes/rfc2545.txt. -

-

Route selection rules

- -

BGP doesn't have any simple metric, so the rules for selection of an optimal -route among multiple BGP routes with the same preference are a bit more complex -and they are implemented according to the following algorithm. It starts the first -rule, if there are more "best" routes, then it uses the second rule to choose -among them and so on. -

-

    -
  • Prefer route with the highest Local Preference attribute.
  • -
  • Prefer route with the shortest AS path.
  • -
  • Prefer IGP origin over EGP and EGP origin over incomplete.
  • -
  • Prefer the lowest value of the Multiple Exit Discriminator.
  • -
  • Prefer routes received via eBGP over ones received via iBGP.
  • -
  • Prefer routes with lower internal distance to a boundary router.
  • -
  • Prefer the route with the lowest value of router ID of the -advertising router.
  • -
-

-

IGP routing table

- -

BGP is mainly concerned with global network reachability and with -routes to other autonomous systems. When such routes are redistributed -to routers in the AS via BGP, they contain IP addresses of a boundary -routers (in route attribute NEXT_HOP). BGP depends on existing IGP -routing table with AS-internal routes to determine immediate next hops -for routes and to know their internal distances to boundary routers -for the purpose of BGP route selection. In BIRD, there is usually -one routing table used for both IGP routes and BGP routes. -

-

Configuration

- -

Each instance of the BGP corresponds to one neighboring router. -This allows to set routing policy and all the other parameters differently -for each neighbor using the following configuration parameters: -

-

-
local [ip] as number

Define which AS we -are part of. (Note that contrary to other IP routers, BIRD is -able to act as a router located in multiple AS'es -simultaneously, but in such cases you need to tweak the BGP -paths manually in the filters to get consistent behavior.) -Optional ip argument specifies a source address, -equivalent to the source address option (see below). -This parameter is mandatory. -

-

neighbor ip as number

Define neighboring router -this instance will be talking to and what AS it's located in. Unless -you use the multihop clause, it must be directly connected to one -of your router's interfaces. In case the neighbor is in the same AS -as we are, we automatically switch to iBGP. This parameter is mandatory. -

-

multihop [number]

Configure multihop BGP -session to a neighbor that isn't directly connected. -Accurately, this option should be used if the configured -neighbor IP address does not match with any local network -subnets. Such IP address have to be reachable through system -routing table. For multihop BGP it is recommended to -explicitly configure source address to have it -stable. Optional number argument can be used to specify -the number of hops (used for TTL). Note that the number of -networks (edges) in a path is counted, i.e. if two BGP -speakers are separated by one router, the number of hops is -2. Default: switched off. -

-

source address ip

Define local address we -should use for next hop calculation and as a source address -for the BGP session. Default: the address of the local -end of the interface our neighbor is connected to. -

-

next hop self

Avoid calculation of the Next Hop -attribute and always advertise our own source address as a -next hop. This needs to be used only occasionally to -circumvent misconfigurations of other routers. Default: -disabled. -

-

next hop keep

Forward the received Next Hop -attribute even in situations where the local address should be -used instead, like when the route is sent to an interface with -a different subnet. Default: disabled. -

-

missing lladdr self|drop|ignore

Next Hop attribute -in BGP-IPv6 sometimes contains just the global IPv6 address, -but sometimes it has to contain both global and link-local -IPv6 addresses. This option specifies what to do if BIRD have -to send both addresses but does not know link-local address. -This situation might happen when routes from other protocols -are exported to BGP, or when improper updates are received -from BGP peers. self means that BIRD advertises its own -local address instead. drop means that BIRD skips that -prefixes and logs error. ignore means that BIRD ignores -the problem and sends just the global address (and therefore -forms improper BGP update). Default: self, unless BIRD -is configured as a route server (option rs client), in -that case default is ignore, because route servers usually -do not forward packets themselves. -

-

gateway direct|recursive

For received routes, their -gw (immediate next hop) attribute is computed from -received bgp_next_hop attribute. This option specifies -how it is computed. Direct mode means that the IP address from -bgp_next_hop is used if it is directly reachable, -otherwise the neighbor IP address is used. Recursive mode -means that the gateway is computed by an IGP routing table -lookup for the IP address from bgp_next_hop. Recursive -mode is the behavior specified by the BGP standard. Direct -mode is simpler, does not require any routes in a routing -table, and was used in older versions of BIRD, but does not -handle well nontrivial iBGP setups and multihop. Recursive -mode is incompatible with -sorted tables. Default: direct for singlehop eBGP, -recursive otherwise. -

-

igp table name

Specifies a table that is used -as an IGP routing table. Default: the same as the table BGP is -connected to. -

-

ttl security switch

Use GTSM (RFC 5082 - the -generalized TTL security mechanism). GTSM protects against -spoofed packets by ignoring received packets with a smaller -than expected TTL. To work properly, GTSM have to be enabled -on both sides of a BGP session. If both ttl security and -multihop options are enabled, multihop option should -specify proper hop value to compute expected TTL. Kernel -support required: Linux: 2.6.34+ (IPv4), 2.6.35+ (IPv6), BSD: -since long ago, IPv4 only. Note that full (ICMP protection, -for example) RFC 5082 support is provided by Linux -only. Default: disabled. -

-

password string

Use this password for MD5 authentication -of BGP sessions. Default: no authentication. Password has to be set by -external utility (e.g. setkey(8)) on BSD systems. -

-

passive switch

Standard BGP behavior is both -initiating outgoing connections and accepting incoming -connections. In passive mode, outgoing connections are not -initiated. Default: off. -

-

rr client

Be a route reflector and treat the neighbor as -a route reflection client. Default: disabled. -

-

rr cluster id IPv4 address

Route reflectors use cluster id -to avoid route reflection loops. When there is one route reflector in a cluster -it usually uses its router id as a cluster id, but when there are more route -reflectors in a cluster, these need to be configured (using this option) to -use a common cluster id. Clients in a cluster need not know their cluster -id and this option is not allowed for them. Default: the same as router id. -

-

rs client

Be a route server and treat the neighbor -as a route server client. A route server is used as a -replacement for full mesh EBGP routing in Internet exchange -points in a similar way to route reflectors used in IBGP routing. -BIRD does not implement obsoleted RFC 1863, but uses ad-hoc implementation, -which behaves like plain EBGP but reduces modifications to advertised route -attributes to be transparent (for example does not prepend its AS number to -AS PATH attribute and keeps MED attribute). Default: disabled. -

-

secondary switch

Usually, if an import filter -rejects a selected route, no other route is propagated for -that network. This option allows to try the next route in -order until one that is accepted is found or all routes for -that network are rejected. This can be used for route servers -that need to propagate different tables to each client but do -not want to have these tables explicitly (to conserve memory). -This option requires that the connected routing table is -sorted. Default: off. -

-

enable route refresh switch

When BGP speaker -changes its import filter, it has to re-examine all routes -received from its neighbor against the new filter. As these -routes might not be available, there is a BGP protocol -extension Route Refresh (specified in RFC 2918) that allows -BGP speaker to request re-advertisement of all routes from its -neighbor. This option specifies whether BIRD advertises this -capability and accepts such requests. Even when disabled, BIRD -can send route refresh requests. Default: on. -

-

interpret communities switch

RFC 1997 demands -that BGP speaker should process well-known communities like -no-export (65535, 65281) or no-advertise (65535, 65282). For -example, received route carrying a no-adverise community -should not be advertised to any of its neighbors. If this -option is enabled (which is by default), BIRD has such -behavior automatically (it is evaluated when a route is -exported to the BGP protocol just before the export filter). -Otherwise, this integrated processing of well-known -communities is disabled. In that case, similar behavior can be -implemented in the export filter. Default: on. -

-

enable as4 switch

BGP protocol was designed to use 2B AS numbers -and was extended later to allow 4B AS number. BIRD supports 4B AS extension, -but by disabling this option it can be persuaded not to advertise it and -to maintain old-style sessions with its neighbors. This might be useful for -circumventing bugs in neighbor's implementation of 4B AS extension. -Even when disabled (off), BIRD behaves internally as AS4-aware BGP router. -Default: on. -

-

capabilities switch

Use capability advertisement -to advertise optional capabilities. This is standard behavior -for newer BGP implementations, but there might be some older -BGP implementations that reject such connection attempts. -When disabled (off), features that request it (4B AS support) -are also disabled. Default: on, with automatic fallback to -off when received capability-related error. -

-

advertise ipv4 switch

Advertise IPv4 multiprotocol capability. -This is not a correct behavior according to the strict interpretation -of RFC 4760, but it is widespread and required by some BGP -implementations (Cisco and Quagga). This option is relevant -to IPv4 mode with enabled capability advertisement only. Default: on. -

-

route limit number

The maximal number of routes -that may be imported from the protocol. If the route limit is -exceeded, the connection is closed with an error. Limit is currently implemented as -import limit number action restart. This option is obsolete and it is -replaced by -import limit option. Default: no limit. -

-

disable after error switch

When an error is encountered (either -locally or by the other side), disable the instance automatically -and wait for an administrator to fix the problem manually. Default: off. -

-

hold time number

Time in seconds to wait for a Keepalive -message from the other side before considering the connection stale. -Default: depends on agreement with the neighboring router, we prefer -240 seconds if the other side is willing to accept it. -

-

startup hold time number

Value of the hold timer used -before the routers have a chance to exchange open messages and agree -on the real value. Default: 240 seconds. -

-

keepalive time number

Delay in seconds between sending -of two consecutive Keepalive messages. Default: One third of the hold time. -

-

connect retry time number

Time in seconds to wait before -retrying a failed attempt to connect. Default: 120 seconds. -

-

start delay time number

Delay in seconds between protocol -startup and the first attempt to connect. Default: 5 seconds. -

-

error wait time number,number

Minimum and maximum delay in seconds between a protocol -failure (either local or reported by the peer) and automatic restart. -Doesn't apply when disable after error is configured. If consecutive -errors happen, the delay is increased exponentially until it reaches the maximum. Default: 60, 300. -

-

error forget time number

Maximum time in seconds between two protocol -failures to treat them as a error sequence which makes the error wait time -increase exponentially. Default: 300 seconds. -

-

path metric switch

Enable comparison of path lengths -when deciding which BGP route is the best one. Default: on. -

-

med metric switch

Enable comparison of MED -attributes (during best route selection) even between routes -received from different ASes. This may be useful if all MED -attributes contain some consistent metric, perhaps enforced in -import filters of AS boundary routers. If this option is -disabled, MED attributes are compared only if routes are -received from the same AS (which is the standard behavior). -Default: off. -

-

deterministic med switch

BGP route selection -algorithm is often viewed as a comparison between individual -routes (e.g. if a new route appears and is better than the -current best one, it is chosen as the new best one). But the -proper route selection, as specified by RFC 4271, cannot be -fully implemented in that way. The problem is mainly in -handling the MED attribute. BIRD, by default, uses an -simplification based on individual route comparison, which in -some cases may lead to temporally dependent behavior (i.e. the -selection is dependent on the order in which routes appeared). -This option enables a different (and slower) algorithm -implementing proper RFC 4271 route selection, which is -deterministic. Alternative way how to get deterministic -behavior is to use med metric option. This option is -incompatible with -sorted tables. -Default: off. -

-

igp metric switch

Enable comparison of internal -distances to boundary routers during best route selection. Default: on. -

-

prefer older switch

Standard route selection algorithm -breaks ties by comparing router IDs. This changes the behavior -to prefer older routes (when both are external and from different -peer). For details, see RFC 5004. Default: off. -

-

default bgp_med number

Value of the Multiple Exit -Discriminator to be used during route selection when the MED attribute -is missing. Default: 0. -

-

default bgp_local_pref number

A default value -for the Local Preference attribute. It is used when a new -Local Preference attribute is attached to a route by the BGP -protocol itself (for example, if a route is received through -eBGP and therefore does not have such attribute). Default: 100 -(0 in pre-1.2.0 versions of BIRD). -

-

-

Attributes

- -

BGP defines several route attributes. Some of them (those marked with `I' in the -table below) are available on internal BGP connections only, some of them (marked -with `O') are optional. -

-

-
bgppath bgp_path

Sequence of AS numbers describing the AS path -the packet will travel through when forwarded according to the particular route. -In case of internal BGP it doesn't contain the number of the local AS. -

-

int bgp_local_pref [I]

Local preference value used for -selection among multiple BGP routes (see the selection rules above). It's -used as an additional metric which is propagated through the whole local AS. -

-

int bgp_med [O]

The Multiple Exit Discriminator of the route -is an optional attribute which is used on external (inter-AS) links to -convey to an adjacent AS the optimal entry point into the local AS. -The received attribute is also propagated over internal BGP links. -The attribute value is zeroed when a route is exported to an external BGP -instance to ensure that the attribute received from a neighboring AS is -not propagated to other neighboring ASes. A new value might be set in -the export filter of an external BGP instance. -See RFC 4451 -ftp://ftp.rfc-editor.org/in-notes/rfc4451.txt -for further discussion of BGP MED attribute. -

-

enum bgp_origin

Origin of the route: either ORIGIN_IGP -if the route has originated in an interior routing protocol or -ORIGIN_EGP if it's been imported from the EGP protocol -(nowadays it seems to be obsolete) or ORIGIN_INCOMPLETE if the origin -is unknown. -

-

ip bgp_next_hop

Next hop to be used for forwarding of packets -to this destination. On internal BGP connections, it's an address of the -originating router if it's inside the local AS or a boundary router the -packet will leave the AS through if it's an exterior route, so each BGP -speaker within the AS has a chance to use the shortest interior path -possible to this point. -

-

void bgp_atomic_aggr [O]

This is an optional attribute -which carries no value, but the sole presence of which indicates that the route -has been aggregated from multiple routes by some router on the path from -the originator. -

-

clist bgp_community [O]

List of community values associated -with the route. Each such value is a pair (represented as a pair data -type inside the filters) of 16-bit integers, the first of them containing the number of the AS which defines -the community and the second one being a per-AS identifier. There are lots -of uses of the community mechanism, but generally they are used to carry -policy information like "don't export to USA peers". As each AS can define -its own routing policy, it also has a complete freedom about which community -attributes it defines and what will their semantics be. -

-

eclist bgp_ext_community [O]

List of extended community -values associated with the route. Extended communities have similar usage -as plain communities, but they have an extended range (to allow 4B ASNs) -and a nontrivial structure with a type field. Individual community values are -represented using an ec data type inside the filters. -

-

quad bgp_originator_id [I, O]

This attribute is created by the -route reflector when reflecting the route and contains the router ID of the -originator of the route in the local AS. -

-

clist bgp_cluster_list [I, O]

This attribute contains a list -of cluster IDs of route reflectors. Each route reflector prepends its -cluster ID when reflecting the route. -

-

-

Example

- -

-


-
-protocol bgp {
-        local as 65000;                      # Use a private AS number
-        neighbor 198.51.100.130 as 64496;    # Our neighbor ...
-        multihop;                            # ... which is connected indirectly
-        export filter {                      # We use non-trivial export rules
-                if source = RTS_STATIC then { # Export only static routes
-                        # Assign our community
-                        bgp_community.add((65000,64501));
-                        # Artificially increase path length
-                        # by advertising local AS number twice
-                        if bgp_path ~ [= 65000 =] then
-                                bgp_path.prepend(65000);
-                        accept;
-                }
-                reject;
-        };
-        import all;
-        source address 198.51.100.14;   # Use a non-standard source address
-}
-
-
-

-

6.2 Device -

- -

The Device protocol is not a real routing protocol. It doesn't generate -any routes and it only serves as a module for getting information about network -interfaces from the kernel. -

-

Except for very unusual circumstances, you probably should include -this protocol in the configuration since almost all other protocols -require network interfaces to be defined for them to work with. -

-

Configuration

- -

-

-
scan time number

Time in seconds between two scans -of the network interface list. On systems where we are notified about -interface status changes asynchronously (such as newer versions of -Linux), we need to scan the list only in order to avoid confusion by lost -notification messages, so the default time is set to a large value. -

-

primary [ "mask" ] prefix

If a network interface has more than one network address, BIRD -has to choose one of them as a primary one. By default, BIRD -chooses the lexicographically smallest address as the primary -one. -

This option allows to specify which network address should be -chosen as a primary one. Network addresses that match -prefix are preferred to non-matching addresses. If more -primary options are used, the first one has the highest -preference. If "mask" is specified, then such -primary option is relevant only to matching network -interfaces. -

In all cases, an address marked by operating system as -secondary cannot be chosen as the primary one. -

-

-

As the Device protocol doesn't generate any routes, it cannot have -any attributes. Example configuration looks like this: -

-

-


-
-protocol device {
-        scan time 10;           # Scan the interfaces often
-        primary "eth0" 192.168.1.1;
-        primary 192.168.0.0/16;
-}
-
-
-

-

6.3 Direct -

- -

The Direct protocol is a simple generator of device routes for all the -directly connected networks according to the list of interfaces provided -by the kernel via the Device protocol. -

-

The question is whether it is a good idea to have such device -routes in BIRD routing table. OS kernel usually handles device routes -for directly connected networks by itself so we don't need (and don't -want) to export these routes to the kernel protocol. OSPF protocol -creates device routes for its interfaces itself and BGP protocol is -usually used for exporting aggregate routes. Although there are some -use cases that use the direct protocol (like abusing eBGP as an IGP -routing protocol), in most cases it is not needed to have these device -routes in BIRD routing table and to use the direct protocol. -

-

There is one notable case when you definitely want to use the -direct protocol -- running BIRD on BSD systems. Having high priority -device routes for directly connected networks from the direct protocol -protects kernel device routes from being overwritten or removed by IGP -routes during some transient network conditions, because a lower -priority IGP route for the same network is not exported to the kernel -routing table. This is an issue on BSD systems only, as on Linux -systems BIRD cannot change non-BIRD route in the kernel routing table. -

-

The only configurable thing about direct is what interfaces it watches: -

-

-

-
interface pattern [, ...]

By default, the Direct -protocol will generate device routes for all the interfaces -available. If you want to restrict it to some subset of interfaces -(for example if you're using multiple routing tables for policy -routing and some of the policy domains don't contain all interfaces), -just use this clause. -

-

-

Direct device routes don't contain any specific attributes. -

-

Example config might look like this: -

-

-


-
-protocol direct {
-        interface "-arc*", "*";         # Exclude the ARCnets
-}
-
-
-

-

6.4 Kernel -

- -

The Kernel protocol is not a real routing protocol. Instead of communicating -with other routers in the network, it performs synchronization of BIRD's routing -tables with the OS kernel. Basically, it sends all routing table updates to the kernel -and from time to time it scans the kernel tables to see whether some routes have -disappeared (for example due to unnoticed up/down transition of an interface) -or whether an `alien' route has been added by someone else (depending on the -learn switch, such routes are either ignored or accepted to our -table). -

-

Unfortunately, there is one thing that makes the routing table -synchronization a bit more complicated. In the kernel routing table -there are also device routes for directly connected networks. These -routes are usually managed by OS itself (as a part of IP address -configuration) and we don't want to touch that. They are completely -ignored during the scan of the kernel tables and also the export of -device routes from BIRD tables to kernel routing tables is restricted -to prevent accidental interference. This restriction can be disabled using -device routes switch. -

-

If your OS supports only a single routing table, you can configure -only one instance of the Kernel protocol. If it supports multiple -tables (in order to allow policy routing; such an OS is for example -Linux), you can run as many instances as you want, but each of them -must be connected to a different BIRD routing table and to a different -kernel table. -

-

Because the kernel protocol is partially integrated with the -connected routing table, there are two limitations - it is not -possible to connect more kernel protocols to the same routing table -and changing route destination/gateway in an export -filter of a kernel protocol does not work. Both limitations can be -overcome using another routing table and the pipe protocol. -

-

Configuration

- -

-

-
persist switch

Tell BIRD to leave all its routes in the -routing tables when it exits (instead of cleaning them up). -

scan time number

Time in seconds between two consecutive scans of the -kernel routing table. -

learn switch

Enable learning of routes added to the kernel -routing tables by other routing daemons or by the system administrator. -This is possible only on systems which support identification of route -authorship. -

-

device routes switch

Enable export of device -routes to the kernel routing table. By default, such routes -are rejected (with the exception of explicitly configured -device routes from the static protocol) regardless of the -export filter to protect device routes in kernel routing table -(managed by OS itself) from accidental overwriting or erasing. -

-

kernel table number

Select which kernel table should -this particular instance of the Kernel protocol work with. Available -only on systems supporting multiple routing tables. -

-

-

Attributes

- -

The Kernel protocol defines several attributes. These attributes -are translated to appropriate system (and OS-specific) route attributes. -We support these attributes: -

-

-
int krt_source

The original source of the imported -kernel route. The value is system-dependent. On Linux, it is -a value of the protocol field of the route. See -/etc/iproute2/rt_protos for common values. On BSD, it is -based on STATIC and PROTOx flags. The attribute is read-only. -

-

int krt_metric

The kernel metric of -the route. When multiple same routes are in a kernel routing -table, the Linux kernel chooses one with lower metric. -

-

ip krt_prefsrc

(Linux) The preferred source address. -Used in source address selection for outgoing packets. Have to -be one of IP addresses of the router. -

-

int krt_realm

(Linux) The realm of the route. Can be -used for traffic classification. -

-

-

Example

- -

A simple configuration can look this way: -

-

-


-
-protocol kernel {
-        export all;
-}
-
-
-

-

Or for a system with two routing tables: -

-

-


-
-protocol kernel {               # Primary routing table
-        learn;                  # Learn alien routes from the kernel
-        persist;                # Don't remove routes on bird shutdown
-        scan time 10;           # Scan kernel routing table every 10 seconds
-        import all;
-        export all;
-}
-
-protocol kernel {               # Secondary routing table
-        table auxtable;
-        kernel table 100;
-        export all;
-}
-
-
-

-

6.5 OSPF -

- -

Introduction

- -

Open Shortest Path First (OSPF) is a quite complex interior gateway -protocol. The current IPv4 version (OSPFv2) is defined in RFC -2328 -ftp://ftp.rfc-editor.org/in-notes/rfc2328.txt and -the current IPv6 version (OSPFv3) is defined in RFC 5340 -ftp://ftp.rfc-editor.org/in-notes/rfc5340.txt It's a link state -(a.k.a. shortest path first) protocol -- each router maintains a -database describing the autonomous system's topology. Each participating -router has an identical copy of the database and all routers run the -same algorithm calculating a shortest path tree with themselves as a -root. OSPF chooses the least cost path as the best path. -

-

In OSPF, the autonomous system can be split to several areas in order -to reduce the amount of resources consumed for exchanging the routing -information and to protect the other areas from incorrect routing data. -Topology of the area is hidden to the rest of the autonomous system. -

-

Another very important feature of OSPF is that -it can keep routing information from other protocols (like Static or BGP) -in its link state database as external routes. Each external route can -be tagged by the advertising router, making it possible to pass additional -information between routers on the boundary of the autonomous system. -

-

OSPF quickly detects topological changes in the autonomous system (such -as router interface failures) and calculates new loop-free routes after a short -period of convergence. Only a minimal amount of -routing traffic is involved. -

-

Each router participating in OSPF routing periodically sends Hello messages -to all its interfaces. This allows neighbors to be discovered dynamically. -Then the neighbors exchange theirs parts of the link state database and keep it -identical by flooding updates. The flooding process is reliable and ensures -that each router detects all changes. -

-

Configuration

- -

In the main part of configuration, there can be multiple definitions of -OSPF areas, each with a different id. These definitions includes many other -switches and multiple definitions of interfaces. Definition of interface -may contain many switches and constant definitions and list of neighbors -on nonbroadcast networks. -

-


-
-protocol ospf <name> {
-        rfc1583compat <switch>;
-        stub router <switch>;
-        tick <num>;
-        ecmp <switch> [limit <num>];
-        area <id> {
-                stub;
-                nssa;
-                summary <switch>;
-                default nssa <switch>;
-                default cost <num>;
-                default cost2 <num>;
-                translator <switch>;
-                translator stability <num>;
-
-                networks {
-                        <prefix>;
-                        <prefix> hidden;
-                }
-                external {
-                        <prefix>;
-                        <prefix> hidden;
-                        <prefix> tag <num>;
-                }
-                stubnet <prefix>;
-                stubnet <prefix> {
-                        hidden <switch>;
-                        summary <switch>;
-                        cost <num>;
-                }
-                interface <interface pattern> [instance <num>] {
-                        cost <num>;
-                        stub <switch>;
-                        hello <num>;
-                        poll <num>;
-                        retransmit <num>;
-                        priority <num>;
-                        wait <num>;
-                        dead count <num>;
-                        dead <num>;
-                        rx buffer [normal|large|<num>];
-                        type [broadcast|bcast|pointopoint|ptp|
-                                nonbroadcast|nbma|pointomultipoint|ptmp];
-                        strict nonbroadcast <switch>;
-                        real broadcast <switch>;
-                        ptp netmask <switch>;
-                        check link <switch>;
-                        ecmp weight <num>;
-                        ttl security [<switch>; | tx only]
-                        tx class|dscp <num>;
-                        tx priority <num>;
-                        authentication [none|simple|cryptographic];
-                        password "<text>";
-                        password "<text>" {
-                                id <num>;
-                                generate from "<date>";
-                                generate to "<date>";
-                                accept from "<date>";
-                                accept to "<date>";
-                        };
-                        neighbors {
-                                <ip>;
-                                <ip> eligible;
-                        };
-                };
-                virtual link <id> [instance <num>] {
-                        hello <num>;
-                        retransmit <num>;
-                        wait <num>;
-                        dead count <num>;
-                        dead <num>;
-                        authentication [none|simple|cryptographic];
-                        password "<text>";
-                };
-        };
-}
-
-
-

-

-
rfc1583compat switch

This option controls compatibility of routing table -calculation with RFC 1583 -ftp://ftp.rfc-editor.org/in-notes/rfc1583.txt. Default -value is no. -

-

stub router switch

This option configures the router to be a stub router, i.e., -a router that participates in the OSPF topology but does not -allow transit traffic. In OSPFv2, this is implemented by -advertising maximum metric for outgoing links, as suggested -by RFC 3137 -ftp://ftp.rfc-editor.org/in-notes/rfc3137.txt. -In OSPFv3, the stub router behavior is announced by clearing -the R-bit in the router LSA. Default value is no. -

-

tick num

The routing table calculation and clean-up of areas' databases -is not performed when a single link state -change arrives. To lower the CPU utilization, it's processed later -at periodical intervals of num seconds. The default value is 1. -

-

ecmp switch [limit number]

This option specifies whether OSPF is allowed to generate -ECMP (equal-cost multipath) routes. Such routes are used when -there are several directions to the destination, each with -the same (computed) cost. This option also allows to specify -a limit on maximal number of nexthops in one route. By -default, ECMP is disabled. If enabled, default value of the -limit is 16. -

-

area id

This defines an OSPF area with given area ID (an integer or an IPv4 -address, similarly to a router ID). The most important area is -the backbone (ID 0) to which every other area must be connected. -

-

stub

This option configures the area to be a stub area. External -routes are not flooded into stub areas. Also summary LSAs can be -limited in stub areas (see option summary). -By default, the area is not a stub area. -

-

nssa

This option configures the area to be a NSSA (Not-So-Stubby -Area). NSSA is a variant of a stub area which allows a -limited way of external route propagation. Global external -routes are not propagated into a NSSA, but an external route -can be imported into NSSA as a (area-wide) NSSA-LSA (and -possibly translated and/or aggregated on area boundary). -By default, the area is not NSSA. -

-

summary switch

This option controls propagation of summary LSAs into stub or -NSSA areas. If enabled, summary LSAs are propagated as usual, -otherwise just the default summary route (0.0.0.0/0) is -propagated (this is sometimes called totally stubby area). If -a stub area has more area boundary routers, propagating -summary LSAs could lead to more efficient routing at the cost -of larger link state database. Default value is no. -

-

default nssa switch

When summary option is enabled, default summary route is -no longer propagated to the NSSA. In that case, this option -allows to originate default route as NSSA-LSA to the NSSA. -Default value is no. -

-

default cost num

This option controls the cost of a default route propagated to -stub and NSSA areas. Default value is 1000. -

-

default cost2 num

When a default route is originated as NSSA-LSA, its cost -can use either type 1 or type 2 metric. This option allows -to specify the cost of a default route in type 2 metric. -By default, type 1 metric (option default cost) is used. -

-

translator switch

This option controls translation of NSSA-LSAs into external -LSAs. By default, one translator per NSSA is automatically -elected from area boundary routers. If enabled, this area -boundary router would unconditionally translate all NSSA-LSAs -regardless of translator election. Default value is no. -

-

translator stability num

This option controls the translator stability interval (in -seconds). When the new translator is elected, the old one -keeps translating until the interval is over. Default value -is 40. -

-

networks { set }

Definition of area IP ranges. This is used in summary LSA origination. -Hidden networks are not propagated into other areas. -

-

external { set }

Definition of external area IP ranges for NSSAs. This is used -for NSSA-LSA translation. Hidden networks are not translated -into external LSAs. Networks can have configured route tag. -

-

stubnet prefix { options }

Stub networks are networks that are not transit networks -between OSPF routers. They are also propagated through an -OSPF area as a part of a link state database. By default, -BIRD generates a stub network record for each primary network -address on each OSPF interface that does not have any OSPF -neighbors, and also for each non-primary network address on -each OSPF interface. This option allows to alter a set of -stub networks propagated by this router. -

Each instance of this option adds a stub network with given -network prefix to the set of propagated stub network, unless -option hidden is used. It also suppresses default stub -networks for given network prefix. When option -summary is used, also default stub networks that are -subnetworks of given stub network are suppressed. This might -be used, for example, to aggregate generated stub networks. -

-

interface pattern [instance num]

Defines that the specified interfaces belong to the area being defined. -See -interface common option for detailed description. -In OSPFv3, you can specify instance ID for that interface -description, so it is possible to have several instances of -that interface with different options or even in different areas. -

-

virtual link id [instance num]

Virtual link to router with the router id. Virtual link acts -as a point-to-point interface belonging to backbone. The -actual area is used as transport area. This item cannot be in -the backbone. In OSPFv3, you could also use several virtual -links to one destination with different instance IDs. -

-

cost num

Specifies output cost (metric) of an interface. Default value is 10. -

-

stub switch

If set to interface it does not listen to any packet and does not send -any hello. Default value is no. -

-

hello num

Specifies interval in seconds between sending of Hello messages. Beware, all -routers on the same network need to have the same hello interval. -Default value is 10. -

-

poll num

Specifies interval in seconds between sending of Hello messages for -some neighbors on NBMA network. Default value is 20. -

-

retransmit num

Specifies interval in seconds between retransmissions of unacknowledged updates. -Default value is 5. -

-

priority num

On every multiple access network (e.g., the Ethernet) Designed Router -and Backup Designed router are elected. These routers have some -special functions in the flooding process. Higher priority increases -preferences in this election. Routers with priority 0 are not -eligible. Default value is 1. -

-

wait num

After start, router waits for the specified number of seconds between starting -election and building adjacency. Default value is 40. -

-

dead count num

When the router does not receive any messages from a neighbor in -dead count*hello seconds, it will consider the neighbor down. -

-

dead num

When the router does not receive any messages from a neighbor in -dead seconds, it will consider the neighbor down. If both directives -dead count and dead are used, dead has precendence. -

-

rx buffer num

This sets the size of buffer used for receiving packets. The buffer should -be bigger than maximal size of any packets. Value NORMAL (default) -means 2*MTU, value LARGE means maximal allowed packet - 65535. -

-

type broadcast|bcast

BIRD detects a type of a connected network automatically, but -sometimes it's convenient to force use of a different type -manually. On broadcast networks (like ethernet), flooding -and Hello messages are sent using multicasts (a single packet -for all the neighbors). A designated router is elected and it -is responsible for synchronizing the link-state databases and -originating network LSAs. This network type cannot be used on -physically NBMA networks and on unnumbered networks (networks -without proper IP prefix). -

-

type pointopoint|ptp

Point-to-point networks connect just 2 routers together. No -election is performed and no network LSA is originated, which -makes it simpler and faster to establish. This network type -is useful not only for physically PtP ifaces (like PPP or -tunnels), but also for broadcast networks used as PtP links. -This network type cannot be used on physically NBMA networks. -

-

type nonbroadcast|nbma

On NBMA networks, the packets are sent to each neighbor -separately because of lack of multicast capabilities. -Like on broadcast networks, a designated router is elected, -which plays a central role in propagation of LSAs. -This network type cannot be used on unnumbered networks. -

-

type pointomultipoint|ptmp

This is another network type designed to handle NBMA -networks. In this case the NBMA network is treated as a -collection of PtP links. This is useful if not every pair of -routers on the NBMA network has direct communication, or if -the NBMA network is used as an (possibly unnumbered) PtP -link. -

-

strict nonbroadcast switch

If set, don't send hello to any undefined neighbor. This switch -is ignored on other than NBMA or PtMP networks. Default value is no. -

-

real broadcast switch

In type broadcast or type ptp network -configuration, OSPF packets are sent as IP multicast -packets. This option changes the behavior to using -old-fashioned IP broadcast packets. This may be useful as a -workaround if IP multicast for some reason does not work or -does not work reliably. This is a non-standard option and -probably is not interoperable with other OSPF -implementations. Default value is no. -

-

ptp netmask switch

In type ptp network configurations, OSPFv2 -implementations should ignore received netmask field in hello -packets and should send hello packets with zero netmask field -on unnumbered PtP links. But some OSPFv2 implementations -perform netmask checking even for PtP links. This option -specifies whether real netmask will be used in hello packets -on type ptp interfaces. You should ignore this option -unless you meet some compatibility problems related to this -issue. Default value is no for unnumbered PtP links, yes -otherwise. -

-

check link switch

If set, a hardware link state (reported by OS) is taken into -consideration. When a link disappears (e.g. an ethernet cable is -unplugged), neighbors are immediately considered unreachable -and only the address of the iface (instead of whole network -prefix) is propagated. It is possible that some hardware -drivers or platforms do not implement this feature. Default value is no. -

-

ttl security [switch | tx only]

TTL security is a feature that protects routing protocols -from remote spoofed packets by using TTL 255 instead of TTL 1 -for protocol packets destined to neighbors. Because TTL is -decremented when packets are forwarded, it is non-trivial to -spoof packets with TTL 255 from remote locations. Note that -this option would interfere with OSPF virtual links. -

If this option is enabled, the router will send OSPF packets -with TTL 255 and drop received packets with TTL less than -255. If this option si set to tx only, TTL 255 is used -for sent packets, but is not checked for received -packets. Default value is no. -

-

tx class|dscp|priority num

These options specify the ToS/DiffServ/Traffic class/Priority -of the outgoing OSPF packets. See -tx class common option for detailed description. -

-

ecmp weight num

When ECMP (multipath) routes are allowed, this value specifies -a relative weight used for nexthops going through the iface. -Allowed values are 1-256. Default value is 1. -

-

authentication none

No passwords are sent in OSPF packets. This is the default value. -

-

authentication simple

Every packet carries 8 bytes of password. Received packets -lacking this password are ignored. This authentication mechanism is -very weak. -

-

authentication cryptographic

16-byte long MD5 digest is appended to every packet. For the digest -generation 16-byte long passwords are used. Those passwords are -not sent via network, so this mechanism is quite secure. -Packets can still be read by an attacker. -

-

password "text"

An 8-byte or 16-byte password used for authentication. -See -password common option for detailed description. -

-

neighbors { set }

A set of neighbors to which Hello messages on NBMA or PtMP -networks are to be sent. For NBMA networks, some of them -could be marked as eligible. In OSPFv3, link-local addresses -should be used, using global ones is possible, but it is -nonstandard and might be problematic. And definitely, -link-local and global addresses should not be mixed. -

-

-

-

Attributes

- -

OSPF defines four route attributes. Each internal route has a metric. -Metric is ranging from 1 to infinity (65535). -External routes use metric type 1 or metric type 2. -A metric of type 1 is comparable with internal metric, a -metric of type 2 is always longer -than any metric of type 1 or any internal metric. -Internal metric or metric of type 1 is stored in attribute -ospf_metric1, metric type 2 is stored in attribute ospf_metric2. -If you specify both metrics only metric1 is used. -

Each external route can also carry attribute ospf_tag which is a -32-bit integer which is used when exporting routes to other protocols; -otherwise, it doesn't affect routing inside the OSPF domain at all. -The fourth attribute ospf_router_id is a router ID of the router -advertising that route/network. This attribute is read-only. Default -is ospf_metric2 = 10000 and ospf_tag = 0. -

-

Example

- -

-

-


-
-protocol ospf MyOSPF {
-        rfc1583compat yes;
-        tick 2;
-        export filter {
-                if source = RTS_BGP then {
-                        ospf_metric1 = 100;
-                        accept;
-                }
-                reject;
-        };
-        area 0.0.0.0 {
-                interface "eth*" {
-                        cost 11;
-                        hello 15;
-                        priority 100;
-                        retransmit 7;
-                        authentication simple;
-                        password "aaa";
-                };
-                interface "ppp*" {
-                        cost 100;
-                        authentication cryptographic;
-                        password "abc" {
-                                id 1;
-                                generate to "22-04-2003 11:00:06";
-                                accept from "17-01-2001 12:01:05";
-                        };
-                        password "def" {
-                                id 2;
-                                generate to "22-07-2005 17:03:21";
-                                accept from "22-02-2001 11:34:06";
-                        };
-                };
-                interface "arc0" {
-                        cost 10;
-                        stub yes;
-                };
-                interface "arc1";
-        };
-        area 120 {
-                stub yes;
-                networks {
-                        172.16.1.0/24;
-                        172.16.2.0/24 hidden;
-                }
-                interface "-arc0" , "arc*" {
-                        type nonbroadcast;
-                        authentication none;
-                        strict nonbroadcast yes;
-                        wait 120;
-                        poll 40;
-                        dead count 8;
-                        neighbors {
-                                192.168.120.1 eligible;
-                                192.168.120.2;
-                                192.168.120.10;
-                        };
-                };
-        };
-}
-
-
-

-

6.6 Pipe -

- -

Introduction

- -

The Pipe protocol serves as a link between two routing tables, allowing routes to be -passed from a table declared as primary (i.e., the one the pipe is connected to using the -table configuration keyword) to the secondary one (declared using peer table) -and vice versa, depending on what's allowed by the filters. Export filters control export -of routes from the primary table to the secondary one, import filters control the opposite -direction. -

-

The Pipe protocol may work in the transparent mode mode or in the opaque mode. -In the transparent mode, the Pipe protocol retransmits all routes from -one table to the other table, retaining their original source and -attributes. If import and export filters are set to accept, then both -tables would have the same content. The transparent mode is the default mode. -

-

In the opaque mode, the Pipe protocol retransmits optimal route -from one table to the other table in a similar way like other -protocols send and receive routes. Retransmitted route will have the -source set to the Pipe protocol, which may limit access to protocol -specific route attributes. This mode is mainly for compatibility, it -is not suggested for new configs. The mode can be changed by -mode option. -

-

The primary use of multiple routing tables and the Pipe protocol is for policy routing, -where handling of a single packet doesn't depend only on its destination address, but also -on its source address, source interface, protocol type and other similar parameters. -In many systems (Linux being a good example), the kernel allows to enforce routing policies -by defining routing rules which choose one of several routing tables to be used for a packet -according to its parameters. Setting of these rules is outside the scope of BIRD's work -(on Linux, you can use the ip command), but you can create several routing tables in BIRD, -connect them to the kernel ones, use filters to control which routes appear in which tables -and also you can employ the Pipe protocol for exporting a selected subset of one table to -another one. -

-

Configuration

- -

-

-
peer table table

Defines secondary routing table to connect to. The -primary one is selected by the table keyword. -

-

mode opaque|transparent

Specifies the mode for the pipe to work in. Default is transparent. -

-

-

Attributes

- -

The Pipe protocol doesn't define any route attributes. -

-

Example

- -

Let's consider a router which serves as a boundary router of two different autonomous -systems, each of them connected to a subset of interfaces of the router, having its own -exterior connectivity and wishing to use the other AS as a backup connectivity in case -of outage of its own exterior line. -

-

Probably the simplest solution to this situation is to use two routing tables (we'll -call them as1 and as2) and set up kernel routing rules, so that packets having -arrived from interfaces belonging to the first AS will be routed according to as1 -and similarly for the second AS. Thus we have split our router to two logical routers, -each one acting on its own routing table, having its own routing protocols on its own -interfaces. In order to use the other AS's routes for backup purposes, we can pass -the routes between the tables through a Pipe protocol while decreasing their preferences -and correcting their BGP paths to reflect the AS boundary crossing. -

-


-
-table as1;                              # Define the tables
-table as2;
-
-protocol kernel kern1 {                 # Synchronize them with the kernel
-        table as1;
-        kernel table 1;
-}
-
-protocol kernel kern2 {
-        table as2;
-        kernel table 2;
-}
-
-protocol bgp bgp1 {                     # The outside connections
-        table as1;
-        local as 1;
-        neighbor 192.168.0.1 as 1001;
-        export all;
-        import all;
-}
-
-protocol bgp bgp2 {
-        table as2;
-        local as 2;
-        neighbor 10.0.0.1 as 1002;
-        export all;
-        import all;
-}
-
-protocol pipe {                         # The Pipe
-        table as1;
-        peer table as2;
-        export filter {
-                if net ~ [ 1.0.0.0/8+] then {   # Only AS1 networks
-                        if preference>10 then preference = preference-10;
-                        if source=RTS_BGP then bgp_path.prepend(1);
-                        accept;
-                }
-                reject;
-        };
-        import filter {
-                if net ~ [ 2.0.0.0/8+] then {   # Only AS2 networks
-                        if preference>10 then preference = preference-10;
-                        if source=RTS_BGP then bgp_path.prepend(2);
-                        accept;
-                }
-                reject;
-        };
-}
-
-
-

-

6.7 RAdv -

- -

Introduction

- -

The RAdv protocol is an implementation of Router Advertisements, -which are used in the IPv6 stateless autoconfiguration. IPv6 routers -send (in irregular time intervals or as an answer to a request) -advertisement packets to connected networks. These packets contain -basic information about a local network (e.g. a list of network -prefixes), which allows network hosts to autoconfigure network -addresses and choose a default route. BIRD implements router behavior -as defined in -RFC 4861 -ftp://ftp.rfc-editor.org/in-notes/rfc4861.txt -and also the DNS extensions from -RFC 6106 -ftp://ftp.rfc-editor.org/in-notes/rfc6106.txt. -

-

Configuration

- -

There are several classes of definitions in RAdv configuration -- -interface definitions, prefix definitions and DNS definitions: -

-

-
interface pattern [, ...] { options }

Interface definitions specify a set of interfaces on which the -protocol is activated and contain interface specific options. -See -interface common options for -detailed description. -

-

prefix prefix { options }

Prefix definitions allow to modify a list of advertised -prefixes. By default, the advertised prefixes are the same as -the network prefixes assigned to the interface. For each -network prefix, the matching prefix definition is found and -its options are used. If no matching prefix definition is -found, the prefix is used with default options. -

Prefix definitions can be either global or interface-specific. -The second ones are part of interface options. The prefix -definition matching is done in the first-match style, when -interface-specific definitions are processed before global -definitions. As expected, the prefix definition is matching if -the network prefix is a subnet of the prefix in prefix -definition. -

-

rdnss { options }

RDNSS definitions allow to specify a list of advertised -recursive DNS servers together with their options. As options -are seldom necessary, there is also a short variant rdnss -address that just specifies one DNS server. Multiple -definitions are cumulative. RDNSS definitions may also be -interface-specific when used inside interface options. By -default, interface uses both global and interface-specific -options, but that can be changed by rdnss local option. -

-

dnssl { options }

DNSSL definitions allow to specify a list of advertised DNS -search domains together with their options. Like rdnss -above, multiple definitions are cumulative, they can be used -also as interface-specific options and there is a short -variant dnssl domain that just specifies one DNS -search domain. -

- -

trigger prefix

RAdv protocol could be configured to change its behavior based -on availability of routes. When this option is used, the -protocol waits in suppressed state until a trigger route -(for the specified network) is exported to the protocol, the -protocol also returnsd to suppressed state if the -trigger route disappears. Note that route export depends -on specified export filter, as usual. This option could be -used, e.g., for handling failover in multihoming scenarios. -

During suppressed state, router advertisements are generated, -but with some fields zeroed. Exact behavior depends on which -fields are zeroed, this can be configured by -sensitive option for appropriate fields. By default, just -default lifetime (also called router lifetime) is -zeroed, which means hosts cannot use the router as a default -router. preferred lifetime and valid lifetime could -also be configured as sensitive for a prefix, which would -cause autoconfigured IPs to be deprecated or even removed. -

-

-

Interface specific options: -

-

-
max ra interval expr

Unsolicited router advertisements are sent in irregular time -intervals. This option specifies the maximum length of these -intervals, in seconds. Valid values are 4-1800. Default: 600 -

-

min ra interval expr

This option specifies the minimum length of that intervals, in -seconds. Must be at least 3 and at most 3/4 * max ra interval. -Default: about 1/3 * max ra interval. -

-

min delay expr

The minimum delay between two consecutive router advertisements, -in seconds. Default: 3 -

-

managed switch

This option specifies whether hosts should use DHCPv6 for -IP address configuration. Default: no -

-

other config switch

This option specifies whether hosts should use DHCPv6 to -receive other configuration information. Default: no -

-

link mtu expr

This option specifies which value of MTU should be used by -hosts. 0 means unspecified. Default: 0 -

-

reachable time expr

This option specifies the time (in milliseconds) how long -hosts should assume a neighbor is reachable (from the last -confirmation). Maximum is 3600000, 0 means unspecified. -Default 0. -

-

retrans timer expr

This option specifies the time (in milliseconds) how long -hosts should wait before retransmitting Neighbor Solicitation -messages. 0 means unspecified. Default 0. -

-

current hop limit expr

This option specifies which value of Hop Limit should be used -by hosts. Valid values are 0-255, 0 means unspecified. Default: 64 -

-

default lifetime expr [sensitive switch]

This option specifies the time (in seconds) how long (after -the receipt of RA) hosts may use the router as a default -router. 0 means do not use as a default router. For -sensitive option, see -trigger. -Default: 3 * max ra interval, sensitive yes. -

-

rdnss local switch

Use only local (interface-specific) RDNSS definitions for this -interface. Otherwise, both global and local definitions are -used. Could also be used to disable RDNSS for given interface -if no local definitons are specified. Default: no. -

-

dnssl local switch

Use only local DNSSL definitions for this interface. See -rdnss local option above. Default: no. -

-

-

-

Prefix specific options: -

-

-
skip switch

This option allows to specify that given prefix should not be -advertised. This is useful for making exceptions from a -default policy of advertising all prefixes. Note that for -withdrawing an already advertised prefix it is more useful to -advertise it with zero valid lifetime. Default: no -

-

onlink switch

This option specifies whether hosts may use the advertised -prefix for onlink determination. Default: yes -

-

autonomous switch

This option specifies whether hosts may use the advertised -prefix for stateless autoconfiguration. Default: yes -

-

valid lifetime expr [sensitive switch]

This option specifies the time (in seconds) how long (after -the receipt of RA) the prefix information is valid, i.e., -autoconfigured IP addresses can be assigned and hosts with -that IP addresses are considered directly reachable. 0 means -the prefix is no longer valid. For sensitive option, see -trigger. Default: 86400 (1 day), sensitive no. -

-

preferred lifetime expr [sensitive switch]

This option specifies the time (in seconds) how long (after -the receipt of RA) IP addresses generated from the prefix -using stateless autoconfiguration remain preferred. For -sensitive option, see -trigger. -Default: 14400 (4 hours), sensitive no. -

-

-

-

RDNSS specific options: -

-

-
ns address

This option specifies one recursive DNS server. Can be used -multiple times for multiple servers. It is mandatory to have -at least one ns option in rdnss definition. -

-

lifetime [mult] expr

This option specifies the time how long the RDNSS information -may be used by clients after the receipt of RA. It is -expressed either in seconds or (when mult is used) in -multiples of max ra interval. Note that RDNSS information -is also invalidated when default lifetime expires. 0 -means these addresses are no longer valid DNS servers. -Default: 3 * max ra interval. -

-

-

-

DNSSL specific options: -

-

-
domain address

This option specifies one DNS search domain. Can be used -multiple times for multiple domains. It is mandatory to have -at least one domain option in dnssl definition. -

-

lifetime [mult] expr

This option specifies the time how long the DNSSL information -may be used by clients after the receipt of RA. Details are -the same as for RDNSS lifetime option above. -Default: 3 * max ra interval. -

-

-

-

Example

- -

-


-
-protocol radv {
-        interface "eth2" {
-                max ra interval 5;      # Fast failover with more routers
-                managed yes;            # Using DHCPv6 on eth2
-                prefix ::/0 {
-                        autonomous off; # So do not autoconfigure any IP
-                };
-        };
-
-        interface "eth*";               # No need for any other options
-
-        prefix 2001:0DB8:1234::/48 {
-                preferred lifetime 0;   # Deprecated address range
-        };
-
-        prefix 2001:0DB8:2000::/48 {
-                autonomous off;         # Do not autoconfigure
-        };
-
-        rdnss 2001:0DB8:1234::10;       # Short form of RDNSS
-
-        rdnss {
-                lifetime mult 10;
-                ns 2001:0DB8:1234::11;
-                ns 2001:0DB8:1234::12;
-        };
-
-        dnssl {
-                lifetime 3600;
-                domain "abc.com";
-                domain "xyz.com";
-        };
-}
-
-
-

-

6.8 RIP -

- -

Introduction

- -

The RIP protocol (also sometimes called Rest In Pieces) is a simple protocol, where each router broadcasts (to all its neighbors) -distances to all networks it can reach. When a router hears distance to another network, it increments -it and broadcasts it back. Broadcasts are done in regular intervals. Therefore, if some network goes -unreachable, routers keep telling each other that its distance is the original distance plus 1 (actually, plus -interface metric, which is usually one). After some time, the distance reaches infinity (that's 15 in -RIP) and all routers know that network is unreachable. RIP tries to minimize situations where -counting to infinity is necessary, because it is slow. Due to infinity being 16, you can't use -RIP on networks where maximal distance is higher than 15 hosts. You can read more about RIP at -http://www.ietf.org/html.charters/rip-charter.html. Both IPv4 -(RFC 1723 -ftp://ftp.rfc-editor.org/in-notes/rfc1723.txt) -and IPv6 (RFC 2080 -ftp://ftp.rfc-editor.org/in-notes/rfc2080.txt) versions of RIP are supported by BIRD, historical RIPv1 (RFC 1058 -ftp://ftp.rfc-editor.org/in-notes/rfc1058.txt)is -not currently supported. RIPv4 MD5 authentication (RFC 2082 -ftp://ftp.rfc-editor.org/in-notes/rfc2082.txt) is supported. -

-

RIP is a very simple protocol, and it has a lot of shortcomings. Slow -convergence, big network load and inability to handle larger networks -makes it pretty much obsolete. (It is still usable on very small networks.) -

-

Configuration

- -

In addition to options common for all to other protocols, RIP supports the following ones: -

-

-
authentication none|plaintext|md5

selects authentication method to be used. none means that -packets are not authenticated at all, plaintext means that a plaintext password is embedded -into each packet, and md5 means that packets are authenticated using a MD5 cryptographic -hash. If you set authentication to not-none, it is a good idea to add password -section. Default: none. -

-

honor always|neighbor|never

specifies when should requests for dumping routing table -be honored. (Always, when sent from a host on a directly connected -network or never.) Routing table updates are honored only from -neighbors, that is not configurable. Default: never. -

-

-

There are some options that can be specified per-interface: -

-

-
metric num

This option specifies the metric of the interface. Valid -

-

mode multicast|broadcast|quiet|nolisten|version1

This option selects the mode for RIP to work in. If nothing is -specified, RIP runs in multicast mode. version1 is -currently equivalent to broadcast, and it makes RIP talk -to a broadcast address even through multicast mode is -possible. quiet option means that RIP will not transmit -any periodic messages to this interface and nolisten -means that RIP will send to this interface butnot listen to it. -

-

ttl security [switch | tx only]

TTL security is a feature that protects routing protocols -from remote spoofed packets by using TTL 255 instead of TTL 1 -for protocol packets destined to neighbors. Because TTL is -decremented when packets are forwarded, it is non-trivial to -spoof packets with TTL 255 from remote locations. -

If this option is enabled, the router will send RIP packets -with TTL 255 and drop received packets with TTL less than -255. If this option si set to tx only, TTL 255 is used -for sent packets, but is not checked for received -packets. Such setting does not offer protection, but offers -compatibility with neighbors regardless of whether they use -ttl security. -

Note that for RIPng, TTL security is a standard behavior -(required by RFC 2080), but BIRD uses tx only by -default, for compatibility with older versions. For IPv4 RIP, -default value is no. -

-

tx class|dscp|priority num

These options specify the ToS/DiffServ/Traffic class/Priority -of the outgoing RIP packets. See -tx class common option for detailed description. -

-

-

The following options generally override behavior specified in RFC. If you use any of these -options, BIRD will no longer be RFC-compliant, which means it will not be able to talk to anything -other than equally configured BIRD. I have warned you. -

-

-
port number

selects IP port to operate on, default 520. (This is useful when testing BIRD, if you -set this to an address >1024, you will not need to run bird with UID==0). -

-

infinity number

selects the value of infinity, default is 16. Bigger values will make protocol convergence -even slower. -

-

period number

specifies the number of seconds between periodic updates. Default is 30 seconds. A lower -number will mean faster convergence but bigger network -load. Do not use values lower than 12. -

-

timeout time number

specifies how old route has to be to be considered unreachable. Default is 4*period. -

-

garbage time number

specifies how old route has to be to be discarded. Default is 10*period. -

-

-

Attributes

- -

RIP defines two route attributes: -

-

-
int rip_metric

RIP metric of the route (ranging from 0 to infinity). -When routes from different RIP instances are available and all of them have the same -preference, BIRD prefers the route with lowest rip_metric. -When importing a non-RIP route, the metric defaults to 5. -

-

int rip_tag

RIP route tag: a 16-bit number which can be used -to carry additional information with the route (for example, an originating AS number -in case of external routes). When importing a non-RIP route, the tag defaults to 0. -

-

-

Example

- -

-


-
-protocol rip MyRIP_test {
-        debug all;
-        port 1520;
-        period 12;
-        garbage time 60;
-        interface "eth0" { metric 3; mode multicast; };
-        interface "eth*" { metric 2; mode broadcast; };
-        honor neighbor;
-        authentication none;
-        import filter { print "importing"; accept; };
-        export filter { print "exporting"; accept; };
-}
-
-
-

-

6.9 Static -

- -

The Static protocol doesn't communicate with other routers in the network, -but instead it allows you to define routes manually. This is often used for -specifying how to forward packets to parts of the network which don't use -dynamic routing at all and also for defining sink routes (i.e., those -telling to return packets as undeliverable if they are in your IP block, -you don't have any specific destination for them and you don't want to send -them out through the default route to prevent routing loops). -

-

There are five types of static routes: `classical' routes telling -to forward packets to a neighboring router, multipath routes -specifying several (possibly weighted) neighboring routers, device -routes specifying forwarding to hosts on a directly connected network, -recursive routes computing their nexthops by doing route table lookups -for a given IP and special routes (sink, blackhole etc.) which specify -a special action to be done instead of forwarding the packet. -

-

When the particular destination is not available (the interface is down or -the next hop of the route is not a neighbor at the moment), Static just -uninstalls the route from the table it is connected to and adds it again as soon -as the destination becomes adjacent again. -

-

The Static protocol does not have many configuration options. The -definition of the protocol contains mainly a list of static routes: -

-

-
route prefix via ip

Static route through -a neighboring router. -

route prefix multipath via ip [weight num] [via ...]

Static multipath route. Contains several nexthops (gateways), possibly -with their weights. -

route prefix via "interface"

Static device -route through an interface to hosts on a directly connected network. -

route prefix recursive ip

Static recursive route, -its nexthop depends on a route table lookup for given IP address. -

route prefix blackhole|unreachable|prohibit

Special routes -specifying to silently drop the packet, return it as unreachable or return -it as administratively prohibited. First two targets are also known -as drop and reject. -

-

check link switch

If set, hardware link states of network interfaces are taken -into consideration. When link disappears (e.g. ethernet cable -is unplugged), static routes directing to that interface are -removed. It is possible that some hardware drivers or -platforms do not implement this feature. Default: off. -

-

igp table name

Specifies a table that is used -for route table lookups of recursive routes. Default: the -same table as the protocol is connected to. -

-

-

Static routes have no specific attributes. -

-

Example static config might look like this: -

-

-


-
-protocol static {
-        table testable;                  # Connect to a non-default routing table
-        route 0.0.0.0/0 via 198.51.100.130; # Default route
-        route 10.0.0.0/8 multipath       # Multipath route
-                via 198.51.100.10 weight 2
-                via 198.51.100.20
-                via 192.0.2.1;
-        route 203.0.113.0/24 unreachable; # Sink route
-        route 10.2.0.0/24 via "arc0";    # Secondary network
-}
-
-
-

-


-Next -Previous -Contents - - diff --git a/doc/bird.sgml b/doc/bird.sgml index a2266424..5ceb8ab9 100644 --- a/doc/bird.sgml +++ b/doc/bird.sgml @@ -477,7 +477,7 @@ to zero to disable it. An empty is equivalent to import limit [ +

Session specific options (part of + interval + BFD ensures availability of the forwarding path associated with the + session by periodically sending BFD control packets in both + directions. The rate of such packets is controlled by two options, + min rx interval + This option specifies the minimum RX interval, which is announced to the + neighbor and used there to limit the neighbor's rate of generated BFD + control packets. Default: 10 ms. + + min tx interval + This option specifies the desired TX interval, which controls the rate + of generated BFD control packets (together with idle tx interval + In order to limit unnecessary traffic in cases where a neighbor is not + available or not running BFD, the rate of generated BFD control packets + is lower when the BFD session is not up. This option specifies the + desired TX interval in such cases instead of multiplier + Failure detection time for BFD sessions is based on established rate of + BFD control packets (min rx/tx interval) multiplied by this + multiplier, which is essentially (ignoring jitter) a number of missed + packets after which the session is declared down. Note that rates and + multipliers could be different in each direction of a BFD session. + Default: 5. + + passive + Generally, both BFD session endpoinds try to establish the session by + sending control packets to the other side. This option allows to enable + passive mode, which means that the router does not send BFD packets + until it has received one from the other side. Default: disabled. + + +Example + +

+protocol bfd { + interface "eth*" { + min rx interval 20 ms; + min tx interval 50 ms; + idle tx interval 300 ms; + }; + interface "gre*" { + interval 200 ms; + multiplier 10; + passive; + }; + multihop { + interval 200 ms; + multiplier 10; + }; + + neighbor 192.168.1.10; + neighbor 192.168.2.2 dev "eth2"; + neighbor 192.168.10.1 local 192.168.1.1 multihop; +} + + BGP

The Border Gateway Protocol is the routing protocol used for backbone @@ -1258,8 +1430,8 @@ AS). Each AS is a part of the network with common management and common routing policy. It is identified by a unique 16-bit number (ASN). Routers within each AS usually exchange AS-internal routing information with each other using an interior gateway protocol (IGP, -such as OSPF or RIP). Boundary routers at the border of -the AS communicate global (inter-AS) network reachability information with +such as OSPF or RIP). Boundary routers at the border of the AS +communicate global (inter-AS) network reachability information with their neighbors in the neighboring AS'es via exterior BGP (eBGP) and redistribute received information to other routers in the AS via interior BGP (iBGP). @@ -1412,7 +1584,15 @@ for each neighbor using the following configuration parameters: igp table Specifies a table that is used as an IGP routing table. Default: the same as the table BGP is connected to. - + + bfd switch + BGP could use BFD protocol as an advisory mechanism for neighbor + liveness and failure detection. If enabled, BIRD setups a BFD session + for the BGP neighbor and tracks its liveness by it. This has an + advantage of an order of magnitude lower detection times in case of + failure. Note that BFD protocol also has to be configured, see + section for details. Default: disabled. + ttl security Use GTSM (RFC 5082 - the generalized TTL security mechanism). GTSM protects against spoofed packets by ignoring received packets with a smaller @@ -1986,6 +2166,7 @@ protocol ospf <name> { real broadcast <switch>; ptp netmask <switch>; check link <switch>; + bfd <switch>; ecmp weight <num>; ttl security [<switch>; | tx only] tx class|dscp <num>; @@ -2260,6 +2441,14 @@ protocol ospf <name> { prefix) is propagated. It is possible that some hardware drivers or platforms do not implement this feature. Default value is no. + bfd switch + OSPF could use BFD protocol as an advisory mechanism for neighbor + liveness and failure detection. If enabled, BIRD setups a BFD session + for each OSPF neighbor and tracks its liveness by it. This has an + advantage of an order of magnitude lower detection times in case of + failure. Note that BFD protocol also has to be configured, see + section for details. Default value is no. + ttl security [ TTL security is a feature that protects routing protocols from remote spoofed packets by using TTL 255 instead of TTL 1 diff --git a/lib/birdlib.h b/lib/birdlib.h index 2d6849e1..a5424958 100644 --- a/lib/birdlib.h +++ b/lib/birdlib.h @@ -24,6 +24,8 @@ #define _MAX(a,b) (((a)>(b))?(a):(b)) #ifndef PARSER +#undef MIN +#undef MAX #define MIN(a,b) _MIN(a,b) #define MAX(a,b) _MAX(a,b) #endif diff --git a/lib/printf.c b/lib/printf.c index d8600b61..41e1cc0d 100644 --- a/lib/printf.c +++ b/lib/printf.c @@ -276,7 +276,7 @@ int bvsnprintf(char *buf, int size, const char *fmt, va_list args) ip_ntox(va_arg(args, ip_addr), ipbuf); else { ip_ntop(va_arg(args, ip_addr), ipbuf); - if (field_width > 0) + if (field_width == 1) field_width = STD_ADDRESS_P_LENGTH; } s = ipbuf; diff --git a/nest/proto.c b/nest/proto.c index 0c85c2d9..c15247be 100644 --- a/nest/proto.c +++ b/nest/proto.c @@ -681,6 +681,9 @@ proto_build(struct protocol *p) } } +/* FIXME: convert this call to some protocol hook */ +extern void bfd_init_all(void); + /** * protos_build - build a protocol list * @@ -718,8 +721,10 @@ protos_build(void) #ifdef CONFIG_BGP proto_build(&proto_bgp); #endif - // XXX +#ifdef CONFIG_BFD proto_build(&proto_bfd); + bfd_init_all(); +#endif proto_pool = rp_new(&root_pool, "Protocols"); proto_flush_event = ev_new(proto_pool); diff --git a/proto/bfd/bfd.c b/proto/bfd/bfd.c index 3e2af9d5..5ebfadc1 100644 --- a/proto/bfd/bfd.c +++ b/proto/bfd/bfd.c @@ -4,6 +4,103 @@ * Can be freely distributed and used under the terms of the GNU GPL. */ +/** + * DOC: Bidirectional Forwarding Detection + * + * The BFD protocol is implemented in three files: |bfd.c| containing the + * protocol logic and the protocol glue with BIRD core, |packets.c| handling BFD + * packet processing, RX, TX and protocol sockets. |io.c| then contains generic + * code for the event loop, threads and event sources (sockets, microsecond + * timers). This generic code will be merged to the main BIRD I/O code in the + * future. + * + * The BFD implementation uses a separate thread with an internal event loop for + * handling the protocol logic, which requires high-res and low-latency timing, + * so it is not affected by the rest of BIRD, which has several low-granularity + * hooks in the main loop, uses second-based timers and cannot offer good + * latency. The core of BFD protocol (the code related to BFD sessions, + * interfaces and packets) runs in the BFD thread, while the rest (the code + * related to BFD requests, BFD neighbors and the protocol glue) runs in the + * main thread. + * + * BFD sessions are represented by structure &bfd_session that contains a state + * related to the session and two timers (TX timer for periodic packets and hold + * timer for session timeout). These sessions are allocated from @session_slab + * and are accessible by two hash tables, @session_hash_id (by session ID) and + * @session_hash_ip (by IP addresses of neighbors). Slab and both hashes are in + * the main protocol structure &bfd_proto. The protocol logic related to BFD + * sessions is implemented in internal functions bfd_session_*(), which are + * expected to be called from the context of BFD thread, and external functions + * bfd_add_session(), bfd_remove_session() and bfd_reconfigure_session(), which + * form an interface to the BFD core for the rest and are expected to be called + * from the context of main thread. + * + * Each BFD session has an associated BFD interface, represented by structure + * &bfd_iface. A BFD interface contains a socket used for TX (the one for RX is + * shared in &bfd_proto), an interface configuration and reference counter. + * Compared to interface structures of other protocols, these structures are not + * created and removed based on interface notification events, but according to + * the needs of BFD sessions. When a new session is created, it requests a + * proper BFD interface by function bfd_get_iface(), which either finds an + * existing one in &iface_list (from &bfd_proto) or allocates a new one. When a + * session is removed, an associated iface is dicharged by bfd_free_iface(). + * + * BFD requests are the external API for the other protocols. When a protocol + * wants a BFD session, it calls bfd_request_session(), which creates a + * structure &bfd_request containing approprite information and an notify hook. + * This structure is a resource associated with the caller's resource pool. When + * a BFD protocol is available, a BFD request is submitted to the protocol, an + * appropriate BFD session is found or created and the request is attached to + * the session. When a session changes state, all attached requests (and related + * protocols) are notified. Note that BFD requests do not depend on BFD protocol + * running. When the BFD protocol is stopped or removed (or not available from + * beginning), related BFD requests are stored in @bfd_wait_list, where waits + * for a new protocol. + * + * BFD neighbors are just a way to statically configure BFD sessions without + * requests from other protocol. Structures &bfd_neighbor are part of BFD + * configuration (like static routes in the static protocol). BFD neighbors are + * handled by BFD protocol like it is a BFD client -- when a BFD neighbor is + * ready, the protocol just creates a BFD request like any other protocol. + * + * The protocol uses a new generic event loop (structure &birdloop) from |io.c|, + * which supports sockets, timers and events like the main loop. Timers + * (structure &timer2) are new microsecond based timers, while sockets and + * events are the same. A birdloop is associated with a thread (field @thread) + * in which event hooks are executed. Most functions for setting event sources + * (like sk_start() or tm2_start()) must be called from the context of that + * thread. Birdloop allows to temporarily acquire the context of that thread for + * the main thread by calling birdloop_enter() and then birdloop_leave(), which + * also ensures mutual exclusion with all event hooks. Note that resources + * associated with a birdloop (like timers) should be attached to the + * independent resource pool, detached from the main resource tree. + * + * There are two kinds of interaction between the BFD core (running in the BFD + * thread) and the rest of BFD (running in the main thread). The first kind are + * configuration calls from main thread to the BFD thread (like bfd_add_session()). + * These calls are synchronous and use birdloop_enter() mechanism for mutual + * exclusion. The second kind is a notification about session changes from the + * BFD thread to the main thread. This is done in an asynchronous way, sesions + * with pending notifications are linked (in the BFD thread) to @notify_list in + * &bfd_proto, and then bfd_notify_hook() in the main thread is activated using + * bfd_notify_kick() and a pipe. The hook then processes scheduled sessions and + * calls hooks from associated BFD requests. This @notify_list (and state fields + * in structure &bfd_session) is protected by a spinlock in &bfd_proto and + * functions bfd_lock_sessions() / bfd_unlock_sessions(). + * + * There are few data races (accessing @p->p.debug from TRACE() from the BFD + * thread and accessing some some private fields of %bfd_session from + * bfd_show_sessions() from the main thread, but these are harmless (i hope). + * + * TODO: document functions and access restrictions for fields in BFD structures. + * + * Supported standards: + * - RFC 5880 - main BFD standard + * - RFC 5881 - BFD for IP links + * - RFC 5882 - generic application of BFD + * - RFC 5883 - BFD for multihop paths + */ + #include "bfd.h" @@ -17,23 +114,34 @@ #define HASH_IP_EQ(a,b) ipa_equal(a,b) #define HASH_IP_FN(k) ipa_hash(k) +static list bfd_proto_list; +static list bfd_wait_list; const char *bfd_state_names[] = { "AdminDown", "Down", "Init", "Up" }; +static void bfd_session_set_min_tx(struct bfd_session *s, u32 val); +static struct bfd_iface *bfd_get_iface(struct bfd_proto *p, ip_addr local, struct iface *iface); +static void bfd_free_iface(struct bfd_iface *ifa); static inline void bfd_notify_kick(struct bfd_proto *p); + +/* + * BFD sessions + */ + static void bfd_session_update_state(struct bfd_session *s, uint state, uint diag) { - struct bfd_proto *p = s->bfd; + struct bfd_proto *p = s->ifa->bfd; + uint old_state = s->loc_state; int notify; - if (s->loc_state == state) + if (state == old_state) return; - TRACE(D_EVENTS, "Session changed %I %d %d", s->addr, state, diag); - debug("STATE %I %d %d %d\n", s->addr, s->loc_state, state, diag); - + TRACE(D_EVENTS, "Session to %I changed state from %s to %s", + s->addr, bfd_state_names[old_state], bfd_state_names[state]); + bfd_lock_sessions(p); s->loc_state = state; s->loc_diag = diag; @@ -43,21 +151,14 @@ bfd_session_update_state(struct bfd_session *s, uint state, uint diag) add_tail(&p->notify_list, &s->n); bfd_unlock_sessions(p); - if (notify) - bfd_notify_kick(p); -} + if (state == BFD_STATE_UP) + bfd_session_set_min_tx(s, s->ifa->cf->min_tx_int); -static void -bfd_session_timeout(struct bfd_session *s) -{ - s->rem_state = BFD_STATE_DOWN; - s->rem_id = 0; - s->rem_min_tx_int = 0; - s->rem_min_rx_int = 1; - s->rem_demand_mode = 0; - s->rem_detect_mult = 0; + if (old_state == BFD_STATE_UP) + bfd_session_set_min_tx(s, s->ifa->cf->idle_tx_int); - bfd_session_update_state(s, BFD_STATE_DOWN, BFD_DIAG_TIMEOUT); + if (notify) + bfd_notify_kick(p); } static void @@ -93,10 +194,9 @@ bfd_session_update_detection_time(struct bfd_session *s, int kick) } static void -bfd_session_control_tx_timer(struct bfd_session *s) +bfd_session_control_tx_timer(struct bfd_session *s, int reset) { - if (!s->opened) - goto stop; + // if (!s->opened) goto stop; if (s->passive && (s->rem_id == 0)) goto stop; @@ -111,10 +211,12 @@ bfd_session_control_tx_timer(struct bfd_session *s) goto stop; /* So TX timer should run */ - if (tm2_active(s->tx_timer)) - return; + if (reset || !tm2_active(s->tx_timer)) + { + s->last_tx = 0; + tm2_start(s->tx_timer, 0); + } - tm2_start(s->tx_timer, 0); return; stop: @@ -125,6 +227,10 @@ bfd_session_control_tx_timer(struct bfd_session *s) static void bfd_session_request_poll(struct bfd_session *s, u8 request) { + /* Not sure about this, but doing poll in this case does not make sense */ + if (s->rem_id == 0) + return; + s->poll_scheduled |= request; if (s->poll_active) @@ -132,7 +238,8 @@ bfd_session_request_poll(struct bfd_session *s, u8 request) s->poll_active = s->poll_scheduled; s->poll_scheduled = 0; - bfd_send_ctl(s->bfd, s, 0); + + bfd_session_control_tx_timer(s, 1); } static void @@ -146,11 +253,10 @@ bfd_session_terminate_poll(struct bfd_session *s) if (poll_done & BFD_POLL_RX) s->req_min_rx_int = s->req_min_rx_new; - s->poll_active = 0; + s->poll_active = s->poll_scheduled; + s->poll_scheduled = 0; /* Timers are updated by caller - bfd_session_process_ctl() */ - - // xxx_restart_poll(); } void @@ -191,10 +297,32 @@ bfd_session_process_ctl(struct bfd_session *s, u8 flags, u32 old_tx_int, u32 old if (next_state) bfd_session_update_state(s, next_state, diag); - bfd_session_control_tx_timer(s); + bfd_session_control_tx_timer(s, 0); if (flags & BFD_FLAG_POLL) - bfd_send_ctl(s->bfd, s, 1); + bfd_send_ctl(s->ifa->bfd, s, 1); +} + +static void +bfd_session_timeout(struct bfd_session *s) +{ + struct bfd_proto *p = s->ifa->bfd; + + TRACE(D_EVENTS, "Session to %I expired", s->addr); + + s->rem_state = BFD_STATE_DOWN; + s->rem_id = 0; + s->rem_min_tx_int = 0; + s->rem_min_rx_int = 1; + s->rem_demand_mode = 0; + s->rem_detect_mult = 0; + + s->poll_active = 0; + s->poll_scheduled = 0; + + bfd_session_update_state(s, BFD_STATE_DOWN, BFD_DIAG_TIMEOUT); + + bfd_session_control_tx_timer(s, 1); } static void @@ -255,8 +383,7 @@ bfd_tx_timer_hook(timer2 *t) struct bfd_session *s = t->data; s->last_tx = current_time(); - // debug("TX %d\n", (s32) (s->last_tx TO_MS)); - bfd_send_ctl(s->bfd, s, 0); + bfd_send_ctl(s->ifa->bfd, s, 0); } static void @@ -277,48 +404,53 @@ bfd_get_free_id(struct bfd_proto *p) } static struct bfd_session * -bfd_add_session(struct bfd_proto *p, ip_addr addr, struct bfd_session_config *opts) +bfd_add_session(struct bfd_proto *p, ip_addr addr, ip_addr local, struct iface *iface) { birdloop_enter(p->loop); + struct bfd_iface *ifa = bfd_get_iface(p, local, iface); + struct bfd_session *s = sl_alloc(p->session_slab); bzero(s, sizeof(struct bfd_session)); s->addr = addr; + s->ifa = ifa; s->loc_id = bfd_get_free_id(p); - debug("XXX INS1 %d %d %u %I\n", p->session_hash_id.count, p->session_hash_ip.count, s->loc_id, s->addr); + HASH_INSERT(p->session_hash_id, HASH_ID, s); - debug("XXX INS2 %d %d\n", p->session_hash_id.count, p->session_hash_ip.count); HASH_INSERT(p->session_hash_ip, HASH_IP, s); - debug("XXX INS3 %d %d\n", p->session_hash_id.count, p->session_hash_ip.count); - s->bfd = p; + /* Initialization of state variables - see RFC 5880 6.8.1 */ s->loc_state = BFD_STATE_DOWN; s->rem_state = BFD_STATE_DOWN; - s->des_min_tx_int = s->des_min_tx_new = opts->min_tx_int; // XXX opts->idle_tx_int; - s->req_min_rx_int = s->req_min_rx_new = opts->min_rx_int; + s->des_min_tx_int = s->des_min_tx_new = ifa->cf->idle_tx_int; + s->req_min_rx_int = s->req_min_rx_new = ifa->cf->min_rx_int; s->rem_min_rx_int = 1; - s->detect_mult = opts->multiplier; - s->passive = opts->passive; + s->detect_mult = ifa->cf->multiplier; + s->passive = ifa->cf->passive; s->tx_timer = tm2_new_init(p->tpool, bfd_tx_timer_hook, s, 0, 0); s->hold_timer = tm2_new_init(p->tpool, bfd_hold_timer_hook, s, 0, 0); bfd_session_update_tx_interval(s); + bfd_session_control_tx_timer(s, 1); + + init_list(&s->request_list); + s->last_state_change = now; + + TRACE(D_EVENTS, "Session to %I added", s->addr); birdloop_leave(p->loop); return s; } +/* static void bfd_open_session(struct bfd_proto *p, struct bfd_session *s, ip_addr local, struct iface *ifa) { birdloop_enter(p->loop); - s->bsock = bfd_get_socket(p, local, ifa); - // s->local = local; - // s->iface = ifa; s->opened = 1; bfd_session_control_tx_timer(s); @@ -331,10 +463,6 @@ bfd_close_session(struct bfd_proto *p, struct bfd_session *s) { birdloop_enter(p->loop); - bfd_free_socket(s->bsock); - s->bsock = NULL; - // s->local = IPA_NONE; - // s->iface = NULL; s->opened = 0; bfd_session_update_state(s, BFD_STATE_DOWN, BFD_DIAG_PATH_DOWN); @@ -342,54 +470,295 @@ bfd_close_session(struct bfd_proto *p, struct bfd_session *s) birdloop_leave(p->loop); } +*/ static void bfd_remove_session(struct bfd_proto *p, struct bfd_session *s) { + ip_addr ip = s->addr; + birdloop_enter(p->loop); - bfd_free_socket(s->bsock); + bfd_free_iface(s->ifa); rfree(s->tx_timer); rfree(s->hold_timer); - debug("XXX REM1 %d %d %u %I\n", p->session_hash_id.count, p->session_hash_ip.count, s->loc_id, s->addr); HASH_REMOVE(p->session_hash_id, HASH_ID, s); - debug("XXX REM2 %d %d\n", p->session_hash_id.count, p->session_hash_ip.count); HASH_REMOVE(p->session_hash_ip, HASH_IP, s); - debug("XXX REM3 %d %d\n", p->session_hash_id.count, p->session_hash_ip.count); sl_free(p->session_slab, s); + TRACE(D_EVENTS, "Session to %I removed", ip); + birdloop_leave(p->loop); } static void -bfd_configure_session(struct bfd_proto *p, struct bfd_session *s, - struct bfd_session_config *opts) +bfd_reconfigure_session(struct bfd_proto *p, struct bfd_session *s) { birdloop_enter(p->loop); - // XXX opts->idle_tx_int; + struct bfd_iface_config *cf = s->ifa->cf; - bfd_session_set_min_tx(s, opts->min_tx_int); - bfd_session_set_min_rx(s, opts->min_rx_int); - s->detect_mult = opts->multiplier; - s->passive = opts->passive; + u32 tx = (s->loc_state == BFD_STATE_UP) ? cf->min_tx_int : cf->idle_tx_int; + bfd_session_set_min_tx(s, tx); + bfd_session_set_min_rx(s, cf->min_rx_int); + s->detect_mult = cf->multiplier; + s->passive = cf->passive; - bfd_session_control_tx_timer(s); + bfd_session_control_tx_timer(s, 0); birdloop_leave(p->loop); + + TRACE(D_EVENTS, "Session to %I reconfigured", s->addr); +} + + +/* + * BFD interfaces + */ + +static struct bfd_iface_config bfd_default_iface = { + .min_rx_int = BFD_DEFAULT_MIN_RX_INT, + .min_tx_int = BFD_DEFAULT_MIN_TX_INT, + .idle_tx_int = BFD_DEFAULT_IDLE_TX_INT, + .multiplier = BFD_DEFAULT_MULTIPLIER +}; + +static inline struct bfd_iface_config * +bfd_find_iface_config(struct bfd_config *cf, struct iface *iface) +{ + struct bfd_iface_config *ic; + + ic = iface ? (void *) iface_patt_find(&cf->patt_list, iface, NULL) : cf->multihop; + + return ic ? ic : &bfd_default_iface; +} + +static struct bfd_iface * +bfd_get_iface(struct bfd_proto *p, ip_addr local, struct iface *iface) +{ + struct bfd_iface *ifa; + + WALK_LIST(ifa, p->iface_list) + if (ipa_equal(ifa->local, local) && (ifa->iface == iface)) + return ifa->uc++, ifa; + + struct bfd_config *cf = (struct bfd_config *) (p->p.cf); + struct bfd_iface_config *ic = bfd_find_iface_config(cf, iface); + + ifa = mb_allocz(p->tpool, sizeof(struct bfd_iface)); + ifa->local = local; + ifa->iface = iface; + ifa->cf = ic; + ifa->bfd = p; + + ifa->sk = bfd_open_tx_sk(p, local, iface); + ifa->uc = 1; + + add_tail(&p->iface_list, &ifa->n); + + return ifa; +} + +static void +bfd_free_iface(struct bfd_iface *ifa) +{ + if (!ifa || --ifa->uc) + return; + + rem_node(&ifa->n); + sk_stop(ifa->sk); + rfree(ifa->sk); + mb_free(ifa); +} + +static void +bfd_reconfigure_iface(struct bfd_proto *p, struct bfd_iface *ifa, struct bfd_config *nc) +{ + struct bfd_iface_config *nic = bfd_find_iface_config(nc, ifa->iface); + ifa->changed = !!memcmp(nic, ifa->cf, sizeof(struct bfd_iface_config)); + + /* This should be probably changed to not access ifa->cf from the BFD thread */ + birdloop_enter(p->loop); + ifa->cf = nic; + birdloop_leave(p->loop); +} + + +/* + * BFD requests + */ + +static void +bfd_request_notify(struct bfd_request *req, u8 state, u8 diag) +{ + u8 old_state = req->state; + + if (state == old_state) + return; + + req->state = state; + req->diag = diag; + req->old_state = old_state; + req->down = (old_state == BFD_STATE_UP) && (state == BFD_STATE_DOWN); + + if (req->hook) + req->hook(req); +} + +static int +bfd_add_request(struct bfd_proto *p, struct bfd_request *req) +{ + struct bfd_session *s = bfd_find_session_by_addr(p, req->addr); + u8 state, diag; + + if (!s) + s = bfd_add_session(p, req->addr, req->local, req->iface); + + rem_node(&req->n); + add_tail(&s->request_list, &req->n); + req->session = s; + + bfd_lock_sessions(p); + state = s->loc_state; + diag = s->loc_diag; + bfd_unlock_sessions(p); + + bfd_request_notify(req, state, diag); + + return 1; +} + +static void +bfd_submit_request(struct bfd_request *req) +{ + node *n; + + WALK_LIST(n, bfd_proto_list) + if (bfd_add_request(SKIP_BACK(struct bfd_proto, bfd_node, n), req)) + return; + + rem_node(&req->n); + add_tail(&bfd_wait_list, &req->n); + req->session = NULL; + bfd_request_notify(req, BFD_STATE_ADMIN_DOWN, 0); +} + +static void +bfd_take_requests(struct bfd_proto *p) +{ + node *n, *nn; + + WALK_LIST_DELSAFE(n, nn, bfd_wait_list) + bfd_add_request(p, SKIP_BACK(struct bfd_request, n, n)); +} + +static void +bfd_drop_requests(struct bfd_proto *p) +{ + node *n; + + HASH_WALK(p->session_hash_id, next_id, s) + { + /* We assume that p is not in bfd_proto_list */ + WALK_LIST_FIRST(n, s->request_list) + bfd_submit_request(SKIP_BACK(struct bfd_request, n, n)); + } + HASH_WALK_END; +} + +static struct resclass bfd_request_class; + +struct bfd_request * +bfd_request_session(pool *p, ip_addr addr, ip_addr local, struct iface *iface, + void (*hook)(struct bfd_request *), void *data) +{ + struct bfd_request *req = ralloc(p, &bfd_request_class); + + /* Hack: self-link req->n, we will call rem_node() on it */ + req->n.prev = req->n.next = &req->n; + + req->addr = addr; + req->local = local; + req->iface = iface; + + bfd_submit_request(req); + + req->hook = hook; + req->data = data; + + return req; +} + +static void +bfd_request_free(resource *r) +{ + struct bfd_request *req = (struct bfd_request *) r; + struct bfd_session *s = req->session; + + rem_node(&req->n); + + /* Remove the session if there is no request for it. Skip that if + inside notify hooks, will be handled by bfd_notify_hook() itself */ + + if (s && EMPTY_LIST(s->request_list) && !s->notify_running) + bfd_remove_session(s->ifa->bfd, s); +} + +static void +bfd_request_dump(resource *r) +{ + struct bfd_request *req = (struct bfd_request *) r; + + debug("(code %p, data %p)\n", req->hook, req->data); +} + +static struct resclass bfd_request_class = { + "BFD request", + sizeof(struct bfd_request), + bfd_request_free, + bfd_request_dump, + NULL, + NULL +}; + + +/* + * BFD neighbors + */ + +static void +bfd_neigh_notify(struct neighbor *nb) +{ + struct bfd_proto *p = (struct bfd_proto *) nb->proto; + struct bfd_neighbor *n = nb->data; + + if (!n) + return; + + if ((nb->scope > 0) && !n->req) + { + ip_addr local = ipa_nonzero(n->local) ? n->local : nb->iface->addr->ip; + n->req = bfd_request_session(p->p.pool, n->addr, local, nb->iface, NULL, NULL); + } + + if ((nb->scope <= 0) && n->req) + { + rfree(n->req); + n->req = NULL; + } } static void bfd_start_neighbor(struct bfd_proto *p, struct bfd_neighbor *n) { - n->session = bfd_add_session(p, n->addr, n->opts); + n->active = 1; - if (n->opts->multihop) + if (n->multihop) { - bfd_open_session(p, n->session, n->local, NULL); + n->req = bfd_request_session(p->p.pool, n->addr, n->local, NULL, NULL, NULL); return; } @@ -402,14 +771,15 @@ bfd_start_neighbor(struct bfd_proto *p, struct bfd_neighbor *n) if (nb->data) { - log(L_ERR "%s: Duplicate remote address %I", p->p.name, n->addr); + log(L_ERR "%s: Duplicate neighbor %I", p->p.name, n->addr); return; } - nb->data = n->session; + n->neigh = nb; + nb->data = n; if (nb->scope > 0) - bfd_open_session(p, n->session, nb->iface->addr->ip, nb->iface); + bfd_neigh_notify(nb); else TRACE(D_EVENTS, "Waiting for %I%J to become my neighbor", n->addr, n->iface); } @@ -417,33 +787,54 @@ bfd_start_neighbor(struct bfd_proto *p, struct bfd_neighbor *n) static void bfd_stop_neighbor(struct bfd_proto *p, struct bfd_neighbor *n) { - if (!n->opts->multihop) - { - struct neighbor *nb = neigh_find2(&p->p, &n->addr, n->iface, 0); - if (nb) - nb->data = NULL; - } + if (n->neigh) + n->neigh->data = NULL; + n->neigh = NULL; - bfd_remove_session(p, n->session); + rfree(n->req); + n->req = NULL; } -static void -bfd_neigh_notify(struct neighbor *nb) +static inline int +bfd_same_neighbor(struct bfd_neighbor *x, struct bfd_neighbor *y) { - struct bfd_proto *p = (struct bfd_proto *) nb->proto; - struct bfd_session *s = nb->data; + return ipa_equal(x->addr, y->addr) && ipa_equal(x->local, y->local) && + (x->iface == y->iface) && (x->multihop == y->multihop); +} - if (!s) - return; +static void +bfd_reconfigure_neighbors(struct bfd_proto *p, struct bfd_config *new) +{ + struct bfd_config *old = (struct bfd_config *) (p->p.cf); + struct bfd_neighbor *on, *nn; - if ((nb->scope > 0) && !s->opened) - bfd_open_session(p, s, nb->iface->addr->ip, nb->iface); + WALK_LIST(on, old->neigh_list) + { + WALK_LIST(nn, new->neigh_list) + if (bfd_same_neighbor(nn, on)) + { + nn->neigh = on->neigh; + if (nn->neigh) + nn->neigh->data = nn; + + nn->req = on->req; + nn->active = 1; + return; + } + + bfd_stop_neighbor(p, on); + } - if ((nb->scope <= 0) && s->opened) - bfd_close_session(p, s); + WALK_LIST(nn, new->neigh_list) + if (!nn->active) + bfd_start_neighbor(p, nn); } +/* + * BFD notify socket + */ + /* This core notify code should be replaced after main loop transition to birdloop */ int pipe(int pipefd[2]); @@ -456,6 +847,8 @@ bfd_notify_hook(sock *sk, int len) struct bfd_proto *p = sk->data; struct bfd_session *s; list tmp_list; + u8 state, diag; + node *n, *nn; pipe_drain(sk->fd); @@ -469,10 +862,21 @@ bfd_notify_hook(sock *sk, int len) { bfd_lock_sessions(p); rem2_node(&s->n); + state = s->loc_state; + diag = s->loc_diag; bfd_unlock_sessions(p); - // XXX do something - TRACE(D_EVENTS, "Notify: session changed %I %d %d", s->addr, s->loc_state, s->loc_diag); + /* FIXME: convert to btime and move to bfd_session_update_state() */ + s->last_state_change = now; + + s->notify_running = 1; + WALK_LIST_DELSAFE(n, nn, s->request_list) + bfd_request_notify(SKIP_BACK(struct bfd_request, n, n), state, diag); + s->notify_running = 0; + + /* Remove the session if all requests were removed in notify hooks */ + if (EMPTY_LIST(s->request_list)) + bfd_remove_session(p, s); } return 0; @@ -523,6 +927,17 @@ bfd_notify_init(struct bfd_proto *p) } +/* + * BFD protocol glue + */ + +void +bfd_init_all(void) +{ + init_list(&bfd_proto_list); + init_list(&bfd_wait_list); +} + static struct proto * bfd_init(struct proto_config *c) { @@ -539,25 +954,28 @@ bfd_start(struct proto *P) struct bfd_proto *p = (struct bfd_proto *) P; struct bfd_config *cf = (struct bfd_config *) (P->cf); - p->loop = birdloop_new(P->pool); + p->loop = birdloop_new(); p->tpool = rp_new(NULL, "BFD thread root"); pthread_spin_init(&p->lock, PTHREAD_PROCESS_PRIVATE); p->session_slab = sl_new(P->pool, sizeof(struct bfd_session)); - HASH_INIT(p->session_hash_id, P->pool, 4); - HASH_INIT(p->session_hash_ip, P->pool, 4); - - init_list(&p->sock_list); + HASH_INIT(p->session_hash_id, P->pool, 8); + HASH_INIT(p->session_hash_ip, P->pool, 8); + init_list(&p->iface_list); init_list(&p->notify_list); bfd_notify_init(p); + add_tail(&bfd_proto_list, &p->bfd_node); + birdloop_enter(p->loop); p->rx_1 = bfd_open_rx_sk(p, 0); p->rx_m = bfd_open_rx_sk(p, 1); birdloop_leave(p->loop); + bfd_take_requests(p); + struct bfd_neighbor *n; WALK_LIST(n, cf->neigh_list) bfd_start_neighbor(p, n); @@ -572,76 +990,79 @@ static int bfd_shutdown(struct proto *P) { struct bfd_proto *p = (struct bfd_proto *) P; + struct bfd_config *cf = (struct bfd_config *) (P->cf); + + rem_node(&p->bfd_node); birdloop_stop(p->loop); + struct bfd_neighbor *n; + WALK_LIST(n, cf->neigh_list) + bfd_stop_neighbor(p, n); + + bfd_drop_requests(p); + /* FIXME: This is hack */ birdloop_enter(p->loop); rfree(p->tpool); birdloop_leave(p->loop); - return PS_DOWN; -} - -static inline int -bfd_same_neighbor(struct bfd_neighbor *x, struct bfd_neighbor *y) -{ - return ipa_equal(x->addr, y->addr) && ipa_equal(x->local, y->local) && - (x->iface == y->iface) && (x->opts->multihop == y->opts->multihop); -} - -static void -bfd_match_neighbor(struct bfd_proto *p, struct bfd_neighbor *on, struct bfd_config *new) -{ - struct bfd_neighbor *nn; - - WALK_LIST(nn, new->neigh_list) - if (bfd_same_neighbor(nn, on)) - { - nn->session = on->session; - bfd_configure_session(p, nn->session, nn->opts); - return; - } + birdloop_free(p->loop); - bfd_stop_neighbor(p, on); + return PS_DOWN; } static int bfd_reconfigure(struct proto *P, struct proto_config *c) { struct bfd_proto *p = (struct bfd_proto *) P; - struct bfd_config *old = (struct bfd_config *) (P->cf); + // struct bfd_config *old = (struct bfd_config *) (P->cf); struct bfd_config *new = (struct bfd_config *) c; - struct bfd_neighbor *n; + struct bfd_iface *ifa; birdloop_mask_wakeups(p->loop); - WALK_LIST(n, old->neigh_list) - bfd_match_neighbor(p, n, new); + WALK_LIST(ifa, p->iface_list) + bfd_reconfigure_iface(p, ifa, new); + + HASH_WALK(p->session_hash_id, next_id, s) + { + if (s->ifa->changed) + bfd_reconfigure_session(p, s); + } + HASH_WALK_END; - WALK_LIST(n, new->neigh_list) - if (!n->session) - bfd_start_neighbor(p, n); + bfd_reconfigure_neighbors(p, new); birdloop_unmask_wakeups(p->loop); return 1; } +/* Ensure one instance */ +struct bfd_config *bfd_cf; + +static void +bfd_preconfig(struct protocol *P UNUSED, struct config *c UNUSED) +{ + bfd_cf = NULL; +} + static void bfd_copy_config(struct proto_config *dest, struct proto_config *src) { struct bfd_config *d = (struct bfd_config *) dest; // struct bfd_config *s = (struct bfd_config *) src; - /* We clean up neigh_list, ifaces are non-sharable */ + /* We clean up patt_list and neigh_list, neighbors and ifaces are non-sharable */ + init_list(&d->patt_list); init_list(&d->neigh_list); - } void bfd_show_sessions(struct proto *P) { + byte tbuf[TM_DATETIME_BUFFER_SIZE]; struct bfd_proto *p = (struct bfd_proto *) P; uint state, diag; u32 tx_int, timeout; @@ -655,22 +1076,25 @@ bfd_show_sessions(struct proto *P) } cli_msg(-1013, "%s:", p->p.name); - cli_msg(-1013, "%-12s\t%s\t%s\t%s\t%s", "Router IP", "Iface", - "State", "TX Int", "Timeout"); + cli_msg(-1013, "%-25s %-10s %-10s %-10s %8s %8s", + "IP address", "Interface", "State", "Since", "Interval", "Timeout"); - debug("XXX WALK %d %d\n", p->session_hash_id.count, p->session_hash_ip.count); HASH_WALK(p->session_hash_id, next_id, s) { - // FIXME this is unsafe + /* FIXME: this is thread-unsafe, but perhaps harmless */ state = s->loc_state; diag = s->loc_diag; - ifname = (s->bsock && s->bsock->sk->iface) ? s->bsock->sk->iface->name : "---"; - tx_int = (MAX(s->des_min_tx_int, s->rem_min_rx_int) TO_MS); + ifname = (s->ifa && s->ifa->sk->iface) ? s->ifa->sk->iface->name : "---"; + tx_int = s->last_tx ? (MAX(s->des_min_tx_int, s->rem_min_rx_int) TO_MS) : 0; timeout = (MAX(s->req_min_rx_int, s->rem_min_tx_int) TO_MS) * s->rem_detect_mult; - cli_msg(-1013, "%I\t%s\t%s %d\t%u\t%u", - s->addr, ifname, bfd_state_names[state], diag, tx_int, timeout); + state = (state < 4) ? state : 0; + tm_format_datetime(tbuf, &config->tf_proto, s->last_state_change); + + cli_msg(-1013, "%-25I %-10s %-10s %-10s %3u.%03u %3u.%03u", + s->addr, ifname, bfd_state_names[state], tbuf, + tx_int / 1000, tx_int % 1000, timeout / 1000, timeout % 1000); } HASH_WALK_END; @@ -685,5 +1109,6 @@ struct protocol proto_bfd = { .start = bfd_start, .shutdown = bfd_shutdown, .reconfigure = bfd_reconfigure, + .preconfig = bfd_preconfig, .copy_config = bfd_copy_config, }; diff --git a/proto/bfd/bfd.h b/proto/bfd/bfd.h index 66c6ed17..f4ab3fcc 100644 --- a/proto/bfd/bfd.h +++ b/proto/bfd/bfd.h @@ -20,6 +20,7 @@ #include "lib/socket.h" #include "lib/string.h" +#include "nest/bfd.h" #include "io.h" @@ -33,19 +34,23 @@ #define BFD_DEFAULT_MULTIPLIER 5 +struct bfd_iface_config; + struct bfd_config { struct proto_config c; - list neigh_list; /* List of struct bfd_neighbor */ + list patt_list; /* List of iface configs (struct bfd_iface_config) */ + list neigh_list; /* List of configured neighbors (struct bfd_neighbor) */ + struct bfd_iface_config *multihop; /* Multihop pseudoiface config */ }; -struct bfd_session_config +struct bfd_iface_config { + struct iface_patt i; u32 min_rx_int; u32 min_tx_int; u32 idle_tx_int; u8 multiplier; - u8 multihop; u8 passive; }; @@ -55,9 +60,12 @@ struct bfd_neighbor ip_addr addr; ip_addr local; struct iface *iface; - struct bfd_session_config *opts; - struct bfd_session *session; + struct neighbor *neigh; + struct bfd_request *req; + + u8 multihop; + u8 active; }; struct bfd_proto @@ -66,6 +74,7 @@ struct bfd_proto struct birdloop *loop; pool *tpool; pthread_spinlock_t lock; + node bfd_node; slab *session_slab; HASH(struct bfd_session) session_hash_id; @@ -77,25 +86,31 @@ struct bfd_proto sock *rx_1; sock *rx_m; - list sock_list; + list iface_list; }; -struct bfd_socket +struct bfd_iface { node n; + ip_addr local; + struct iface *iface; + struct bfd_iface_config *cf; + struct bfd_proto *bfd; + sock *sk; u32 uc; + u8 changed; }; struct bfd_session { node n; ip_addr addr; /* Address of session */ + struct bfd_iface *ifa; /* Iface associated with session */ struct bfd_session *next_id; /* Next in bfd.session_hash_id */ struct bfd_session *next_ip; /* Next in bfd.session_hash_ip */ - struct bfd_proto *bfd; - u8 opened; + u8 opened_unused; u8 passive; u8 poll_active; u8 poll_scheduled; @@ -123,7 +138,9 @@ struct bfd_session timer2 *tx_timer; /* Periodic control packet timer */ timer2 *hold_timer; /* Timer for session down detection time */ - struct bfd_socket *bsock; /* Socket associated with session */ + list request_list; /* List of client requests (struct bfd_request) */ + bird_clock_t last_state_change; /* Time of last state change */ + u8 notify_running; /* 1 if notify hooks are running */ }; @@ -168,10 +185,7 @@ void bfd_show_sessions(struct proto *P); /* packets.c */ void bfd_send_ctl(struct bfd_proto *p, struct bfd_session *s, int final); sock * bfd_open_rx_sk(struct bfd_proto *p, int multihop); -struct bfd_socket * bfd_get_socket(struct bfd_proto *p, ip_addr local, struct iface *ifa); -void bfd_free_socket(struct bfd_socket *sk); - - +sock * bfd_open_tx_sk(struct bfd_proto *p, ip_addr local, struct iface *ifa); #endif /* _BIRD_BFD_H_ */ diff --git a/proto/bfd/config.Y b/proto/bfd/config.Y index f1193d70..1bf8764f 100644 --- a/proto/bfd/config.Y +++ b/proto/bfd/config.Y @@ -12,20 +12,21 @@ CF_HDR CF_DEFINES #define BFD_CFG ((struct bfd_config *) this_proto) -#define BFD_SESSION this_bfd_session +#define BFD_IFACE ((struct bfd_iface_config *) this_ipatt) #define BFD_NEIGHBOR this_bfd_neighbor -static struct bfd_session_config *this_bfd_session; static struct bfd_neighbor *this_bfd_neighbor; +extern struct bfd_config *bfd_cf; CF_DECLS -CF_KEYWORDS(BFD, MIN, IDLE, RX, TX, INTERVAL, MULTIPLIER, MULTIHOP, PASSIVE, - NEIGHBOR, DEV) +CF_KEYWORDS(BFD, MIN, IDLE, RX, TX, INTERVAL, MULTIPLIER, PASSIVE, + INTERFACE, MULTIHOP, NEIGHBOR, DEV, LOCAL) %type bfd_neigh_iface %type bfd_neigh_local +%type bfd_neigh_multihop CF_GRAMMAR @@ -34,12 +35,19 @@ CF_ADDTO(proto, bfd_proto) bfd_proto_start: proto_start BFD { this_proto = proto_config_new(&proto_bfd, sizeof(struct bfd_config), $1); + init_list(&BFD_CFG->patt_list); init_list(&BFD_CFG->neigh_list); + + if (bfd_cf) + cf_error("Only one BFD instance allowed"); + bfd_cf = BFD_CFG; }; bfd_proto_item: proto_item - | bfd_neighbor + | INTERFACE bfd_iface + | MULTIHOP bfd_multihop + | NEIGHBOR bfd_neighbor ; bfd_proto_opts: @@ -51,38 +59,41 @@ bfd_proto: bfd_proto_start proto_name '{' bfd_proto_opts '}'; -bfd_session_start: +bfd_iface_start: { - this_bfd_session = cfg_allocz(sizeof(struct bfd_session_config)); + this_ipatt = cfg_allocz(sizeof(struct bfd_iface_config)); + init_list(&this_ipatt->ipn_list); - BFD_SESSION->min_rx_int = BFD_DEFAULT_MIN_RX_INT; - BFD_SESSION->min_tx_int = BFD_DEFAULT_MIN_TX_INT; - BFD_SESSION->idle_tx_int = BFD_DEFAULT_IDLE_TX_INT; - BFD_SESSION->multiplier = BFD_DEFAULT_MULTIPLIER; + BFD_IFACE->min_rx_int = BFD_DEFAULT_MIN_RX_INT; + BFD_IFACE->min_tx_int = BFD_DEFAULT_MIN_TX_INT; + BFD_IFACE->idle_tx_int = BFD_DEFAULT_IDLE_TX_INT; + BFD_IFACE->multiplier = BFD_DEFAULT_MULTIPLIER; }; -bfd_session_item: - INTERVAL expr_us { BFD_SESSION->min_rx_int = BFD_SESSION->min_tx_int = $2; } - | MIN RX INTERVAL expr_us { BFD_SESSION->min_rx_int = $4; } - | MIN TX INTERVAL expr_us { BFD_SESSION->min_tx_int = $4; } - | IDLE TX INTERVAL expr_us { BFD_SESSION->idle_tx_int = $4; } - | MULTIPLIER expr { BFD_SESSION->multiplier = $2; } - | MULTIHOP bool { BFD_SESSION->multihop = $2; } - | PASSIVE bool { BFD_SESSION->passive = $2; } +bfd_iface_item: + INTERVAL expr_us { BFD_IFACE->min_rx_int = BFD_IFACE->min_tx_int = $2; } + | MIN RX INTERVAL expr_us { BFD_IFACE->min_rx_int = $4; } + | MIN TX INTERVAL expr_us { BFD_IFACE->min_tx_int = $4; } + | IDLE TX INTERVAL expr_us { BFD_IFACE->idle_tx_int = $4; } + | MULTIPLIER expr { BFD_IFACE->multiplier = $2; } + | PASSIVE bool { BFD_IFACE->passive = $2; } ; -bfd_session_opts: +bfd_iface_opts: /* empty */ - | bfd_session_opts bfd_session_item ';' + | bfd_iface_opts bfd_iface_item ';' ; -bfd_session_opt_list: +bfd_iface_opt_list: /* empty */ - | '{' bfd_session_opts '}' + | '{' bfd_iface_opts '}' ; -bfd_session: - bfd_session_start bfd_session_opt_list; +bfd_iface: bfd_iface_start iface_patt_list bfd_iface_opt_list +{ add_tail(&BFD_CFG->patt_list, NODE this_ipatt); }; + +bfd_multihop: bfd_iface_start bfd_iface_opt_list +{ BFD_CFG->multihop = BFD_IFACE; }; bfd_neigh_iface: @@ -96,15 +107,26 @@ bfd_neigh_local: | LOCAL ipa { $$ = $2; } ; -bfd_neighbor: NEIGHBOR ipa bfd_neigh_iface bfd_neigh_local bfd_session +bfd_neigh_multihop: + /* empty */ { $$ = 0; } + | MULTIHOP bool { $$ = $2; } + ; + +bfd_neighbor: ipa bfd_neigh_iface bfd_neigh_local bfd_neigh_multihop { this_bfd_neighbor = cfg_allocz(sizeof(struct bfd_neighbor)); add_tail(&BFD_CFG->neigh_list, NODE this_bfd_neighbor); - BFD_NEIGHBOR->addr = $2; - BFD_NEIGHBOR->local = $4; - BFD_NEIGHBOR->iface = $3; - BFD_NEIGHBOR->opts = BFD_SESSION; + BFD_NEIGHBOR->addr = $1; + BFD_NEIGHBOR->local = $3; + BFD_NEIGHBOR->iface = $2; + BFD_NEIGHBOR->multihop = $4; + + if ($4 && $2) + cf_error("Neighbor cannot set both interface and multihop"); + + if ($4 && ipa_zero($3)) + cf_error("Multihop neighbor requires specified local address"); }; diff --git a/proto/bfd/io.c b/proto/bfd/io.c index 2c1f7b03..fb150040 100644 --- a/proto/bfd/io.c +++ b/proto/bfd/io.c @@ -52,6 +52,9 @@ struct birdloop }; +/* + * Current thread context + */ static pthread_key_t current_loop_key; @@ -74,6 +77,9 @@ birdloop_init_current(void) } +/* + * Time clock + */ static void times_update_alt(struct birdloop *loop); @@ -163,6 +169,9 @@ current_time(void) } +/* + * Wakeup code for birdloop + */ static void pipe_new(int *pfds) @@ -244,6 +253,9 @@ wakeup_kick(struct birdloop *loop) } +/* + * Events + */ static inline uint events_waiting(struct birdloop *loop) @@ -279,12 +291,14 @@ ev2_schedule(event *e) } +/* + * Timers + */ #define TIMER_LESS(a,b) ((a)->expires < (b)->expires) #define TIMER_SWAP(heap,a,b,t) (t = heap[a], heap[a] = heap[b], heap[b] = t, \ heap[a]->index = (a), heap[b]->index = (b)) - static inline uint timers_count(struct birdloop *loop) { return loop->timers.used - 1; } @@ -425,6 +439,9 @@ timers_fire(struct birdloop *loop) } +/* + * Sockets + */ static void sockets_init(struct birdloop *loop) @@ -494,12 +511,16 @@ sk_stop(sock *s) static inline uint sk_want_events(sock *s) { return (s->rx_hook ? POLLIN : 0) | ((s->ttx != s->tpos) ? POLLOUT : 0); } +/* +FIXME: this should be called from sock code + static void sockets_update(struct birdloop *loop, sock *s) { if (s->index >= 0) loop->poll_fd.data[s->index].events = sk_want_events(s); } +*/ static void sockets_prepare(struct birdloop *loop) @@ -594,17 +615,21 @@ sockets_fire(struct birdloop *loop) } +/* + * Birdloop + */ static void * birdloop_main(void *arg); struct birdloop * -birdloop_new(pool *p) +birdloop_new(void) { /* FIXME: this init should be elsewhere and thread-safe */ static int init = 0; if (!init) { birdloop_init_current(); init = 1; } + pool *p = rp_new(NULL, "Birdloop root"); struct birdloop *loop = mb_allocz(p, sizeof(struct birdloop)); loop->pool = p; pthread_mutex_init(&loop->mutex, NULL); @@ -640,6 +665,12 @@ birdloop_stop(struct birdloop *loop) die("pthread_join(): %M", rv); } +void +birdloop_free(struct birdloop *loop) +{ + rfree(loop->pool); +} + void birdloop_enter(struct birdloop *loop) @@ -735,4 +766,3 @@ birdloop_main(void *arg) } - diff --git a/proto/bfd/io.h b/proto/bfd/io.h index 4f7c678d..3f166a47 100644 --- a/proto/bfd/io.h +++ b/proto/bfd/io.h @@ -63,6 +63,13 @@ tm2_new_init(pool *p, void (*hook)(struct timer2 *), void *data, uint rec, uint return t; } +static inline void +tm2_set_max(timer2 *t, btime when) +{ + if (when > t->expires) + tm2_set(t, when); +} + /* static inline void tm2_start_max(timer2 *t, btime after) @@ -78,9 +85,11 @@ void sk_stop(sock *s); -struct birdloop *birdloop_new(pool *p); +struct birdloop *birdloop_new(void); void birdloop_start(struct birdloop *loop); void birdloop_stop(struct birdloop *loop); +void birdloop_free(struct birdloop *loop); + void birdloop_enter(struct birdloop *loop); void birdloop_leave(struct birdloop *loop); void birdloop_mask_wakeups(struct birdloop *loop); diff --git a/proto/bfd/packets.c b/proto/bfd/packets.c index 0e24114b..fc2616ca 100644 --- a/proto/bfd/packets.c +++ b/proto/bfd/packets.c @@ -62,7 +62,7 @@ bfd_format_flags(u8 flags, char *buf) void bfd_send_ctl(struct bfd_proto *p, struct bfd_session *s, int final) { - sock *sk = s->bsock->sk; + sock *sk = s->ifa->sk; struct bfd_ctl_packet *pkt = (struct bfd_ctl_packet *) sk->tbuf; char fb[8]; @@ -143,7 +143,7 @@ bfd_rx_hook(sock *sk, int len) s = bfd_find_session_by_addr(p, sk->faddr); /* FIXME: better session matching and message */ - if (!s || !s->opened) + if (!s) return 1; } @@ -155,7 +155,7 @@ bfd_rx_hook(sock *sk, int len) u32 old_tx_int = s->des_min_tx_int; u32 old_rx_int = s->rem_min_rx_int; - s->rem_id = ntohl(pkt->snd_id); + s->rem_id= ntohl(pkt->snd_id); s->rem_state = bfd_pkt_get_state(pkt); s->rem_diag = bfd_pkt_get_diag(pkt); s->rem_demand_mode = pkt->flags & BFD_FLAG_DEMAND; @@ -213,7 +213,7 @@ bfd_open_rx_sk(struct bfd_proto *p, int multihop) return NULL; } -static inline sock * +sock * bfd_open_tx_sk(struct bfd_proto *p, ip_addr local, struct iface *ifa) { sock *sk = sk_new(p->tpool); @@ -246,32 +246,3 @@ bfd_open_tx_sk(struct bfd_proto *p, ip_addr local, struct iface *ifa) rfree(sk); return NULL; } - -struct bfd_socket * -bfd_get_socket(struct bfd_proto *p, ip_addr local, struct iface *ifa) -{ - struct bfd_socket *sk; - - WALK_LIST(sk, p->sock_list) - if (ipa_equal(sk->sk->saddr, local) && (sk->sk->iface == ifa)) - return sk->uc++, sk; - - sk = mb_allocz(p->tpool, sizeof(struct bfd_socket)); - sk->sk = bfd_open_tx_sk(p, local, ifa); - sk->uc = 1; - add_tail(&p->sock_list, &sk->n); - - return sk; -} - -void -bfd_free_socket(struct bfd_socket *sk) -{ - if (!sk || --sk->uc) - return; - - rem_node(&sk->n); - sk_stop(sk->sk); - rfree(sk->sk); - mb_free(sk); -} diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index 7cad75df..07ad31f3 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -59,8 +59,9 @@ #include "nest/iface.h" #include "nest/protocol.h" #include "nest/route.h" -#include "nest/locks.h" +#include "nest/bfd.h" #include "nest/cli.h" +#include "nest/locks.h" #include "conf/conf.h" #include "lib/socket.h" #include "lib/resource.h" @@ -76,6 +77,7 @@ static void bgp_close(struct bgp_proto *p, int apply_md5); static void bgp_connect(struct bgp_proto *p); static void bgp_active(struct bgp_proto *p); static sock *bgp_setup_listen_sk(ip_addr addr, unsigned port, u32 flags); +static void bgp_update_bfd(struct bgp_proto *p, int use_bfd); /** @@ -153,8 +155,12 @@ bgp_initiate(struct bgp_proto *p) if (rv < 0) return; + if (p->cf->bfd) + bgp_update_bfd(p, p->cf->bfd); + if (p->startup_delay) { + p->start_state = BSS_DELAY; BGP_TRACE(D_EVENTS, "Startup delayed by %d seconds", p->startup_delay); bgp_start_timer(p->startup_timer, p->startup_delay); } @@ -765,6 +771,37 @@ bgp_neigh_notify(neighbor *n) } } +static void +bgp_bfd_notify(struct bfd_request *req) +{ + struct bgp_proto *p = req->data; + int ps = p->p.proto_state; + + if (req->down && ((ps == PS_START) || (ps == PS_UP))) + { + BGP_TRACE(D_EVENTS, "BFD session down"); + bgp_store_error(p, NULL, BE_MISC, BEM_BFD_DOWN); + if (ps == PS_UP) + bgp_update_startup_delay(p); + bgp_stop(p, 0); + } +} + +static void +bgp_update_bfd(struct bgp_proto *p, int use_bfd) +{ + if (use_bfd && !p->bfd_req) + p->bfd_req = bfd_request_session(p->p.pool, p->cf->remote_ip, p->source_addr, + p->cf->multihop ? NULL : p->neigh->iface, + bgp_bfd_notify, p); + + if (!use_bfd && p->bfd_req) + { + rfree(p->bfd_req); + p->bfd_req = NULL; + } +} + static int bgp_reload_routes(struct proto *P) { @@ -825,6 +862,7 @@ bgp_start(struct proto *P) p->outgoing_conn.state = BS_IDLE; p->incoming_conn.state = BS_IDLE; p->neigh = NULL; + p->bfd_req = NULL; rt_lock_table(p->igp_table); @@ -992,6 +1030,9 @@ bgp_check_config(struct bgp_config *c) ipa_has_link_scope(c->source_addr))) cf_error("Multihop BGP cannot be used with link-local addresses"); + if (c->multihop && c->bfd && ipa_zero(c->source_addr)) + cf_error("Multihop BGP with BFD requires specified source address"); + /* Different default based on rs_client */ if (!c->missing_lladdr) @@ -1034,6 +1075,9 @@ bgp_reconfigure(struct proto *P, struct proto_config *C) || (old->password && new->password && !strcmp(old->password, new->password))) && (get_igp_table(old) == get_igp_table(new)); + if (same && (p->start_state > BSS_PREPARE)) + bgp_update_bfd(p, new->bfd); + /* We should update our copy of configuration ptr as old configuration will be freed */ if (same) p->cf = new; @@ -1115,7 +1159,7 @@ bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code) static char *bgp_state_names[] = { "Idle", "Connect", "Active", "OpenSent", "OpenConfirm", "Established", "Close" }; static char *bgp_err_classes[] = { "", "Error: ", "Socket: ", "Received: ", "BGP Error: ", "Automatic shutdown: ", ""}; -static char *bgp_misc_errors[] = { "", "Neighbor lost", "Invalid next hop", "Kernel MD5 auth failed", "No listening socket" }; +static char *bgp_misc_errors[] = { "", "Neighbor lost", "Invalid next hop", "Kernel MD5 auth failed", "No listening socket", "BFD session down" }; static char *bgp_auto_errors[] = { "", "Route limit exceeded"}; static const char * diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h index 77a36715..6da38949 100644 --- a/proto/bgp/bgp.h +++ b/proto/bgp/bgp.h @@ -14,6 +14,7 @@ struct linpool; struct eattr; +struct bfd_request; struct bgp_config { struct proto_config c; @@ -52,8 +53,10 @@ struct bgp_config { unsigned error_delay_time_min; /* Time to wait after an error is detected */ unsigned error_delay_time_max; unsigned disable_after_error; /* Disable the protocol when error is detected */ + char *password; /* Password used for MD5 authentication */ struct rtable_config *igp_table; /* Table used for recursive next hop lookups */ + int bfd; /* Use BFD for liveness detection */ }; #define MLL_SELF 1 @@ -99,6 +102,7 @@ struct bgp_proto { struct bgp_conn incoming_conn; /* Incoming connection we have neither accepted nor rejected yet */ struct object_lock *lock; /* Lock for neighbor connection */ struct neighbor *neigh; /* Neighbor entry corresponding to remote ip, NULL if multihop */ + struct bfd_request *bfd_req; /* BFD request, if BFD is used */ ip_addr source_addr; /* Local address used as an advertised next hop */ rtable *igp_table; /* Table used for recursive next hop lookups */ struct event *event; /* Event for respawning and shutting process */ @@ -287,6 +291,7 @@ void bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsi #define BEM_INVALID_NEXT_HOP 2 #define BEM_INVALID_MD5 3 /* MD5 authentication kernel request failed (possibly not supported) */ #define BEM_NO_SOCKET 4 +#define BEM_BFD_DOWN 5 /* Automatic shutdown error codes */ diff --git a/proto/bgp/config.Y b/proto/bgp/config.Y index d5e5aaca..0292c234 100644 --- a/proto/bgp/config.Y +++ b/proto/bgp/config.Y @@ -26,7 +26,7 @@ CF_KEYWORDS(BGP, LOCAL, NEIGHBOR, AS, HOLD, TIME, CONNECT, RETRY, PREFER, OLDER, MISSING, LLADDR, DROP, IGNORE, ROUTE, REFRESH, INTERPRET, COMMUNITIES, BGP_ORIGINATOR_ID, BGP_CLUSTER_LIST, IGP, TABLE, GATEWAY, DIRECT, RECURSIVE, MED, TTL, SECURITY, DETERMINISTIC, - SECONDARY) + SECONDARY, BFD) CF_GRAMMAR @@ -110,6 +110,7 @@ bgp_proto: | bgp_proto SECONDARY bool ';' { BGP_CFG->secondary = $3; } | bgp_proto IGP TABLE rtable ';' { BGP_CFG->igp_table = $4; } | bgp_proto TTL SECURITY bool ';' { BGP_CFG->ttl_security = $4; } + | bgp_proto BFD bool ';' { BGP_CFG->bfd = $3; cf_check_bfd($3); } ; CF_ADDTO(dynamic_attr, BGP_ORIGIN diff --git a/proto/ospf/config.Y b/proto/ospf/config.Y index f042e1aa..f06dd311 100644 --- a/proto/ospf/config.Y +++ b/proto/ospf/config.Y @@ -309,6 +309,7 @@ ospf_iface_item: | TX PRIORITY expr { OSPF_PATT->tx_priority = $3; } | TTL SECURITY bool { OSPF_PATT->ttl_security = $3; } | TTL SECURITY TX ONLY { OSPF_PATT->ttl_security = 2; } + | BFD bool { OSPF_PATT->bfd = $2; cf_check_bfd($2); } | password_list ; diff --git a/proto/ospf/hello.c b/proto/ospf/hello.c index d5aa1b95..bac2a589 100644 --- a/proto/ospf/hello.c +++ b/proto/ospf/hello.c @@ -140,6 +140,9 @@ ospf_hello_receive(struct ospf_packet *ps_i, struct ospf_iface *ifa, #ifdef OSPFv3 n->iface_id = ntohl(ps->iface_id); #endif + + if (n->ifa->cf->bfd) + ospf_neigh_update_bfd(n, n->ifa->bfd); } ospf_neigh_sm(n, INM_HELLOREC); diff --git a/proto/ospf/iface.c b/proto/ospf/iface.c index 63c26466..f1409840 100644 --- a/proto/ospf/iface.c +++ b/proto/ospf/iface.c @@ -536,6 +536,7 @@ ospf_iface_new(struct ospf_area *oa, struct ifa *addr, struct ospf_iface_patt *i ifa->check_link = ip->check_link; ifa->ecmp_weight = ip->ecmp_weight; ifa->check_ttl = (ip->ttl_security == 1); + ifa->bfd = ip->bfd; #ifdef OSPFv2 ifa->autype = ip->autype; @@ -840,6 +841,19 @@ ospf_iface_reconfigure(struct ospf_iface *ifa, struct ospf_iface_patt *new) ifa->ecmp_weight = new->ecmp_weight; } + /* BFD */ + if (ifa->bfd != new->bfd) + { + OSPF_TRACE(D_EVENTS, "%s BFD on interface %s", + new->bfd ? "Enabling" : "Disabling", ifname); + ifa->bfd = new->bfd; + + struct ospf_neighbor *n; + WALK_LIST(n, ifa->neigh_list) + ospf_neigh_update_bfd(n, ifa->bfd); + } + + /* instance_id is not updated - it is part of key */ return 1; diff --git a/proto/ospf/neighbor.c b/proto/ospf/neighbor.c index 26d81dce..61224ec2 100644 --- a/proto/ospf/neighbor.c +++ b/proto/ospf/neighbor.c @@ -582,6 +582,36 @@ ospf_neigh_remove(struct ospf_neighbor *n) OSPF_TRACE(D_EVENTS, "Deleting neigbor."); } +static void +ospf_neigh_bfd_hook(struct bfd_request *req) +{ + struct ospf_neighbor *n = req->data; + struct proto *p = &n->ifa->oa->po->proto; + + if (req->down) + { + OSPF_TRACE(D_EVENTS, "BFD session down for %I on %s", + n->ip, n->ifa->iface->name); + + ospf_neigh_remove(n); + } +} + +void +ospf_neigh_update_bfd(struct ospf_neighbor *n, int use_bfd) +{ + if (use_bfd && !n->bfd_req) + n->bfd_req = bfd_request_session(n->pool, n->ip, n->ifa->addr->ip, n->ifa->iface, + ospf_neigh_bfd_hook, n); + + if (!use_bfd && n->bfd_req) + { + rfree(n->bfd_req); + n->bfd_req = NULL; + } +} + + void ospf_sh_neigh_info(struct ospf_neighbor *n) { diff --git a/proto/ospf/neighbor.h b/proto/ospf/neighbor.h index f593faed..e674927d 100644 --- a/proto/ospf/neighbor.h +++ b/proto/ospf/neighbor.h @@ -16,6 +16,7 @@ void bdr_election(struct ospf_iface *ifa); struct ospf_neighbor *find_neigh(struct ospf_iface *ifa, u32 rid); struct ospf_neighbor *find_neigh_by_ip(struct ospf_iface *ifa, ip_addr ip); void ospf_neigh_remove(struct ospf_neighbor *n); +void ospf_neigh_update_bfd(struct ospf_neighbor *n, int use_bfd); void ospf_sh_neigh_info(struct ospf_neighbor *n); #endif /* _BIRD_OSPF_NEIGHBOR_H_ */ diff --git a/proto/ospf/ospf.h b/proto/ospf/ospf.h index f1409af3..46a1c3c1 100644 --- a/proto/ospf/ospf.h +++ b/proto/ospf/ospf.h @@ -46,6 +46,7 @@ do { if ((p->debug & D_PACKETS) || OSPF_FORCE_DEBUG) \ #include "nest/route.h" #include "nest/cli.h" #include "nest/locks.h" +#include "nest/bfd.h" #include "conf/conf.h" #include "lib/string.h" @@ -276,6 +277,7 @@ struct ospf_iface u8 ecmp_weight; /* Weight used for ECMP */ u8 ptp_netmask; /* Send real netmask for P2P */ u8 check_ttl; /* Check incoming packets for TTL 255 */ + u8 bfd; /* Use BFD on iface */ }; struct ospf_md5 @@ -708,6 +710,7 @@ struct ospf_neighbor #define ACKL_DIRECT 0 #define ACKL_DELAY 1 timer *ackd_timer; /* Delayed ack timer */ + struct bfd_request *bfd_req; /* BFD request, if BFD is used */ u32 csn; /* Last received crypt seq number (for MD5) */ }; @@ -818,6 +821,7 @@ struct ospf_iface_patt u8 real_bcast; /* Not really used in OSPFv3 */ u8 ptp_netmask; /* bool + 2 for unspecified */ u8 ttl_security; /* bool + 2 for TX only */ + u8 bfd; #ifdef OSPFv2 list *passwords; diff --git a/proto/radv/radv.c b/proto/radv/radv.c index a6b9b16c..90408536 100644 --- a/proto/radv/radv.c +++ b/proto/radv/radv.c @@ -15,7 +15,7 @@ * The RAdv protocol is implemented in two files: |radv.c| containing * the interface with BIRD core and the protocol logic and |packets.c| * handling low level protocol stuff (RX, TX and packet formats). - * The protocol does not import or export any routes. + * The protocol does not export any routes. * * The RAdv is structured in the usual way - for each handled interface * there is a structure &radv_iface that contains a state related to diff --git a/sysdep/autoconf.h.in b/sysdep/autoconf.h.in index ac6f7a87..a9e46e27 100644 --- a/sysdep/autoconf.h.in +++ b/sysdep/autoconf.h.in @@ -39,10 +39,14 @@ #undef CONFIG_STATIC #undef CONFIG_RIP #undef CONFIG_RADV +#undef CONFIG_BFD #undef CONFIG_BGP #undef CONFIG_OSPF #undef CONFIG_PIPE +/* We use multithreading */ +#undef USE_PTHREADS + /* We have and syslog() */ #undef HAVE_SYSLOG diff --git a/sysdep/unix/io.c b/sysdep/unix/io.c index f0ec6dae..da8343f9 100644 --- a/sysdep/unix/io.c +++ b/sysdep/unix/io.c @@ -538,6 +538,8 @@ sk_free(resource *r) if (s->fd >= 0) { close(s->fd); + + /* FIXME: we should call sk_stop() for SKF_THREAD sockets */ if (s->flags & SKF_THREAD) return; diff --git a/sysdep/unix/log.c b/sysdep/unix/log.c index 67b70773..9dd4d66f 100644 --- a/sysdep/unix/log.c +++ b/sysdep/unix/log.c @@ -36,13 +36,20 @@ static const bird_clock_t rate_limit_time = 5; static const int rate_limit_count = 5; -// XXX add ifdef for threads +#ifdef USE_PTHREADS #include static pthread_mutex_t log_mutex; static inline void log_lock(void) { pthread_mutex_lock(&log_mutex); } static inline void log_unlock(void) { pthread_mutex_unlock(&log_mutex); } +#else + +static inline void log_lock(void) { } +static inline void log_unlock(void) { } + +#endif + #ifdef HAVE_SYSLOG #include -- cgit v1.2.3

BFD configuration consists mainly of multiple definitions of interfaces. +Most BFD config options are session specific. When a new session is requested +and dynamically created, it is configured from one of these definitions. For +sessions to directly connected neighbors, Note that to use BFD for other protocols like OSPF or BGP, these protocols +also have to be configured to request BFD sessions, usually by Some of BFD session options require +protocol bfd [<name>] { + interface <interface pattern> { + interval <time>; + min rx interval <time>; + min tx interval <time>; + idle tx interval <time>; + multiplier <num>; + passive <switch>; + }; + multihop { + interval <time>; + min rx interval <time>; + min tx interval <time>; + idle tx interval <time>; + multiplier <num>; + passive <switch>; + }; + neighbor <ip> [dev "<interface>"] [local <ip>] [multihop <switch>]; +} + + + + interface + Interface definitions allow to specify options for sessions associated + with such interfaces and also may contain interface specific options. + See common option for a detailed + description of interface patterns. Note that contrary to the behavior of + multihop { + Multihop definitions allow to specify options for multihop BFD sessions, + in the same manner as neighbor + BFD sessions are usually created on demand as requested by other + protocols (like OSPF or BGP). This option allows to explicitly add + a BFD session to the specified neighbor regardless of such requests. + + The session is identified by the IP address of the neighbor, with + optional specification of used interface and local IP. By default + the neighbor must be directly connected, unless the the session is + configured as multihop. Note that local IP must be specified for + multihop sessions. + + +